1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 ;; All vector modes including V1TImode.
22 (define_mode_iterator V16
23 [(V32QI "TARGET_AVX") V16QI
24 (V16HI "TARGET_AVX") V8HI
25 (V8SI "TARGET_AVX") V4SI
26 (V4DI "TARGET_AVX") V2DI
28 (V8SF "TARGET_AVX") V4SF
29 (V4DF "TARGET_AVX") V2DF])
31 ;; All vector float modes
32 (define_mode_iterator VF
33 [(V4SF "TARGET_SSE") (V2DF "TARGET_SSE2")
34 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")])
36 ;; All SFmode vector float modes
37 (define_mode_iterator VF1
38 [(V4SF "TARGET_SSE") (V8SF "TARGET_AVX")])
40 ;; All DFmode vector float modes
41 (define_mode_iterator VF2
42 [(V2DF "TARGET_SSE2") (V4DF "TARGET_AVX")])
44 ;; All 128bit vector float modes
45 (define_mode_iterator VF_128
46 [(V4SF "TARGET_SSE") (V2DF "TARGET_SSE2")])
48 ;; All vector integer modes
49 (define_mode_iterator VI
50 [(V32QI "TARGET_AVX") V16QI
51 (V16HI "TARGET_AVX") V8HI
52 (V8SI "TARGET_AVX") V4SI
53 (V4DI "TARGET_AVX") V2DI])
55 ;; All QImode vector integer modes
56 (define_mode_iterator VI1
57 [(V32QI "TARGET_AVX") V16QI])
59 ;; All DImode vector integer modes
60 (define_mode_iterator VI8
61 [(V4DI "TARGET_AVX") V2DI])
63 ;; All 128bit vector integer modes
64 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
66 ;; Random 128bit vector integer mode combinations
67 (define_mode_iterator VI12_128 [V16QI V8HI])
68 (define_mode_iterator VI14_128 [V16QI V4SI])
69 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
70 (define_mode_iterator VI24_128 [V8HI V4SI])
71 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
73 ;; Mapping from float mode to required SSE level
75 [(SF "sse") (DF "sse2")
76 (V4SF "sse") (V2DF "sse2")
77 (V8SF "avx") (V4DF "avx")])
79 (define_mode_attr sse2
80 [(V16QI "sse2") (V32QI "avx")
81 (V2DI "sse2") (V4DI "avx")])
83 (define_mode_attr sse3
84 [(V16QI "sse3") (V32QI "avx")])
86 (define_mode_attr sse4_1
87 [(V4SF "sse4_1") (V2DF "sse4_1")
88 (V8SF "avx") (V4DF "avx")])
90 ;; Pack/unpack vector modes
91 (define_mode_attr sseunpackmode
92 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")])
94 (define_mode_attr ssepackmode
95 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")])
99 ;; Instruction suffix for sign and zero extensions.
100 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
102 ;; All 16-byte vector modes handled by SSE
103 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
105 ;; All 32-byte vector modes handled by AVX
106 (define_mode_iterator AVX256MODE [V32QI V16HI V8SI V4DI V8SF V4DF])
108 ;; All vector modes handled by AVX
109 (define_mode_iterator AVXMODE
110 [V16QI V8HI V4SI V2DI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
113 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
114 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
115 (define_mode_iterator SSEMODEF2P [V4SF V2DF])
117 (define_mode_iterator AVX256MODEF2P [V8SF V4DF])
118 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
119 (define_mode_iterator AVX256MODE24P [V8SI V8SF V4DI V4DF])
120 (define_mode_iterator AVX256MODE4P [V4DI V4DF])
121 (define_mode_iterator AVX256MODE8P [V8SI V8SF])
122 (define_mode_iterator AVXMODEF2P [V4SF V2DF V8SF V4DF])
123 (define_mode_iterator AVXMODEFDP [V2DF V4DF])
124 (define_mode_iterator AVXMODEFSP [V4SF V8SF])
126 (define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
128 ;; Int-float size matches
129 (define_mode_iterator SSEMODE4S [V4SF V4SI])
130 (define_mode_iterator SSEMODE2D [V2DF V2DI])
132 ;; Modes handled by vec_extract_even/odd pattern.
133 (define_mode_iterator SSEMODE_EO
136 (V2DI "TARGET_SSE2") (V4SI "TARGET_SSE2")
137 (V8HI "TARGET_SSE2") (V16QI "TARGET_SSE2")
138 (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
140 ;; Modes handled by storent patterns.
141 (define_mode_iterator STORENT_MODE
142 [(SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
143 (SI "TARGET_SSE2") (V2DI "TARGET_SSE2")
144 (V4SF "TARGET_SSE") (V2DF "TARGET_SSE2")
145 (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
147 ;; Modes handled by vector extract patterns.
148 (define_mode_iterator VEC_EXTRACT_MODE
149 [(V2DI "TARGET_SSE") (V4SI "TARGET_SSE")
150 (V8HI "TARGET_SSE") (V16QI "TARGET_SSE")
151 (V2DF "TARGET_SSE") (V4SF "TARGET_SSE")
152 (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
154 ;; Mapping from integer vector mode to mnemonic suffix
155 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
157 ;; Mapping of the insn mnemonic suffix
158 (define_mode_attr ssemodesuffix
159 [(SF "ss") (DF "sd") (V4SF "ps") (V2DF "pd") (V8SF "ps") (V4DF "pd")
160 (V8SI "ps") (V4DI "pd")])
161 (define_mode_attr ssescalarmodesuffix
162 [(SF "ss") (DF "sd") (V4SF "ss") (V2DF "sd") (V8SF "ss") (V8SI "ss")
163 (V4DF "sd") (V4SI "d") (V4DI "sd")])
165 ;; Mapping of the max integer size for xop rotate immediate constraint
166 (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
168 ;; Mapping of vector modes back to the scalar modes
169 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")
170 (V16QI "QI") (V8HI "HI")
171 (V4SI "SI") (V2DI "DI")])
173 ;; Mapping of vector modes to a vector mode of double size
174 (define_mode_attr ssedoublesizemode
175 [(V2DF "V4DF") (V2DI "V4DI") (V4SF "V8SF") (V4SI "V8SI")
176 (V8HI "V16HI") (V16QI "V32QI")
177 (V4DF "V8DF") (V8SF "V16SF")
178 (V4DI "V8DI") (V8SI "V16SI") (V16HI "V32HI") (V32QI "V64QI")])
180 ;; Number of scalar elements in each vector type
181 (define_mode_attr ssescalarnum
182 [(V4SF "4") (V2DF "2") (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
183 (V8SF "8") (V4DF "4") (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")])
186 (define_mode_attr avxvecmode
187 [(V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
188 (V4SF "V4SF") (V8SF "V8SF") (V2DF "V2DF") (V4DF "V4DF")
189 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")])
190 (define_mode_attr avxhalfvecmode
191 [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
192 (V8SF "V4SF") (V4DF "V2DF")
193 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI") (V4SF "V2SF")])
194 (define_mode_attr avxscalarmode
195 [(V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI") (V4SF "SF") (V2DF "DF")
196 (V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI") (V8SF "SF") (V4DF "DF")])
197 (define_mode_attr avxpermvecmode
198 [(V2DF "V2DI") (V4SF "V4SI") (V4DF "V4DI") (V8SF "V8SI")])
199 (define_mode_attr avxmodesuffixp
200 [(V2DF "pd") (V4SI "si") (V4SF "ps") (V8SF "ps") (V8SI "si")
202 (define_mode_attr avxmodesuffix
203 [(V16QI "") (V32QI "256") (V4SI "") (V4SF "") (V2DF "")
204 (V8SI "256") (V8SF "256") (V4DF "256")])
206 ;; Mapping of immediate bits for blend instructions
207 (define_mode_attr blendbits
208 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
210 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
212 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
216 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
218 ;; All of these patterns are enabled for SSE1 as well as SSE2.
219 ;; This is essential for maintaining stable calling conventions.
221 (define_expand "mov<mode>"
222 [(set (match_operand:V16 0 "nonimmediate_operand" "")
223 (match_operand:V16 1 "nonimmediate_operand" ""))]
226 ix86_expand_vector_move (<MODE>mode, operands);
230 (define_insn "*mov<mode>_internal"
231 [(set (match_operand:V16 0 "nonimmediate_operand" "=x,x ,m")
232 (match_operand:V16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
234 && (register_operand (operands[0], <MODE>mode)
235 || register_operand (operands[1], <MODE>mode))"
237 switch (which_alternative)
240 return standard_sse_constant_opcode (insn, operands[1]);
243 switch (get_attr_mode (insn))
248 && (misaligned_operand (operands[0], <MODE>mode)
249 || misaligned_operand (operands[1], <MODE>mode)))
250 return "vmovups\t{%1, %0|%0, %1}";
252 return "%vmovaps\t{%1, %0|%0, %1}";
257 && (misaligned_operand (operands[0], <MODE>mode)
258 || misaligned_operand (operands[1], <MODE>mode)))
259 return "vmovupd\t{%1, %0|%0, %1}";
260 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
261 return "%vmovaps\t{%1, %0|%0, %1}";
263 return "%vmovapd\t{%1, %0|%0, %1}";
268 && (misaligned_operand (operands[0], <MODE>mode)
269 || misaligned_operand (operands[1], <MODE>mode)))
270 return "vmovdqu\t{%1, %0|%0, %1}";
271 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
272 return "%vmovaps\t{%1, %0|%0, %1}";
274 return "%vmovdqa\t{%1, %0|%0, %1}";
283 [(set_attr "type" "sselog1,ssemov,ssemov")
284 (set_attr "prefix" "maybe_vex")
286 (cond [(ne (symbol_ref "TARGET_AVX") (const_int 0))
287 (const_string "<avxvecmode>")
289 (ne (symbol_ref "optimize_function_for_size_p (cfun)")
291 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
292 (and (eq_attr "alternative" "2")
293 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
295 (const_string "V4SF")
296 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
297 (const_string "V4SF")
298 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
299 (const_string "V2DF")
301 (const_string "TI")))])
303 (define_insn "sse2_movq128"
304 [(set (match_operand:V2DI 0 "register_operand" "=x")
307 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
308 (parallel [(const_int 0)]))
311 "%vmovq\t{%1, %0|%0, %1}"
312 [(set_attr "type" "ssemov")
313 (set_attr "prefix" "maybe_vex")
314 (set_attr "mode" "TI")])
316 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
317 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
318 ;; from memory, we'd prefer to load the memory directly into the %xmm
319 ;; register. To facilitate this happy circumstance, this pattern won't
320 ;; split until after register allocation. If the 64-bit value didn't
321 ;; come from memory, this is the best we can do. This is much better
322 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
325 (define_insn_and_split "movdi_to_sse"
327 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
328 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
329 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
330 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
332 "&& reload_completed"
335 if (register_operand (operands[1], DImode))
337 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
338 Assemble the 64-bit DImode value in an xmm register. */
339 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
340 gen_rtx_SUBREG (SImode, operands[1], 0)));
341 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
342 gen_rtx_SUBREG (SImode, operands[1], 4)));
343 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
346 else if (memory_operand (operands[1], DImode))
347 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
348 operands[1], const0_rtx));
354 [(set (match_operand:V4SF 0 "register_operand" "")
355 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
356 "TARGET_SSE && reload_completed"
359 (vec_duplicate:V4SF (match_dup 1))
363 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
364 operands[2] = CONST0_RTX (V4SFmode);
368 [(set (match_operand:V2DF 0 "register_operand" "")
369 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
370 "TARGET_SSE2 && reload_completed"
371 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
373 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
374 operands[2] = CONST0_RTX (DFmode);
377 (define_expand "push<mode>1"
378 [(match_operand:V16 0 "register_operand" "")]
381 ix86_expand_push (<MODE>mode, operands[0]);
385 (define_expand "movmisalign<mode>"
386 [(set (match_operand:V16 0 "nonimmediate_operand" "")
387 (match_operand:V16 1 "nonimmediate_operand" ""))]
390 ix86_expand_vector_move_misalign (<MODE>mode, operands);
394 (define_expand "<sse>_movu<ssemodesuffix><avxmodesuffix>"
395 [(set (match_operand:VF 0 "nonimmediate_operand" "")
397 [(match_operand:VF 1 "nonimmediate_operand" "")]
401 if (MEM_P (operands[0]) && MEM_P (operands[1]))
402 operands[1] = force_reg (<MODE>mode, operands[1]);
405 (define_insn "*<sse>_movu<ssemodesuffix><avxmodesuffix>"
406 [(set (match_operand:VF 0 "nonimmediate_operand" "=x,m")
408 [(match_operand:VF 1 "nonimmediate_operand" "xm,x")]
410 "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
411 "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
412 [(set_attr "type" "ssemov")
413 (set_attr "movu" "1")
414 (set_attr "prefix" "maybe_vex")
415 (set_attr "mode" "<MODE>")])
417 (define_expand "<sse2>_movdqu<avxmodesuffix>"
418 [(set (match_operand:VI1 0 "nonimmediate_operand" "")
419 (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "")]
423 if (MEM_P (operands[0]) && MEM_P (operands[1]))
424 operands[1] = force_reg (<MODE>mode, operands[1]);
427 (define_insn "*<sse2>_movdqu<avxmodesuffix>"
428 [(set (match_operand:VI1 0 "nonimmediate_operand" "=x,m")
429 (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "xm,x")]
431 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
432 "%vmovdqu\t{%1, %0|%0, %1}"
433 [(set_attr "type" "ssemov")
434 (set_attr "movu" "1")
435 (set (attr "prefix_data16")
437 (ne (symbol_ref "TARGET_AVX") (const_int 0))
440 (set_attr "prefix" "maybe_vex")
441 (set_attr "mode" "<avxvecmode>")])
443 (define_insn "<sse3>_lddqu<avxmodesuffix>"
444 [(set (match_operand:VI1 0 "register_operand" "=x")
445 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
448 "%vlddqu\t{%1, %0|%0, %1}"
449 [(set_attr "type" "ssemov")
450 (set_attr "movu" "1")
451 (set (attr "prefix_data16")
453 (ne (symbol_ref "TARGET_AVX") (const_int 0))
456 (set (attr "prefix_rep")
458 (ne (symbol_ref "TARGET_AVX") (const_int 0))
461 (set_attr "prefix" "maybe_vex")
462 (set_attr "mode" "<avxvecmode>")])
464 (define_insn "sse2_movntsi"
465 [(set (match_operand:SI 0 "memory_operand" "=m")
466 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
469 "movnti\t{%1, %0|%0, %1}"
470 [(set_attr "type" "ssemov")
471 (set_attr "prefix_data16" "0")
472 (set_attr "mode" "V2DF")])
474 (define_insn "<sse>_movnt<mode>"
475 [(set (match_operand:VF 0 "memory_operand" "=m")
476 (unspec:VF [(match_operand:VF 1 "register_operand" "x")]
479 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
480 [(set_attr "type" "ssemov")
481 (set_attr "prefix" "maybe_vex")
482 (set_attr "mode" "<MODE>")])
484 (define_insn "<sse2>_movnt<mode>"
485 [(set (match_operand:VI8 0 "memory_operand" "=m")
486 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "x")]
489 "%vmovntdq\t{%1, %0|%0, %1}"
490 [(set_attr "type" "ssecvt")
491 (set (attr "prefix_data16")
493 (ne (symbol_ref "TARGET_AVX") (const_int 0))
496 (set_attr "prefix" "maybe_vex")
497 (set_attr "mode" "<avxvecmode>")])
499 ; Expand patterns for non-temporal stores. At the moment, only those
500 ; that directly map to insns are defined; it would be possible to
501 ; define patterns for other modes that would expand to several insns.
503 (define_expand "storent<mode>"
504 [(set (match_operand:STORENT_MODE 0 "memory_operand" "")
506 [(match_operand:STORENT_MODE 1 "register_operand" "")]
509 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
511 ;; Parallel floating point arithmetic
513 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
515 (define_expand "<code><mode>2"
516 [(set (match_operand:VF 0 "register_operand" "")
518 (match_operand:VF 1 "register_operand" "")))]
520 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
522 (define_insn_and_split "*absneg<mode>2"
523 [(set (match_operand:VF 0 "register_operand" "=x,x,x,x")
524 (match_operator:VF 3 "absneg_operator"
525 [(match_operand:VF 1 "nonimmediate_operand" "0, xm,x, m")]))
526 (use (match_operand:VF 2 "nonimmediate_operand" "xm,0, xm,x"))]
532 enum rtx_code absneg_op;
538 if (MEM_P (operands[1]))
539 op1 = operands[2], op2 = operands[1];
541 op1 = operands[1], op2 = operands[2];
546 if (rtx_equal_p (operands[0], operands[1]))
552 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
553 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
554 t = gen_rtx_SET (VOIDmode, operands[0], t);
558 [(set_attr "isa" "noavx,noavx,avx,avx")])
560 (define_expand "<plusminus_insn><mode>3"
561 [(set (match_operand:VF 0 "register_operand" "")
563 (match_operand:VF 1 "nonimmediate_operand" "")
564 (match_operand:VF 2 "nonimmediate_operand" "")))]
566 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
568 (define_insn "*<plusminus_insn><mode>3"
569 [(set (match_operand:VF 0 "register_operand" "=x,x")
571 (match_operand:VF 1 "nonimmediate_operand" "<comm>0,x")
572 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
573 "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
575 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
576 v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
577 [(set_attr "isa" "noavx,avx")
578 (set_attr "type" "sseadd")
579 (set_attr "prefix" "orig,vex")
580 (set_attr "mode" "<MODE>")])
582 (define_insn "<sse>_vm<plusminus_insn><mode>3"
583 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
586 (match_operand:VF_128 1 "register_operand" "0,x")
587 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
592 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}
593 v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
594 [(set_attr "isa" "noavx,avx")
595 (set_attr "type" "sseadd")
596 (set_attr "prefix" "orig,vex")
597 (set_attr "mode" "<ssescalarmode>")])
599 (define_expand "mul<mode>3"
600 [(set (match_operand:VF 0 "register_operand" "")
602 (match_operand:VF 1 "nonimmediate_operand" "")
603 (match_operand:VF 2 "nonimmediate_operand" "")))]
605 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
607 (define_insn "*mul<mode>3"
608 [(set (match_operand:VF 0 "register_operand" "=x,x")
610 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
611 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
612 "ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
614 mul<ssemodesuffix>\t{%2, %0|%0, %2}
615 vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
616 [(set_attr "isa" "noavx,avx")
617 (set_attr "type" "ssemul")
618 (set_attr "prefix" "orig,vex")
619 (set_attr "mode" "<MODE>")])
621 (define_insn "<sse>_vmmul<mode>3"
622 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
625 (match_operand:VF_128 1 "register_operand" "0,x")
626 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
631 mul<ssescalarmodesuffix>\t{%2, %0|%0, %2}
632 vmul<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
633 [(set_attr "isa" "noavx,avx")
634 (set_attr "type" "ssemul")
635 (set_attr "prefix" "orig,vex")
636 (set_attr "mode" "<ssescalarmode>")])
638 (define_expand "div<mode>3"
639 [(set (match_operand:VF2 0 "register_operand" "")
640 (div:VF2 (match_operand:VF2 1 "register_operand" "")
641 (match_operand:VF2 2 "nonimmediate_operand" "")))]
643 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
645 (define_expand "div<mode>3"
646 [(set (match_operand:VF1 0 "register_operand" "")
647 (div:VF1 (match_operand:VF1 1 "register_operand" "")
648 (match_operand:VF1 2 "nonimmediate_operand" "")))]
651 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
653 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
654 && flag_finite_math_only && !flag_trapping_math
655 && flag_unsafe_math_optimizations)
657 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
662 (define_insn "<sse>_div<mode>3"
663 [(set (match_operand:VF 0 "register_operand" "=x,x")
665 (match_operand:VF 1 "register_operand" "0,x")
666 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
669 div<ssemodesuffix>\t{%2, %0|%0, %2}
670 vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
671 [(set_attr "isa" "noavx,avx")
672 (set_attr "type" "ssediv")
673 (set_attr "prefix" "orig,vex")
674 (set_attr "mode" "<MODE>")])
676 (define_insn "<sse>_vmdiv<mode>3"
677 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
680 (match_operand:VF_128 1 "register_operand" "0,x")
681 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
686 div<ssescalarmodesuffix>\t{%2, %0|%0, %2}
687 vdiv<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
688 [(set_attr "isa" "noavx,avx")
689 (set_attr "type" "ssediv")
690 (set_attr "prefix" "orig,vex")
691 (set_attr "mode" "<ssescalarmode>")])
693 (define_insn "<sse>_rcp<mode>2"
694 [(set (match_operand:VF1 0 "register_operand" "=x")
696 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
698 "%vrcpps\t{%1, %0|%0, %1}"
699 [(set_attr "type" "sse")
700 (set_attr "atom_sse_attr" "rcp")
701 (set_attr "prefix" "maybe_vex")
702 (set_attr "mode" "<MODE>")])
704 (define_insn "sse_vmrcpv4sf2"
705 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
707 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
709 (match_operand:V4SF 2 "register_operand" "0,x")
713 rcpss\t{%1, %0|%0, %1}
714 vrcpss\t{%1, %2, %0|%0, %2, %1}"
715 [(set_attr "isa" "noavx,avx")
716 (set_attr "type" "sse")
717 (set_attr "atom_sse_attr" "rcp")
718 (set_attr "prefix" "orig,vex")
719 (set_attr "mode" "SF")])
721 (define_expand "sqrt<mode>2"
722 [(set (match_operand:VF2 0 "register_operand" "")
723 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand" "")))])
725 (define_expand "sqrt<mode>2"
726 [(set (match_operand:VF1 0 "register_operand" "")
727 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand" "")))]
730 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
731 && flag_finite_math_only && !flag_trapping_math
732 && flag_unsafe_math_optimizations)
734 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
739 (define_insn "<sse>_sqrt<mode>2"
740 [(set (match_operand:VF 0 "register_operand" "=x")
741 (sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "xm")))]
743 "%vsqrt<ssemodesuffix>\t{%1, %0|%0, %1}"
744 [(set_attr "type" "sse")
745 (set_attr "atom_sse_attr" "sqrt")
746 (set_attr "prefix" "maybe_vex")
747 (set_attr "mode" "<MODE>")])
749 (define_insn "<sse>_vmsqrt<mode>2"
750 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
753 (match_operand:VF_128 1 "nonimmediate_operand" "xm,xm"))
754 (match_operand:VF_128 2 "register_operand" "0,x")
758 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}
759 vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
760 [(set_attr "isa" "noavx,avx")
761 (set_attr "type" "sse")
762 (set_attr "atom_sse_attr" "sqrt")
763 (set_attr "prefix" "orig,vex")
764 (set_attr "mode" "<ssescalarmode>")])
766 (define_expand "rsqrt<mode>2"
767 [(set (match_operand:VF1 0 "register_operand" "")
769 [(match_operand:VF1 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
772 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
776 (define_insn "<sse>_rsqrt<mode>2"
777 [(set (match_operand:VF1 0 "register_operand" "=x")
779 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
781 "%vrsqrtps\t{%1, %0|%0, %1}"
782 [(set_attr "type" "sse")
783 (set_attr "prefix" "maybe_vex")
784 (set_attr "mode" "<MODE>")])
786 (define_insn "sse_vmrsqrtv4sf2"
787 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
789 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
791 (match_operand:V4SF 2 "register_operand" "0,x")
795 rsqrtss\t{%1, %0|%0, %1}
796 vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
797 [(set_attr "isa" "noavx,avx")
798 (set_attr "type" "sse")
799 (set_attr "prefix" "orig,vex")
800 (set_attr "mode" "SF")])
802 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
803 ;; isn't really correct, as those rtl operators aren't defined when
804 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
806 (define_expand "<code><mode>3"
807 [(set (match_operand:VF 0 "register_operand" "")
809 (match_operand:VF 1 "nonimmediate_operand" "")
810 (match_operand:VF 2 "nonimmediate_operand" "")))]
813 if (!flag_finite_math_only)
814 operands[1] = force_reg (<MODE>mode, operands[1]);
815 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
818 (define_insn "*<code><mode>3_finite"
819 [(set (match_operand:VF 0 "register_operand" "=x,x")
821 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
822 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
823 "flag_finite_math_only
824 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
826 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
827 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
828 [(set_attr "isa" "noavx,avx")
829 (set_attr "type" "sseadd")
830 (set_attr "prefix" "orig,vex")
831 (set_attr "mode" "<MODE>")])
833 (define_insn "*<code><mode>3"
834 [(set (match_operand:VF 0 "register_operand" "=x,x")
836 (match_operand:VF 1 "register_operand" "0,x")
837 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
838 "!flag_finite_math_only"
840 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
841 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
842 [(set_attr "isa" "noavx,avx")
843 (set_attr "type" "sseadd")
844 (set_attr "prefix" "orig,vex")
845 (set_attr "mode" "<MODE>")])
847 (define_insn "<sse>_vm<code><mode>3"
848 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
851 (match_operand:VF_128 1 "register_operand" "0,x")
852 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
857 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %2}
858 v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
859 [(set_attr "isa" "noavx,avx")
860 (set_attr "type" "sse")
861 (set_attr "prefix" "orig,vex")
862 (set_attr "mode" "<ssescalarmode>")])
864 ;; These versions of the min/max patterns implement exactly the operations
865 ;; min = (op1 < op2 ? op1 : op2)
866 ;; max = (!(op1 < op2) ? op1 : op2)
867 ;; Their operands are not commutative, and thus they may be used in the
868 ;; presence of -0.0 and NaN.
870 (define_insn "*ieee_smin<mode>3"
871 [(set (match_operand:VF 0 "register_operand" "=x,x")
873 [(match_operand:VF 1 "register_operand" "0,x")
874 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
878 min<ssemodesuffix>\t{%2, %0|%0, %2}
879 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
880 [(set_attr "isa" "noavx,avx")
881 (set_attr "type" "sseadd")
882 (set_attr "prefix" "orig,vex")
883 (set_attr "mode" "<MODE>")])
885 (define_insn "*ieee_smax<mode>3"
886 [(set (match_operand:VF 0 "register_operand" "=x,x")
888 [(match_operand:VF 1 "register_operand" "0,x")
889 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
893 max<ssemodesuffix>\t{%2, %0|%0, %2}
894 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
895 [(set_attr "isa" "noavx,avx")
896 (set_attr "type" "sseadd")
897 (set_attr "prefix" "orig,vex")
898 (set_attr "mode" "<MODE>")])
900 (define_insn "avx_addsubv4df3"
901 [(set (match_operand:V4DF 0 "register_operand" "=x")
904 (match_operand:V4DF 1 "register_operand" "x")
905 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
906 (minus:V4DF (match_dup 1) (match_dup 2))
909 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
910 [(set_attr "type" "sseadd")
911 (set_attr "prefix" "vex")
912 (set_attr "mode" "V4DF")])
914 (define_insn "sse3_addsubv2df3"
915 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
918 (match_operand:V2DF 1 "register_operand" "0,x")
919 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
920 (minus:V2DF (match_dup 1) (match_dup 2))
924 addsubpd\t{%2, %0|%0, %2}
925 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
926 [(set_attr "isa" "noavx,avx")
927 (set_attr "type" "sseadd")
928 (set_attr "atom_unit" "complex")
929 (set_attr "prefix" "orig,vex")
930 (set_attr "mode" "V2DF")])
932 (define_insn "avx_addsubv8sf3"
933 [(set (match_operand:V8SF 0 "register_operand" "=x")
936 (match_operand:V8SF 1 "register_operand" "x")
937 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
938 (minus:V8SF (match_dup 1) (match_dup 2))
941 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
942 [(set_attr "type" "sseadd")
943 (set_attr "prefix" "vex")
944 (set_attr "mode" "V8SF")])
946 (define_insn "sse3_addsubv4sf3"
947 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
950 (match_operand:V4SF 1 "register_operand" "0,x")
951 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
952 (minus:V4SF (match_dup 1) (match_dup 2))
956 addsubps\t{%2, %0|%0, %2}
957 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
958 [(set_attr "isa" "noavx,avx")
959 (set_attr "type" "sseadd")
960 (set_attr "prefix" "orig,vex")
961 (set_attr "prefix_rep" "1,*")
962 (set_attr "mode" "V4SF")])
964 (define_insn "avx_h<plusminus_insn>v4df3"
965 [(set (match_operand:V4DF 0 "register_operand" "=x")
970 (match_operand:V4DF 1 "register_operand" "x")
971 (parallel [(const_int 0)]))
972 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
974 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
975 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
979 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
980 (parallel [(const_int 0)]))
981 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
983 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
984 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
986 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
987 [(set_attr "type" "sseadd")
988 (set_attr "prefix" "vex")
989 (set_attr "mode" "V4DF")])
991 (define_insn "sse3_h<plusminus_insn>v2df3"
992 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
996 (match_operand:V2DF 1 "register_operand" "0,x")
997 (parallel [(const_int 0)]))
998 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1001 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
1002 (parallel [(const_int 0)]))
1003 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1006 h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}
1007 vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1008 [(set_attr "isa" "noavx,avx")
1009 (set_attr "type" "sseadd")
1010 (set_attr "prefix" "orig,vex")
1011 (set_attr "mode" "V2DF")])
1013 (define_insn "avx_h<plusminus_insn>v8sf3"
1014 [(set (match_operand:V8SF 0 "register_operand" "=x")
1020 (match_operand:V8SF 1 "register_operand" "x")
1021 (parallel [(const_int 0)]))
1022 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1024 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1025 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1029 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1030 (parallel [(const_int 0)]))
1031 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1033 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1034 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1038 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1039 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1041 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1042 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1045 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1046 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1048 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1049 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1051 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1052 [(set_attr "type" "sseadd")
1053 (set_attr "prefix" "vex")
1054 (set_attr "mode" "V8SF")])
1056 (define_insn "sse3_h<plusminus_insn>v4sf3"
1057 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1062 (match_operand:V4SF 1 "register_operand" "0,x")
1063 (parallel [(const_int 0)]))
1064 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1066 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1067 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1071 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
1072 (parallel [(const_int 0)]))
1073 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1075 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1076 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1079 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
1080 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1081 [(set_attr "isa" "noavx,avx")
1082 (set_attr "type" "sseadd")
1083 (set_attr "atom_unit" "complex")
1084 (set_attr "prefix" "orig,vex")
1085 (set_attr "prefix_rep" "1,*")
1086 (set_attr "mode" "V4SF")])
1088 (define_expand "reduc_splus_v4df"
1089 [(match_operand:V4DF 0 "register_operand" "")
1090 (match_operand:V4DF 1 "register_operand" "")]
1093 rtx tmp = gen_reg_rtx (V4DFmode);
1094 rtx tmp2 = gen_reg_rtx (V4DFmode);
1095 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
1096 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
1097 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
1101 (define_expand "reduc_splus_v2df"
1102 [(match_operand:V2DF 0 "register_operand" "")
1103 (match_operand:V2DF 1 "register_operand" "")]
1106 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1110 (define_expand "reduc_splus_v8sf"
1111 [(match_operand:V8SF 0 "register_operand" "")
1112 (match_operand:V8SF 1 "register_operand" "")]
1115 rtx tmp = gen_reg_rtx (V8SFmode);
1116 rtx tmp2 = gen_reg_rtx (V8SFmode);
1117 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
1118 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
1119 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
1120 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
1124 (define_expand "reduc_splus_v4sf"
1125 [(match_operand:V4SF 0 "register_operand" "")
1126 (match_operand:V4SF 1 "register_operand" "")]
1131 rtx tmp = gen_reg_rtx (V4SFmode);
1132 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1133 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1136 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
1141 (define_expand "reduc_smax_v4sf"
1142 [(match_operand:V4SF 0 "register_operand" "")
1143 (match_operand:V4SF 1 "register_operand" "")]
1146 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
1150 (define_expand "reduc_smin_v4sf"
1151 [(match_operand:V4SF 0 "register_operand" "")
1152 (match_operand:V4SF 1 "register_operand" "")]
1155 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
1159 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1161 ;; Parallel floating point comparisons
1163 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1165 (define_insn "avx_cmp<mode>3"
1166 [(set (match_operand:VF 0 "register_operand" "=x")
1168 [(match_operand:VF 1 "register_operand" "x")
1169 (match_operand:VF 2 "nonimmediate_operand" "xm")
1170 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1173 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1174 [(set_attr "type" "ssecmp")
1175 (set_attr "length_immediate" "1")
1176 (set_attr "prefix" "vex")
1177 (set_attr "mode" "<MODE>")])
1179 (define_insn "avx_vmcmp<mode>3"
1180 [(set (match_operand:VF_128 0 "register_operand" "=x")
1183 [(match_operand:VF_128 1 "register_operand" "x")
1184 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
1185 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1190 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1191 [(set_attr "type" "ssecmp")
1192 (set_attr "length_immediate" "1")
1193 (set_attr "prefix" "vex")
1194 (set_attr "mode" "<ssescalarmode>")])
1196 (define_insn "<sse>_maskcmp<mode>3"
1197 [(set (match_operand:VF 0 "register_operand" "=x,x")
1198 (match_operator:VF 3 "sse_comparison_operator"
1199 [(match_operand:VF 1 "register_operand" "0,x")
1200 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1203 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1204 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1205 [(set_attr "isa" "noavx,avx")
1206 (set_attr "type" "ssecmp")
1207 (set_attr "length_immediate" "1")
1208 (set_attr "prefix" "orig,vex")
1209 (set_attr "mode" "<MODE>")])
1211 (define_insn "<sse>_vmmaskcmp<mode>3"
1212 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1214 (match_operator:VF_128 3 "sse_comparison_operator"
1215 [(match_operand:VF_128 1 "register_operand" "0,x")
1216 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
1221 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %2}
1222 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1223 [(set_attr "isa" "noavx,avx")
1224 (set_attr "type" "ssecmp")
1225 (set_attr "length_immediate" "1,*")
1226 (set_attr "prefix" "orig,vex")
1227 (set_attr "mode" "<ssescalarmode>")])
1229 (define_insn "<sse>_comi"
1230 [(set (reg:CCFP FLAGS_REG)
1233 (match_operand:<ssevecmode> 0 "register_operand" "x")
1234 (parallel [(const_int 0)]))
1236 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1237 (parallel [(const_int 0)]))))]
1238 "SSE_FLOAT_MODE_P (<MODE>mode)"
1239 "%vcomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1240 [(set_attr "type" "ssecomi")
1241 (set_attr "prefix" "maybe_vex")
1242 (set_attr "prefix_rep" "0")
1243 (set (attr "prefix_data16")
1244 (if_then_else (eq_attr "mode" "DF")
1246 (const_string "0")))
1247 (set_attr "mode" "<MODE>")])
1249 (define_insn "<sse>_ucomi"
1250 [(set (reg:CCFPU FLAGS_REG)
1253 (match_operand:<ssevecmode> 0 "register_operand" "x")
1254 (parallel [(const_int 0)]))
1256 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1257 (parallel [(const_int 0)]))))]
1258 "SSE_FLOAT_MODE_P (<MODE>mode)"
1259 "%vucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1260 [(set_attr "type" "ssecomi")
1261 (set_attr "prefix" "maybe_vex")
1262 (set_attr "prefix_rep" "0")
1263 (set (attr "prefix_data16")
1264 (if_then_else (eq_attr "mode" "DF")
1266 (const_string "0")))
1267 (set_attr "mode" "<MODE>")])
1269 (define_expand "vcond<mode>"
1270 [(set (match_operand:VF 0 "register_operand" "")
1272 (match_operator 3 ""
1273 [(match_operand:VF 4 "nonimmediate_operand" "")
1274 (match_operand:VF 5 "nonimmediate_operand" "")])
1275 (match_operand:VF 1 "general_operand" "")
1276 (match_operand:VF 2 "general_operand" "")))]
1279 bool ok = ix86_expand_fp_vcond (operands);
1284 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1286 ;; Parallel floating point logical operations
1288 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1290 (define_insn "<sse>_andnot<mode>3"
1291 [(set (match_operand:VF 0 "register_operand" "=x,x")
1294 (match_operand:VF 1 "register_operand" "0,x"))
1295 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1298 static char buf[32];
1301 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1303 switch (which_alternative)
1306 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1309 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1315 snprintf (buf, sizeof (buf), insn, suffix);
1318 [(set_attr "isa" "noavx,avx")
1319 (set_attr "type" "sselog")
1320 (set_attr "prefix" "orig,vex")
1321 (set_attr "mode" "<MODE>")])
1323 (define_expand "<code><mode>3"
1324 [(set (match_operand:VF 0 "register_operand" "")
1326 (match_operand:VF 1 "nonimmediate_operand" "")
1327 (match_operand:VF 2 "nonimmediate_operand" "")))]
1329 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1331 (define_insn "*<code><mode>3"
1332 [(set (match_operand:VF 0 "register_operand" "=x,x")
1334 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
1335 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1336 "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1338 static char buf[32];
1341 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1343 switch (which_alternative)
1346 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1349 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1355 snprintf (buf, sizeof (buf), insn, suffix);
1358 [(set_attr "isa" "noavx,avx")
1359 (set_attr "type" "sselog")
1360 (set_attr "prefix" "orig,vex")
1361 (set_attr "mode" "<MODE>")])
1363 (define_expand "copysign<mode>3"
1366 (not:VF (match_dup 3))
1367 (match_operand:VF 1 "nonimmediate_operand" "")))
1369 (and:VF (match_dup 3)
1370 (match_operand:VF 2 "nonimmediate_operand" "")))
1371 (set (match_operand:VF 0 "register_operand" "")
1372 (ior:VF (match_dup 4) (match_dup 5)))]
1375 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
1377 operands[4] = gen_reg_rtx (<MODE>mode);
1378 operands[5] = gen_reg_rtx (<MODE>mode);
1381 ;; Also define scalar versions. These are used for abs, neg, and
1382 ;; conditional move. Using subregs into vector modes causes register
1383 ;; allocation lossage. These patterns do not allow memory operands
1384 ;; because the native instructions read the full 128-bits.
1386 (define_insn "*andnot<mode>3"
1387 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1390 (match_operand:MODEF 1 "register_operand" "0,x"))
1391 (match_operand:MODEF 2 "register_operand" "x,x")))]
1392 "SSE_FLOAT_MODE_P (<MODE>mode)"
1394 static char buf[32];
1397 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "s" : "<ssemodefsuffix>";
1399 switch (which_alternative)
1402 insn = "andnp%s\t{%%2, %%0|%%0, %%2}";
1405 insn = "vandnp%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1411 snprintf (buf, sizeof (buf), insn, suffix);
1414 [(set_attr "isa" "noavx,avx")
1415 (set_attr "type" "sselog")
1416 (set_attr "prefix" "orig,vex")
1417 (set_attr "mode" "<ssevecmode>")])
1419 (define_insn "*<code><mode>3"
1420 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1422 (match_operand:MODEF 1 "register_operand" "%0,x")
1423 (match_operand:MODEF 2 "register_operand" "x,x")))]
1424 "SSE_FLOAT_MODE_P (<MODE>mode)"
1426 static char buf[32];
1429 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "s" : "<ssemodefsuffix>";
1431 switch (which_alternative)
1434 insn = "<logic>p%s\t{%%2, %%0|%%0, %%2}";
1437 insn = "v<logic>p%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1443 snprintf (buf, sizeof (buf), insn, suffix);
1446 [(set_attr "isa" "noavx,avx")
1447 (set_attr "type" "sselog")
1448 (set_attr "prefix" "orig,vex")
1449 (set_attr "mode" "<ssevecmode>")])
1451 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1453 ;; FMA4 floating point multiply/accumulate instructions. This
1454 ;; includes the scalar version of the instructions as well as the
1457 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1459 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1460 ;; combine to generate a multiply/add with two memory references. We then
1461 ;; split this insn, into loading up the destination register with one of the
1462 ;; memory operations. If we don't manage to split the insn, reload will
1463 ;; generate the appropriate moves. The reason this is needed, is that combine
1464 ;; has already folded one of the memory references into both the multiply and
1465 ;; add insns, and it can't generate a new pseudo. I.e.:
1466 ;; (set (reg1) (mem (addr1)))
1467 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1468 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1470 ;; ??? This is historic, pre-dating the gimple fma transformation.
1471 ;; We could now properly represent that only one memory operand is
1472 ;; allowed and not be penalized during optimization.
1474 ;; Intrinsic FMA operations.
1476 ;; The standard names for fma is only available with SSE math enabled.
1477 (define_expand "fma<mode>4"
1478 [(set (match_operand:FMAMODE 0 "register_operand")
1480 (match_operand:FMAMODE 1 "nonimmediate_operand")
1481 (match_operand:FMAMODE 2 "nonimmediate_operand")
1482 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1483 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH"
1486 (define_expand "fms<mode>4"
1487 [(set (match_operand:FMAMODE 0 "register_operand")
1489 (match_operand:FMAMODE 1 "nonimmediate_operand")
1490 (match_operand:FMAMODE 2 "nonimmediate_operand")
1491 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1492 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH"
1495 (define_expand "fnma<mode>4"
1496 [(set (match_operand:FMAMODE 0 "register_operand")
1498 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1499 (match_operand:FMAMODE 2 "nonimmediate_operand")
1500 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1501 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH"
1504 (define_expand "fnms<mode>4"
1505 [(set (match_operand:FMAMODE 0 "register_operand")
1507 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1508 (match_operand:FMAMODE 2 "nonimmediate_operand")
1509 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1510 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH"
1513 ;; The builtin for fma4intrin.h is not constrained by SSE math enabled.
1514 (define_expand "fma4i_fmadd_<mode>"
1515 [(set (match_operand:FMAMODE 0 "register_operand")
1517 (match_operand:FMAMODE 1 "nonimmediate_operand")
1518 (match_operand:FMAMODE 2 "nonimmediate_operand")
1519 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1520 "TARGET_FMA || TARGET_FMA4"
1523 (define_insn "*fma4i_fmadd_<mode>"
1524 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1526 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1527 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1528 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1530 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1531 [(set_attr "type" "ssemuladd")
1532 (set_attr "mode" "<MODE>")])
1534 (define_insn "*fma4i_fmsub_<mode>"
1535 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1537 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1538 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1540 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1542 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1543 [(set_attr "type" "ssemuladd")
1544 (set_attr "mode" "<MODE>")])
1546 (define_insn "*fma4i_fnmadd_<mode>"
1547 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1550 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1551 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1552 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1554 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1555 [(set_attr "type" "ssemuladd")
1556 (set_attr "mode" "<MODE>")])
1558 (define_insn "*fma4i_fnmsub_<mode>"
1559 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1562 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1563 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1565 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1567 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1568 [(set_attr "type" "ssemuladd")
1569 (set_attr "mode" "<MODE>")])
1571 ;; Scalar versions of the above. Unlike ADDSS et al, these write the
1572 ;; entire destination register, with the high-order elements zeroed.
1574 (define_expand "fma4i_vmfmadd_<mode>"
1575 [(set (match_operand:SSEMODEF2P 0 "register_operand")
1576 (vec_merge:SSEMODEF2P
1578 (match_operand:SSEMODEF2P 1 "nonimmediate_operand")
1579 (match_operand:SSEMODEF2P 2 "nonimmediate_operand")
1580 (match_operand:SSEMODEF2P 3 "nonimmediate_operand"))
1585 operands[4] = CONST0_RTX (<MODE>mode);
1588 (define_insn "*fma4i_vmfmadd_<mode>"
1589 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1590 (vec_merge:SSEMODEF2P
1592 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1593 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" " x,m")
1594 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1595 (match_operand:SSEMODEF2P 4 "const0_operand" "")
1598 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1599 [(set_attr "type" "ssemuladd")
1600 (set_attr "mode" "<MODE>")])
1602 (define_insn "*fma4i_vmfmsub_<mode>"
1603 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1604 (vec_merge:SSEMODEF2P
1606 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1607 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" " x,m")
1609 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")))
1610 (match_operand:SSEMODEF2P 4 "const0_operand" "")
1613 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1614 [(set_attr "type" "ssemuladd")
1615 (set_attr "mode" "<MODE>")])
1617 (define_insn "*fma4i_vmfnmadd_<mode>"
1618 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1619 (vec_merge:SSEMODEF2P
1622 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
1623 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" " x,m")
1624 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1625 (match_operand:SSEMODEF2P 4 "const0_operand" "")
1628 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1629 [(set_attr "type" "ssemuladd")
1630 (set_attr "mode" "<MODE>")])
1632 (define_insn "*fma4i_vmfnmsub_<mode>"
1633 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1634 (vec_merge:SSEMODEF2P
1637 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
1638 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" " x,m")
1640 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")))
1641 (match_operand:SSEMODEF2P 4 "const0_operand" "")
1644 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1645 [(set_attr "type" "ssemuladd")
1646 (set_attr "mode" "<MODE>")])
1648 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1650 ;; FMA4 Parallel floating point multiply addsub and subadd operations.
1652 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1654 ;; It would be possible to represent these without the UNSPEC as
1657 ;; (fma op1 op2 op3)
1658 ;; (fma op1 op2 (neg op3))
1661 ;; But this doesn't seem useful in practice.
1663 (define_expand "fmaddsub_<mode>"
1664 [(set (match_operand:AVXMODEF2P 0 "register_operand")
1666 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand")
1667 (match_operand:AVXMODEF2P 2 "nonimmediate_operand")
1668 (match_operand:AVXMODEF2P 3 "nonimmediate_operand")]
1670 "TARGET_FMA || TARGET_FMA4"
1673 (define_insn "*fma4_fmaddsub_<mode>"
1674 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x")
1676 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x,x")
1677 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" " x,m")
1678 (match_operand:AVXMODEF2P 3 "nonimmediate_operand" "xm,x")]
1681 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1682 [(set_attr "type" "ssemuladd")
1683 (set_attr "mode" "<MODE>")])
1685 (define_insn "*fma4_fmsubadd_<mode>"
1686 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x")
1688 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x,x")
1689 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" " x,m")
1691 (match_operand:AVXMODEF2P 3 "nonimmediate_operand" "xm,x"))]
1694 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1695 [(set_attr "type" "ssemuladd")
1696 (set_attr "mode" "<MODE>")])
1698 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1700 ;; FMA3 floating point multiply/accumulate instructions.
1702 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1704 (define_insn "*fma_fmadd_<mode>"
1705 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1707 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
1708 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1709 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
1712 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1713 vfmadd312<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1714 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1715 [(set_attr "type" "ssemuladd")
1716 (set_attr "mode" "<MODE>")])
1718 (define_insn "*fma_fmsub_<mode>"
1719 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1721 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
1722 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1724 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
1727 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1728 vfmsub312<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1729 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1730 [(set_attr "type" "ssemuladd")
1731 (set_attr "mode" "<MODE>")])
1733 (define_insn "*fma_fmadd_<mode>"
1734 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1737 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
1738 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1739 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
1742 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1743 vfnmadd312<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1744 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1745 [(set_attr "type" "ssemuladd")
1746 (set_attr "mode" "<MODE>")])
1748 (define_insn "*fma_fmsub_<mode>"
1749 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1752 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
1753 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1755 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
1758 vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1759 vfnmsub312<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1760 vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1761 [(set_attr "type" "ssemuladd")
1762 (set_attr "mode" "<MODE>")])
1764 (define_insn "*fma_fmaddsub_<mode>"
1765 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x,x")
1767 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%0, 0,x")
1768 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm, x,xm")
1769 (match_operand:AVXMODEF2P 3 "nonimmediate_operand" " x,xm,0")]
1773 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1774 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1775 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1776 [(set_attr "type" "ssemuladd")
1777 (set_attr "mode" "<MODE>")])
1779 (define_insn "*fma_fmsubadd_<mode>"
1780 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x,x")
1782 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%0, 0,x")
1783 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm, x,xm")
1785 (match_operand:AVXMODEF2P 3 "nonimmediate_operand" " x,xm,0"))]
1789 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1790 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1791 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1792 [(set_attr "type" "ssemuladd")
1793 (set_attr "mode" "<MODE>")])
1795 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1797 ;; Parallel single-precision floating point conversion operations
1799 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1801 (define_insn "sse_cvtpi2ps"
1802 [(set (match_operand:V4SF 0 "register_operand" "=x")
1805 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
1806 (match_operand:V4SF 1 "register_operand" "0")
1809 "cvtpi2ps\t{%2, %0|%0, %2}"
1810 [(set_attr "type" "ssecvt")
1811 (set_attr "mode" "V4SF")])
1813 (define_insn "sse_cvtps2pi"
1814 [(set (match_operand:V2SI 0 "register_operand" "=y")
1816 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1818 (parallel [(const_int 0) (const_int 1)])))]
1820 "cvtps2pi\t{%1, %0|%0, %1}"
1821 [(set_attr "type" "ssecvt")
1822 (set_attr "unit" "mmx")
1823 (set_attr "mode" "DI")])
1825 (define_insn "sse_cvttps2pi"
1826 [(set (match_operand:V2SI 0 "register_operand" "=y")
1828 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
1829 (parallel [(const_int 0) (const_int 1)])))]
1831 "cvttps2pi\t{%1, %0|%0, %1}"
1832 [(set_attr "type" "ssecvt")
1833 (set_attr "unit" "mmx")
1834 (set_attr "prefix_rep" "0")
1835 (set_attr "mode" "SF")])
1837 (define_insn "sse_cvtsi2ss"
1838 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
1841 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
1842 (match_operand:V4SF 1 "register_operand" "0,0,x")
1846 cvtsi2ss\t{%2, %0|%0, %2}
1847 cvtsi2ss\t{%2, %0|%0, %2}
1848 vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
1849 [(set_attr "isa" "noavx,noavx,avx")
1850 (set_attr "type" "sseicvt")
1851 (set_attr "athlon_decode" "vector,double,*")
1852 (set_attr "amdfam10_decode" "vector,double,*")
1853 (set_attr "bdver1_decode" "double,direct,*")
1854 (set_attr "prefix" "orig,orig,vex")
1855 (set_attr "mode" "SF")])
1857 (define_insn "sse_cvtsi2ssq"
1858 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
1861 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
1862 (match_operand:V4SF 1 "register_operand" "0,0,x")
1864 "TARGET_SSE && TARGET_64BIT"
1866 cvtsi2ssq\t{%2, %0|%0, %2}
1867 cvtsi2ssq\t{%2, %0|%0, %2}
1868 vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
1869 [(set_attr "isa" "noavx,noavx,avx")
1870 (set_attr "type" "sseicvt")
1871 (set_attr "athlon_decode" "vector,double,*")
1872 (set_attr "amdfam10_decode" "vector,double,*")
1873 (set_attr "bdver1_decode" "double,direct,*")
1874 (set_attr "length_vex" "*,*,4")
1875 (set_attr "prefix_rex" "1,1,*")
1876 (set_attr "prefix" "orig,orig,vex")
1877 (set_attr "mode" "SF")])
1879 (define_insn "sse_cvtss2si"
1880 [(set (match_operand:SI 0 "register_operand" "=r,r")
1883 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1884 (parallel [(const_int 0)]))]
1885 UNSPEC_FIX_NOTRUNC))]
1887 "%vcvtss2si\t{%1, %0|%0, %1}"
1888 [(set_attr "type" "sseicvt")
1889 (set_attr "athlon_decode" "double,vector")
1890 (set_attr "bdver1_decode" "double,double")
1891 (set_attr "prefix_rep" "1")
1892 (set_attr "prefix" "maybe_vex")
1893 (set_attr "mode" "SI")])
1895 (define_insn "sse_cvtss2si_2"
1896 [(set (match_operand:SI 0 "register_operand" "=r,r")
1897 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1898 UNSPEC_FIX_NOTRUNC))]
1900 "%vcvtss2si\t{%1, %0|%0, %1}"
1901 [(set_attr "type" "sseicvt")
1902 (set_attr "athlon_decode" "double,vector")
1903 (set_attr "amdfam10_decode" "double,double")
1904 (set_attr "bdver1_decode" "double,double")
1905 (set_attr "prefix_rep" "1")
1906 (set_attr "prefix" "maybe_vex")
1907 (set_attr "mode" "SI")])
1909 (define_insn "sse_cvtss2siq"
1910 [(set (match_operand:DI 0 "register_operand" "=r,r")
1913 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1914 (parallel [(const_int 0)]))]
1915 UNSPEC_FIX_NOTRUNC))]
1916 "TARGET_SSE && TARGET_64BIT"
1917 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
1918 [(set_attr "type" "sseicvt")
1919 (set_attr "athlon_decode" "double,vector")
1920 (set_attr "bdver1_decode" "double,double")
1921 (set_attr "prefix_rep" "1")
1922 (set_attr "prefix" "maybe_vex")
1923 (set_attr "mode" "DI")])
1925 (define_insn "sse_cvtss2siq_2"
1926 [(set (match_operand:DI 0 "register_operand" "=r,r")
1927 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1928 UNSPEC_FIX_NOTRUNC))]
1929 "TARGET_SSE && TARGET_64BIT"
1930 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
1931 [(set_attr "type" "sseicvt")
1932 (set_attr "athlon_decode" "double,vector")
1933 (set_attr "amdfam10_decode" "double,double")
1934 (set_attr "bdver1_decode" "double,double")
1935 (set_attr "prefix_rep" "1")
1936 (set_attr "prefix" "maybe_vex")
1937 (set_attr "mode" "DI")])
1939 (define_insn "sse_cvttss2si"
1940 [(set (match_operand:SI 0 "register_operand" "=r,r")
1943 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1944 (parallel [(const_int 0)]))))]
1946 "%vcvttss2si\t{%1, %0|%0, %1}"
1947 [(set_attr "type" "sseicvt")
1948 (set_attr "athlon_decode" "double,vector")
1949 (set_attr "amdfam10_decode" "double,double")
1950 (set_attr "bdver1_decode" "double,double")
1951 (set_attr "prefix_rep" "1")
1952 (set_attr "prefix" "maybe_vex")
1953 (set_attr "mode" "SI")])
1955 (define_insn "sse_cvttss2siq"
1956 [(set (match_operand:DI 0 "register_operand" "=r,r")
1959 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1960 (parallel [(const_int 0)]))))]
1961 "TARGET_SSE && TARGET_64BIT"
1962 "%vcvttss2si{q}\t{%1, %0|%0, %1}"
1963 [(set_attr "type" "sseicvt")
1964 (set_attr "athlon_decode" "double,vector")
1965 (set_attr "amdfam10_decode" "double,double")
1966 (set_attr "bdver1_decode" "double,double")
1967 (set_attr "prefix_rep" "1")
1968 (set_attr "prefix" "maybe_vex")
1969 (set_attr "mode" "DI")])
1971 (define_insn "avx_cvtdq2ps256"
1972 [(set (match_operand:V8SF 0 "register_operand" "=x")
1973 (float:V8SF (match_operand:V8SI 1 "nonimmediate_operand" "xm")))]
1975 "vcvtdq2ps\t{%1, %0|%0, %1}"
1976 [(set_attr "type" "ssecvt")
1977 (set_attr "prefix" "vex")
1978 (set_attr "mode" "V8SF")])
1980 (define_insn "sse2_cvtdq2ps"
1981 [(set (match_operand:V4SF 0 "register_operand" "=x")
1982 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
1984 "%vcvtdq2ps\t{%1, %0|%0, %1}"
1985 [(set_attr "type" "ssecvt")
1986 (set_attr "prefix" "maybe_vex")
1987 (set_attr "mode" "V4SF")])
1989 (define_expand "sse2_cvtudq2ps"
1991 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "")))
1993 (lt:V4SF (match_dup 5) (match_dup 3)))
1995 (and:V4SF (match_dup 6) (match_dup 4)))
1996 (set (match_operand:V4SF 0 "register_operand" "")
1997 (plus:V4SF (match_dup 5) (match_dup 7)))]
2000 REAL_VALUE_TYPE TWO32r;
2004 real_ldexp (&TWO32r, &dconst1, 32);
2005 x = const_double_from_real_value (TWO32r, SFmode);
2007 operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
2008 operands[4] = force_reg (V4SFmode,
2009 ix86_build_const_vector (V4SFmode, 1, x));
2011 for (i = 5; i < 8; i++)
2012 operands[i] = gen_reg_rtx (V4SFmode);
2015 (define_insn "avx_cvtps2dq256"
2016 [(set (match_operand:V8SI 0 "register_operand" "=x")
2017 (unspec:V8SI [(match_operand:V8SF 1 "nonimmediate_operand" "xm")]
2018 UNSPEC_FIX_NOTRUNC))]
2020 "vcvtps2dq\t{%1, %0|%0, %1}"
2021 [(set_attr "type" "ssecvt")
2022 (set_attr "prefix" "vex")
2023 (set_attr "mode" "OI")])
2025 (define_insn "sse2_cvtps2dq"
2026 [(set (match_operand:V4SI 0 "register_operand" "=x")
2027 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2028 UNSPEC_FIX_NOTRUNC))]
2030 "%vcvtps2dq\t{%1, %0|%0, %1}"
2031 [(set_attr "type" "ssecvt")
2032 (set (attr "prefix_data16")
2034 (ne (symbol_ref "TARGET_AVX") (const_int 0))
2036 (const_string "1")))
2037 (set_attr "prefix" "maybe_vex")
2038 (set_attr "mode" "TI")])
2040 (define_insn "avx_cvttps2dq256"
2041 [(set (match_operand:V8SI 0 "register_operand" "=x")
2042 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
2044 "vcvttps2dq\t{%1, %0|%0, %1}"
2045 [(set_attr "type" "ssecvt")
2046 (set_attr "prefix" "vex")
2047 (set_attr "mode" "OI")])
2049 (define_insn "sse2_cvttps2dq"
2050 [(set (match_operand:V4SI 0 "register_operand" "=x")
2051 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2053 "%vcvttps2dq\t{%1, %0|%0, %1}"
2054 [(set_attr "type" "ssecvt")
2055 (set (attr "prefix_rep")
2057 (ne (symbol_ref "TARGET_AVX") (const_int 0))
2059 (const_string "1")))
2060 (set (attr "prefix_data16")
2062 (ne (symbol_ref "TARGET_AVX") (const_int 0))
2064 (const_string "0")))
2065 (set_attr "prefix_data16" "0")
2066 (set_attr "prefix" "maybe_vex")
2067 (set_attr "mode" "TI")])
2069 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2071 ;; Parallel double-precision floating point conversion operations
2073 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2075 (define_insn "sse2_cvtpi2pd"
2076 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2077 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2079 "cvtpi2pd\t{%1, %0|%0, %1}"
2080 [(set_attr "type" "ssecvt")
2081 (set_attr "unit" "mmx,*")
2082 (set_attr "prefix_data16" "1,*")
2083 (set_attr "mode" "V2DF")])
2085 (define_insn "sse2_cvtpd2pi"
2086 [(set (match_operand:V2SI 0 "register_operand" "=y")
2087 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2088 UNSPEC_FIX_NOTRUNC))]
2090 "cvtpd2pi\t{%1, %0|%0, %1}"
2091 [(set_attr "type" "ssecvt")
2092 (set_attr "unit" "mmx")
2093 (set_attr "bdver1_decode" "double")
2094 (set_attr "prefix_data16" "1")
2095 (set_attr "mode" "DI")])
2097 (define_insn "sse2_cvttpd2pi"
2098 [(set (match_operand:V2SI 0 "register_operand" "=y")
2099 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2101 "cvttpd2pi\t{%1, %0|%0, %1}"
2102 [(set_attr "type" "ssecvt")
2103 (set_attr "unit" "mmx")
2104 (set_attr "bdver1_decode" "double")
2105 (set_attr "prefix_data16" "1")
2106 (set_attr "mode" "TI")])
2108 (define_insn "sse2_cvtsi2sd"
2109 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2112 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2113 (match_operand:V2DF 1 "register_operand" "0,0,x")
2117 cvtsi2sd\t{%2, %0|%0, %2}
2118 cvtsi2sd\t{%2, %0|%0, %2}
2119 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2120 [(set_attr "isa" "noavx,noavx,avx")
2121 (set_attr "type" "sseicvt")
2122 (set_attr "athlon_decode" "double,direct,*")
2123 (set_attr "amdfam10_decode" "vector,double,*")
2124 (set_attr "bdver1_decode" "double,direct,*")
2125 (set_attr "prefix" "orig,orig,vex")
2126 (set_attr "mode" "DF")])
2128 (define_insn "sse2_cvtsi2sdq"
2129 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2132 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2133 (match_operand:V2DF 1 "register_operand" "0,0,x")
2135 "TARGET_SSE2 && TARGET_64BIT"
2137 cvtsi2sdq\t{%2, %0|%0, %2}
2138 cvtsi2sdq\t{%2, %0|%0, %2}
2139 vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2140 [(set_attr "isa" "noavx,noavx,avx")
2141 (set_attr "type" "sseicvt")
2142 (set_attr "athlon_decode" "double,direct,*")
2143 (set_attr "amdfam10_decode" "vector,double,*")
2144 (set_attr "bdver1_decode" "double,direct,*")
2145 (set_attr "length_vex" "*,*,4")
2146 (set_attr "prefix_rex" "1,1,*")
2147 (set_attr "prefix" "orig,orig,vex")
2148 (set_attr "mode" "DF")])
2150 (define_insn "sse2_cvtsd2si"
2151 [(set (match_operand:SI 0 "register_operand" "=r,r")
2154 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2155 (parallel [(const_int 0)]))]
2156 UNSPEC_FIX_NOTRUNC))]
2158 "%vcvtsd2si\t{%1, %0|%0, %1}"
2159 [(set_attr "type" "sseicvt")
2160 (set_attr "athlon_decode" "double,vector")
2161 (set_attr "bdver1_decode" "double,double")
2162 (set_attr "prefix_rep" "1")
2163 (set_attr "prefix" "maybe_vex")
2164 (set_attr "mode" "SI")])
2166 (define_insn "sse2_cvtsd2si_2"
2167 [(set (match_operand:SI 0 "register_operand" "=r,r")
2168 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2169 UNSPEC_FIX_NOTRUNC))]
2171 "%vcvtsd2si\t{%1, %0|%0, %1}"
2172 [(set_attr "type" "sseicvt")
2173 (set_attr "athlon_decode" "double,vector")
2174 (set_attr "amdfam10_decode" "double,double")
2175 (set_attr "bdver1_decode" "double,double")
2176 (set_attr "prefix_rep" "1")
2177 (set_attr "prefix" "maybe_vex")
2178 (set_attr "mode" "SI")])
2180 (define_insn "sse2_cvtsd2siq"
2181 [(set (match_operand:DI 0 "register_operand" "=r,r")
2184 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2185 (parallel [(const_int 0)]))]
2186 UNSPEC_FIX_NOTRUNC))]
2187 "TARGET_SSE2 && TARGET_64BIT"
2188 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2189 [(set_attr "type" "sseicvt")
2190 (set_attr "athlon_decode" "double,vector")
2191 (set_attr "bdver1_decode" "double,double")
2192 (set_attr "prefix_rep" "1")
2193 (set_attr "prefix" "maybe_vex")
2194 (set_attr "mode" "DI")])
2196 (define_insn "sse2_cvtsd2siq_2"
2197 [(set (match_operand:DI 0 "register_operand" "=r,r")
2198 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2199 UNSPEC_FIX_NOTRUNC))]
2200 "TARGET_SSE2 && TARGET_64BIT"
2201 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2202 [(set_attr "type" "sseicvt")
2203 (set_attr "athlon_decode" "double,vector")
2204 (set_attr "amdfam10_decode" "double,double")
2205 (set_attr "bdver1_decode" "double,double")
2206 (set_attr "prefix_rep" "1")
2207 (set_attr "prefix" "maybe_vex")
2208 (set_attr "mode" "DI")])
2210 (define_insn "sse2_cvttsd2si"
2211 [(set (match_operand:SI 0 "register_operand" "=r,r")
2214 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2215 (parallel [(const_int 0)]))))]
2217 "%vcvttsd2si\t{%1, %0|%0, %1}"
2218 [(set_attr "type" "sseicvt")
2219 (set_attr "athlon_decode" "double,vector")
2220 (set_attr "amdfam10_decode" "double,double")
2221 (set_attr "bdver1_decode" "double,double")
2222 (set_attr "prefix_rep" "1")
2223 (set_attr "prefix" "maybe_vex")
2224 (set_attr "mode" "SI")])
2226 (define_insn "sse2_cvttsd2siq"
2227 [(set (match_operand:DI 0 "register_operand" "=r,r")
2230 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2231 (parallel [(const_int 0)]))))]
2232 "TARGET_SSE2 && TARGET_64BIT"
2233 "%vcvttsd2si{q}\t{%1, %0|%0, %1}"
2234 [(set_attr "type" "sseicvt")
2235 (set_attr "athlon_decode" "double,vector")
2236 (set_attr "amdfam10_decode" "double,double")
2237 (set_attr "bdver1_decode" "double,double")
2238 (set_attr "prefix_rep" "1")
2239 (set_attr "prefix" "maybe_vex")
2240 (set_attr "mode" "DI")])
2242 (define_insn "avx_cvtdq2pd256"
2243 [(set (match_operand:V4DF 0 "register_operand" "=x")
2244 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2246 "vcvtdq2pd\t{%1, %0|%0, %1}"
2247 [(set_attr "type" "ssecvt")
2248 (set_attr "prefix" "vex")
2249 (set_attr "mode" "V4DF")])
2251 (define_insn "*avx_cvtdq2pd256_2"
2252 [(set (match_operand:V4DF 0 "register_operand" "=x")
2255 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
2256 (parallel [(const_int 0) (const_int 1)
2257 (const_int 2) (const_int 3)]))))]
2259 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
2260 [(set_attr "type" "ssecvt")
2261 (set_attr "prefix" "vex")
2262 (set_attr "mode" "V4DF")])
2264 (define_insn "sse2_cvtdq2pd"
2265 [(set (match_operand:V2DF 0 "register_operand" "=x")
2268 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2269 (parallel [(const_int 0) (const_int 1)]))))]
2271 "%vcvtdq2pd\t{%1, %0|%0, %1}"
2272 [(set_attr "type" "ssecvt")
2273 (set_attr "prefix" "maybe_vex")
2274 (set_attr "mode" "V2DF")])
2276 (define_insn "avx_cvtpd2dq256"
2277 [(set (match_operand:V4SI 0 "register_operand" "=x")
2278 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2279 UNSPEC_FIX_NOTRUNC))]
2281 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2282 [(set_attr "type" "ssecvt")
2283 (set_attr "prefix" "vex")
2284 (set_attr "mode" "OI")])
2286 (define_expand "sse2_cvtpd2dq"
2287 [(set (match_operand:V4SI 0 "register_operand" "")
2289 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2293 "operands[2] = CONST0_RTX (V2SImode);")
2295 (define_insn "*sse2_cvtpd2dq"
2296 [(set (match_operand:V4SI 0 "register_operand" "=x")
2298 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2300 (match_operand:V2SI 2 "const0_operand" "")))]
2304 return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
2306 return "cvtpd2dq\t{%1, %0|%0, %1}";
2308 [(set_attr "type" "ssecvt")
2309 (set_attr "prefix_rep" "1")
2310 (set_attr "prefix_data16" "0")
2311 (set_attr "prefix" "maybe_vex")
2312 (set_attr "mode" "TI")
2313 (set_attr "amdfam10_decode" "double")
2314 (set_attr "athlon_decode" "vector")
2315 (set_attr "bdver1_decode" "double")])
2317 (define_insn "avx_cvttpd2dq256"
2318 [(set (match_operand:V4SI 0 "register_operand" "=x")
2319 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2321 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2322 [(set_attr "type" "ssecvt")
2323 (set_attr "prefix" "vex")
2324 (set_attr "mode" "OI")])
2326 (define_expand "sse2_cvttpd2dq"
2327 [(set (match_operand:V4SI 0 "register_operand" "")
2329 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2332 "operands[2] = CONST0_RTX (V2SImode);")
2334 (define_insn "*sse2_cvttpd2dq"
2335 [(set (match_operand:V4SI 0 "register_operand" "=x")
2337 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2338 (match_operand:V2SI 2 "const0_operand" "")))]
2342 return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
2344 return "cvttpd2dq\t{%1, %0|%0, %1}";
2346 [(set_attr "type" "ssecvt")
2347 (set_attr "amdfam10_decode" "double")
2348 (set_attr "athlon_decode" "vector")
2349 (set_attr "bdver1_decode" "double")
2350 (set_attr "prefix" "maybe_vex")
2351 (set_attr "mode" "TI")])
2353 (define_insn "sse2_cvtsd2ss"
2354 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2357 (float_truncate:V2SF
2358 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,xm")))
2359 (match_operand:V4SF 1 "register_operand" "0,0,x")
2363 cvtsd2ss\t{%2, %0|%0, %2}
2364 cvtsd2ss\t{%2, %0|%0, %2}
2365 vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2366 [(set_attr "isa" "noavx,noavx,avx")
2367 (set_attr "type" "ssecvt")
2368 (set_attr "athlon_decode" "vector,double,*")
2369 (set_attr "amdfam10_decode" "vector,double,*")
2370 (set_attr "bdver1_decode" "direct,direct,*")
2371 (set_attr "prefix" "orig,orig,vex")
2372 (set_attr "mode" "SF")])
2374 (define_insn "sse2_cvtss2sd"
2375 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2379 (match_operand:V4SF 2 "nonimmediate_operand" "x,m,xm")
2380 (parallel [(const_int 0) (const_int 1)])))
2381 (match_operand:V2DF 1 "register_operand" "0,0,x")
2385 cvtss2sd\t{%2, %0|%0, %2}
2386 cvtss2sd\t{%2, %0|%0, %2}
2387 vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
2388 [(set_attr "isa" "noavx,noavx,avx")
2389 (set_attr "type" "ssecvt")
2390 (set_attr "amdfam10_decode" "vector,double,*")
2391 (set_attr "athlon_decode" "direct,direct,*")
2392 (set_attr "bdver1_decode" "direct,direct,*")
2393 (set_attr "prefix" "orig,orig,vex")
2394 (set_attr "mode" "DF")])
2396 (define_insn "avx_cvtpd2ps256"
2397 [(set (match_operand:V4SF 0 "register_operand" "=x")
2398 (float_truncate:V4SF
2399 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2401 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
2402 [(set_attr "type" "ssecvt")
2403 (set_attr "prefix" "vex")
2404 (set_attr "mode" "V4SF")])
2406 (define_expand "sse2_cvtpd2ps"
2407 [(set (match_operand:V4SF 0 "register_operand" "")
2409 (float_truncate:V2SF
2410 (match_operand:V2DF 1 "nonimmediate_operand" ""))
2413 "operands[2] = CONST0_RTX (V2SFmode);")
2415 (define_insn "*sse2_cvtpd2ps"
2416 [(set (match_operand:V4SF 0 "register_operand" "=x")
2418 (float_truncate:V2SF
2419 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2420 (match_operand:V2SF 2 "const0_operand" "")))]
2424 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
2426 return "cvtpd2ps\t{%1, %0|%0, %1}";
2428 [(set_attr "type" "ssecvt")
2429 (set_attr "amdfam10_decode" "double")
2430 (set_attr "athlon_decode" "vector")
2431 (set_attr "bdver1_decode" "double")
2432 (set_attr "prefix_data16" "1")
2433 (set_attr "prefix" "maybe_vex")
2434 (set_attr "mode" "V4SF")])
2436 (define_insn "avx_cvtps2pd256"
2437 [(set (match_operand:V4DF 0 "register_operand" "=x")
2439 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2441 "vcvtps2pd\t{%1, %0|%0, %1}"
2442 [(set_attr "type" "ssecvt")
2443 (set_attr "prefix" "vex")
2444 (set_attr "mode" "V4DF")])
2446 (define_insn "*avx_cvtps2pd256_2"
2447 [(set (match_operand:V4DF 0 "register_operand" "=x")
2450 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
2451 (parallel [(const_int 0) (const_int 1)
2452 (const_int 2) (const_int 3)]))))]
2454 "vcvtps2pd\t{%x1, %0|%0, %x1}"
2455 [(set_attr "type" "ssecvt")
2456 (set_attr "prefix" "vex")
2457 (set_attr "mode" "V4DF")])
2459 (define_insn "sse2_cvtps2pd"
2460 [(set (match_operand:V2DF 0 "register_operand" "=x")
2463 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2464 (parallel [(const_int 0) (const_int 1)]))))]
2466 "%vcvtps2pd\t{%1, %0|%0, %1}"
2467 [(set_attr "type" "ssecvt")
2468 (set_attr "amdfam10_decode" "direct")
2469 (set_attr "athlon_decode" "double")
2470 (set_attr "bdver1_decode" "double")
2471 (set_attr "prefix_data16" "0")
2472 (set_attr "prefix" "maybe_vex")
2473 (set_attr "mode" "V2DF")])
2475 (define_expand "vec_unpacks_hi_v4sf"
2480 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2481 (parallel [(const_int 6) (const_int 7)
2482 (const_int 2) (const_int 3)])))
2483 (set (match_operand:V2DF 0 "register_operand" "")
2487 (parallel [(const_int 0) (const_int 1)]))))]
2489 "operands[2] = gen_reg_rtx (V4SFmode);")
2491 (define_expand "vec_unpacks_hi_v8sf"
2494 (match_operand:V8SF 1 "nonimmediate_operand" "")
2495 (parallel [(const_int 4) (const_int 5)
2496 (const_int 6) (const_int 7)])))
2497 (set (match_operand:V4DF 0 "register_operand" "")
2501 "operands[2] = gen_reg_rtx (V4SFmode);")
2503 (define_expand "vec_unpacks_lo_v4sf"
2504 [(set (match_operand:V2DF 0 "register_operand" "")
2507 (match_operand:V4SF 1 "nonimmediate_operand" "")
2508 (parallel [(const_int 0) (const_int 1)]))))]
2511 (define_expand "vec_unpacks_lo_v8sf"
2512 [(set (match_operand:V4DF 0 "register_operand" "")
2515 (match_operand:V8SF 1 "nonimmediate_operand" "")
2516 (parallel [(const_int 0) (const_int 1)
2517 (const_int 2) (const_int 3)]))))]
2520 (define_expand "vec_unpacks_float_hi_v8hi"
2521 [(match_operand:V4SF 0 "register_operand" "")
2522 (match_operand:V8HI 1 "register_operand" "")]
2525 rtx tmp = gen_reg_rtx (V4SImode);
2527 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
2528 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2532 (define_expand "vec_unpacks_float_lo_v8hi"
2533 [(match_operand:V4SF 0 "register_operand" "")
2534 (match_operand:V8HI 1 "register_operand" "")]
2537 rtx tmp = gen_reg_rtx (V4SImode);
2539 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
2540 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2544 (define_expand "vec_unpacku_float_hi_v8hi"
2545 [(match_operand:V4SF 0 "register_operand" "")
2546 (match_operand:V8HI 1 "register_operand" "")]
2549 rtx tmp = gen_reg_rtx (V4SImode);
2551 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
2552 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2556 (define_expand "vec_unpacku_float_lo_v8hi"
2557 [(match_operand:V4SF 0 "register_operand" "")
2558 (match_operand:V8HI 1 "register_operand" "")]
2561 rtx tmp = gen_reg_rtx (V4SImode);
2563 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
2564 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2568 (define_expand "vec_unpacks_float_hi_v4si"
2571 (match_operand:V4SI 1 "nonimmediate_operand" "")
2572 (parallel [(const_int 2) (const_int 3)
2573 (const_int 2) (const_int 3)])))
2574 (set (match_operand:V2DF 0 "register_operand" "")
2578 (parallel [(const_int 0) (const_int 1)]))))]
2580 "operands[2] = gen_reg_rtx (V4SImode);")
2582 (define_expand "vec_unpacks_float_lo_v4si"
2583 [(set (match_operand:V2DF 0 "register_operand" "")
2586 (match_operand:V4SI 1 "nonimmediate_operand" "")
2587 (parallel [(const_int 0) (const_int 1)]))))]
2590 (define_expand "vec_unpacks_float_hi_v8si"
2593 (match_operand:V8SI 1 "nonimmediate_operand" "")
2594 (parallel [(const_int 4) (const_int 5)
2595 (const_int 6) (const_int 7)])))
2596 (set (match_operand:V4DF 0 "register_operand" "")
2600 "operands[2] = gen_reg_rtx (V4SImode);")
2602 (define_expand "vec_unpacks_float_lo_v8si"
2603 [(set (match_operand:V4DF 0 "register_operand" "")
2606 (match_operand:V8SI 1 "nonimmediate_operand" "")
2607 (parallel [(const_int 0) (const_int 1)
2608 (const_int 2) (const_int 3)]))))]
2611 (define_expand "vec_unpacku_float_hi_v4si"
2614 (match_operand:V4SI 1 "nonimmediate_operand" "")
2615 (parallel [(const_int 2) (const_int 3)
2616 (const_int 2) (const_int 3)])))
2621 (parallel [(const_int 0) (const_int 1)]))))
2623 (lt:V2DF (match_dup 6) (match_dup 3)))
2625 (and:V2DF (match_dup 7) (match_dup 4)))
2626 (set (match_operand:V2DF 0 "register_operand" "")
2627 (plus:V2DF (match_dup 6) (match_dup 8)))]
2630 REAL_VALUE_TYPE TWO32r;
2634 real_ldexp (&TWO32r, &dconst1, 32);
2635 x = const_double_from_real_value (TWO32r, DFmode);
2637 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2638 operands[4] = force_reg (V2DFmode,
2639 ix86_build_const_vector (V2DFmode, 1, x));
2641 operands[5] = gen_reg_rtx (V4SImode);
2643 for (i = 6; i < 9; i++)
2644 operands[i] = gen_reg_rtx (V2DFmode);
2647 (define_expand "vec_unpacku_float_lo_v4si"
2651 (match_operand:V4SI 1 "nonimmediate_operand" "")
2652 (parallel [(const_int 0) (const_int 1)]))))
2654 (lt:V2DF (match_dup 5) (match_dup 3)))
2656 (and:V2DF (match_dup 6) (match_dup 4)))
2657 (set (match_operand:V2DF 0 "register_operand" "")
2658 (plus:V2DF (match_dup 5) (match_dup 7)))]
2661 REAL_VALUE_TYPE TWO32r;
2665 real_ldexp (&TWO32r, &dconst1, 32);
2666 x = const_double_from_real_value (TWO32r, DFmode);
2668 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2669 operands[4] = force_reg (V2DFmode,
2670 ix86_build_const_vector (V2DFmode, 1, x));
2672 for (i = 5; i < 8; i++)
2673 operands[i] = gen_reg_rtx (V2DFmode);
2676 (define_expand "vec_pack_trunc_v4df"
2678 (float_truncate:V4SF
2679 (match_operand:V4DF 1 "nonimmediate_operand" "")))
2681 (float_truncate:V4SF
2682 (match_operand:V4DF 2 "nonimmediate_operand" "")))
2683 (set (match_operand:V8SF 0 "register_operand" "")
2689 operands[3] = gen_reg_rtx (V4SFmode);
2690 operands[4] = gen_reg_rtx (V4SFmode);
2693 (define_expand "vec_pack_trunc_v2df"
2694 [(match_operand:V4SF 0 "register_operand" "")
2695 (match_operand:V2DF 1 "nonimmediate_operand" "")
2696 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2701 r1 = gen_reg_rtx (V4SFmode);
2702 r2 = gen_reg_rtx (V4SFmode);
2704 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
2705 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
2706 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
2710 (define_expand "vec_pack_sfix_trunc_v2df"
2711 [(match_operand:V4SI 0 "register_operand" "")
2712 (match_operand:V2DF 1 "nonimmediate_operand" "")
2713 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2718 r1 = gen_reg_rtx (V4SImode);
2719 r2 = gen_reg_rtx (V4SImode);
2721 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
2722 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
2723 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
2724 gen_lowpart (V2DImode, r1),
2725 gen_lowpart (V2DImode, r2)));
2729 (define_expand "vec_pack_sfix_v2df"
2730 [(match_operand:V4SI 0 "register_operand" "")
2731 (match_operand:V2DF 1 "nonimmediate_operand" "")
2732 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2737 r1 = gen_reg_rtx (V4SImode);
2738 r2 = gen_reg_rtx (V4SImode);
2740 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
2741 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
2742 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
2743 gen_lowpart (V2DImode, r1),
2744 gen_lowpart (V2DImode, r2)));
2748 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2750 ;; Parallel single-precision floating point element swizzling
2752 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2754 (define_expand "sse_movhlps_exp"
2755 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
2758 (match_operand:V4SF 1 "nonimmediate_operand" "")
2759 (match_operand:V4SF 2 "nonimmediate_operand" ""))
2760 (parallel [(const_int 6)
2766 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
2768 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
2770 /* Fix up the destination if needed. */
2771 if (dst != operands[0])
2772 emit_move_insn (operands[0], dst);
2777 (define_insn "sse_movhlps"
2778 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
2781 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
2782 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
2783 (parallel [(const_int 6)
2787 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2789 movhlps\t{%2, %0|%0, %2}
2790 vmovhlps\t{%2, %1, %0|%0, %1, %2}
2791 movlps\t{%H2, %0|%0, %H2}
2792 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
2793 %vmovhps\t{%2, %0|%0, %2}"
2794 [(set_attr "isa" "noavx,avx,noavx,avx,base")
2795 (set_attr "type" "ssemov")
2796 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
2797 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
2799 (define_expand "sse_movlhps_exp"
2800 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
2803 (match_operand:V4SF 1 "nonimmediate_operand" "")
2804 (match_operand:V4SF 2 "nonimmediate_operand" ""))
2805 (parallel [(const_int 0)
2811 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
2813 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
2815 /* Fix up the destination if needed. */
2816 if (dst != operands[0])
2817 emit_move_insn (operands[0], dst);
2822 (define_insn "sse_movlhps"
2823 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
2826 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
2827 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,x,x"))
2828 (parallel [(const_int 0)
2832 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
2834 movlhps\t{%2, %0|%0, %2}
2835 vmovlhps\t{%2, %1, %0|%0, %1, %2}
2836 movhps\t{%2, %0|%0, %2}
2837 vmovhps\t{%2, %1, %0|%0, %1, %2}
2838 %vmovlps\t{%2, %H0|%H0, %2}"
2839 [(set_attr "isa" "noavx,avx,noavx,avx,base")
2840 (set_attr "type" "ssemov")
2841 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
2842 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
2844 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
2845 (define_insn "avx_unpckhps256"
2846 [(set (match_operand:V8SF 0 "register_operand" "=x")
2849 (match_operand:V8SF 1 "register_operand" "x")
2850 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
2851 (parallel [(const_int 2) (const_int 10)
2852 (const_int 3) (const_int 11)
2853 (const_int 6) (const_int 14)
2854 (const_int 7) (const_int 15)])))]
2856 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
2857 [(set_attr "type" "sselog")
2858 (set_attr "prefix" "vex")
2859 (set_attr "mode" "V8SF")])
2861 (define_expand "vec_interleave_highv8sf"
2865 (match_operand:V8SF 1 "register_operand" "x")
2866 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
2867 (parallel [(const_int 0) (const_int 8)
2868 (const_int 1) (const_int 9)
2869 (const_int 4) (const_int 12)
2870 (const_int 5) (const_int 13)])))
2876 (parallel [(const_int 2) (const_int 10)
2877 (const_int 3) (const_int 11)
2878 (const_int 6) (const_int 14)
2879 (const_int 7) (const_int 15)])))
2880 (set (match_operand:V8SF 0 "register_operand" "")
2885 (parallel [(const_int 4) (const_int 5)
2886 (const_int 6) (const_int 7)
2887 (const_int 12) (const_int 13)
2888 (const_int 14) (const_int 15)])))]
2891 operands[3] = gen_reg_rtx (V8SFmode);
2892 operands[4] = gen_reg_rtx (V8SFmode);
2895 (define_insn "vec_interleave_highv4sf"
2896 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2899 (match_operand:V4SF 1 "register_operand" "0,x")
2900 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
2901 (parallel [(const_int 2) (const_int 6)
2902 (const_int 3) (const_int 7)])))]
2905 unpckhps\t{%2, %0|%0, %2}
2906 vunpckhps\t{%2, %1, %0|%0, %1, %2}"
2907 [(set_attr "isa" "noavx,avx")
2908 (set_attr "type" "sselog")
2909 (set_attr "prefix" "orig,vex")
2910 (set_attr "mode" "V4SF")])
2912 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
2913 (define_insn "avx_unpcklps256"
2914 [(set (match_operand:V8SF 0 "register_operand" "=x")
2917 (match_operand:V8SF 1 "register_operand" "x")
2918 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
2919 (parallel [(const_int 0) (const_int 8)
2920 (const_int 1) (const_int 9)
2921 (const_int 4) (const_int 12)
2922 (const_int 5) (const_int 13)])))]
2924 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
2925 [(set_attr "type" "sselog")
2926 (set_attr "prefix" "vex")
2927 (set_attr "mode" "V8SF")])
2929 (define_expand "vec_interleave_lowv8sf"
2933 (match_operand:V8SF 1 "register_operand" "x")
2934 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
2935 (parallel [(const_int 0) (const_int 8)
2936 (const_int 1) (const_int 9)
2937 (const_int 4) (const_int 12)
2938 (const_int 5) (const_int 13)])))
2944 (parallel [(const_int 2) (const_int 10)
2945 (const_int 3) (const_int 11)
2946 (const_int 6) (const_int 14)
2947 (const_int 7) (const_int 15)])))
2948 (set (match_operand:V8SF 0 "register_operand" "")
2953 (parallel [(const_int 0) (const_int 1)
2954 (const_int 2) (const_int 3)
2955 (const_int 8) (const_int 9)
2956 (const_int 10) (const_int 11)])))]
2959 operands[3] = gen_reg_rtx (V8SFmode);
2960 operands[4] = gen_reg_rtx (V8SFmode);
2963 (define_insn "vec_interleave_lowv4sf"
2964 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2967 (match_operand:V4SF 1 "register_operand" "0,x")
2968 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
2969 (parallel [(const_int 0) (const_int 4)
2970 (const_int 1) (const_int 5)])))]
2973 unpcklps\t{%2, %0|%0, %2}
2974 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
2975 [(set_attr "isa" "noavx,avx")
2976 (set_attr "type" "sselog")
2977 (set_attr "prefix" "orig,vex")
2978 (set_attr "mode" "V4SF")])
2980 ;; These are modeled with the same vec_concat as the others so that we
2981 ;; capture users of shufps that can use the new instructions
2982 (define_insn "avx_movshdup256"
2983 [(set (match_operand:V8SF 0 "register_operand" "=x")
2986 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
2988 (parallel [(const_int 1) (const_int 1)
2989 (const_int 3) (const_int 3)
2990 (const_int 5) (const_int 5)
2991 (const_int 7) (const_int 7)])))]
2993 "vmovshdup\t{%1, %0|%0, %1}"
2994 [(set_attr "type" "sse")
2995 (set_attr "prefix" "vex")
2996 (set_attr "mode" "V8SF")])
2998 (define_insn "sse3_movshdup"
2999 [(set (match_operand:V4SF 0 "register_operand" "=x")
3002 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3004 (parallel [(const_int 1)
3009 "%vmovshdup\t{%1, %0|%0, %1}"
3010 [(set_attr "type" "sse")
3011 (set_attr "prefix_rep" "1")
3012 (set_attr "prefix" "maybe_vex")
3013 (set_attr "mode" "V4SF")])
3015 (define_insn "avx_movsldup256"
3016 [(set (match_operand:V8SF 0 "register_operand" "=x")
3019 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3021 (parallel [(const_int 0) (const_int 0)
3022 (const_int 2) (const_int 2)
3023 (const_int 4) (const_int 4)
3024 (const_int 6) (const_int 6)])))]
3026 "vmovsldup\t{%1, %0|%0, %1}"
3027 [(set_attr "type" "sse")
3028 (set_attr "prefix" "vex")
3029 (set_attr "mode" "V8SF")])
3031 (define_insn "sse3_movsldup"
3032 [(set (match_operand:V4SF 0 "register_operand" "=x")
3035 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3037 (parallel [(const_int 0)
3042 "%vmovsldup\t{%1, %0|%0, %1}"
3043 [(set_attr "type" "sse")
3044 (set_attr "prefix_rep" "1")
3045 (set_attr "prefix" "maybe_vex")
3046 (set_attr "mode" "V4SF")])
3048 (define_expand "avx_shufps256"
3049 [(match_operand:V8SF 0 "register_operand" "")
3050 (match_operand:V8SF 1 "register_operand" "")
3051 (match_operand:V8SF 2 "nonimmediate_operand" "")
3052 (match_operand:SI 3 "const_int_operand" "")]
3055 int mask = INTVAL (operands[3]);
3056 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3057 GEN_INT ((mask >> 0) & 3),
3058 GEN_INT ((mask >> 2) & 3),
3059 GEN_INT (((mask >> 4) & 3) + 8),
3060 GEN_INT (((mask >> 6) & 3) + 8),
3061 GEN_INT (((mask >> 0) & 3) + 4),
3062 GEN_INT (((mask >> 2) & 3) + 4),
3063 GEN_INT (((mask >> 4) & 3) + 12),
3064 GEN_INT (((mask >> 6) & 3) + 12)));
3068 ;; One bit in mask selects 2 elements.
3069 (define_insn "avx_shufps256_1"
3070 [(set (match_operand:V8SF 0 "register_operand" "=x")
3073 (match_operand:V8SF 1 "register_operand" "x")
3074 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3075 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3076 (match_operand 4 "const_0_to_3_operand" "")
3077 (match_operand 5 "const_8_to_11_operand" "")
3078 (match_operand 6 "const_8_to_11_operand" "")
3079 (match_operand 7 "const_4_to_7_operand" "")
3080 (match_operand 8 "const_4_to_7_operand" "")
3081 (match_operand 9 "const_12_to_15_operand" "")
3082 (match_operand 10 "const_12_to_15_operand" "")])))]
3084 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3085 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3086 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3087 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3090 mask = INTVAL (operands[3]);
3091 mask |= INTVAL (operands[4]) << 2;
3092 mask |= (INTVAL (operands[5]) - 8) << 4;
3093 mask |= (INTVAL (operands[6]) - 8) << 6;
3094 operands[3] = GEN_INT (mask);
3096 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3098 [(set_attr "type" "sselog")
3099 (set_attr "length_immediate" "1")
3100 (set_attr "prefix" "vex")
3101 (set_attr "mode" "V8SF")])
3103 (define_expand "sse_shufps"
3104 [(match_operand:V4SF 0 "register_operand" "")
3105 (match_operand:V4SF 1 "register_operand" "")
3106 (match_operand:V4SF 2 "nonimmediate_operand" "")
3107 (match_operand:SI 3 "const_int_operand" "")]
3110 int mask = INTVAL (operands[3]);
3111 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3112 GEN_INT ((mask >> 0) & 3),
3113 GEN_INT ((mask >> 2) & 3),
3114 GEN_INT (((mask >> 4) & 3) + 4),
3115 GEN_INT (((mask >> 6) & 3) + 4)));
3119 (define_insn "sse_shufps_<mode>"
3120 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x,x")
3121 (vec_select:SSEMODE4S
3122 (vec_concat:<ssedoublesizemode>
3123 (match_operand:SSEMODE4S 1 "register_operand" "0,x")
3124 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm,xm"))
3125 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3126 (match_operand 4 "const_0_to_3_operand" "")
3127 (match_operand 5 "const_4_to_7_operand" "")
3128 (match_operand 6 "const_4_to_7_operand" "")])))]
3132 mask |= INTVAL (operands[3]) << 0;
3133 mask |= INTVAL (operands[4]) << 2;
3134 mask |= (INTVAL (operands[5]) - 4) << 4;
3135 mask |= (INTVAL (operands[6]) - 4) << 6;
3136 operands[3] = GEN_INT (mask);
3138 switch (which_alternative)
3141 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3143 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3148 [(set_attr "isa" "noavx,avx")
3149 (set_attr "type" "sselog")
3150 (set_attr "length_immediate" "1")
3151 (set_attr "prefix" "orig,vex")
3152 (set_attr "mode" "V4SF")])
3154 (define_insn "sse_storehps"
3155 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3157 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3158 (parallel [(const_int 2) (const_int 3)])))]
3161 %vmovhps\t{%1, %0|%0, %1}
3162 %vmovhlps\t{%1, %d0|%d0, %1}
3163 %vmovlps\t{%H1, %d0|%d0, %H1}"
3164 [(set_attr "type" "ssemov")
3165 (set_attr "prefix" "maybe_vex")
3166 (set_attr "mode" "V2SF,V4SF,V2SF")])
3168 (define_expand "sse_loadhps_exp"
3169 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3172 (match_operand:V4SF 1 "nonimmediate_operand" "")
3173 (parallel [(const_int 0) (const_int 1)]))
3174 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3177 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3179 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
3181 /* Fix up the destination if needed. */
3182 if (dst != operands[0])
3183 emit_move_insn (operands[0], dst);
3188 (define_insn "sse_loadhps"
3189 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
3192 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3193 (parallel [(const_int 0) (const_int 1)]))
3194 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))]
3197 movhps\t{%2, %0|%0, %2}
3198 vmovhps\t{%2, %1, %0|%0, %1, %2}
3199 movlhps\t{%2, %0|%0, %2}
3200 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3201 %vmovlps\t{%2, %H0|%H0, %2}"
3202 [(set_attr "isa" "noavx,avx,noavx,avx,base")
3203 (set_attr "type" "ssemov")
3204 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3205 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
3207 (define_insn "sse_storelps"
3208 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3210 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
3211 (parallel [(const_int 0) (const_int 1)])))]
3214 %vmovlps\t{%1, %0|%0, %1}
3215 %vmovaps\t{%1, %0|%0, %1}
3216 %vmovlps\t{%1, %d0|%d0, %1}"
3217 [(set_attr "type" "ssemov")
3218 (set_attr "prefix" "maybe_vex")
3219 (set_attr "mode" "V2SF,V4SF,V2SF")])
3221 (define_expand "sse_loadlps_exp"
3222 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3224 (match_operand:V2SF 2 "nonimmediate_operand" "")
3226 (match_operand:V4SF 1 "nonimmediate_operand" "")
3227 (parallel [(const_int 2) (const_int 3)]))))]
3230 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3232 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
3234 /* Fix up the destination if needed. */
3235 if (dst != operands[0])
3236 emit_move_insn (operands[0], dst);
3241 (define_insn "sse_loadlps"
3242 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
3244 (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,x,x")
3246 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
3247 (parallel [(const_int 2) (const_int 3)]))))]
3250 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3251 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3252 movlps\t{%2, %0|%0, %2}
3253 vmovlps\t{%2, %1, %0|%0, %1, %2}
3254 %vmovlps\t{%2, %0|%0, %2}"
3255 [(set_attr "isa" "noavx,avx,noavx,avx,base")
3256 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
3257 (set_attr "length_immediate" "1,1,*,*,*")
3258 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3259 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3261 (define_insn "sse_movss"
3262 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3264 (match_operand:V4SF 2 "register_operand" " x,x")
3265 (match_operand:V4SF 1 "register_operand" " 0,x")
3269 movss\t{%2, %0|%0, %2}
3270 vmovss\t{%2, %1, %0|%0, %1, %2}"
3271 [(set_attr "isa" "noavx,avx")
3272 (set_attr "type" "ssemov")
3273 (set_attr "prefix" "orig,vex")
3274 (set_attr "mode" "SF")])
3276 (define_expand "vec_dupv4sf"
3277 [(set (match_operand:V4SF 0 "register_operand" "")
3279 (match_operand:SF 1 "nonimmediate_operand" "")))]
3283 operands[1] = force_reg (V4SFmode, operands[1]);
3286 (define_insn "*vec_dupv4sf_avx"
3287 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3289 (match_operand:SF 1 "nonimmediate_operand" "x,m")))]
3292 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
3293 vbroadcastss\t{%1, %0|%0, %1}"
3294 [(set_attr "type" "sselog1,ssemov")
3295 (set_attr "length_immediate" "1,0")
3296 (set_attr "prefix_extra" "0,1")
3297 (set_attr "prefix" "vex")
3298 (set_attr "mode" "V4SF")])
3300 (define_insn "*vec_dupv4sf"
3301 [(set (match_operand:V4SF 0 "register_operand" "=x")
3303 (match_operand:SF 1 "register_operand" "0")))]
3305 "shufps\t{$0, %0, %0|%0, %0, 0}"
3306 [(set_attr "type" "sselog1")
3307 (set_attr "length_immediate" "1")
3308 (set_attr "mode" "V4SF")])
3310 ;; Although insertps takes register source, we prefer
3311 ;; unpcklps with register source since it is shorter.
3312 (define_insn "*vec_concatv2sf_sse4_1"
3313 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,x,x,*y ,*y")
3315 (match_operand:SF 1 "nonimmediate_operand" " 0,x,0,x,m, 0 , m")
3316 (match_operand:SF 2 "vector_move_operand" " x,x,m,m,C,*ym, C")))]
3319 unpcklps\t{%2, %0|%0, %2}
3320 vunpcklps\t{%2, %1, %0|%0, %1, %2}
3321 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3322 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3323 %vmovss\t{%1, %0|%0, %1}
3324 punpckldq\t{%2, %0|%0, %2}
3325 movd\t{%1, %0|%0, %1}"
3326 [(set_attr "isa" "noavx,avx,noavx,avx,base,base,base")
3327 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
3328 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
3329 (set_attr "prefix_extra" "*,*,1,1,*,*,*")
3330 (set_attr "length_immediate" "*,*,1,1,*,*,*")
3331 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
3332 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
3334 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3335 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3336 ;; alternatives pretty much forces the MMX alternative to be chosen.
3337 (define_insn "*vec_concatv2sf_sse"
3338 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
3340 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3341 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
3344 unpcklps\t{%2, %0|%0, %2}
3345 movss\t{%1, %0|%0, %1}
3346 punpckldq\t{%2, %0|%0, %2}
3347 movd\t{%1, %0|%0, %1}"
3348 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3349 (set_attr "mode" "V4SF,SF,DI,DI")])
3351 (define_insn "*vec_concatv4sf_sse"
3352 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x")
3354 (match_operand:V2SF 1 "register_operand" " 0,x,0,x")
3355 (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
3358 movlhps\t{%2, %0|%0, %2}
3359 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3360 movhps\t{%2, %0|%0, %2}
3361 vmovhps\t{%2, %1, %0|%0, %1, %2}"
3362 [(set_attr "isa" "noavx,avx,noavx,avx")
3363 (set_attr "type" "ssemov")
3364 (set_attr "prefix" "orig,vex,orig,vex")
3365 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
3367 (define_expand "vec_init<mode>"
3368 [(match_operand:SSEMODE 0 "register_operand" "")
3369 (match_operand 1 "" "")]
3372 ix86_expand_vector_init (false, operands[0], operands[1]);
3376 ;; Avoid combining registers from different units in a single alternative,
3377 ;; see comment above inline_secondary_memory_needed function in i386.c
3378 (define_insn "*vec_set<mode>_0_sse4_1"
3379 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand"
3380 "=x,x,x ,x,x,x ,x ,m,m,m")
3381 (vec_merge:SSEMODE4S
3382 (vec_duplicate:SSEMODE4S
3383 (match_operand:<ssescalarmode> 2 "general_operand"
3384 " x,m,*r,x,x,*rm,*rm,x,*r,fF"))
3385 (match_operand:SSEMODE4S 1 "vector_move_operand"
3386 " C,C,C ,0,x,0 ,x ,0,0 ,0")
3390 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
3391 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
3392 %vmovd\t{%2, %0|%0, %2}
3393 movss\t{%2, %0|%0, %2}
3394 vmovss\t{%2, %1, %0|%0, %1, %2}
3395 pinsrd\t{$0, %2, %0|%0, %2, 0}
3396 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
3400 [(set_attr "isa" "base,base,base,noavx,avx,noavx,avx,base,base,base")
3401 (set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov,sselog,sselog,*,*,*")
3402 (set_attr "prefix_extra" "*,*,*,*,*,1,1,*,*,*")
3403 (set_attr "length_immediate" "*,*,*,*,*,1,1,*,*,*")
3404 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,vex,orig,vex,*,*,*")
3405 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,SF,TI,TI,*,*,*")])
3407 ;; Avoid combining registers from different units in a single alternative,
3408 ;; see comment above inline_secondary_memory_needed function in i386.c
3409 (define_insn "*vec_set<mode>_0_sse2"
3410 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand"
3412 (vec_merge:SSEMODE4S
3413 (vec_duplicate:SSEMODE4S
3414 (match_operand:<ssescalarmode> 2 "general_operand"
3416 (match_operand:SSEMODE4S 1 "vector_move_operand"
3421 mov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
3422 movd\t{%2, %0|%0, %2}
3423 movss\t{%2, %0|%0, %2}
3427 [(set_attr "type" "ssemov")
3428 (set_attr "mode" "<ssescalarmode>,SI,SF,*,*,*")])
3430 ;; Avoid combining registers from different units in a single alternative,
3431 ;; see comment above inline_secondary_memory_needed function in i386.c
3432 (define_insn "vec_set<mode>_0"
3433 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand"
3435 (vec_merge:SSEMODE4S
3436 (vec_duplicate:SSEMODE4S
3437 (match_operand:<ssescalarmode> 2 "general_operand"
3439 (match_operand:SSEMODE4S 1 "vector_move_operand"
3444 movss\t{%2, %0|%0, %2}
3445 movss\t{%2, %0|%0, %2}
3449 [(set_attr "type" "ssemov")
3450 (set_attr "mode" "SF,SF,*,*,*")])
3452 ;; A subset is vec_setv4sf.
3453 (define_insn "*vec_setv4sf_sse4_1"
3454 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3457 (match_operand:SF 2 "nonimmediate_operand" "xm,xm"))
3458 (match_operand:V4SF 1 "register_operand" "0,x")
3459 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n,n")))]
3462 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
3463 switch (which_alternative)
3466 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3468 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3473 [(set_attr "isa" "noavx,avx")
3474 (set_attr "type" "sselog")
3475 (set_attr "prefix_data16" "1,*")
3476 (set_attr "prefix_extra" "1")
3477 (set_attr "length_immediate" "1")
3478 (set_attr "prefix" "orig,vex")
3479 (set_attr "mode" "V4SF")])
3481 (define_insn "sse4_1_insertps"
3482 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3483 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
3484 (match_operand:V4SF 1 "register_operand" "0,x")
3485 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
3489 if (MEM_P (operands[2]))
3491 unsigned count_s = INTVAL (operands[3]) >> 6;
3493 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
3494 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
3496 switch (which_alternative)
3499 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3501 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3506 [(set_attr "isa" "noavx,avx")
3507 (set_attr "type" "sselog")
3508 (set_attr "prefix_data16" "1,*")
3509 (set_attr "prefix_extra" "1")
3510 (set_attr "length_immediate" "1")
3511 (set_attr "prefix" "orig,vex")
3512 (set_attr "mode" "V4SF")])
3515 [(set (match_operand:SSEMODE4S 0 "memory_operand" "")
3516 (vec_merge:SSEMODE4S
3517 (vec_duplicate:SSEMODE4S
3518 (match_operand:<ssescalarmode> 1 "nonmemory_operand" ""))
3521 "TARGET_SSE && reload_completed"
3524 emit_move_insn (adjust_address (operands[0], <ssescalarmode>mode, 0),
3529 (define_expand "vec_set<mode>"
3530 [(match_operand:SSEMODE 0 "register_operand" "")
3531 (match_operand:<ssescalarmode> 1 "register_operand" "")
3532 (match_operand 2 "const_int_operand" "")]
3535 ix86_expand_vector_set (false, operands[0], operands[1],
3536 INTVAL (operands[2]));
3540 (define_insn_and_split "*vec_extractv4sf_0"
3541 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
3543 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
3544 (parallel [(const_int 0)])))]
3545 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3547 "&& reload_completed"
3550 rtx op1 = operands[1];
3552 op1 = gen_rtx_REG (SFmode, REGNO (op1));
3554 op1 = gen_lowpart (SFmode, op1);
3555 emit_move_insn (operands[0], op1);
3559 (define_expand "avx_vextractf128<mode>"
3560 [(match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "")
3561 (match_operand:AVX256MODE 1 "register_operand" "")
3562 (match_operand:SI 2 "const_0_to_1_operand" "")]
3565 rtx (*insn)(rtx, rtx);
3567 switch (INTVAL (operands[2]))
3570 insn = gen_vec_extract_lo_<mode>;
3573 insn = gen_vec_extract_hi_<mode>;
3579 emit_insn (insn (operands[0], operands[1]));
3583 (define_insn_and_split "vec_extract_lo_<mode>"
3584 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
3585 (vec_select:<avxhalfvecmode>
3586 (match_operand:AVX256MODE4P 1 "nonimmediate_operand" "xm,x")
3587 (parallel [(const_int 0) (const_int 1)])))]
3590 "&& reload_completed"
3593 rtx op1 = operands[1];
3595 op1 = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (op1));
3597 op1 = gen_lowpart (<avxhalfvecmode>mode, op1);
3598 emit_move_insn (operands[0], op1);
3602 (define_insn "vec_extract_hi_<mode>"
3603 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
3604 (vec_select:<avxhalfvecmode>
3605 (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
3606 (parallel [(const_int 2) (const_int 3)])))]
3608 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3609 [(set_attr "type" "sselog")
3610 (set_attr "prefix_extra" "1")
3611 (set_attr "length_immediate" "1")
3612 (set_attr "memory" "none,store")
3613 (set_attr "prefix" "vex")
3614 (set_attr "mode" "V8SF")])
3616 (define_insn_and_split "vec_extract_lo_<mode>"
3617 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
3618 (vec_select:<avxhalfvecmode>
3619 (match_operand:AVX256MODE8P 1 "nonimmediate_operand" "xm,x")
3620 (parallel [(const_int 0) (const_int 1)
3621 (const_int 2) (const_int 3)])))]
3624 "&& reload_completed"
3627 rtx op1 = operands[1];
3629 op1 = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (op1));
3631 op1 = gen_lowpart (<avxhalfvecmode>mode, op1);
3632 emit_move_insn (operands[0], op1);
3636 (define_insn "vec_extract_hi_<mode>"
3637 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
3638 (vec_select:<avxhalfvecmode>
3639 (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
3640 (parallel [(const_int 4) (const_int 5)
3641 (const_int 6) (const_int 7)])))]
3643 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3644 [(set_attr "type" "sselog")
3645 (set_attr "prefix_extra" "1")
3646 (set_attr "length_immediate" "1")
3647 (set_attr "memory" "none,store")
3648 (set_attr "prefix" "vex")
3649 (set_attr "mode" "V8SF")])
3651 (define_insn_and_split "vec_extract_lo_v16hi"
3652 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
3654 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
3655 (parallel [(const_int 0) (const_int 1)
3656 (const_int 2) (const_int 3)
3657 (const_int 4) (const_int 5)
3658 (const_int 6) (const_int 7)])))]
3661 "&& reload_completed"
3664 rtx op1 = operands[1];
3666 op1 = gen_rtx_REG (V8HImode, REGNO (op1));
3668 op1 = gen_lowpart (V8HImode, op1);
3669 emit_move_insn (operands[0], op1);
3673 (define_insn "vec_extract_hi_v16hi"
3674 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
3676 (match_operand:V16HI 1 "register_operand" "x,x")
3677 (parallel [(const_int 8) (const_int 9)
3678 (const_int 10) (const_int 11)
3679 (const_int 12) (const_int 13)
3680 (const_int 14) (const_int 15)])))]
3682 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3683 [(set_attr "type" "sselog")
3684 (set_attr "prefix_extra" "1")
3685 (set_attr "length_immediate" "1")
3686 (set_attr "memory" "none,store")
3687 (set_attr "prefix" "vex")
3688 (set_attr "mode" "V8SF")])
3690 (define_insn_and_split "vec_extract_lo_v32qi"
3691 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
3693 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
3694 (parallel [(const_int 0) (const_int 1)
3695 (const_int 2) (const_int 3)
3696 (const_int 4) (const_int 5)
3697 (const_int 6) (const_int 7)
3698 (const_int 8) (const_int 9)
3699 (const_int 10) (const_int 11)
3700 (const_int 12) (const_int 13)
3701 (const_int 14) (const_int 15)])))]
3704 "&& reload_completed"
3707 rtx op1 = operands[1];
3709 op1 = gen_rtx_REG (V16QImode, REGNO (op1));
3711 op1 = gen_lowpart (V16QImode, op1);
3712 emit_move_insn (operands[0], op1);
3716 (define_insn "vec_extract_hi_v32qi"
3717 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
3719 (match_operand:V32QI 1 "register_operand" "x,x")
3720 (parallel [(const_int 16) (const_int 17)
3721 (const_int 18) (const_int 19)
3722 (const_int 20) (const_int 21)
3723 (const_int 22) (const_int 23)
3724 (const_int 24) (const_int 25)
3725 (const_int 26) (const_int 27)
3726 (const_int 28) (const_int 29)
3727 (const_int 30) (const_int 31)])))]
3729 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3730 [(set_attr "type" "sselog")
3731 (set_attr "prefix_extra" "1")
3732 (set_attr "length_immediate" "1")
3733 (set_attr "memory" "none,store")
3734 (set_attr "prefix" "vex")
3735 (set_attr "mode" "V8SF")])
3737 (define_insn "*sse4_1_extractps"
3738 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
3740 (match_operand:V4SF 1 "register_operand" "x")
3741 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
3743 "%vextractps\t{%2, %1, %0|%0, %1, %2}"
3744 [(set_attr "type" "sselog")
3745 (set_attr "prefix_data16" "1")
3746 (set_attr "prefix_extra" "1")
3747 (set_attr "length_immediate" "1")
3748 (set_attr "prefix" "maybe_vex")
3749 (set_attr "mode" "V4SF")])
3751 (define_insn_and_split "*vec_extract_v4sf_mem"
3752 [(set (match_operand:SF 0 "register_operand" "=x*rf")
3754 (match_operand:V4SF 1 "memory_operand" "o")
3755 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
3761 int i = INTVAL (operands[2]);
3763 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
3767 (define_expand "vec_extract<mode>"
3768 [(match_operand:<avxscalarmode> 0 "register_operand" "")
3769 (match_operand:VEC_EXTRACT_MODE 1 "register_operand" "")
3770 (match_operand 2 "const_int_operand" "")]
3773 ix86_expand_vector_extract (false, operands[0], operands[1],
3774 INTVAL (operands[2]));
3778 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3780 ;; Parallel double-precision floating point element swizzling
3782 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3784 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3785 (define_insn "avx_unpckhpd256"
3786 [(set (match_operand:V4DF 0 "register_operand" "=x")
3789 (match_operand:V4DF 1 "register_operand" "x")
3790 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
3791 (parallel [(const_int 1) (const_int 5)
3792 (const_int 3) (const_int 7)])))]
3794 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
3795 [(set_attr "type" "sselog")
3796 (set_attr "prefix" "vex")
3797 (set_attr "mode" "V4DF")])
3799 (define_expand "vec_interleave_highv4df"
3803 (match_operand:V4DF 1 "register_operand" "x")
3804 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
3805 (parallel [(const_int 0) (const_int 4)
3806 (const_int 2) (const_int 6)])))
3812 (parallel [(const_int 1) (const_int 5)
3813 (const_int 3) (const_int 7)])))
3814 (set (match_operand:V4DF 0 "register_operand" "")
3819 (parallel [(const_int 2) (const_int 3)
3820 (const_int 6) (const_int 7)])))]
3823 operands[3] = gen_reg_rtx (V4DFmode);
3824 operands[4] = gen_reg_rtx (V4DFmode);
3828 (define_expand "vec_interleave_highv2df"
3829 [(set (match_operand:V2DF 0 "register_operand" "")
3832 (match_operand:V2DF 1 "nonimmediate_operand" "")
3833 (match_operand:V2DF 2 "nonimmediate_operand" ""))
3834 (parallel [(const_int 1)
3838 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
3839 operands[2] = force_reg (V2DFmode, operands[2]);
3842 (define_insn "*sse3_interleave_highv2df"
3843 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
3846 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x")
3847 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0"))
3848 (parallel [(const_int 1)
3850 "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
3852 unpckhpd\t{%2, %0|%0, %2}
3853 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
3854 %vmovddup\t{%H1, %0|%0, %H1}
3855 movlpd\t{%H1, %0|%0, %H1}
3856 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
3857 %vmovhpd\t{%1, %0|%0, %1}"
3858 [(set_attr "isa" "noavx,avx,base,noavx,avx,base")
3859 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
3860 (set_attr "prefix_data16" "*,*,*,1,*,1")
3861 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
3862 (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")])
3864 (define_insn "*sse2_interleave_highv2df"
3865 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
3868 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
3869 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
3870 (parallel [(const_int 1)
3872 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
3874 unpckhpd\t{%2, %0|%0, %2}
3875 movlpd\t{%H1, %0|%0, %H1}
3876 movhpd\t{%1, %0|%0, %1}"
3877 [(set_attr "type" "sselog,ssemov,ssemov")
3878 (set_attr "prefix_data16" "*,1,1")
3879 (set_attr "mode" "V2DF,V1DF,V1DF")])
3881 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3882 (define_expand "avx_movddup256"
3883 [(set (match_operand:V4DF 0 "register_operand" "")
3886 (match_operand:V4DF 1 "nonimmediate_operand" "")
3888 (parallel [(const_int 0) (const_int 4)
3889 (const_int 2) (const_int 6)])))]
3892 (define_expand "avx_unpcklpd256"
3893 [(set (match_operand:V4DF 0 "register_operand" "")
3896 (match_operand:V4DF 1 "register_operand" "")
3897 (match_operand:V4DF 2 "nonimmediate_operand" ""))
3898 (parallel [(const_int 0) (const_int 4)
3899 (const_int 2) (const_int 6)])))]
3902 (define_insn "*avx_unpcklpd256"
3903 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
3906 (match_operand:V4DF 1 "nonimmediate_operand" "xm,x")
3907 (match_operand:V4DF 2 "nonimmediate_operand" " 1,xm"))
3908 (parallel [(const_int 0) (const_int 4)
3909 (const_int 2) (const_int 6)])))]
3911 && (!MEM_P (operands[1]) || rtx_equal_p (operands[1], operands[2]))"
3913 vmovddup\t{%1, %0|%0, %1}
3914 vunpcklpd\t{%2, %1, %0|%0, %1, %2}"
3915 [(set_attr "type" "sselog")
3916 (set_attr "prefix" "vex")
3917 (set_attr "mode" "V4DF")])
3919 (define_expand "vec_interleave_lowv4df"
3923 (match_operand:V4DF 1 "register_operand" "x")
3924 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
3925 (parallel [(const_int 0) (const_int 4)
3926 (const_int 2) (const_int 6)])))
3932 (parallel [(const_int 1) (const_int 5)
3933 (const_int 3) (const_int 7)])))
3934 (set (match_operand:V4DF 0 "register_operand" "")
3939 (parallel [(const_int 0) (const_int 1)
3940 (const_int 4) (const_int 5)])))]
3943 operands[3] = gen_reg_rtx (V4DFmode);
3944 operands[4] = gen_reg_rtx (V4DFmode);
3947 (define_expand "vec_interleave_lowv2df"
3948 [(set (match_operand:V2DF 0 "register_operand" "")
3951 (match_operand:V2DF 1 "nonimmediate_operand" "")
3952 (match_operand:V2DF 2 "nonimmediate_operand" ""))
3953 (parallel [(const_int 0)
3957 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
3958 operands[1] = force_reg (V2DFmode, operands[1]);
3961 (define_insn "*sse3_interleave_lowv2df"
3962 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,o")
3965 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0")
3966 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x"))
3967 (parallel [(const_int 0)
3969 "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
3971 unpcklpd\t{%2, %0|%0, %2}
3972 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
3973 %vmovddup\t{%1, %0|%0, %1}
3974 movhpd\t{%2, %0|%0, %2}
3975 vmovhpd\t{%2, %1, %0|%0, %1, %2}
3976 %vmovlpd\t{%2, %H0|%H0, %2}"
3977 [(set_attr "isa" "noavx,avx,base,noavx,avx,base")
3978 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
3979 (set_attr "prefix_data16" "*,*,*,1,*,1")
3980 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
3981 (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")])
3983 (define_insn "*sse2_interleave_lowv2df"
3984 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
3987 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
3988 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
3989 (parallel [(const_int 0)
3991 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
3993 unpcklpd\t{%2, %0|%0, %2}
3994 movhpd\t{%2, %0|%0, %2}
3995 movlpd\t{%2, %H0|%H0, %2}"
3996 [(set_attr "type" "sselog,ssemov,ssemov")
3997 (set_attr "prefix_data16" "*,1,1")
3998 (set_attr "mode" "V2DF,V1DF,V1DF")])
4001 [(set (match_operand:V2DF 0 "memory_operand" "")
4004 (match_operand:V2DF 1 "register_operand" "")
4006 (parallel [(const_int 0)
4008 "TARGET_SSE3 && reload_completed"
4011 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4012 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4013 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4018 [(set (match_operand:V2DF 0 "register_operand" "")
4021 (match_operand:V2DF 1 "memory_operand" "")
4023 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "")
4024 (match_operand:SI 3 "const_int_operand" "")])))]
4025 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
4026 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
4028 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
4031 (define_expand "avx_shufpd256"
4032 [(match_operand:V4DF 0 "register_operand" "")
4033 (match_operand:V4DF 1 "register_operand" "")
4034 (match_operand:V4DF 2 "nonimmediate_operand" "")
4035 (match_operand:SI 3 "const_int_operand" "")]
4038 int mask = INTVAL (operands[3]);
4039 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4041 GEN_INT (mask & 2 ? 5 : 4),
4042 GEN_INT (mask & 4 ? 3 : 2),
4043 GEN_INT (mask & 8 ? 7 : 6)));
4047 (define_insn "avx_shufpd256_1"
4048 [(set (match_operand:V4DF 0 "register_operand" "=x")
4051 (match_operand:V4DF 1 "register_operand" "x")
4052 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4053 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4054 (match_operand 4 "const_4_to_5_operand" "")
4055 (match_operand 5 "const_2_to_3_operand" "")
4056 (match_operand 6 "const_6_to_7_operand" "")])))]
4060 mask = INTVAL (operands[3]);
4061 mask |= (INTVAL (operands[4]) - 4) << 1;
4062 mask |= (INTVAL (operands[5]) - 2) << 2;
4063 mask |= (INTVAL (operands[6]) - 6) << 3;
4064 operands[3] = GEN_INT (mask);
4066 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4068 [(set_attr "type" "sselog")
4069 (set_attr "length_immediate" "1")
4070 (set_attr "prefix" "vex")
4071 (set_attr "mode" "V4DF")])
4073 (define_expand "sse2_shufpd"
4074 [(match_operand:V2DF 0 "register_operand" "")
4075 (match_operand:V2DF 1 "register_operand" "")
4076 (match_operand:V2DF 2 "nonimmediate_operand" "")
4077 (match_operand:SI 3 "const_int_operand" "")]
4080 int mask = INTVAL (operands[3]);
4081 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4083 GEN_INT (mask & 2 ? 3 : 2)));
4087 (define_expand "vec_extract_even<mode>"
4088 [(match_operand:SSEMODE_EO 0 "register_operand" "")
4089 (match_operand:SSEMODE_EO 1 "register_operand" "")
4090 (match_operand:SSEMODE_EO 2 "register_operand" "")]
4093 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 0);
4097 (define_expand "vec_extract_odd<mode>"
4098 [(match_operand:SSEMODE_EO 0 "register_operand" "")
4099 (match_operand:SSEMODE_EO 1 "register_operand" "")
4100 (match_operand:SSEMODE_EO 2 "register_operand" "")]
4103 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 1);
4107 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4109 (define_insn "vec_interleave_highv2di"
4110 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4113 (match_operand:V2DI 1 "register_operand" "0,x")
4114 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4115 (parallel [(const_int 1)
4119 punpckhqdq\t{%2, %0|%0, %2}
4120 vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4121 [(set_attr "isa" "noavx,avx")
4122 (set_attr "type" "sselog")
4123 (set_attr "prefix_data16" "1,*")
4124 (set_attr "prefix" "orig,vex")
4125 (set_attr "mode" "TI")])
4127 (define_insn "vec_interleave_lowv2di"
4128 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4131 (match_operand:V2DI 1 "register_operand" "0,x")
4132 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4133 (parallel [(const_int 0)
4137 punpcklqdq\t{%2, %0|%0, %2}
4138 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4139 [(set_attr "isa" "noavx,avx")
4140 (set_attr "type" "sselog")
4141 (set_attr "prefix_data16" "1,*")
4142 (set_attr "prefix" "orig,vex")
4143 (set_attr "mode" "TI")])
4145 (define_insn "sse2_shufpd_<mode>"
4146 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x,x")
4147 (vec_select:SSEMODE2D
4148 (vec_concat:<ssedoublesizemode>
4149 (match_operand:SSEMODE2D 1 "register_operand" "0,x")
4150 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm,xm"))
4151 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4152 (match_operand 4 "const_2_to_3_operand" "")])))]
4156 mask = INTVAL (operands[3]);
4157 mask |= (INTVAL (operands[4]) - 2) << 1;
4158 operands[3] = GEN_INT (mask);
4160 switch (which_alternative)
4163 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4165 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4170 [(set_attr "isa" "noavx,avx")
4171 (set_attr "type" "sselog")
4172 (set_attr "length_immediate" "1")
4173 (set_attr "prefix" "orig,vex")
4174 (set_attr "mode" "V2DF")])
4176 ;; Avoid combining registers from different units in a single alternative,
4177 ;; see comment above inline_secondary_memory_needed function in i386.c
4178 (define_insn "sse2_storehpd"
4179 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r")
4181 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
4182 (parallel [(const_int 1)])))]
4183 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4185 %vmovhpd\t{%1, %0|%0, %1}
4187 vunpckhpd\t{%d1, %0|%0, %d1}
4191 [(set_attr "isa" "base,noavx,avx,base,base,base")
4192 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
4193 (set (attr "prefix_data16")
4195 (and (eq_attr "alternative" "0")
4196 (eq (symbol_ref "TARGET_AVX") (const_int 0)))
4198 (const_string "*")))
4199 (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
4200 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
4203 [(set (match_operand:DF 0 "register_operand" "")
4205 (match_operand:V2DF 1 "memory_operand" "")
4206 (parallel [(const_int 1)])))]
4207 "TARGET_SSE2 && reload_completed"
4208 [(set (match_dup 0) (match_dup 1))]
4209 "operands[1] = adjust_address (operands[1], DFmode, 8);")
4211 ;; Avoid combining registers from different units in a single alternative,
4212 ;; see comment above inline_secondary_memory_needed function in i386.c
4213 (define_insn "sse2_storelpd"
4214 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4216 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4217 (parallel [(const_int 0)])))]
4218 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4220 %vmovlpd\t{%1, %0|%0, %1}
4225 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4226 (set_attr "prefix_data16" "1,*,*,*,*")
4227 (set_attr "prefix" "maybe_vex")
4228 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4231 [(set (match_operand:DF 0 "register_operand" "")
4233 (match_operand:V2DF 1 "nonimmediate_operand" "")
4234 (parallel [(const_int 0)])))]
4235 "TARGET_SSE2 && reload_completed"
4238 rtx op1 = operands[1];
4240 op1 = gen_rtx_REG (DFmode, REGNO (op1));
4242 op1 = gen_lowpart (DFmode, op1);
4243 emit_move_insn (operands[0], op1);
4247 (define_expand "sse2_loadhpd_exp"
4248 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4251 (match_operand:V2DF 1 "nonimmediate_operand" "")
4252 (parallel [(const_int 0)]))
4253 (match_operand:DF 2 "nonimmediate_operand" "")))]
4256 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4258 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
4260 /* Fix up the destination if needed. */
4261 if (dst != operands[0])
4262 emit_move_insn (operands[0], dst);
4267 ;; Avoid combining registers from different units in a single alternative,
4268 ;; see comment above inline_secondary_memory_needed function in i386.c
4269 (define_insn "sse2_loadhpd"
4270 [(set (match_operand:V2DF 0 "nonimmediate_operand"
4271 "=x,x,x,x,x,o,o ,o")
4274 (match_operand:V2DF 1 "nonimmediate_operand"
4275 " 0,x,0,x,x,0,0 ,0")
4276 (parallel [(const_int 0)]))
4277 (match_operand:DF 2 "nonimmediate_operand"
4278 " m,m,x,x,0,x,*f,r")))]
4279 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4281 movhpd\t{%2, %0|%0, %2}
4282 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4283 unpcklpd\t{%2, %0|%0, %2}
4284 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4285 shufpd\t{$1, %1, %0|%0, %1, 1}
4289 [(set_attr "isa" "noavx,avx,noavx,avx,noavx,base,base,base")
4290 (set_attr "type" "ssemov,ssemov,sselog,sselog,sselog,ssemov,fmov,imov")
4291 (set_attr "prefix_data16" "1,*,*,*,*,*,*,*")
4292 (set_attr "length_immediate" "*,*,*,*,1,*,*,*")
4293 (set_attr "prefix" "orig,vex,orig,vex,orig,*,*,*")
4294 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,V2DF,DF,DF,DF")])
4297 [(set (match_operand:V2DF 0 "memory_operand" "")
4299 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
4300 (match_operand:DF 1 "register_operand" "")))]
4301 "TARGET_SSE2 && reload_completed"
4302 [(set (match_dup 0) (match_dup 1))]
4303 "operands[0] = adjust_address (operands[0], DFmode, 8);")
4305 (define_expand "sse2_loadlpd_exp"
4306 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4308 (match_operand:DF 2 "nonimmediate_operand" "")
4310 (match_operand:V2DF 1 "nonimmediate_operand" "")
4311 (parallel [(const_int 1)]))))]
4314 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4316 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
4318 /* Fix up the destination if needed. */
4319 if (dst != operands[0])
4320 emit_move_insn (operands[0], dst);
4325 ;; Avoid combining registers from different units in a single alternative,
4326 ;; see comment above inline_secondary_memory_needed function in i386.c
4327 (define_insn "sse2_loadlpd"
4328 [(set (match_operand:V2DF 0 "nonimmediate_operand"
4329 "=x,x,x,x,x,x,x,x,m,m ,m")
4331 (match_operand:DF 2 "nonimmediate_operand"
4332 " m,m,m,x,x,0,0,x,x,*f,r")
4334 (match_operand:V2DF 1 "vector_move_operand"
4335 " C,0,x,0,x,x,o,o,0,0 ,0")
4336 (parallel [(const_int 1)]))))]
4337 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4339 %vmovsd\t{%2, %0|%0, %2}
4340 movlpd\t{%2, %0|%0, %2}
4341 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4342 movsd\t{%2, %0|%0, %2}
4343 vmovsd\t{%2, %1, %0|%0, %1, %2}
4344 shufpd\t{$2, %1, %0|%0, %1, 2}
4345 movhpd\t{%H1, %0|%0, %H1}
4346 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
4350 [(set_attr "isa" "base,noavx,avx,noavx,avx,noavx,noavx,avx,base,base,base")
4351 (set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov,sselog,ssemov,ssemov,ssemov,fmov,imov")
4352 (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
4353 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
4354 (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
4355 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
4358 [(set (match_operand:V2DF 0 "memory_operand" "")
4360 (match_operand:DF 1 "register_operand" "")
4361 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
4362 "TARGET_SSE2 && reload_completed"
4363 [(set (match_dup 0) (match_dup 1))]
4364 "operands[0] = adjust_address (operands[0], DFmode, 8);")
4366 ;; Not sure these two are ever used, but it doesn't hurt to have
4368 (define_insn "*vec_extractv2df_1_sse"
4369 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4371 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
4372 (parallel [(const_int 1)])))]
4373 "!TARGET_SSE2 && TARGET_SSE
4374 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4376 movhps\t{%1, %0|%0, %1}
4377 movhlps\t{%1, %0|%0, %1}
4378 movlps\t{%H1, %0|%0, %H1}"
4379 [(set_attr "type" "ssemov")
4380 (set_attr "mode" "V2SF,V4SF,V2SF")])
4382 (define_insn "*vec_extractv2df_0_sse"
4383 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4385 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
4386 (parallel [(const_int 0)])))]
4387 "!TARGET_SSE2 && TARGET_SSE
4388 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4390 movlps\t{%1, %0|%0, %1}
4391 movaps\t{%1, %0|%0, %1}
4392 movlps\t{%1, %0|%0, %1}"
4393 [(set_attr "type" "ssemov")
4394 (set_attr "mode" "V2SF,V4SF,V2SF")])
4396 (define_insn "sse2_movsd"
4397 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o")
4399 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
4400 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
4404 movsd\t{%2, %0|%0, %2}
4405 vmovsd\t{%2, %1, %0|%0, %1, %2}
4406 movlpd\t{%2, %0|%0, %2}
4407 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4408 %vmovlpd\t{%2, %0|%0, %2}
4409 shufpd\t{$2, %1, %0|%0, %1, 2}
4410 movhps\t{%H1, %0|%0, %H1}
4411 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
4412 %vmovhps\t{%1, %H0|%H0, %1}"
4413 [(set_attr "isa" "noavx,avx,noavx,avx,base,noavx,noavx,avx,base")
4414 (set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
4415 (set (attr "prefix_data16")
4417 (and (eq_attr "alternative" "2,4")
4418 (eq (symbol_ref "TARGET_AVX") (const_int 0)))
4420 (const_string "*")))
4421 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
4422 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
4423 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
4425 (define_insn "*vec_dupv2df_sse3"
4426 [(set (match_operand:V2DF 0 "register_operand" "=x")
4428 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
4430 "%vmovddup\t{%1, %0|%0, %1}"
4431 [(set_attr "type" "sselog1")
4432 (set_attr "prefix" "maybe_vex")
4433 (set_attr "mode" "DF")])
4435 (define_insn "vec_dupv2df"
4436 [(set (match_operand:V2DF 0 "register_operand" "=x")
4438 (match_operand:DF 1 "register_operand" "0")))]
4441 [(set_attr "type" "sselog1")
4442 (set_attr "mode" "V2DF")])
4444 (define_insn "*vec_concatv2df_sse3"
4445 [(set (match_operand:V2DF 0 "register_operand" "=x")
4447 (match_operand:DF 1 "nonimmediate_operand" "xm")
4450 "%vmovddup\t{%1, %0|%0, %1}"
4451 [(set_attr "type" "sselog1")
4452 (set_attr "prefix" "maybe_vex")
4453 (set_attr "mode" "DF")])
4455 (define_insn "*vec_concatv2df"
4456 [(set (match_operand:V2DF 0 "register_operand" "=Y2,x,Y2,x,Y2,x,x")
4458 (match_operand:DF 1 "nonimmediate_operand" " 0 ,x,0 ,x,m ,0,0")
4459 (match_operand:DF 2 "vector_move_operand" " Y2,x,m ,m,C ,x,m")))]
4462 unpcklpd\t{%2, %0|%0, %2}
4463 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4464 movhpd\t{%2, %0|%0, %2}
4465 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4466 %vmovsd\t{%1, %0|%0, %1}
4467 movlhps\t{%2, %0|%0, %2}
4468 movhps\t{%2, %0|%0, %2}"
4469 [(set_attr "isa" "noavx,avx,noavx,avx,base,noavx,noavx")
4470 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov,ssemov,ssemov")
4471 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
4472 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
4473 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF,DF,V4SF,V2SF")])
4475 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4477 ;; Parallel integral arithmetic
4479 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4481 (define_expand "neg<mode>2"
4482 [(set (match_operand:VI_128 0 "register_operand" "")
4485 (match_operand:VI_128 1 "nonimmediate_operand" "")))]
4487 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
4489 (define_expand "<plusminus_insn><mode>3"
4490 [(set (match_operand:VI_128 0 "register_operand" "")
4492 (match_operand:VI_128 1 "nonimmediate_operand" "")
4493 (match_operand:VI_128 2 "nonimmediate_operand" "")))]
4495 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4497 (define_insn "*<plusminus_insn><mode>3"
4498 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
4500 (match_operand:VI_128 1 "nonimmediate_operand" "<comm>0,x")
4501 (match_operand:VI_128 2 "nonimmediate_operand" "xm,xm")))]
4502 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4504 p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}
4505 vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
4506 [(set_attr "isa" "noavx,avx")
4507 (set_attr "type" "sseiadd")
4508 (set_attr "prefix_data16" "1,*")
4509 (set_attr "prefix" "orig,vex")
4510 (set_attr "mode" "TI")])
4512 (define_expand "sse2_<plusminus_insn><mode>3"
4513 [(set (match_operand:VI12_128 0 "register_operand" "")
4514 (sat_plusminus:VI12_128
4515 (match_operand:VI12_128 1 "nonimmediate_operand" "")
4516 (match_operand:VI12_128 2 "nonimmediate_operand" "")))]
4518 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4520 (define_insn "*sse2_<plusminus_insn><mode>3"
4521 [(set (match_operand:VI12_128 0 "register_operand" "=x,x")
4522 (sat_plusminus:VI12_128
4523 (match_operand:VI12_128 1 "nonimmediate_operand" "<comm>0,x")
4524 (match_operand:VI12_128 2 "nonimmediate_operand" "xm,xm")))]
4525 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4527 p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}
4528 vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
4529 [(set_attr "isa" "noavx,avx")
4530 (set_attr "type" "sseiadd")
4531 (set_attr "prefix_data16" "1,*")
4532 (set_attr "prefix" "orig,vex")
4533 (set_attr "mode" "TI")])
4535 (define_insn_and_split "mulv16qi3"
4536 [(set (match_operand:V16QI 0 "register_operand" "")
4537 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
4538 (match_operand:V16QI 2 "register_operand" "")))]
4540 && can_create_pseudo_p ()"
4548 for (i = 0; i < 6; ++i)
4549 t[i] = gen_reg_rtx (V16QImode);
4551 /* Unpack data such that we've got a source byte in each low byte of
4552 each word. We don't care what goes into the high byte of each word.
4553 Rather than trying to get zero in there, most convenient is to let
4554 it be a copy of the low byte. */
4555 emit_insn (gen_vec_interleave_highv16qi (t[0], operands[1], operands[1]));
4556 emit_insn (gen_vec_interleave_highv16qi (t[1], operands[2], operands[2]));
4557 emit_insn (gen_vec_interleave_lowv16qi (t[2], operands[1], operands[1]));
4558 emit_insn (gen_vec_interleave_lowv16qi (t[3], operands[2], operands[2]));
4560 /* Multiply words. The end-of-line annotations here give a picture of what
4561 the output of that instruction looks like. Dot means don't care; the
4562 letters are the bytes of the result with A being the most significant. */
4563 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
4564 gen_lowpart (V8HImode, t[0]),
4565 gen_lowpart (V8HImode, t[1])));
4566 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
4567 gen_lowpart (V8HImode, t[2]),
4568 gen_lowpart (V8HImode, t[3])));
4570 /* Extract the even bytes and merge them back together. */
4571 ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0);
4575 (define_expand "mulv8hi3"
4576 [(set (match_operand:V8HI 0 "register_operand" "")
4577 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
4578 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
4580 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
4582 (define_insn "*mulv8hi3"
4583 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
4584 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
4585 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
4586 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4588 pmullw\t{%2, %0|%0, %2}
4589 vpmullw\t{%2, %1, %0|%0, %1, %2}"
4590 [(set_attr "isa" "noavx,avx")
4591 (set_attr "type" "sseimul")
4592 (set_attr "prefix_data16" "1,*")
4593 (set_attr "prefix" "orig,vex")
4594 (set_attr "mode" "TI")])
4596 (define_expand "<s>mulv8hi3_highpart"
4597 [(set (match_operand:V8HI 0 "register_operand" "")
4602 (match_operand:V8HI 1 "nonimmediate_operand" ""))
4604 (match_operand:V8HI 2 "nonimmediate_operand" "")))
4607 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
4609 (define_insn "*<s>mulv8hi3_highpart"
4610 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
4615 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
4617 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
4619 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4621 pmulh<u>w\t{%2, %0|%0, %2}
4622 vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
4623 [(set_attr "isa" "noavx,avx")
4624 (set_attr "type" "sseimul")
4625 (set_attr "prefix_data16" "1,*")
4626 (set_attr "prefix" "orig,vex")
4627 (set_attr "mode" "TI")])
4629 (define_expand "sse2_umulv2siv2di3"
4630 [(set (match_operand:V2DI 0 "register_operand" "")
4634 (match_operand:V4SI 1 "nonimmediate_operand" "")
4635 (parallel [(const_int 0) (const_int 2)])))
4638 (match_operand:V4SI 2 "nonimmediate_operand" "")
4639 (parallel [(const_int 0) (const_int 2)])))))]
4641 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
4643 (define_insn "*sse2_umulv2siv2di3"
4644 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4648 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
4649 (parallel [(const_int 0) (const_int 2)])))
4652 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
4653 (parallel [(const_int 0) (const_int 2)])))))]
4654 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
4656 pmuludq\t{%2, %0|%0, %2}
4657 vpmuludq\t{%2, %1, %0|%0, %1, %2}"
4658 [(set_attr "isa" "noavx,avx")
4659 (set_attr "type" "sseimul")
4660 (set_attr "prefix_data16" "1,*")
4661 (set_attr "prefix" "orig,vex")
4662 (set_attr "mode" "TI")])
4664 (define_expand "sse4_1_mulv2siv2di3"
4665 [(set (match_operand:V2DI 0 "register_operand" "")
4669 (match_operand:V4SI 1 "nonimmediate_operand" "")
4670 (parallel [(const_int 0) (const_int 2)])))
4673 (match_operand:V4SI 2 "nonimmediate_operand" "")
4674 (parallel [(const_int 0) (const_int 2)])))))]
4676 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
4678 (define_insn "*sse4_1_mulv2siv2di3"
4679 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4683 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
4684 (parallel [(const_int 0) (const_int 2)])))
4687 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
4688 (parallel [(const_int 0) (const_int 2)])))))]
4689 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
4691 pmuldq\t{%2, %0|%0, %2}
4692 vpmuldq\t{%2, %1, %0|%0, %1, %2}"
4693 [(set_attr "isa" "noavx,avx")
4694 (set_attr "type" "sseimul")
4695 (set_attr "prefix_data16" "1,*")
4696 (set_attr "prefix_extra" "1")
4697 (set_attr "prefix" "orig,vex")
4698 (set_attr "mode" "TI")])
4700 (define_expand "sse2_pmaddwd"
4701 [(set (match_operand:V4SI 0 "register_operand" "")
4706 (match_operand:V8HI 1 "nonimmediate_operand" "")
4707 (parallel [(const_int 0)
4713 (match_operand:V8HI 2 "nonimmediate_operand" "")
4714 (parallel [(const_int 0)
4720 (vec_select:V4HI (match_dup 1)
4721 (parallel [(const_int 1)
4726 (vec_select:V4HI (match_dup 2)
4727 (parallel [(const_int 1)
4730 (const_int 7)]))))))]
4732 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
4734 (define_insn "*sse2_pmaddwd"
4735 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
4740 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
4741 (parallel [(const_int 0)
4747 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
4748 (parallel [(const_int 0)
4754 (vec_select:V4HI (match_dup 1)
4755 (parallel [(const_int 1)
4760 (vec_select:V4HI (match_dup 2)
4761 (parallel [(const_int 1)
4764 (const_int 7)]))))))]
4765 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4767 pmaddwd\t{%2, %0|%0, %2}
4768 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
4769 [(set_attr "isa" "noavx,avx")
4770 (set_attr "type" "sseiadd")
4771 (set_attr "atom_unit" "simul")
4772 (set_attr "prefix_data16" "1,*")
4773 (set_attr "prefix" "orig,vex")
4774 (set_attr "mode" "TI")])
4776 (define_expand "mulv4si3"
4777 [(set (match_operand:V4SI 0 "register_operand" "")
4778 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
4779 (match_operand:V4SI 2 "register_operand" "")))]
4782 if (TARGET_SSE4_1 || TARGET_AVX)
4783 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
4786 (define_insn "*sse4_1_mulv4si3"
4787 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
4788 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
4789 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")))]
4790 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
4792 pmulld\t{%2, %0|%0, %2}
4793 vpmulld\t{%2, %1, %0|%0, %1, %2}"
4794 [(set_attr "isa" "noavx,avx")
4795 (set_attr "type" "sseimul")
4796 (set_attr "prefix_extra" "1")
4797 (set_attr "prefix" "orig,vex")
4798 (set_attr "mode" "TI")])
4800 (define_insn_and_split "*sse2_mulv4si3"
4801 [(set (match_operand:V4SI 0 "register_operand" "")
4802 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
4803 (match_operand:V4SI 2 "register_operand" "")))]
4804 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_AVX
4805 && can_create_pseudo_p ()"
4810 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
4816 t1 = gen_reg_rtx (V4SImode);
4817 t2 = gen_reg_rtx (V4SImode);
4818 t3 = gen_reg_rtx (V4SImode);
4819 t4 = gen_reg_rtx (V4SImode);
4820 t5 = gen_reg_rtx (V4SImode);
4821 t6 = gen_reg_rtx (V4SImode);
4822 thirtytwo = GEN_INT (32);
4824 /* Multiply elements 2 and 0. */
4825 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
4828 /* Shift both input vectors down one element, so that elements 3
4829 and 1 are now in the slots for elements 2 and 0. For K8, at
4830 least, this is faster than using a shuffle. */
4831 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
4832 gen_lowpart (V1TImode, op1),
4834 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
4835 gen_lowpart (V1TImode, op2),
4837 /* Multiply elements 3 and 1. */
4838 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
4841 /* Move the results in element 2 down to element 1; we don't care
4842 what goes in elements 2 and 3. */
4843 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
4844 const0_rtx, const0_rtx));
4845 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
4846 const0_rtx, const0_rtx));
4848 /* Merge the parts back together. */
4849 emit_insn (gen_vec_interleave_lowv4si (op0, t5, t6));
4853 (define_insn_and_split "mulv2di3"
4854 [(set (match_operand:V2DI 0 "register_operand" "")
4855 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
4856 (match_operand:V2DI 2 "register_operand" "")))]
4858 && can_create_pseudo_p ()"
4863 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
4872 /* op1: A,B,C,D, op2: E,F,G,H */
4873 op1 = gen_lowpart (V4SImode, op1);
4874 op2 = gen_lowpart (V4SImode, op2);
4876 t1 = gen_reg_rtx (V4SImode);
4877 t2 = gen_reg_rtx (V4SImode);
4878 t3 = gen_reg_rtx (V2DImode);
4879 t4 = gen_reg_rtx (V2DImode);
4882 emit_insn (gen_sse2_pshufd_1 (t1, op1,
4888 /* t2: (B*E),(A*F),(D*G),(C*H) */
4889 emit_insn (gen_mulv4si3 (t2, t1, op2));
4891 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
4892 emit_insn (gen_xop_phadddq (t3, t2));
4894 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
4895 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
4897 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
4898 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
4902 t1 = gen_reg_rtx (V2DImode);
4903 t2 = gen_reg_rtx (V2DImode);
4904 t3 = gen_reg_rtx (V2DImode);
4905 t4 = gen_reg_rtx (V2DImode);
4906 t5 = gen_reg_rtx (V2DImode);
4907 t6 = gen_reg_rtx (V2DImode);
4908 thirtytwo = GEN_INT (32);
4910 /* Multiply low parts. */
4911 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
4912 gen_lowpart (V4SImode, op2)));
4914 /* Shift input vectors left 32 bits so we can multiply high parts. */
4915 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
4916 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
4918 /* Multiply high parts by low parts. */
4919 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
4920 gen_lowpart (V4SImode, t3)));
4921 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
4922 gen_lowpart (V4SImode, t2)));
4924 /* Shift them back. */
4925 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
4926 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
4928 /* Add the three parts together. */
4929 emit_insn (gen_addv2di3 (t6, t1, t4));
4930 emit_insn (gen_addv2di3 (op0, t6, t5));
4935 (define_expand "vec_widen_smult_hi_v8hi"
4936 [(match_operand:V4SI 0 "register_operand" "")
4937 (match_operand:V8HI 1 "register_operand" "")
4938 (match_operand:V8HI 2 "register_operand" "")]
4941 rtx op1, op2, t1, t2, dest;
4945 t1 = gen_reg_rtx (V8HImode);
4946 t2 = gen_reg_rtx (V8HImode);
4947 dest = gen_lowpart (V8HImode, operands[0]);
4949 emit_insn (gen_mulv8hi3 (t1, op1, op2));
4950 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
4951 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
4955 (define_expand "vec_widen_smult_lo_v8hi"
4956 [(match_operand:V4SI 0 "register_operand" "")
4957 (match_operand:V8HI 1 "register_operand" "")
4958 (match_operand:V8HI 2 "register_operand" "")]
4961 rtx op1, op2, t1, t2, dest;
4965 t1 = gen_reg_rtx (V8HImode);
4966 t2 = gen_reg_rtx (V8HImode);
4967 dest = gen_lowpart (V8HImode, operands[0]);
4969 emit_insn (gen_mulv8hi3 (t1, op1, op2));
4970 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
4971 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
4975 (define_expand "vec_widen_umult_hi_v8hi"
4976 [(match_operand:V4SI 0 "register_operand" "")
4977 (match_operand:V8HI 1 "register_operand" "")
4978 (match_operand:V8HI 2 "register_operand" "")]
4981 rtx op1, op2, t1, t2, dest;
4985 t1 = gen_reg_rtx (V8HImode);
4986 t2 = gen_reg_rtx (V8HImode);
4987 dest = gen_lowpart (V8HImode, operands[0]);
4989 emit_insn (gen_mulv8hi3 (t1, op1, op2));
4990 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
4991 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
4995 (define_expand "vec_widen_umult_lo_v8hi"
4996 [(match_operand:V4SI 0 "register_operand" "")
4997 (match_operand:V8HI 1 "register_operand" "")
4998 (match_operand:V8HI 2 "register_operand" "")]
5001 rtx op1, op2, t1, t2, dest;
5005 t1 = gen_reg_rtx (V8HImode);
5006 t2 = gen_reg_rtx (V8HImode);
5007 dest = gen_lowpart (V8HImode, operands[0]);
5009 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5010 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5011 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5015 (define_expand "vec_widen_smult_hi_v4si"
5016 [(match_operand:V2DI 0 "register_operand" "")
5017 (match_operand:V4SI 1 "register_operand" "")
5018 (match_operand:V4SI 2 "register_operand" "")]
5023 t1 = gen_reg_rtx (V4SImode);
5024 t2 = gen_reg_rtx (V4SImode);
5026 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5031 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5036 emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
5040 (define_expand "vec_widen_smult_lo_v4si"
5041 [(match_operand:V2DI 0 "register_operand" "")
5042 (match_operand:V4SI 1 "register_operand" "")
5043 (match_operand:V4SI 2 "register_operand" "")]
5048 t1 = gen_reg_rtx (V4SImode);
5049 t2 = gen_reg_rtx (V4SImode);
5051 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5056 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5061 emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
5065 (define_expand "vec_widen_umult_hi_v4si"
5066 [(match_operand:V2DI 0 "register_operand" "")
5067 (match_operand:V4SI 1 "register_operand" "")
5068 (match_operand:V4SI 2 "register_operand" "")]
5071 rtx op1, op2, t1, t2;
5075 t1 = gen_reg_rtx (V4SImode);
5076 t2 = gen_reg_rtx (V4SImode);
5078 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5079 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5080 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5084 (define_expand "vec_widen_umult_lo_v4si"
5085 [(match_operand:V2DI 0 "register_operand" "")
5086 (match_operand:V4SI 1 "register_operand" "")
5087 (match_operand:V4SI 2 "register_operand" "")]
5090 rtx op1, op2, t1, t2;
5094 t1 = gen_reg_rtx (V4SImode);
5095 t2 = gen_reg_rtx (V4SImode);
5097 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5098 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5099 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5103 (define_expand "sdot_prodv8hi"
5104 [(match_operand:V4SI 0 "register_operand" "")
5105 (match_operand:V8HI 1 "register_operand" "")
5106 (match_operand:V8HI 2 "register_operand" "")
5107 (match_operand:V4SI 3 "register_operand" "")]
5110 rtx t = gen_reg_rtx (V4SImode);
5111 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
5112 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
5116 (define_expand "udot_prodv4si"
5117 [(match_operand:V2DI 0 "register_operand" "")
5118 (match_operand:V4SI 1 "register_operand" "")
5119 (match_operand:V4SI 2 "register_operand" "")
5120 (match_operand:V2DI 3 "register_operand" "")]
5125 t1 = gen_reg_rtx (V2DImode);
5126 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
5127 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5129 t2 = gen_reg_rtx (V4SImode);
5130 t3 = gen_reg_rtx (V4SImode);
5131 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5132 gen_lowpart (V1TImode, operands[1]),
5134 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5135 gen_lowpart (V1TImode, operands[2]),
5138 t4 = gen_reg_rtx (V2DImode);
5139 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
5141 emit_insn (gen_addv2di3 (operands[0], t1, t4));
5145 (define_insn "ashr<mode>3"
5146 [(set (match_operand:VI24_128 0 "register_operand" "=x,x")
5148 (match_operand:VI24_128 1 "register_operand" "0,x")
5149 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5152 psra<ssevecsize>\t{%2, %0|%0, %2}
5153 vpsra<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5154 [(set_attr "isa" "noavx,avx")
5155 (set_attr "type" "sseishft")
5156 (set (attr "length_immediate")
5157 (if_then_else (match_operand 2 "const_int_operand" "")
5159 (const_string "0")))
5160 (set_attr "prefix_data16" "1,*")
5161 (set_attr "prefix" "orig,vex")
5162 (set_attr "mode" "TI")])
5164 (define_insn "lshr<mode>3"
5165 [(set (match_operand:VI248_128 0 "register_operand" "=x,x")
5167 (match_operand:VI248_128 1 "register_operand" "0,x")
5168 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5171 psrl<ssevecsize>\t{%2, %0|%0, %2}
5172 vpsrl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5173 [(set_attr "isa" "noavx,avx")
5174 (set_attr "type" "sseishft")
5175 (set (attr "length_immediate")
5176 (if_then_else (match_operand 2 "const_int_operand" "")
5178 (const_string "0")))
5179 (set_attr "prefix_data16" "1,*")
5180 (set_attr "prefix" "orig,vex")
5181 (set_attr "mode" "TI")])
5183 (define_insn "ashl<mode>3"
5184 [(set (match_operand:VI248_128 0 "register_operand" "=x,x")
5186 (match_operand:VI248_128 1 "register_operand" "0,x")
5187 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5190 psll<ssevecsize>\t{%2, %0|%0, %2}
5191 vpsll<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5192 [(set_attr "isa" "noavx,avx")
5193 (set_attr "type" "sseishft")
5194 (set (attr "length_immediate")
5195 (if_then_else (match_operand 2 "const_int_operand" "")
5197 (const_string "0")))
5198 (set_attr "prefix_data16" "1,*")
5199 (set_attr "prefix" "orig,vex")
5200 (set_attr "mode" "TI")])
5202 (define_expand "vec_shl_<mode>"
5203 [(set (match_operand:VI_128 0 "register_operand" "")
5205 (match_operand:VI_128 1 "register_operand" "")
5206 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5209 operands[0] = gen_lowpart (V1TImode, operands[0]);
5210 operands[1] = gen_lowpart (V1TImode, operands[1]);
5213 (define_insn "sse2_ashlv1ti3"
5214 [(set (match_operand:V1TI 0 "register_operand" "=x,x")
5216 (match_operand:V1TI 1 "register_operand" "0,x")
5217 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5220 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5222 switch (which_alternative)
5225 return "pslldq\t{%2, %0|%0, %2}";
5227 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
5232 [(set_attr "isa" "noavx,avx")
5233 (set_attr "type" "sseishft")
5234 (set_attr "length_immediate" "1")
5235 (set_attr "prefix_data16" "1,*")
5236 (set_attr "prefix" "orig,vex")
5237 (set_attr "mode" "TI")])
5239 (define_expand "vec_shr_<mode>"
5240 [(set (match_operand:VI_128 0 "register_operand" "")
5242 (match_operand:VI_128 1 "register_operand" "")
5243 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5246 operands[0] = gen_lowpart (V1TImode, operands[0]);
5247 operands[1] = gen_lowpart (V1TImode, operands[1]);
5250 (define_insn "sse2_lshrv1ti3"
5251 [(set (match_operand:V1TI 0 "register_operand" "=x,x")
5253 (match_operand:V1TI 1 "register_operand" "0,x")
5254 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5257 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5259 switch (which_alternative)
5262 return "psrldq\t{%2, %0|%0, %2}";
5264 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
5269 [(set_attr "isa" "noavx,avx")
5270 (set_attr "type" "sseishft")
5271 (set_attr "length_immediate" "1")
5272 (set_attr "atom_unit" "sishuf")
5273 (set_attr "prefix_data16" "1,*")
5274 (set_attr "prefix" "orig,vex")
5275 (set_attr "mode" "TI")])
5277 (define_insn "*sse4_1_<code><mode>3"
5278 [(set (match_operand:VI14_128 0 "register_operand" "=x,x")
5280 (match_operand:VI14_128 1 "nonimmediate_operand" "%0,x")
5281 (match_operand:VI14_128 2 "nonimmediate_operand" "xm,xm")))]
5282 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5284 p<maxmin_int><ssevecsize>\t{%2, %0|%0, %2}
5285 vp<maxmin_int><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5286 [(set_attr "isa" "noavx,avx")
5287 (set_attr "type" "sseiadd")
5288 (set_attr "prefix_extra" "1,*")
5289 (set_attr "prefix" "orig,vex")
5290 (set_attr "mode" "TI")])
5292 (define_insn "*<code>v8hi3"
5293 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
5295 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
5296 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
5297 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
5299 p<maxmin_int>w\t{%2, %0|%0, %2}
5300 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
5301 [(set_attr "isa" "noavx,avx")
5302 (set_attr "type" "sseiadd")
5303 (set_attr "prefix_data16" "1,*")
5304 (set_attr "prefix_extra" "*,1")
5305 (set_attr "prefix" "orig,vex")
5306 (set_attr "mode" "TI")])
5308 (define_expand "smax<mode>3"
5309 [(set (match_operand:VI14_128 0 "register_operand" "")
5310 (smax:VI14_128 (match_operand:VI14_128 1 "register_operand" "")
5311 (match_operand:VI14_128 2 "register_operand" "")))]
5315 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
5321 xops[0] = operands[0];
5322 xops[1] = operands[1];
5323 xops[2] = operands[2];
5324 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5325 xops[4] = operands[1];
5326 xops[5] = operands[2];
5327 ok = ix86_expand_int_vcond (xops);
5333 (define_expand "smin<mode>3"
5334 [(set (match_operand:VI14_128 0 "register_operand" "")
5335 (smin:VI14_128 (match_operand:VI14_128 1 "register_operand" "")
5336 (match_operand:VI14_128 2 "register_operand" "")))]
5340 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
5346 xops[0] = operands[0];
5347 xops[1] = operands[2];
5348 xops[2] = operands[1];
5349 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5350 xops[4] = operands[1];
5351 xops[5] = operands[2];
5352 ok = ix86_expand_int_vcond (xops);
5358 (define_expand "<code>v8hi3"
5359 [(set (match_operand:V8HI 0 "register_operand" "")
5361 (match_operand:V8HI 1 "nonimmediate_operand" "")
5362 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5364 "ix86_fixup_binary_operands_no_copy (<CODE>, V8HImode, operands);")
5366 (define_expand "smaxv2di3"
5367 [(set (match_operand:V2DI 0 "register_operand" "")
5368 (smax:V2DI (match_operand:V2DI 1 "register_operand" "")
5369 (match_operand:V2DI 2 "register_operand" "")))]
5375 xops[0] = operands[0];
5376 xops[1] = operands[1];
5377 xops[2] = operands[2];
5378 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5379 xops[4] = operands[1];
5380 xops[5] = operands[2];
5381 ok = ix86_expand_int_vcond (xops);
5386 (define_expand "sminv2di3"
5387 [(set (match_operand:V2DI 0 "register_operand" "")
5388 (smin:V2DI (match_operand:V2DI 1 "register_operand" "")
5389 (match_operand:V2DI 2 "register_operand" "")))]
5395 xops[0] = operands[0];
5396 xops[1] = operands[2];
5397 xops[2] = operands[1];
5398 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5399 xops[4] = operands[1];
5400 xops[5] = operands[2];
5401 ok = ix86_expand_int_vcond (xops);
5406 (define_insn "*sse4_1_<code><mode>3"
5407 [(set (match_operand:VI24_128 0 "register_operand" "=x,x")
5409 (match_operand:VI24_128 1 "nonimmediate_operand" "%0,x")
5410 (match_operand:VI24_128 2 "nonimmediate_operand" "xm,xm")))]
5411 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5413 p<maxmin_int><ssevecsize>\t{%2, %0|%0, %2}
5414 vp<maxmin_int><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5415 [(set_attr "isa" "noavx,avx")
5416 (set_attr "type" "sseiadd")
5417 (set_attr "prefix_extra" "1,*")
5418 (set_attr "prefix" "orig,vex")
5419 (set_attr "mode" "TI")])
5421 (define_insn "*<code>v16qi3"
5422 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
5424 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")
5425 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))]
5426 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
5428 p<maxmin_int>b\t{%2, %0|%0, %2}
5429 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
5430 [(set_attr "isa" "noavx,avx")
5431 (set_attr "type" "sseiadd")
5432 (set_attr "prefix_data16" "1,*")
5433 (set_attr "prefix_extra" "*,1")
5434 (set_attr "prefix" "orig,vex")
5435 (set_attr "mode" "TI")])
5437 (define_expand "<code>v16qi3"
5438 [(set (match_operand:V16QI 0 "register_operand" "")
5440 (match_operand:V16QI 1 "nonimmediate_operand" "")
5441 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
5443 "ix86_fixup_binary_operands_no_copy (<CODE>, V16QImode, operands);")
5445 (define_expand "umaxv8hi3"
5446 [(set (match_operand:V8HI 0 "register_operand" "")
5447 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
5448 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5452 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
5455 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
5456 if (rtx_equal_p (op3, op2))
5457 op3 = gen_reg_rtx (V8HImode);
5458 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
5459 emit_insn (gen_addv8hi3 (op0, op3, op2));
5464 (define_expand "umaxv4si3"
5465 [(set (match_operand:V4SI 0 "register_operand" "")
5466 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
5467 (match_operand:V4SI 2 "register_operand" "")))]
5471 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
5477 xops[0] = operands[0];
5478 xops[1] = operands[1];
5479 xops[2] = operands[2];
5480 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5481 xops[4] = operands[1];
5482 xops[5] = operands[2];
5483 ok = ix86_expand_int_vcond (xops);
5489 (define_expand "umin<mode>3"
5490 [(set (match_operand:VI24_128 0 "register_operand" "")
5491 (umin:VI24_128 (match_operand:VI24_128 1 "register_operand" "")
5492 (match_operand:VI24_128 2 "register_operand" "")))]
5496 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
5502 xops[0] = operands[0];
5503 xops[1] = operands[2];
5504 xops[2] = operands[1];
5505 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5506 xops[4] = operands[1];
5507 xops[5] = operands[2];
5508 ok = ix86_expand_int_vcond (xops);
5514 (define_expand "umaxv2di3"
5515 [(set (match_operand:V2DI 0 "register_operand" "")
5516 (umax:V2DI (match_operand:V2DI 1 "register_operand" "")
5517 (match_operand:V2DI 2 "register_operand" "")))]
5523 xops[0] = operands[0];
5524 xops[1] = operands[1];
5525 xops[2] = operands[2];
5526 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5527 xops[4] = operands[1];
5528 xops[5] = operands[2];
5529 ok = ix86_expand_int_vcond (xops);
5534 (define_expand "uminv2di3"
5535 [(set (match_operand:V2DI 0 "register_operand" "")
5536 (umin:V2DI (match_operand:V2DI 1 "register_operand" "")
5537 (match_operand:V2DI 2 "register_operand" "")))]
5543 xops[0] = operands[0];
5544 xops[1] = operands[2];
5545 xops[2] = operands[1];
5546 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5547 xops[4] = operands[1];
5548 xops[5] = operands[2];
5549 ok = ix86_expand_int_vcond (xops);
5554 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5556 ;; Parallel integral comparisons
5558 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5560 (define_insn "*sse4_1_eqv2di3"
5561 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5563 (match_operand:V2DI 1 "nonimmediate_operand" "%0,x")
5564 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
5565 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
5567 pcmpeqq\t{%2, %0|%0, %2}
5568 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
5569 [(set_attr "isa" "noavx,avx")
5570 (set_attr "type" "ssecmp")
5571 (set_attr "prefix_extra" "1")
5572 (set_attr "prefix" "orig,vex")
5573 (set_attr "mode" "TI")])
5575 (define_insn "*sse2_eq<mode>3"
5576 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
5578 (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x")
5579 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
5580 "TARGET_SSE2 && !TARGET_XOP
5581 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
5583 pcmpeq<ssevecsize>\t{%2, %0|%0, %2}
5584 vpcmpeq<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5585 [(set_attr "isa" "noavx,avx")
5586 (set_attr "type" "ssecmp")
5587 (set_attr "prefix_data16" "1,*")
5588 (set_attr "prefix" "orig,vex")
5589 (set_attr "mode" "TI")])
5591 (define_expand "sse2_eq<mode>3"
5592 [(set (match_operand:VI124_128 0 "register_operand" "")
5594 (match_operand:VI124_128 1 "nonimmediate_operand" "")
5595 (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
5596 "TARGET_SSE2 && !TARGET_XOP "
5597 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
5599 (define_expand "sse4_1_eqv2di3"
5600 [(set (match_operand:V2DI 0 "register_operand" "")
5602 (match_operand:V2DI 1 "nonimmediate_operand" "")
5603 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
5605 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
5607 (define_insn "sse4_2_gtv2di3"
5608 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5610 (match_operand:V2DI 1 "register_operand" "0,x")
5611 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
5614 pcmpgtq\t{%2, %0|%0, %2}
5615 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
5616 [(set_attr "isa" "noavx,avx")
5617 (set_attr "type" "ssecmp")
5618 (set_attr "prefix_extra" "1")
5619 (set_attr "prefix" "orig,vex")
5620 (set_attr "mode" "TI")])
5622 (define_insn "sse2_gt<mode>3"
5623 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
5625 (match_operand:VI124_128 1 "register_operand" "0,x")
5626 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
5627 "TARGET_SSE2 && !TARGET_XOP"
5629 pcmpgt<ssevecsize>\t{%2, %0|%0, %2}
5630 vpcmpgt<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5631 [(set_attr "isa" "noavx,avx")
5632 (set_attr "type" "ssecmp")
5633 (set_attr "prefix_data16" "1,*")
5634 (set_attr "prefix" "orig,vex")
5635 (set_attr "mode" "TI")])
5637 (define_expand "vcond<mode>"
5638 [(set (match_operand:VI124_128 0 "register_operand" "")
5639 (if_then_else:VI124_128
5640 (match_operator 3 ""
5641 [(match_operand:VI124_128 4 "nonimmediate_operand" "")
5642 (match_operand:VI124_128 5 "nonimmediate_operand" "")])
5643 (match_operand:VI124_128 1 "general_operand" "")
5644 (match_operand:VI124_128 2 "general_operand" "")))]
5647 bool ok = ix86_expand_int_vcond (operands);
5652 (define_expand "vcondv2di"
5653 [(set (match_operand:V2DI 0 "register_operand" "")
5655 (match_operator 3 ""
5656 [(match_operand:V2DI 4 "nonimmediate_operand" "")
5657 (match_operand:V2DI 5 "nonimmediate_operand" "")])
5658 (match_operand:V2DI 1 "general_operand" "")
5659 (match_operand:V2DI 2 "general_operand" "")))]
5662 bool ok = ix86_expand_int_vcond (operands);
5667 (define_expand "vcondu<mode>"
5668 [(set (match_operand:VI124_128 0 "register_operand" "")
5669 (if_then_else:VI124_128
5670 (match_operator 3 ""
5671 [(match_operand:VI124_128 4 "nonimmediate_operand" "")
5672 (match_operand:VI124_128 5 "nonimmediate_operand" "")])
5673 (match_operand:VI124_128 1 "general_operand" "")
5674 (match_operand:VI124_128 2 "general_operand" "")))]
5677 bool ok = ix86_expand_int_vcond (operands);
5682 (define_expand "vconduv2di"
5683 [(set (match_operand:V2DI 0 "register_operand" "")
5685 (match_operator 3 ""
5686 [(match_operand:V2DI 4 "nonimmediate_operand" "")
5687 (match_operand:V2DI 5 "nonimmediate_operand" "")])
5688 (match_operand:V2DI 1 "general_operand" "")
5689 (match_operand:V2DI 2 "general_operand" "")))]
5692 bool ok = ix86_expand_int_vcond (operands);
5697 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5699 ;; Parallel bitwise logical operations
5701 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5703 (define_expand "one_cmpl<mode>2"
5704 [(set (match_operand:VI 0 "register_operand" "")
5705 (xor:VI (match_operand:VI 1 "nonimmediate_operand" "")
5709 int i, n = GET_MODE_NUNITS (<MODE>mode);
5710 rtvec v = rtvec_alloc (n);
5712 for (i = 0; i < n; ++i)
5713 RTVEC_ELT (v, i) = constm1_rtx;
5715 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
5718 (define_expand "sse2_andnot<mode>3"
5719 [(set (match_operand:VI_128 0 "register_operand" "")
5721 (not:VI_128 (match_operand:VI_128 1 "register_operand" ""))
5722 (match_operand:VI_128 2 "nonimmediate_operand" "")))]
5725 (define_insn "*andnot<mode>3"
5726 [(set (match_operand:VI 0 "register_operand" "=x,x")
5728 (not:VI (match_operand:VI 1 "register_operand" "0,x"))
5729 (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
5732 static char buf[32];
5735 = (get_attr_mode (insn) == MODE_TI) ? "pandn" : "andnps";
5737 switch (which_alternative)
5740 ops = "%s\t{%%2, %%0|%%0, %%2}";
5743 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
5749 snprintf (buf, sizeof (buf), ops, tmp);
5752 [(set_attr "isa" "noavx,avx")
5753 (set_attr "type" "sselog")
5754 (set (attr "prefix_data16")
5756 (and (eq_attr "alternative" "0")
5757 (eq_attr "mode" "TI"))
5759 (const_string "*")))
5760 (set_attr "prefix" "orig,vex")
5762 (cond [(ne (symbol_ref "GET_MODE_SIZE (<MODE>mode) > 128") (const_int 0))
5763 (const_string "V8SF")
5764 (ne (symbol_ref "TARGET_SSE2") (const_int 0))
5767 (const_string "V4SF")))])
5769 (define_expand "<code><mode>3"
5770 [(set (match_operand:VI 0 "register_operand" "")
5772 (match_operand:VI 1 "nonimmediate_operand" "")
5773 (match_operand:VI 2 "nonimmediate_operand" "")))]
5775 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5777 (define_insn "*<code><mode>3"
5778 [(set (match_operand:VI 0 "register_operand" "=x,x")
5780 (match_operand:VI 1 "nonimmediate_operand" "%0,x")
5781 (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
5783 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5785 static char buf[32];
5788 = (get_attr_mode (insn) == MODE_TI) ? "p<logic>" : "<logic>ps";
5790 switch (which_alternative)
5793 ops = "%s\t{%%2, %%0|%%0, %%2}";
5796 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
5802 snprintf (buf, sizeof (buf), ops, tmp);
5805 [(set_attr "isa" "noavx,avx")
5806 (set_attr "type" "sselog")
5807 (set (attr "prefix_data16")
5809 (and (eq_attr "alternative" "0")
5810 (eq_attr "mode" "TI"))
5812 (const_string "*")))
5813 (set_attr "prefix" "orig,vex")
5815 (cond [(ne (symbol_ref "GET_MODE_SIZE (<MODE>mode) > 128") (const_int 0))
5816 (const_string "V8SF")
5817 (ne (symbol_ref "TARGET_SSE2") (const_int 0))
5820 (const_string "V4SF")))])
5822 (define_insn "*andnottf3"
5823 [(set (match_operand:TF 0 "register_operand" "=x,x")
5825 (not:TF (match_operand:TF 1 "register_operand" "0,x"))
5826 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
5829 pandn\t{%2, %0|%0, %2}
5830 vpandn\t{%2, %1, %0|%0, %1, %2}"
5831 [(set_attr "isa" "noavx,avx")
5832 (set_attr "type" "sselog")
5833 (set_attr "prefix_data16" "1,*")
5834 (set_attr "prefix" "orig,vex")
5835 (set_attr "mode" "TI")])
5837 (define_expand "<code>tf3"
5838 [(set (match_operand:TF 0 "register_operand" "")
5840 (match_operand:TF 1 "nonimmediate_operand" "")
5841 (match_operand:TF 2 "nonimmediate_operand" "")))]
5843 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
5845 (define_insn "*<code>tf3"
5846 [(set (match_operand:TF 0 "register_operand" "=x,x")
5848 (match_operand:TF 1 "nonimmediate_operand" "%0,x")
5849 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
5851 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
5853 p<logic>\t{%2, %0|%0, %2}
5854 vp<logic>\t{%2, %1, %0|%0, %1, %2}"
5855 [(set_attr "isa" "noavx,avx")
5856 (set_attr "type" "sselog")
5857 (set_attr "prefix_data16" "1,*")
5858 (set_attr "prefix" "orig,vex")
5859 (set_attr "mode" "TI")])
5861 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5863 ;; Parallel integral element swizzling
5865 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5867 (define_expand "vec_pack_trunc_<mode>"
5868 [(match_operand:<ssepackmode> 0 "register_operand" "")
5869 (match_operand:VI248_128 1 "register_operand" "")
5870 (match_operand:VI248_128 2 "register_operand" "")]
5873 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
5874 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
5875 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
5879 (define_insn "sse2_packsswb"
5880 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
5883 (match_operand:V8HI 1 "register_operand" "0,x"))
5885 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))))]
5888 packsswb\t{%2, %0|%0, %2}
5889 vpacksswb\t{%2, %1, %0|%0, %1, %2}"
5890 [(set_attr "isa" "noavx,avx")
5891 (set_attr "type" "sselog")
5892 (set_attr "prefix_data16" "1,*")
5893 (set_attr "prefix" "orig,vex")
5894 (set_attr "mode" "TI")])
5896 (define_insn "sse2_packssdw"
5897 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
5900 (match_operand:V4SI 1 "register_operand" "0,x"))
5902 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))]
5905 packssdw\t{%2, %0|%0, %2}
5906 vpackssdw\t{%2, %1, %0|%0, %1, %2}"
5907 [(set_attr "isa" "noavx,avx")
5908 (set_attr "type" "sselog")
5909 (set_attr "prefix_data16" "1,*")
5910 (set_attr "prefix" "orig,vex")
5911 (set_attr "mode" "TI")])
5913 (define_insn "sse2_packuswb"
5914 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
5917 (match_operand:V8HI 1 "register_operand" "0,x"))
5919 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))))]
5922 packuswb\t{%2, %0|%0, %2}
5923 vpackuswb\t{%2, %1, %0|%0, %1, %2}"
5924 [(set_attr "isa" "noavx,avx")
5925 (set_attr "type" "sselog")
5926 (set_attr "prefix_data16" "1,*")
5927 (set_attr "prefix" "orig,vex")
5928 (set_attr "mode" "TI")])
5930 (define_insn "vec_interleave_highv16qi"
5931 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
5934 (match_operand:V16QI 1 "register_operand" "0,x")
5935 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
5936 (parallel [(const_int 8) (const_int 24)
5937 (const_int 9) (const_int 25)
5938 (const_int 10) (const_int 26)
5939 (const_int 11) (const_int 27)
5940 (const_int 12) (const_int 28)
5941 (const_int 13) (const_int 29)
5942 (const_int 14) (const_int 30)
5943 (const_int 15) (const_int 31)])))]
5946 punpckhbw\t{%2, %0|%0, %2}
5947 vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
5948 [(set_attr "isa" "noavx,avx")
5949 (set_attr "type" "sselog")
5950 (set_attr "prefix_data16" "1,*")
5951 (set_attr "prefix" "orig,vex")
5952 (set_attr "mode" "TI")])
5954 (define_insn "vec_interleave_lowv16qi"
5955 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
5958 (match_operand:V16QI 1 "register_operand" "0,x")
5959 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
5960 (parallel [(const_int 0) (const_int 16)
5961 (const_int 1) (const_int 17)
5962 (const_int 2) (const_int 18)
5963 (const_int 3) (const_int 19)
5964 (const_int 4) (const_int 20)
5965 (const_int 5) (const_int 21)
5966 (const_int 6) (const_int 22)
5967 (const_int 7) (const_int 23)])))]
5970 punpcklbw\t{%2, %0|%0, %2}
5971 vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
5972 [(set_attr "isa" "noavx,avx")
5973 (set_attr "type" "sselog")
5974 (set_attr "prefix_data16" "1,*")
5975 (set_attr "prefix" "orig,vex")
5976 (set_attr "mode" "TI")])
5978 (define_insn "vec_interleave_highv8hi"
5979 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
5982 (match_operand:V8HI 1 "register_operand" "0,x")
5983 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
5984 (parallel [(const_int 4) (const_int 12)
5985 (const_int 5) (const_int 13)
5986 (const_int 6) (const_int 14)
5987 (const_int 7) (const_int 15)])))]
5990 punpckhwd\t{%2, %0|%0, %2}
5991 vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
5992 [(set_attr "isa" "noavx,avx")
5993 (set_attr "type" "sselog")
5994 (set_attr "prefix_data16" "1,*")
5995 (set_attr "prefix" "orig,vex")
5996 (set_attr "mode" "TI")])
5998 (define_insn "vec_interleave_lowv8hi"
5999 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6002 (match_operand:V8HI 1 "register_operand" "0,x")
6003 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
6004 (parallel [(const_int 0) (const_int 8)
6005 (const_int 1) (const_int 9)
6006 (const_int 2) (const_int 10)
6007 (const_int 3) (const_int 11)])))]
6010 punpcklwd\t{%2, %0|%0, %2}
6011 vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6012 [(set_attr "isa" "noavx,avx")
6013 (set_attr "type" "sselog")
6014 (set_attr "prefix_data16" "1,*")
6015 (set_attr "prefix" "orig,vex")
6016 (set_attr "mode" "TI")])
6018 (define_insn "vec_interleave_highv4si"
6019 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6022 (match_operand:V4SI 1 "register_operand" "0,x")
6023 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6024 (parallel [(const_int 2) (const_int 6)
6025 (const_int 3) (const_int 7)])))]
6028 punpckhdq\t{%2, %0|%0, %2}
6029 vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6030 [(set_attr "isa" "noavx,avx")
6031 (set_attr "type" "sselog")
6032 (set_attr "prefix_data16" "1,*")
6033 (set_attr "prefix" "orig,vex")
6034 (set_attr "mode" "TI")])
6036 (define_insn "vec_interleave_lowv4si"
6037 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6040 (match_operand:V4SI 1 "register_operand" "0,x")
6041 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6042 (parallel [(const_int 0) (const_int 4)
6043 (const_int 1) (const_int 5)])))]
6046 punpckldq\t{%2, %0|%0, %2}
6047 vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6048 [(set_attr "isa" "noavx,avx")
6049 (set_attr "type" "sselog")
6050 (set_attr "prefix_data16" "1,*")
6051 (set_attr "prefix" "orig,vex")
6052 (set_attr "mode" "TI")])
6054 (define_insn "sse4_1_pinsrb"
6055 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
6057 (vec_duplicate:V16QI
6058 (match_operand:QI 2 "nonimmediate_operand" "r,m,r,m"))
6059 (match_operand:V16QI 1 "register_operand" "0,0,x,x")
6060 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n,n,n,n")))]
6063 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6065 switch (which_alternative)
6068 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
6070 return "pinsrb\t{%3, %2, %0|%0, %2, %3}";
6072 return "vpinsrb\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
6074 return "vpinsrb\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6079 [(set_attr "isa" "noavx,noavx,avx,avx")
6080 (set_attr "type" "sselog")
6081 (set_attr "prefix_extra" "1")
6082 (set_attr "length_immediate" "1")
6083 (set_attr "prefix" "orig,orig,vex,vex")
6084 (set_attr "mode" "TI")])
6086 (define_insn "sse2_pinsrw"
6087 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x,x")
6090 (match_operand:HI 2 "nonimmediate_operand" "r,m,r,m"))
6091 (match_operand:V8HI 1 "register_operand" "0,0,x,x")
6092 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n,n,n,n")))]
6095 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6097 switch (which_alternative)
6100 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
6102 return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
6104 return "vpinsrw\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
6106 return "vpinsrw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6111 [(set_attr "isa" "noavx,noavx,avx,avx")
6112 (set_attr "type" "sselog")
6113 (set_attr "prefix_data16" "1,1,*,*")
6114 (set_attr "prefix_extra" "*,*,1,1")
6115 (set_attr "length_immediate" "1")
6116 (set_attr "prefix" "orig,orig,vex,vex")
6117 (set_attr "mode" "TI")])
6119 ;; It must come before sse2_loadld since it is preferred.
6120 (define_insn "sse4_1_pinsrd"
6121 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6124 (match_operand:SI 2 "nonimmediate_operand" "rm,rm"))
6125 (match_operand:V4SI 1 "register_operand" "0,x")
6126 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n,n")))]
6129 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6131 switch (which_alternative)
6134 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
6136 return "vpinsrd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6141 [(set_attr "isa" "noavx,avx")
6142 (set_attr "type" "sselog")
6143 (set_attr "prefix_extra" "1")
6144 (set_attr "length_immediate" "1")
6145 (set_attr "prefix" "orig,vex")
6146 (set_attr "mode" "TI")])
6148 (define_insn "sse4_1_pinsrq"
6149 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6152 (match_operand:DI 2 "nonimmediate_operand" "rm,rm"))
6153 (match_operand:V2DI 1 "register_operand" "0,x")
6154 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n,n")))]
6155 "TARGET_SSE4_1 && TARGET_64BIT"
6157 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6159 switch (which_alternative)
6162 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
6164 return "vpinsrq\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6169 [(set_attr "isa" "noavx,avx")
6170 (set_attr "type" "sselog")
6171 (set_attr "prefix_rex" "1,*")
6172 (set_attr "prefix_extra" "1")
6173 (set_attr "length_immediate" "1")
6174 (set_attr "prefix" "orig,vex")
6175 (set_attr "mode" "TI")])
6177 (define_insn "*sse4_1_pextrb_<mode>"
6178 [(set (match_operand:SWI48 0 "register_operand" "=r")
6181 (match_operand:V16QI 1 "register_operand" "x")
6182 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
6184 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
6185 [(set_attr "type" "sselog")
6186 (set_attr "prefix_extra" "1")
6187 (set_attr "length_immediate" "1")
6188 (set_attr "prefix" "maybe_vex")
6189 (set_attr "mode" "TI")])
6191 (define_insn "*sse4_1_pextrb_memory"
6192 [(set (match_operand:QI 0 "memory_operand" "=m")
6194 (match_operand:V16QI 1 "register_operand" "x")
6195 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
6197 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
6198 [(set_attr "type" "sselog")
6199 (set_attr "prefix_extra" "1")
6200 (set_attr "length_immediate" "1")
6201 (set_attr "prefix" "maybe_vex")
6202 (set_attr "mode" "TI")])
6204 (define_insn "*sse2_pextrw_<mode>"
6205 [(set (match_operand:SWI48 0 "register_operand" "=r")
6208 (match_operand:V8HI 1 "register_operand" "x")
6209 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
6211 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
6212 [(set_attr "type" "sselog")
6213 (set_attr "prefix_data16" "1")
6214 (set_attr "length_immediate" "1")
6215 (set_attr "prefix" "maybe_vex")
6216 (set_attr "mode" "TI")])
6218 (define_insn "*sse4_1_pextrw_memory"
6219 [(set (match_operand:HI 0 "memory_operand" "=m")
6221 (match_operand:V8HI 1 "register_operand" "x")
6222 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
6224 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
6225 [(set_attr "type" "sselog")
6226 (set_attr "prefix_extra" "1")
6227 (set_attr "length_immediate" "1")
6228 (set_attr "prefix" "maybe_vex")
6229 (set_attr "mode" "TI")])
6231 (define_insn "*sse4_1_pextrd"
6232 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
6234 (match_operand:V4SI 1 "register_operand" "x")
6235 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
6237 "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
6238 [(set_attr "type" "sselog")
6239 (set_attr "prefix_extra" "1")
6240 (set_attr "length_immediate" "1")
6241 (set_attr "prefix" "maybe_vex")
6242 (set_attr "mode" "TI")])
6244 (define_insn "*sse4_1_pextrd_zext"
6245 [(set (match_operand:DI 0 "register_operand" "=r")
6248 (match_operand:V4SI 1 "register_operand" "x")
6249 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))]
6250 "TARGET_64BIT && TARGET_SSE4_1"
6251 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
6252 [(set_attr "type" "sselog")
6253 (set_attr "prefix_extra" "1")
6254 (set_attr "length_immediate" "1")
6255 (set_attr "prefix" "maybe_vex")
6256 (set_attr "mode" "TI")])
6258 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
6259 (define_insn "*sse4_1_pextrq"
6260 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
6262 (match_operand:V2DI 1 "register_operand" "x")
6263 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
6264 "TARGET_SSE4_1 && TARGET_64BIT"
6265 "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
6266 [(set_attr "type" "sselog")
6267 (set_attr "prefix_rex" "1")
6268 (set_attr "prefix_extra" "1")
6269 (set_attr "length_immediate" "1")
6270 (set_attr "prefix" "maybe_vex")
6271 (set_attr "mode" "TI")])
6273 (define_expand "sse2_pshufd"
6274 [(match_operand:V4SI 0 "register_operand" "")
6275 (match_operand:V4SI 1 "nonimmediate_operand" "")
6276 (match_operand:SI 2 "const_int_operand" "")]
6279 int mask = INTVAL (operands[2]);
6280 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
6281 GEN_INT ((mask >> 0) & 3),
6282 GEN_INT ((mask >> 2) & 3),
6283 GEN_INT ((mask >> 4) & 3),
6284 GEN_INT ((mask >> 6) & 3)));
6288 (define_insn "sse2_pshufd_1"
6289 [(set (match_operand:V4SI 0 "register_operand" "=x")
6291 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
6292 (parallel [(match_operand 2 "const_0_to_3_operand" "")
6293 (match_operand 3 "const_0_to_3_operand" "")
6294 (match_operand 4 "const_0_to_3_operand" "")
6295 (match_operand 5 "const_0_to_3_operand" "")])))]
6299 mask |= INTVAL (operands[2]) << 0;
6300 mask |= INTVAL (operands[3]) << 2;
6301 mask |= INTVAL (operands[4]) << 4;
6302 mask |= INTVAL (operands[5]) << 6;
6303 operands[2] = GEN_INT (mask);
6305 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
6307 [(set_attr "type" "sselog1")
6308 (set_attr "prefix_data16" "1")
6309 (set_attr "prefix" "maybe_vex")
6310 (set_attr "length_immediate" "1")
6311 (set_attr "mode" "TI")])
6313 (define_expand "sse2_pshuflw"
6314 [(match_operand:V8HI 0 "register_operand" "")
6315 (match_operand:V8HI 1 "nonimmediate_operand" "")
6316 (match_operand:SI 2 "const_int_operand" "")]
6319 int mask = INTVAL (operands[2]);
6320 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
6321 GEN_INT ((mask >> 0) & 3),
6322 GEN_INT ((mask >> 2) & 3),
6323 GEN_INT ((mask >> 4) & 3),
6324 GEN_INT ((mask >> 6) & 3)));
6328 (define_insn "sse2_pshuflw_1"
6329 [(set (match_operand:V8HI 0 "register_operand" "=x")
6331 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
6332 (parallel [(match_operand 2 "const_0_to_3_operand" "")
6333 (match_operand 3 "const_0_to_3_operand" "")
6334 (match_operand 4 "const_0_to_3_operand" "")
6335 (match_operand 5 "const_0_to_3_operand" "")
6343 mask |= INTVAL (operands[2]) << 0;
6344 mask |= INTVAL (operands[3]) << 2;
6345 mask |= INTVAL (operands[4]) << 4;
6346 mask |= INTVAL (operands[5]) << 6;
6347 operands[2] = GEN_INT (mask);
6349 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
6351 [(set_attr "type" "sselog")
6352 (set_attr "prefix_data16" "0")
6353 (set_attr "prefix_rep" "1")
6354 (set_attr "prefix" "maybe_vex")
6355 (set_attr "length_immediate" "1")
6356 (set_attr "mode" "TI")])
6358 (define_expand "sse2_pshufhw"
6359 [(match_operand:V8HI 0 "register_operand" "")
6360 (match_operand:V8HI 1 "nonimmediate_operand" "")
6361 (match_operand:SI 2 "const_int_operand" "")]
6364 int mask = INTVAL (operands[2]);
6365 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
6366 GEN_INT (((mask >> 0) & 3) + 4),
6367 GEN_INT (((mask >> 2) & 3) + 4),
6368 GEN_INT (((mask >> 4) & 3) + 4),
6369 GEN_INT (((mask >> 6) & 3) + 4)));
6373 (define_insn "sse2_pshufhw_1"
6374 [(set (match_operand:V8HI 0 "register_operand" "=x")
6376 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
6377 (parallel [(const_int 0)
6381 (match_operand 2 "const_4_to_7_operand" "")
6382 (match_operand 3 "const_4_to_7_operand" "")
6383 (match_operand 4 "const_4_to_7_operand" "")
6384 (match_operand 5 "const_4_to_7_operand" "")])))]
6388 mask |= (INTVAL (operands[2]) - 4) << 0;
6389 mask |= (INTVAL (operands[3]) - 4) << 2;
6390 mask |= (INTVAL (operands[4]) - 4) << 4;
6391 mask |= (INTVAL (operands[5]) - 4) << 6;
6392 operands[2] = GEN_INT (mask);
6394 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
6396 [(set_attr "type" "sselog")
6397 (set_attr "prefix_rep" "1")
6398 (set_attr "prefix_data16" "0")
6399 (set_attr "prefix" "maybe_vex")
6400 (set_attr "length_immediate" "1")
6401 (set_attr "mode" "TI")])
6403 (define_expand "sse2_loadd"
6404 [(set (match_operand:V4SI 0 "register_operand" "")
6407 (match_operand:SI 1 "nonimmediate_operand" ""))
6411 "operands[2] = CONST0_RTX (V4SImode);")
6413 (define_insn "sse2_loadld"
6414 [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x,x")
6417 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x"))
6418 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,x")
6422 %vmovd\t{%2, %0|%0, %2}
6423 %vmovd\t{%2, %0|%0, %2}
6424 movss\t{%2, %0|%0, %2}
6425 movss\t{%2, %0|%0, %2}
6426 vmovss\t{%2, %1, %0|%0, %1, %2}"
6427 [(set_attr "isa" "base,base,noavx,noavx,avx")
6428 (set_attr "type" "ssemov")
6429 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex")
6430 (set_attr "mode" "TI,TI,V4SF,SF,SF")])
6432 (define_insn_and_split "sse2_stored"
6433 [(set (match_operand:SI 0 "nonimmediate_operand" "=xm,r")
6435 (match_operand:V4SI 1 "register_operand" "x,Yi")
6436 (parallel [(const_int 0)])))]
6439 "&& reload_completed
6440 && (TARGET_INTER_UNIT_MOVES
6441 || MEM_P (operands [0])
6442 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
6443 [(set (match_dup 0) (match_dup 1))]
6444 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
6446 (define_insn_and_split "*vec_ext_v4si_mem"
6447 [(set (match_operand:SI 0 "register_operand" "=r")
6449 (match_operand:V4SI 1 "memory_operand" "o")
6450 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
6456 int i = INTVAL (operands[2]);
6458 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
6462 (define_expand "sse_storeq"
6463 [(set (match_operand:DI 0 "nonimmediate_operand" "")
6465 (match_operand:V2DI 1 "register_operand" "")
6466 (parallel [(const_int 0)])))]
6469 (define_insn "*sse2_storeq_rex64"
6470 [(set (match_operand:DI 0 "nonimmediate_operand" "=xm,*r,r")
6472 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
6473 (parallel [(const_int 0)])))]
6474 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6478 %vmov{q}\t{%1, %0|%0, %1}"
6479 [(set_attr "type" "*,*,imov")
6480 (set_attr "prefix" "*,*,maybe_vex")
6481 (set_attr "mode" "*,*,DI")])
6483 (define_insn "*sse2_storeq"
6484 [(set (match_operand:DI 0 "nonimmediate_operand" "=xm")
6486 (match_operand:V2DI 1 "register_operand" "x")
6487 (parallel [(const_int 0)])))]
6492 [(set (match_operand:DI 0 "nonimmediate_operand" "")
6494 (match_operand:V2DI 1 "register_operand" "")
6495 (parallel [(const_int 0)])))]
6498 && (TARGET_INTER_UNIT_MOVES
6499 || MEM_P (operands [0])
6500 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
6501 [(set (match_dup 0) (match_dup 1))]
6502 "operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));")
6504 (define_insn "*vec_extractv2di_1_rex64"
6505 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,x,r")
6507 (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o,o")
6508 (parallel [(const_int 1)])))]
6509 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6511 %vmovhps\t{%1, %0|%0, %1}
6512 psrldq\t{$8, %0|%0, 8}
6513 vpsrldq\t{$8, %1, %0|%0, %1, 8}
6514 %vmovq\t{%H1, %0|%0, %H1}
6515 %vmov{q}\t{%H1, %0|%0, %H1}"
6516 [(set_attr "isa" "base,noavx,avx,base,base")
6517 (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,imov")
6518 (set_attr "length_immediate" "*,1,1,*,*")
6519 (set_attr "memory" "*,none,none,*,*")
6520 (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,maybe_vex")
6521 (set_attr "mode" "V2SF,TI,TI,TI,DI")])
6523 (define_insn "*vec_extractv2di_1_sse2"
6524 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,x")
6526 (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o")
6527 (parallel [(const_int 1)])))]
6529 && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6531 %vmovhps\t{%1, %0|%0, %1}
6532 psrldq\t{$8, %0|%0, 8}
6533 vpsrldq\t{$8, %1, %0|%0, %1, 8}
6534 %vmovq\t{%H1, %0|%0, %H1}"
6535 [(set_attr "isa" "base,noavx,avx,base")
6536 (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov")
6537 (set_attr "length_immediate" "*,1,1,*")
6538 (set_attr "memory" "*,none,none,*")
6539 (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex")
6540 (set_attr "mode" "V2SF,TI,TI,TI")])
6542 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
6543 (define_insn "*vec_extractv2di_1_sse"
6544 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
6546 (match_operand:V2DI 1 "nonimmediate_operand" " x,x,o")
6547 (parallel [(const_int 1)])))]
6548 "!TARGET_SSE2 && TARGET_SSE
6549 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6551 movhps\t{%1, %0|%0, %1}
6552 movhlps\t{%1, %0|%0, %1}
6553 movlps\t{%H1, %0|%0, %H1}"
6554 [(set_attr "type" "ssemov")
6555 (set_attr "mode" "V2SF,V4SF,V2SF")])
6557 (define_insn "*vec_dupv4si_avx"
6558 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6560 (match_operand:SI 1 "nonimmediate_operand" " x,m")))]
6563 vpshufd\t{$0, %1, %0|%0, %1, 0}
6564 vbroadcastss\t{%1, %0|%0, %1}"
6565 [(set_attr "type" "sselog1,ssemov")
6566 (set_attr "length_immediate" "1,0")
6567 (set_attr "prefix_extra" "0,1")
6568 (set_attr "prefix" "vex")
6569 (set_attr "mode" "TI,V4SF")])
6571 (define_insn "*vec_dupv4si"
6572 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
6574 (match_operand:SI 1 "register_operand" " Y2,0")))]
6577 pshufd\t{$0, %1, %0|%0, %1, 0}
6578 shufps\t{$0, %0, %0|%0, %0, 0}"
6579 [(set_attr "type" "sselog1")
6580 (set_attr "length_immediate" "1")
6581 (set_attr "mode" "TI,V4SF")])
6583 (define_insn "*vec_dupv2di_sse3"
6584 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
6586 (match_operand:DI 1 "nonimmediate_operand" " 0,x,m")))]
6590 vpunpcklqdq\t{%d1, %0|%0, %d1}
6591 %vmovddup\t{%1, %0|%0, %1}"
6592 [(set_attr "isa" "noavx,avx,base")
6593 (set_attr "type" "sselog1")
6594 (set_attr "prefix" "orig,vex,maybe_vex")
6595 (set_attr "mode" "TI,TI,DF")])
6597 (define_insn "*vec_dupv2di"
6598 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
6600 (match_operand:DI 1 "register_operand" " 0 ,0")))]
6605 [(set_attr "type" "sselog1,ssemov")
6606 (set_attr "mode" "TI,V4SF")])
6608 (define_insn "*vec_concatv2si_sse4_1"
6609 [(set (match_operand:V2SI 0 "register_operand" "=x, x,x,x, x, *y,*y")
6611 (match_operand:SI 1 "nonimmediate_operand" " 0, x,0,x,rm, 0,rm")
6612 (match_operand:SI 2 "vector_move_operand" "rm,rm,x,x, C,*ym, C")))]
6615 pinsrd\t{$0x1, %2, %0|%0, %2, 0x1}
6616 vpinsrd\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
6617 punpckldq\t{%2, %0|%0, %2}
6618 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
6619 %vmovd\t{%1, %0|%0, %1}
6620 punpckldq\t{%2, %0|%0, %2}
6621 movd\t{%1, %0|%0, %1}"
6622 [(set_attr "isa" "noavx,avx,noavx,avx,base,base,base")
6623 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
6624 (set_attr "prefix_extra" "1,1,*,*,*,*,*")
6625 (set_attr "length_immediate" "1,1,*,*,*,*,*")
6626 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
6627 (set_attr "mode" "TI,TI,TI,TI,TI,DI,DI")])
6629 ;; ??? In theory we can match memory for the MMX alternative, but allowing
6630 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
6631 ;; alternatives pretty much forces the MMX alternative to be chosen.
6632 (define_insn "*vec_concatv2si_sse2"
6633 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
6635 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
6636 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
6639 punpckldq\t{%2, %0|%0, %2}
6640 movd\t{%1, %0|%0, %1}
6641 punpckldq\t{%2, %0|%0, %2}
6642 movd\t{%1, %0|%0, %1}"
6643 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
6644 (set_attr "mode" "TI,TI,DI,DI")])
6646 (define_insn "*vec_concatv2si_sse"
6647 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
6649 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
6650 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
6653 unpcklps\t{%2, %0|%0, %2}
6654 movss\t{%1, %0|%0, %1}
6655 punpckldq\t{%2, %0|%0, %2}
6656 movd\t{%1, %0|%0, %1}"
6657 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
6658 (set_attr "mode" "V4SF,V4SF,DI,DI")])
6660 (define_insn "*vec_concatv4si_1_avx"
6661 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6663 (match_operand:V2SI 1 "register_operand" " x,x")
6664 (match_operand:V2SI 2 "nonimmediate_operand" " x,m")))]
6667 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
6668 vmovhps\t{%2, %1, %0|%0, %1, %2}"
6669 [(set_attr "type" "sselog,ssemov")
6670 (set_attr "prefix" "vex")
6671 (set_attr "mode" "TI,V2SF")])
6673 (define_insn "*vec_concatv4si_1"
6674 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
6676 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
6677 (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
6680 punpcklqdq\t{%2, %0|%0, %2}
6681 movlhps\t{%2, %0|%0, %2}
6682 movhps\t{%2, %0|%0, %2}"
6683 [(set_attr "type" "sselog,ssemov,ssemov")
6684 (set_attr "mode" "TI,V4SF,V2SF")])
6686 (define_insn "*vec_concatv2di_rex64_sse4_1"
6687 [(set (match_operand:V2DI 0 "register_operand"
6688 "=x, x, x,Yi,!x,x,x,x,x")
6690 (match_operand:DI 1 "nonimmediate_operand"
6691 " 0, x,xm,r ,*y,0,x,0,x")
6692 (match_operand:DI 2 "vector_move_operand"
6693 "rm,rm, C,C ,C ,x,x,m,m")))]
6694 "TARGET_64BIT && TARGET_SSE4_1"
6696 pinsrq\t{$0x1, %2, %0|%0, %2, 0x1}
6697 vpinsrq\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
6698 %vmovq\t{%1, %0|%0, %1}
6699 %vmovq\t{%1, %0|%0, %1}
6700 movq2dq\t{%1, %0|%0, %1}
6701 punpcklqdq\t{%2, %0|%0, %2}
6702 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
6703 movhps\t{%2, %0|%0, %2}
6704 vmovhps\t{%2, %1, %0|%0, %1, %2}"
6705 [(set_attr "isa" "noavx,avx,base,base,base,noavx,avx,noavx,avx")
6706 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov,sselog,sselog,ssemov,ssemov")
6707 (set (attr "prefix_rex")
6709 (and (eq_attr "alternative" "0,3")
6710 (eq (symbol_ref "TARGET_AVX") (const_int 0)))
6712 (const_string "*")))
6713 (set_attr "prefix_extra" "1,1,*,*,*,*,*,*,*")
6714 (set_attr "length_immediate" "1,1,*,*,*,*,*,*,*")
6715 (set_attr "prefix" "orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex")
6716 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,V2SF,V2SF")])
6718 (define_insn "*vec_concatv2di_rex64_sse"
6719 [(set (match_operand:V2DI 0 "register_operand" "=Y2,Yi,!Y2,Y2,x,x")
6721 (match_operand:DI 1 "nonimmediate_operand" "Y2m,r ,*y ,0 ,0,0")
6722 (match_operand:DI 2 "vector_move_operand" " C ,C ,C ,Y2,x,m")))]
6723 "TARGET_64BIT && TARGET_SSE"
6725 movq\t{%1, %0|%0, %1}
6726 movq\t{%1, %0|%0, %1}
6727 movq2dq\t{%1, %0|%0, %1}
6728 punpcklqdq\t{%2, %0|%0, %2}
6729 movlhps\t{%2, %0|%0, %2}
6730 movhps\t{%2, %0|%0, %2}"
6731 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
6732 (set_attr "prefix_rex" "*,1,*,*,*,*")
6733 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF")])
6735 (define_insn "vec_concatv2di"
6736 [(set (match_operand:V2DI 0 "register_operand" "=Y2,?Y2,Y2,x,x,x,x")
6738 (match_operand:DI 1 "nonimmediate_operand" "Y2m,*y , 0,x,0,0,x")
6739 (match_operand:DI 2 "vector_move_operand" " C , C ,Y2,x,x,m,m")))]
6740 "!TARGET_64BIT && TARGET_SSE"
6742 %vmovq\t{%1, %0|%0, %1}
6743 movq2dq\t{%1, %0|%0, %1}
6744 punpcklqdq\t{%2, %0|%0, %2}
6745 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
6746 movlhps\t{%2, %0|%0, %2}
6747 movhps\t{%2, %0|%0, %2}
6748 vmovhps\t{%2, %1, %0|%0, %1, %2}"
6749 [(set_attr "isa" "base,base,noavx,avx,noavx,noavx,avx")
6750 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,ssemov,ssemov")
6751 (set_attr "prefix" "maybe_vex,orig,orig,vex,orig,orig,vex")
6752 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")])
6754 (define_expand "vec_unpacks_lo_<mode>"
6755 [(match_operand:<sseunpackmode> 0 "register_operand" "")
6756 (match_operand:VI124_128 1 "register_operand" "")]
6758 "ix86_expand_sse_unpack (operands, false, false); DONE;")
6760 (define_expand "vec_unpacks_hi_<mode>"
6761 [(match_operand:<sseunpackmode> 0 "register_operand" "")
6762 (match_operand:VI124_128 1 "register_operand" "")]
6764 "ix86_expand_sse_unpack (operands, false, true); DONE;")
6766 (define_expand "vec_unpacku_lo_<mode>"
6767 [(match_operand:<sseunpackmode> 0 "register_operand" "")
6768 (match_operand:VI124_128 1 "register_operand" "")]
6770 "ix86_expand_sse_unpack (operands, true, false); DONE;")
6772 (define_expand "vec_unpacku_hi_<mode>"
6773 [(match_operand:<sseunpackmode> 0 "register_operand" "")
6774 (match_operand:VI124_128 1 "register_operand" "")]
6776 "ix86_expand_sse_unpack (operands, true, true); DONE;")
6778 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6782 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6784 (define_expand "sse2_uavgv16qi3"
6785 [(set (match_operand:V16QI 0 "register_operand" "")
6791 (match_operand:V16QI 1 "nonimmediate_operand" ""))
6793 (match_operand:V16QI 2 "nonimmediate_operand" "")))
6794 (const_vector:V16QI [(const_int 1) (const_int 1)
6795 (const_int 1) (const_int 1)
6796 (const_int 1) (const_int 1)
6797 (const_int 1) (const_int 1)
6798 (const_int 1) (const_int 1)
6799 (const_int 1) (const_int 1)
6800 (const_int 1) (const_int 1)
6801 (const_int 1) (const_int 1)]))
6804 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
6806 (define_insn "*sse2_uavgv16qi3"
6807 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6813 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x"))
6815 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))
6816 (const_vector:V16QI [(const_int 1) (const_int 1)
6817 (const_int 1) (const_int 1)
6818 (const_int 1) (const_int 1)
6819 (const_int 1) (const_int 1)
6820 (const_int 1) (const_int 1)
6821 (const_int 1) (const_int 1)
6822 (const_int 1) (const_int 1)
6823 (const_int 1) (const_int 1)]))
6825 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
6827 pavgb\t{%2, %0|%0, %2}
6828 vpavgb\t{%2, %1, %0|%0, %1, %2}"
6829 [(set_attr "isa" "noavx,avx")
6830 (set_attr "type" "sseiadd")
6831 (set_attr "prefix_data16" "1,*")
6832 (set_attr "prefix" "orig,vex")
6833 (set_attr "mode" "TI")])
6835 (define_expand "sse2_uavgv8hi3"
6836 [(set (match_operand:V8HI 0 "register_operand" "")
6842 (match_operand:V8HI 1 "nonimmediate_operand" ""))
6844 (match_operand:V8HI 2 "nonimmediate_operand" "")))
6845 (const_vector:V8HI [(const_int 1) (const_int 1)
6846 (const_int 1) (const_int 1)
6847 (const_int 1) (const_int 1)
6848 (const_int 1) (const_int 1)]))
6851 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
6853 (define_insn "*sse2_uavgv8hi3"
6854 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6860 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
6862 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
6863 (const_vector:V8HI [(const_int 1) (const_int 1)
6864 (const_int 1) (const_int 1)
6865 (const_int 1) (const_int 1)
6866 (const_int 1) (const_int 1)]))
6868 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
6870 pavgw\t{%2, %0|%0, %2}
6871 vpavgw\t{%2, %1, %0|%0, %1, %2}"
6872 [(set_attr "isa" "noavx,avx")
6873 (set_attr "type" "sseiadd")
6874 (set_attr "prefix_data16" "1,*")
6875 (set_attr "prefix" "orig,vex")
6876 (set_attr "mode" "TI")])
6878 ;; The correct representation for this is absolutely enormous, and
6879 ;; surely not generally useful.
6880 (define_insn "sse2_psadbw"
6881 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6882 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0,x")
6883 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")]
6887 psadbw\t{%2, %0|%0, %2}
6888 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
6889 [(set_attr "isa" "noavx,avx")
6890 (set_attr "type" "sseiadd")
6891 (set_attr "atom_unit" "simul")
6892 (set_attr "prefix_data16" "1,*")
6893 (set_attr "prefix" "orig,vex")
6894 (set_attr "mode" "TI")])
6896 (define_insn "<sse>_movmsk<ssemodesuffix><avxmodesuffix>"
6897 [(set (match_operand:SI 0 "register_operand" "=r")
6899 [(match_operand:VF 1 "register_operand" "x")]
6902 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
6903 [(set_attr "type" "ssemov")
6904 (set_attr "prefix" "maybe_vex")
6905 (set_attr "mode" "<MODE>")])
6907 (define_insn "sse2_pmovmskb"
6908 [(set (match_operand:SI 0 "register_operand" "=r")
6909 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
6912 "%vpmovmskb\t{%1, %0|%0, %1}"
6913 [(set_attr "type" "ssemov")
6914 (set_attr "prefix_data16" "1")
6915 (set_attr "prefix" "maybe_vex")
6916 (set_attr "mode" "SI")])
6918 (define_expand "sse2_maskmovdqu"
6919 [(set (match_operand:V16QI 0 "memory_operand" "")
6920 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
6921 (match_operand:V16QI 2 "register_operand" "")
6926 (define_insn "*sse2_maskmovdqu"
6927 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
6928 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
6929 (match_operand:V16QI 2 "register_operand" "x")
6930 (mem:V16QI (match_dup 0))]
6933 "%vmaskmovdqu\t{%2, %1|%1, %2}"
6934 [(set_attr "type" "ssemov")
6935 (set_attr "prefix_data16" "1")
6936 ;; The implicit %rdi operand confuses default length_vex computation.
6937 (set (attr "length_vex")
6938 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
6939 (set_attr "prefix" "maybe_vex")
6940 (set_attr "mode" "TI")])
6942 (define_insn "sse_ldmxcsr"
6943 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
6947 [(set_attr "type" "sse")
6948 (set_attr "atom_sse_attr" "mxcsr")
6949 (set_attr "prefix" "maybe_vex")
6950 (set_attr "memory" "load")])
6952 (define_insn "sse_stmxcsr"
6953 [(set (match_operand:SI 0 "memory_operand" "=m")
6954 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
6957 [(set_attr "type" "sse")
6958 (set_attr "atom_sse_attr" "mxcsr")
6959 (set_attr "prefix" "maybe_vex")
6960 (set_attr "memory" "store")])
6962 (define_expand "sse_sfence"
6964 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
6965 "TARGET_SSE || TARGET_3DNOW_A"
6967 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
6968 MEM_VOLATILE_P (operands[0]) = 1;
6971 (define_insn "*sse_sfence"
6972 [(set (match_operand:BLK 0 "" "")
6973 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
6974 "TARGET_SSE || TARGET_3DNOW_A"
6976 [(set_attr "type" "sse")
6977 (set_attr "length_address" "0")
6978 (set_attr "atom_sse_attr" "fence")
6979 (set_attr "memory" "unknown")])
6981 (define_insn "sse2_clflush"
6982 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
6986 [(set_attr "type" "sse")
6987 (set_attr "atom_sse_attr" "fence")
6988 (set_attr "memory" "unknown")])
6990 (define_expand "sse2_mfence"
6992 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
6995 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
6996 MEM_VOLATILE_P (operands[0]) = 1;
6999 (define_insn "*sse2_mfence"
7000 [(set (match_operand:BLK 0 "" "")
7001 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
7002 "TARGET_64BIT || TARGET_SSE2"
7004 [(set_attr "type" "sse")
7005 (set_attr "length_address" "0")
7006 (set_attr "atom_sse_attr" "fence")
7007 (set_attr "memory" "unknown")])
7009 (define_expand "sse2_lfence"
7011 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
7014 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
7015 MEM_VOLATILE_P (operands[0]) = 1;
7018 (define_insn "*sse2_lfence"
7019 [(set (match_operand:BLK 0 "" "")
7020 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
7023 [(set_attr "type" "sse")
7024 (set_attr "length_address" "0")
7025 (set_attr "atom_sse_attr" "lfence")
7026 (set_attr "memory" "unknown")])
7028 (define_insn "sse3_mwait"
7029 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
7030 (match_operand:SI 1 "register_operand" "c")]
7033 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
7034 ;; Since 32bit register operands are implicitly zero extended to 64bit,
7035 ;; we only need to set up 32bit registers.
7037 [(set_attr "length" "3")])
7039 (define_insn "sse3_monitor"
7040 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
7041 (match_operand:SI 1 "register_operand" "c")
7042 (match_operand:SI 2 "register_operand" "d")]
7044 "TARGET_SSE3 && !TARGET_64BIT"
7045 "monitor\t%0, %1, %2"
7046 [(set_attr "length" "3")])
7048 (define_insn "sse3_monitor64"
7049 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
7050 (match_operand:SI 1 "register_operand" "c")
7051 (match_operand:SI 2 "register_operand" "d")]
7053 "TARGET_SSE3 && TARGET_64BIT"
7054 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
7055 ;; RCX and RDX are used. Since 32bit register operands are implicitly
7056 ;; zero extended to 64bit, we only need to set up 32bit registers.
7058 [(set_attr "length" "3")])
7060 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7062 ;; SSSE3 instructions
7064 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7066 (define_insn "ssse3_phaddwv8hi3"
7067 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7073 (match_operand:V8HI 1 "register_operand" "0,x")
7074 (parallel [(const_int 0)]))
7075 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7077 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7078 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7081 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7082 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7084 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7085 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
7090 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
7091 (parallel [(const_int 0)]))
7092 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7094 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7095 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
7098 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
7099 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
7101 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
7102 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
7105 phaddw\t{%2, %0|%0, %2}
7106 vphaddw\t{%2, %1, %0|%0, %1, %2}"
7107 [(set_attr "isa" "noavx,avx")
7108 (set_attr "type" "sseiadd")
7109 (set_attr "atom_unit" "complex")
7110 (set_attr "prefix_data16" "1,*")
7111 (set_attr "prefix_extra" "1")
7112 (set_attr "prefix" "orig,vex")
7113 (set_attr "mode" "TI")])
7115 (define_insn "ssse3_phaddwv4hi3"
7116 [(set (match_operand:V4HI 0 "register_operand" "=y")
7121 (match_operand:V4HI 1 "register_operand" "0")
7122 (parallel [(const_int 0)]))
7123 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7125 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7126 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7130 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
7131 (parallel [(const_int 0)]))
7132 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7134 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7135 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
7137 "phaddw\t{%2, %0|%0, %2}"
7138 [(set_attr "type" "sseiadd")
7139 (set_attr "atom_unit" "complex")
7140 (set_attr "prefix_extra" "1")
7141 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7142 (set_attr "mode" "DI")])
7144 (define_insn "ssse3_phadddv4si3"
7145 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7150 (match_operand:V4SI 1 "register_operand" "0,x")
7151 (parallel [(const_int 0)]))
7152 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
7154 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
7155 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
7159 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
7160 (parallel [(const_int 0)]))
7161 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
7163 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
7164 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
7167 phaddd\t{%2, %0|%0, %2}
7168 vphaddd\t{%2, %1, %0|%0, %1, %2}"
7169 [(set_attr "isa" "noavx,avx")
7170 (set_attr "type" "sseiadd")
7171 (set_attr "atom_unit" "complex")
7172 (set_attr "prefix_data16" "1,*")
7173 (set_attr "prefix_extra" "1")
7174 (set_attr "prefix" "orig,vex")
7175 (set_attr "mode" "TI")])
7177 (define_insn "ssse3_phadddv2si3"
7178 [(set (match_operand:V2SI 0 "register_operand" "=y")
7182 (match_operand:V2SI 1 "register_operand" "0")
7183 (parallel [(const_int 0)]))
7184 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
7187 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
7188 (parallel [(const_int 0)]))
7189 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
7191 "phaddd\t{%2, %0|%0, %2}"
7192 [(set_attr "type" "sseiadd")
7193 (set_attr "atom_unit" "complex")
7194 (set_attr "prefix_extra" "1")
7195 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7196 (set_attr "mode" "DI")])
7198 (define_insn "ssse3_phaddswv8hi3"
7199 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7205 (match_operand:V8HI 1 "register_operand" "0,x")
7206 (parallel [(const_int 0)]))
7207 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7209 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7210 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7213 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7214 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7216 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7217 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
7222 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
7223 (parallel [(const_int 0)]))
7224 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7226 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7227 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
7230 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
7231 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
7233 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
7234 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
7237 phaddsw\t{%2, %0|%0, %2}
7238 vphaddsw\t{%2, %1, %0|%0, %1, %2}"
7239 [(set_attr "isa" "noavx,avx")
7240 (set_attr "type" "sseiadd")
7241 (set_attr "atom_unit" "complex")
7242 (set_attr "prefix_data16" "1,*")
7243 (set_attr "prefix_extra" "1")
7244 (set_attr "prefix" "orig,vex")
7245 (set_attr "mode" "TI")])
7247 (define_insn "ssse3_phaddswv4hi3"
7248 [(set (match_operand:V4HI 0 "register_operand" "=y")
7253 (match_operand:V4HI 1 "register_operand" "0")
7254 (parallel [(const_int 0)]))
7255 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7257 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7258 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7262 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
7263 (parallel [(const_int 0)]))
7264 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7266 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7267 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
7269 "phaddsw\t{%2, %0|%0, %2}"
7270 [(set_attr "type" "sseiadd")
7271 (set_attr "atom_unit" "complex")
7272 (set_attr "prefix_extra" "1")
7273 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7274 (set_attr "mode" "DI")])
7276 (define_insn "ssse3_phsubwv8hi3"
7277 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7283 (match_operand:V8HI 1 "register_operand" "0,x")
7284 (parallel [(const_int 0)]))
7285 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7287 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7288 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7291 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7292 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7294 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7295 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
7300 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
7301 (parallel [(const_int 0)]))
7302 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7304 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7305 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
7308 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
7309 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
7311 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
7312 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
7315 phsubw\t{%2, %0|%0, %2}
7316 vphsubw\t{%2, %1, %0|%0, %1, %2}"
7317 [(set_attr "isa" "noavx,avx")
7318 (set_attr "type" "sseiadd")
7319 (set_attr "atom_unit" "complex")
7320 (set_attr "prefix_data16" "1,*")
7321 (set_attr "prefix_extra" "1")
7322 (set_attr "prefix" "orig,vex")
7323 (set_attr "mode" "TI")])
7325 (define_insn "ssse3_phsubwv4hi3"
7326 [(set (match_operand:V4HI 0 "register_operand" "=y")
7331 (match_operand:V4HI 1 "register_operand" "0")
7332 (parallel [(const_int 0)]))
7333 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7335 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7336 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7340 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
7341 (parallel [(const_int 0)]))
7342 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7344 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7345 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
7347 "phsubw\t{%2, %0|%0, %2}"
7348 [(set_attr "type" "sseiadd")
7349 (set_attr "atom_unit" "complex")
7350 (set_attr "prefix_extra" "1")
7351 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7352 (set_attr "mode" "DI")])
7354 (define_insn "ssse3_phsubdv4si3"
7355 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7360 (match_operand:V4SI 1 "register_operand" "0,x")
7361 (parallel [(const_int 0)]))
7362 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
7364 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
7365 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
7369 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
7370 (parallel [(const_int 0)]))
7371 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
7373 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
7374 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
7377 phsubd\t{%2, %0|%0, %2}
7378 vphsubd\t{%2, %1, %0|%0, %1, %2}"
7380 [(set_attr "isa" "noavx,avx")
7381 (set_attr "type" "sseiadd")
7382 (set_attr "atom_unit" "complex")
7383 (set_attr "prefix_data16" "1,*")
7384 (set_attr "prefix_extra" "1")
7385 (set_attr "prefix" "orig,vex")
7386 (set_attr "mode" "TI")])
7388 (define_insn "ssse3_phsubdv2si3"
7389 [(set (match_operand:V2SI 0 "register_operand" "=y")
7393 (match_operand:V2SI 1 "register_operand" "0")
7394 (parallel [(const_int 0)]))
7395 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
7398 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
7399 (parallel [(const_int 0)]))
7400 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
7402 "phsubd\t{%2, %0|%0, %2}"
7403 [(set_attr "type" "sseiadd")
7404 (set_attr "atom_unit" "complex")
7405 (set_attr "prefix_extra" "1")
7406 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7407 (set_attr "mode" "DI")])
7409 (define_insn "ssse3_phsubswv8hi3"
7410 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7416 (match_operand:V8HI 1 "register_operand" "0,x")
7417 (parallel [(const_int 0)]))
7418 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7420 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7421 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7424 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7425 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7427 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7428 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
7433 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
7434 (parallel [(const_int 0)]))
7435 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7437 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7438 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
7441 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
7442 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
7444 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
7445 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
7448 phsubsw\t{%2, %0|%0, %2}
7449 vphsubsw\t{%2, %1, %0|%0, %1, %2}"
7450 [(set_attr "isa" "noavx,avx")
7451 (set_attr "type" "sseiadd")
7452 (set_attr "atom_unit" "complex")
7453 (set_attr "prefix_data16" "1,*")
7454 (set_attr "prefix_extra" "1")
7455 (set_attr "prefix" "orig,vex")
7456 (set_attr "mode" "TI")])
7458 (define_insn "ssse3_phsubswv4hi3"
7459 [(set (match_operand:V4HI 0 "register_operand" "=y")
7464 (match_operand:V4HI 1 "register_operand" "0")
7465 (parallel [(const_int 0)]))
7466 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7468 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7469 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7473 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
7474 (parallel [(const_int 0)]))
7475 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7477 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7478 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
7480 "phsubsw\t{%2, %0|%0, %2}"
7481 [(set_attr "type" "sseiadd")
7482 (set_attr "atom_unit" "complex")
7483 (set_attr "prefix_extra" "1")
7484 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7485 (set_attr "mode" "DI")])
7487 (define_insn "ssse3_pmaddubsw128"
7488 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7493 (match_operand:V16QI 1 "register_operand" "0,x")
7494 (parallel [(const_int 0)
7504 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
7505 (parallel [(const_int 0)
7515 (vec_select:V8QI (match_dup 1)
7516 (parallel [(const_int 1)
7525 (vec_select:V8QI (match_dup 2)
7526 (parallel [(const_int 1)
7533 (const_int 15)]))))))]
7536 pmaddubsw\t{%2, %0|%0, %2}
7537 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
7538 [(set_attr "isa" "noavx,avx")
7539 (set_attr "type" "sseiadd")
7540 (set_attr "atom_unit" "simul")
7541 (set_attr "prefix_data16" "1,*")
7542 (set_attr "prefix_extra" "1")
7543 (set_attr "prefix" "orig,vex")
7544 (set_attr "mode" "TI")])
7546 (define_insn "ssse3_pmaddubsw"
7547 [(set (match_operand:V4HI 0 "register_operand" "=y")
7552 (match_operand:V8QI 1 "register_operand" "0")
7553 (parallel [(const_int 0)
7559 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
7560 (parallel [(const_int 0)
7566 (vec_select:V4QI (match_dup 1)
7567 (parallel [(const_int 1)
7572 (vec_select:V4QI (match_dup 2)
7573 (parallel [(const_int 1)
7576 (const_int 7)]))))))]
7578 "pmaddubsw\t{%2, %0|%0, %2}"
7579 [(set_attr "type" "sseiadd")
7580 (set_attr "atom_unit" "simul")
7581 (set_attr "prefix_extra" "1")
7582 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7583 (set_attr "mode" "DI")])
7585 (define_expand "ssse3_pmulhrswv8hi3"
7586 [(set (match_operand:V8HI 0 "register_operand" "")
7593 (match_operand:V8HI 1 "nonimmediate_operand" ""))
7595 (match_operand:V8HI 2 "nonimmediate_operand" "")))
7597 (const_vector:V8HI [(const_int 1) (const_int 1)
7598 (const_int 1) (const_int 1)
7599 (const_int 1) (const_int 1)
7600 (const_int 1) (const_int 1)]))
7603 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
7605 (define_insn "*ssse3_pmulhrswv8hi3"
7606 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7613 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
7615 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
7617 (const_vector:V8HI [(const_int 1) (const_int 1)
7618 (const_int 1) (const_int 1)
7619 (const_int 1) (const_int 1)
7620 (const_int 1) (const_int 1)]))
7622 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
7624 pmulhrsw\t{%2, %0|%0, %2}
7625 vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
7626 [(set_attr "isa" "noavx,avx")
7627 (set_attr "type" "sseimul")
7628 (set_attr "prefix_data16" "1,*")
7629 (set_attr "prefix_extra" "1")
7630 (set_attr "prefix" "orig,vex")
7631 (set_attr "mode" "TI")])
7633 (define_expand "ssse3_pmulhrswv4hi3"
7634 [(set (match_operand:V4HI 0 "register_operand" "")
7641 (match_operand:V4HI 1 "nonimmediate_operand" ""))
7643 (match_operand:V4HI 2 "nonimmediate_operand" "")))
7645 (const_vector:V4HI [(const_int 1) (const_int 1)
7646 (const_int 1) (const_int 1)]))
7649 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
7651 (define_insn "*ssse3_pmulhrswv4hi3"
7652 [(set (match_operand:V4HI 0 "register_operand" "=y")
7659 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
7661 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
7663 (const_vector:V4HI [(const_int 1) (const_int 1)
7664 (const_int 1) (const_int 1)]))
7666 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
7667 "pmulhrsw\t{%2, %0|%0, %2}"
7668 [(set_attr "type" "sseimul")
7669 (set_attr "prefix_extra" "1")
7670 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7671 (set_attr "mode" "DI")])
7673 (define_insn "ssse3_pshufbv16qi3"
7674 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
7675 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,x")
7676 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")]
7680 pshufb\t{%2, %0|%0, %2}
7681 vpshufb\t{%2, %1, %0|%0, %1, %2}"
7682 [(set_attr "isa" "noavx,avx")
7683 (set_attr "type" "sselog1")
7684 (set_attr "prefix_data16" "1,*")
7685 (set_attr "prefix_extra" "1")
7686 (set_attr "prefix" "orig,vex")
7687 (set_attr "mode" "TI")])
7689 (define_insn "ssse3_pshufbv8qi3"
7690 [(set (match_operand:V8QI 0 "register_operand" "=y")
7691 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
7692 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
7695 "pshufb\t{%2, %0|%0, %2}";
7696 [(set_attr "type" "sselog1")
7697 (set_attr "prefix_extra" "1")
7698 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7699 (set_attr "mode" "DI")])
7701 (define_insn "ssse3_psign<mode>3"
7702 [(set (match_operand:SSEMODE124 0 "register_operand" "=x,x")
7704 [(match_operand:SSEMODE124 1 "register_operand" "0,x")
7705 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm,xm")]
7709 psign<ssevecsize>\t{%2, %0|%0, %2}
7710 vpsign<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
7711 [(set_attr "isa" "noavx,avx")
7712 (set_attr "type" "sselog1")
7713 (set_attr "prefix_data16" "1,*")
7714 (set_attr "prefix_extra" "1")
7715 (set_attr "prefix" "orig,vex")
7716 (set_attr "mode" "TI")])
7718 (define_insn "ssse3_psign<mode>3"
7719 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
7721 [(match_operand:MMXMODEI 1 "register_operand" "0")
7722 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
7725 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
7726 [(set_attr "type" "sselog1")
7727 (set_attr "prefix_extra" "1")
7728 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7729 (set_attr "mode" "DI")])
7731 (define_insn "ssse3_palignrti"
7732 [(set (match_operand:TI 0 "register_operand" "=x,x")
7733 (unspec:TI [(match_operand:TI 1 "register_operand" "0,x")
7734 (match_operand:TI 2 "nonimmediate_operand" "xm,xm")
7735 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
7739 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
7741 switch (which_alternative)
7744 return "palignr\t{%3, %2, %0|%0, %2, %3}";
7746 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7751 [(set_attr "isa" "noavx,avx")
7752 (set_attr "type" "sseishft")
7753 (set_attr "atom_unit" "sishuf")
7754 (set_attr "prefix_data16" "1,*")
7755 (set_attr "prefix_extra" "1")
7756 (set_attr "length_immediate" "1")
7757 (set_attr "prefix" "orig,vex")
7758 (set_attr "mode" "TI")])
7760 (define_insn "ssse3_palignrdi"
7761 [(set (match_operand:DI 0 "register_operand" "=y")
7762 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
7763 (match_operand:DI 2 "nonimmediate_operand" "ym")
7764 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
7768 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
7769 return "palignr\t{%3, %2, %0|%0, %2, %3}";
7771 [(set_attr "type" "sseishft")
7772 (set_attr "atom_unit" "sishuf")
7773 (set_attr "prefix_extra" "1")
7774 (set_attr "length_immediate" "1")
7775 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7776 (set_attr "mode" "DI")])
7778 (define_insn "abs<mode>2"
7779 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
7781 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
7783 "%vpabs<ssevecsize>\t{%1, %0|%0, %1}"
7784 [(set_attr "type" "sselog1")
7785 (set_attr "prefix_data16" "1")
7786 (set_attr "prefix_extra" "1")
7787 (set_attr "prefix" "maybe_vex")
7788 (set_attr "mode" "TI")])
7790 (define_insn "abs<mode>2"
7791 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
7793 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
7795 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
7796 [(set_attr "type" "sselog1")
7797 (set_attr "prefix_rep" "0")
7798 (set_attr "prefix_extra" "1")
7799 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7800 (set_attr "mode" "DI")])
7802 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7804 ;; AMD SSE4A instructions
7806 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7808 (define_insn "sse4a_movnt<mode>"
7809 [(set (match_operand:MODEF 0 "memory_operand" "=m")
7811 [(match_operand:MODEF 1 "register_operand" "x")]
7814 "movnts<ssemodefsuffix>\t{%1, %0|%0, %1}"
7815 [(set_attr "type" "ssemov")
7816 (set_attr "mode" "<MODE>")])
7818 (define_insn "sse4a_vmmovnt<mode>"
7819 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
7820 (unspec:<ssescalarmode>
7821 [(vec_select:<ssescalarmode>
7822 (match_operand:SSEMODEF2P 1 "register_operand" "x")
7823 (parallel [(const_int 0)]))]
7826 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
7827 [(set_attr "type" "ssemov")
7828 (set_attr "mode" "<ssescalarmode>")])
7830 (define_insn "sse4a_extrqi"
7831 [(set (match_operand:V2DI 0 "register_operand" "=x")
7832 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
7833 (match_operand 2 "const_int_operand" "")
7834 (match_operand 3 "const_int_operand" "")]
7837 "extrq\t{%3, %2, %0|%0, %2, %3}"
7838 [(set_attr "type" "sse")
7839 (set_attr "prefix_data16" "1")
7840 (set_attr "length_immediate" "2")
7841 (set_attr "mode" "TI")])
7843 (define_insn "sse4a_extrq"
7844 [(set (match_operand:V2DI 0 "register_operand" "=x")
7845 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
7846 (match_operand:V16QI 2 "register_operand" "x")]
7849 "extrq\t{%2, %0|%0, %2}"
7850 [(set_attr "type" "sse")
7851 (set_attr "prefix_data16" "1")
7852 (set_attr "mode" "TI")])
7854 (define_insn "sse4a_insertqi"
7855 [(set (match_operand:V2DI 0 "register_operand" "=x")
7856 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
7857 (match_operand:V2DI 2 "register_operand" "x")
7858 (match_operand 3 "const_int_operand" "")
7859 (match_operand 4 "const_int_operand" "")]
7862 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
7863 [(set_attr "type" "sseins")
7864 (set_attr "prefix_data16" "0")
7865 (set_attr "prefix_rep" "1")
7866 (set_attr "length_immediate" "2")
7867 (set_attr "mode" "TI")])
7869 (define_insn "sse4a_insertq"
7870 [(set (match_operand:V2DI 0 "register_operand" "=x")
7871 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
7872 (match_operand:V2DI 2 "register_operand" "x")]
7875 "insertq\t{%2, %0|%0, %2}"
7876 [(set_attr "type" "sseins")
7877 (set_attr "prefix_data16" "0")
7878 (set_attr "prefix_rep" "1")
7879 (set_attr "mode" "TI")])
7881 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7883 ;; Intel SSE4.1 instructions
7885 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7887 (define_insn "<sse4_1>_blend<ssemodesuffix><avxmodesuffix>"
7888 [(set (match_operand:VF 0 "register_operand" "=x,x")
7890 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
7891 (match_operand:VF 1 "register_operand" "0,x")
7892 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n,n")))]
7895 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
7896 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7897 [(set_attr "isa" "noavx,avx")
7898 (set_attr "type" "ssemov")
7899 (set_attr "length_immediate" "1")
7900 (set_attr "prefix_data16" "1,*")
7901 (set_attr "prefix_extra" "1")
7902 (set_attr "prefix" "orig,vex")
7903 (set_attr "mode" "<MODE>")])
7905 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxmodesuffix>"
7906 [(set (match_operand:VF 0 "reg_not_xmm0_operand_maybe_avx" "=x,x")
7908 [(match_operand:VF 1 "reg_not_xmm0_operand_maybe_avx" "0,x")
7909 (match_operand:VF 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
7910 (match_operand:VF 3 "register_operand" "Yz,x")]
7914 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
7915 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7916 [(set_attr "isa" "noavx,avx")
7917 (set_attr "type" "ssemov")
7918 (set_attr "length_immediate" "1")
7919 (set_attr "prefix_data16" "1,*")
7920 (set_attr "prefix_extra" "1")
7921 (set_attr "prefix" "orig,vex")
7922 (set_attr "mode" "<MODE>")])
7924 (define_insn "<sse4_1>_dp<ssemodesuffix><avxmodesuffix>"
7925 [(set (match_operand:VF 0 "register_operand" "=x,x")
7927 [(match_operand:VF 1 "nonimmediate_operand" "%0,x")
7928 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
7929 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
7933 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
7934 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7935 [(set_attr "isa" "noavx,avx")
7936 (set_attr "type" "ssemul")
7937 (set_attr "length_immediate" "1")
7938 (set_attr "prefix_data16" "1,*")
7939 (set_attr "prefix_extra" "1")
7940 (set_attr "prefix" "orig,vex")
7941 (set_attr "mode" "<MODE>")])
7943 (define_insn "sse4_1_movntdqa"
7944 [(set (match_operand:V2DI 0 "register_operand" "=x")
7945 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
7948 "%vmovntdqa\t{%1, %0|%0, %1}"
7949 [(set_attr "type" "ssemov")
7950 (set_attr "prefix_extra" "1")
7951 (set_attr "prefix" "maybe_vex")
7952 (set_attr "mode" "TI")])
7954 (define_insn "sse4_1_mpsadbw"
7955 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
7956 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,x")
7957 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
7958 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
7962 mpsadbw\t{%3, %2, %0|%0, %2, %3}
7963 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7964 [(set_attr "isa" "noavx,avx")
7965 (set_attr "type" "sselog1")
7966 (set_attr "length_immediate" "1")
7967 (set_attr "prefix_extra" "1")
7968 (set_attr "prefix" "orig,vex")
7969 (set_attr "mode" "TI")])
7971 (define_insn "sse4_1_packusdw"
7972 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7975 (match_operand:V4SI 1 "register_operand" "0,x"))
7977 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))]
7980 packusdw\t{%2, %0|%0, %2}
7981 vpackusdw\t{%2, %1, %0|%0, %1, %2}"
7982 [(set_attr "isa" "noavx,avx")
7983 (set_attr "type" "sselog")
7984 (set_attr "prefix_extra" "1")
7985 (set_attr "prefix" "orig,vex")
7986 (set_attr "mode" "TI")])
7988 (define_insn "sse4_1_pblendvb"
7989 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x,x")
7991 [(match_operand:V16QI 1 "reg_not_xmm0_operand_maybe_avx" "0,x")
7992 (match_operand:V16QI 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
7993 (match_operand:V16QI 3 "register_operand" "Yz,x")]
7997 pblendvb\t{%3, %2, %0|%0, %2, %3}
7998 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7999 [(set_attr "isa" "noavx,avx")
8000 (set_attr "type" "ssemov")
8001 (set_attr "prefix_extra" "1")
8002 (set_attr "length_immediate" "*,1")
8003 (set_attr "prefix" "orig,vex")
8004 (set_attr "mode" "TI")])
8006 (define_insn "sse4_1_pblendw"
8007 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8009 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8010 (match_operand:V8HI 1 "register_operand" "0,x")
8011 (match_operand:SI 3 "const_0_to_255_operand" "n,n")))]
8014 pblendw\t{%3, %2, %0|%0, %2, %3}
8015 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8016 [(set_attr "isa" "noavx,avx")
8017 (set_attr "type" "ssemov")
8018 (set_attr "prefix_extra" "1")
8019 (set_attr "length_immediate" "1")
8020 (set_attr "prefix" "orig,vex")
8021 (set_attr "mode" "TI")])
8023 (define_insn "sse4_1_phminposuw"
8024 [(set (match_operand:V8HI 0 "register_operand" "=x")
8025 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
8026 UNSPEC_PHMINPOSUW))]
8028 "%vphminposuw\t{%1, %0|%0, %1}"
8029 [(set_attr "type" "sselog1")
8030 (set_attr "prefix_extra" "1")
8031 (set_attr "prefix" "maybe_vex")
8032 (set_attr "mode" "TI")])
8034 (define_insn "sse4_1_<code>v8qiv8hi2"
8035 [(set (match_operand:V8HI 0 "register_operand" "=x")
8038 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8039 (parallel [(const_int 0)
8048 "%vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
8049 [(set_attr "type" "ssemov")
8050 (set_attr "prefix_extra" "1")
8051 (set_attr "prefix" "maybe_vex")
8052 (set_attr "mode" "TI")])
8054 (define_insn "sse4_1_<code>v4qiv4si2"
8055 [(set (match_operand:V4SI 0 "register_operand" "=x")
8058 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8059 (parallel [(const_int 0)
8064 "%vpmov<extsuffix>bd\t{%1, %0|%0, %1}"
8065 [(set_attr "type" "ssemov")
8066 (set_attr "prefix_extra" "1")
8067 (set_attr "prefix" "maybe_vex")
8068 (set_attr "mode" "TI")])
8070 (define_insn "sse4_1_<code>v4hiv4si2"
8071 [(set (match_operand:V4SI 0 "register_operand" "=x")
8074 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
8075 (parallel [(const_int 0)
8080 "%vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
8081 [(set_attr "type" "ssemov")
8082 (set_attr "prefix_extra" "1")
8083 (set_attr "prefix" "maybe_vex")
8084 (set_attr "mode" "TI")])
8086 (define_insn "sse4_1_<code>v2qiv2di2"
8087 [(set (match_operand:V2DI 0 "register_operand" "=x")
8090 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8091 (parallel [(const_int 0)
8094 "%vpmov<extsuffix>bq\t{%1, %0|%0, %1}"
8095 [(set_attr "type" "ssemov")
8096 (set_attr "prefix_extra" "1")
8097 (set_attr "prefix" "maybe_vex")
8098 (set_attr "mode" "TI")])
8100 (define_insn "sse4_1_<code>v2hiv2di2"
8101 [(set (match_operand:V2DI 0 "register_operand" "=x")
8104 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
8105 (parallel [(const_int 0)
8108 "%vpmov<extsuffix>wq\t{%1, %0|%0, %1}"
8109 [(set_attr "type" "ssemov")
8110 (set_attr "prefix_extra" "1")
8111 (set_attr "prefix" "maybe_vex")
8112 (set_attr "mode" "TI")])
8114 (define_insn "sse4_1_<code>v2siv2di2"
8115 [(set (match_operand:V2DI 0 "register_operand" "=x")
8118 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
8119 (parallel [(const_int 0)
8122 "%vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
8123 [(set_attr "type" "ssemov")
8124 (set_attr "prefix_extra" "1")
8125 (set_attr "prefix" "maybe_vex")
8126 (set_attr "mode" "TI")])
8128 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
8129 ;; setting FLAGS_REG. But it is not a really compare instruction.
8130 (define_insn "avx_vtest<ssemodesuffix><avxmodesuffix>"
8131 [(set (reg:CC FLAGS_REG)
8132 (unspec:CC [(match_operand:VF 0 "register_operand" "x")
8133 (match_operand:VF 1 "nonimmediate_operand" "xm")]
8136 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
8137 [(set_attr "type" "ssecomi")
8138 (set_attr "prefix_extra" "1")
8139 (set_attr "prefix" "vex")
8140 (set_attr "mode" "<MODE>")])
8142 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
8143 ;; But it is not a really compare instruction.
8144 (define_insn "avx_ptest256"
8145 [(set (reg:CC FLAGS_REG)
8146 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
8147 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
8150 "vptest\t{%1, %0|%0, %1}"
8151 [(set_attr "type" "ssecomi")
8152 (set_attr "prefix_extra" "1")
8153 (set_attr "prefix" "vex")
8154 (set_attr "mode" "OI")])
8156 (define_insn "sse4_1_ptest"
8157 [(set (reg:CC FLAGS_REG)
8158 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
8159 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
8162 "%vptest\t{%1, %0|%0, %1}"
8163 [(set_attr "type" "ssecomi")
8164 (set_attr "prefix_extra" "1")
8165 (set_attr "prefix" "maybe_vex")
8166 (set_attr "mode" "TI")])
8168 (define_insn "<sse4_1>_round<ssemodesuffix><avxmodesuffix>"
8169 [(set (match_operand:VF 0 "register_operand" "=x")
8171 [(match_operand:VF 1 "nonimmediate_operand" "xm")
8172 (match_operand:SI 2 "const_0_to_15_operand" "n")]
8175 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8176 [(set_attr "type" "ssecvt")
8177 (set (attr "prefix_data16")
8179 (ne (symbol_ref "TARGET_AVX") (const_int 0))
8181 (const_string "1")))
8182 (set_attr "prefix_extra" "1")
8183 (set_attr "length_immediate" "1")
8184 (set_attr "prefix" "maybe_vex")
8185 (set_attr "mode" "<MODE>")])
8187 (define_insn "sse4_1_round<ssescalarmodesuffix>"
8188 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
8191 [(match_operand:VF_128 2 "register_operand" "x,x")
8192 (match_operand:SI 3 "const_0_to_15_operand" "n,n")]
8194 (match_operand:VF_128 1 "register_operand" "0,x")
8198 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
8199 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8200 [(set_attr "isa" "noavx,avx")
8201 (set_attr "type" "ssecvt")
8202 (set_attr "length_immediate" "1")
8203 (set_attr "prefix_data16" "1,*")
8204 (set_attr "prefix_extra" "1")
8205 (set_attr "prefix" "orig,vex")
8206 (set_attr "mode" "<MODE>")])
8208 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8210 ;; Intel SSE4.2 string/text processing instructions
8212 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8214 (define_insn_and_split "sse4_2_pcmpestr"
8215 [(set (match_operand:SI 0 "register_operand" "=c,c")
8217 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
8218 (match_operand:SI 3 "register_operand" "a,a")
8219 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
8220 (match_operand:SI 5 "register_operand" "d,d")
8221 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
8223 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
8231 (set (reg:CC FLAGS_REG)
8240 && can_create_pseudo_p ()"
8245 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
8246 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
8247 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
8250 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
8251 operands[3], operands[4],
8252 operands[5], operands[6]));
8254 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
8255 operands[3], operands[4],
8256 operands[5], operands[6]));
8257 if (flags && !(ecx || xmm0))
8258 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
8259 operands[2], operands[3],
8260 operands[4], operands[5],
8264 [(set_attr "type" "sselog")
8265 (set_attr "prefix_data16" "1")
8266 (set_attr "prefix_extra" "1")
8267 (set_attr "length_immediate" "1")
8268 (set_attr "memory" "none,load")
8269 (set_attr "mode" "TI")])
8271 (define_insn "sse4_2_pcmpestri"
8272 [(set (match_operand:SI 0 "register_operand" "=c,c")
8274 [(match_operand:V16QI 1 "register_operand" "x,x")
8275 (match_operand:SI 2 "register_operand" "a,a")
8276 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
8277 (match_operand:SI 4 "register_operand" "d,d")
8278 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
8280 (set (reg:CC FLAGS_REG)
8289 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
8290 [(set_attr "type" "sselog")
8291 (set_attr "prefix_data16" "1")
8292 (set_attr "prefix_extra" "1")
8293 (set_attr "prefix" "maybe_vex")
8294 (set_attr "length_immediate" "1")
8295 (set_attr "memory" "none,load")
8296 (set_attr "mode" "TI")])
8298 (define_insn "sse4_2_pcmpestrm"
8299 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
8301 [(match_operand:V16QI 1 "register_operand" "x,x")
8302 (match_operand:SI 2 "register_operand" "a,a")
8303 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
8304 (match_operand:SI 4 "register_operand" "d,d")
8305 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
8307 (set (reg:CC FLAGS_REG)
8316 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
8317 [(set_attr "type" "sselog")
8318 (set_attr "prefix_data16" "1")
8319 (set_attr "prefix_extra" "1")
8320 (set_attr "length_immediate" "1")
8321 (set_attr "prefix" "maybe_vex")
8322 (set_attr "memory" "none,load")
8323 (set_attr "mode" "TI")])
8325 (define_insn "sse4_2_pcmpestr_cconly"
8326 [(set (reg:CC FLAGS_REG)
8328 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
8329 (match_operand:SI 3 "register_operand" "a,a,a,a")
8330 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
8331 (match_operand:SI 5 "register_operand" "d,d,d,d")
8332 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
8334 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
8335 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
8338 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
8339 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
8340 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
8341 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
8342 [(set_attr "type" "sselog")
8343 (set_attr "prefix_data16" "1")
8344 (set_attr "prefix_extra" "1")
8345 (set_attr "length_immediate" "1")
8346 (set_attr "memory" "none,load,none,load")
8347 (set_attr "prefix" "maybe_vex")
8348 (set_attr "mode" "TI")])
8350 (define_insn_and_split "sse4_2_pcmpistr"
8351 [(set (match_operand:SI 0 "register_operand" "=c,c")
8353 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
8354 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
8355 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
8357 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
8363 (set (reg:CC FLAGS_REG)
8370 && can_create_pseudo_p ()"
8375 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
8376 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
8377 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
8380 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
8381 operands[3], operands[4]));
8383 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
8384 operands[3], operands[4]));
8385 if (flags && !(ecx || xmm0))
8386 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
8387 operands[2], operands[3],
8391 [(set_attr "type" "sselog")
8392 (set_attr "prefix_data16" "1")
8393 (set_attr "prefix_extra" "1")
8394 (set_attr "length_immediate" "1")
8395 (set_attr "memory" "none,load")
8396 (set_attr "mode" "TI")])
8398 (define_insn "sse4_2_pcmpistri"
8399 [(set (match_operand:SI 0 "register_operand" "=c,c")
8401 [(match_operand:V16QI 1 "register_operand" "x,x")
8402 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
8403 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
8405 (set (reg:CC FLAGS_REG)
8412 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
8413 [(set_attr "type" "sselog")
8414 (set_attr "prefix_data16" "1")
8415 (set_attr "prefix_extra" "1")
8416 (set_attr "length_immediate" "1")
8417 (set_attr "prefix" "maybe_vex")
8418 (set_attr "memory" "none,load")
8419 (set_attr "mode" "TI")])
8421 (define_insn "sse4_2_pcmpistrm"
8422 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
8424 [(match_operand:V16QI 1 "register_operand" "x,x")
8425 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
8426 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
8428 (set (reg:CC FLAGS_REG)
8435 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
8436 [(set_attr "type" "sselog")
8437 (set_attr "prefix_data16" "1")
8438 (set_attr "prefix_extra" "1")
8439 (set_attr "length_immediate" "1")
8440 (set_attr "prefix" "maybe_vex")
8441 (set_attr "memory" "none,load")
8442 (set_attr "mode" "TI")])
8444 (define_insn "sse4_2_pcmpistr_cconly"
8445 [(set (reg:CC FLAGS_REG)
8447 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
8448 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
8449 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
8451 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
8452 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
8455 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
8456 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
8457 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
8458 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
8459 [(set_attr "type" "sselog")
8460 (set_attr "prefix_data16" "1")
8461 (set_attr "prefix_extra" "1")
8462 (set_attr "length_immediate" "1")
8463 (set_attr "memory" "none,load,none,load")
8464 (set_attr "prefix" "maybe_vex")
8465 (set_attr "mode" "TI")])
8467 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8471 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8473 ;; XOP parallel integer multiply/add instructions.
8474 ;; Note the XOP multiply/add instructions
8475 ;; a[i] = b[i] * c[i] + d[i];
8476 ;; do not allow the value being added to be a memory operation.
8477 (define_insn "xop_pmacsww"
8478 [(set (match_operand:V8HI 0 "register_operand" "=x")
8481 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
8482 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
8483 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
8485 "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8486 [(set_attr "type" "ssemuladd")
8487 (set_attr "mode" "TI")])
8489 (define_insn "xop_pmacssww"
8490 [(set (match_operand:V8HI 0 "register_operand" "=x")
8492 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
8493 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
8494 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
8496 "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8497 [(set_attr "type" "ssemuladd")
8498 (set_attr "mode" "TI")])
8500 (define_insn "xop_pmacsdd"
8501 [(set (match_operand:V4SI 0 "register_operand" "=x")
8504 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
8505 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
8506 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
8508 "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8509 [(set_attr "type" "ssemuladd")
8510 (set_attr "mode" "TI")])
8512 (define_insn "xop_pmacssdd"
8513 [(set (match_operand:V4SI 0 "register_operand" "=x")
8515 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
8516 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
8517 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
8519 "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8520 [(set_attr "type" "ssemuladd")
8521 (set_attr "mode" "TI")])
8523 (define_insn "xop_pmacssdql"
8524 [(set (match_operand:V2DI 0 "register_operand" "=x")
8529 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
8530 (parallel [(const_int 1)
8533 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8534 (parallel [(const_int 1)
8536 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
8538 "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8539 [(set_attr "type" "ssemuladd")
8540 (set_attr "mode" "TI")])
8542 (define_insn "xop_pmacssdqh"
8543 [(set (match_operand:V2DI 0 "register_operand" "=x")
8548 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
8549 (parallel [(const_int 0)
8553 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8554 (parallel [(const_int 0)
8556 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
8558 "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8559 [(set_attr "type" "ssemuladd")
8560 (set_attr "mode" "TI")])
8562 (define_insn "xop_pmacsdql"
8563 [(set (match_operand:V2DI 0 "register_operand" "=x")
8568 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
8569 (parallel [(const_int 1)
8573 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8574 (parallel [(const_int 1)
8576 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
8578 "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8579 [(set_attr "type" "ssemuladd")
8580 (set_attr "mode" "TI")])
8582 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
8583 ;; fake it with a multiply/add. In general, we expect the define_split to
8584 ;; occur before register allocation, so we have to handle the corner case where
8585 ;; the target is the same as operands 1/2
8586 (define_insn_and_split "xop_mulv2div2di3_low"
8587 [(set (match_operand:V2DI 0 "register_operand" "=&x")
8591 (match_operand:V4SI 1 "register_operand" "%x")
8592 (parallel [(const_int 1)
8596 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8597 (parallel [(const_int 1)
8598 (const_int 3)])))))]
8601 "&& reload_completed"
8610 (parallel [(const_int 1)
8615 (parallel [(const_int 1)
8619 operands[3] = CONST0_RTX (V2DImode);
8621 [(set_attr "type" "ssemul")
8622 (set_attr "mode" "TI")])
8624 (define_insn "xop_pmacsdqh"
8625 [(set (match_operand:V2DI 0 "register_operand" "=x")
8630 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
8631 (parallel [(const_int 0)
8635 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8636 (parallel [(const_int 0)
8638 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
8640 "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8641 [(set_attr "type" "ssemuladd")
8642 (set_attr "mode" "TI")])
8644 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
8645 ;; fake it with a multiply/add. In general, we expect the define_split to
8646 ;; occur before register allocation, so we have to handle the corner case where
8647 ;; the target is the same as either operands[1] or operands[2]
8648 (define_insn_and_split "xop_mulv2div2di3_high"
8649 [(set (match_operand:V2DI 0 "register_operand" "=&x")
8653 (match_operand:V4SI 1 "register_operand" "%x")
8654 (parallel [(const_int 0)
8658 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8659 (parallel [(const_int 0)
8660 (const_int 2)])))))]
8663 "&& reload_completed"
8672 (parallel [(const_int 0)
8677 (parallel [(const_int 0)
8681 operands[3] = CONST0_RTX (V2DImode);
8683 [(set_attr "type" "ssemul")
8684 (set_attr "mode" "TI")])
8686 ;; XOP parallel integer multiply/add instructions for the intrinisics
8687 (define_insn "xop_pmacsswd"
8688 [(set (match_operand:V4SI 0 "register_operand" "=x")
8693 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
8694 (parallel [(const_int 1)
8700 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8701 (parallel [(const_int 1)
8705 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
8707 "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8708 [(set_attr "type" "ssemuladd")
8709 (set_attr "mode" "TI")])
8711 (define_insn "xop_pmacswd"
8712 [(set (match_operand:V4SI 0 "register_operand" "=x")
8717 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
8718 (parallel [(const_int 1)
8724 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8725 (parallel [(const_int 1)
8729 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
8731 "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8732 [(set_attr "type" "ssemuladd")
8733 (set_attr "mode" "TI")])
8735 (define_insn "xop_pmadcsswd"
8736 [(set (match_operand:V4SI 0 "register_operand" "=x")
8742 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
8743 (parallel [(const_int 0)
8749 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8750 (parallel [(const_int 0)
8758 (parallel [(const_int 1)
8765 (parallel [(const_int 1)
8769 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
8771 "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8772 [(set_attr "type" "ssemuladd")
8773 (set_attr "mode" "TI")])
8775 (define_insn "xop_pmadcswd"
8776 [(set (match_operand:V4SI 0 "register_operand" "=x")
8782 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
8783 (parallel [(const_int 0)
8789 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8790 (parallel [(const_int 0)
8798 (parallel [(const_int 1)
8805 (parallel [(const_int 1)
8809 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
8811 "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8812 [(set_attr "type" "ssemuladd")
8813 (set_attr "mode" "TI")])
8815 ;; XOP parallel XMM conditional moves
8816 (define_insn "xop_pcmov_<mode>"
8817 [(set (match_operand:SSEMODE 0 "register_operand" "=x,x")
8818 (if_then_else:SSEMODE
8819 (match_operand:SSEMODE 3 "nonimmediate_operand" "x,m")
8820 (match_operand:SSEMODE 1 "vector_move_operand" "x,x")
8821 (match_operand:SSEMODE 2 "vector_move_operand" "xm,x")))]
8823 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8824 [(set_attr "type" "sse4arg")])
8826 (define_insn "xop_pcmov_<mode>256"
8827 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
8828 (if_then_else:AVX256MODE
8829 (match_operand:AVX256MODE 3 "nonimmediate_operand" "x,m")
8830 (match_operand:AVX256MODE 1 "vector_move_operand" "x,x")
8831 (match_operand:AVX256MODE 2 "vector_move_operand" "xm,x")))]
8833 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8834 [(set_attr "type" "sse4arg")])
8836 ;; XOP horizontal add/subtract instructions
8837 (define_insn "xop_phaddbw"
8838 [(set (match_operand:V8HI 0 "register_operand" "=x")
8842 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8843 (parallel [(const_int 0)
8854 (parallel [(const_int 1)
8861 (const_int 15)])))))]
8863 "vphaddbw\t{%1, %0|%0, %1}"
8864 [(set_attr "type" "sseiadd1")])
8866 (define_insn "xop_phaddbd"
8867 [(set (match_operand:V4SI 0 "register_operand" "=x")
8872 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8873 (parallel [(const_int 0)
8880 (parallel [(const_int 1)
8888 (parallel [(const_int 2)
8895 (parallel [(const_int 3)
8898 (const_int 15)]))))))]
8900 "vphaddbd\t{%1, %0|%0, %1}"
8901 [(set_attr "type" "sseiadd1")])
8903 (define_insn "xop_phaddbq"
8904 [(set (match_operand:V2DI 0 "register_operand" "=x")
8910 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8911 (parallel [(const_int 0)
8916 (parallel [(const_int 1)
8922 (parallel [(const_int 2)
8927 (parallel [(const_int 3)
8934 (parallel [(const_int 8)
8939 (parallel [(const_int 9)
8945 (parallel [(const_int 10)
8950 (parallel [(const_int 11)
8951 (const_int 15)])))))))]
8953 "vphaddbq\t{%1, %0|%0, %1}"
8954 [(set_attr "type" "sseiadd1")])
8956 (define_insn "xop_phaddwd"
8957 [(set (match_operand:V4SI 0 "register_operand" "=x")
8961 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
8962 (parallel [(const_int 0)
8969 (parallel [(const_int 1)
8972 (const_int 7)])))))]
8974 "vphaddwd\t{%1, %0|%0, %1}"
8975 [(set_attr "type" "sseiadd1")])
8977 (define_insn "xop_phaddwq"
8978 [(set (match_operand:V2DI 0 "register_operand" "=x")
8983 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
8984 (parallel [(const_int 0)
8989 (parallel [(const_int 1)
8995 (parallel [(const_int 2)
9000 (parallel [(const_int 3)
9001 (const_int 7)]))))))]
9003 "vphaddwq\t{%1, %0|%0, %1}"
9004 [(set_attr "type" "sseiadd1")])
9006 (define_insn "xop_phadddq"
9007 [(set (match_operand:V2DI 0 "register_operand" "=x")
9011 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9012 (parallel [(const_int 0)
9017 (parallel [(const_int 1)
9018 (const_int 3)])))))]
9020 "vphadddq\t{%1, %0|%0, %1}"
9021 [(set_attr "type" "sseiadd1")])
9023 (define_insn "xop_phaddubw"
9024 [(set (match_operand:V8HI 0 "register_operand" "=x")
9028 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9029 (parallel [(const_int 0)
9040 (parallel [(const_int 1)
9047 (const_int 15)])))))]
9049 "vphaddubw\t{%1, %0|%0, %1}"
9050 [(set_attr "type" "sseiadd1")])
9052 (define_insn "xop_phaddubd"
9053 [(set (match_operand:V4SI 0 "register_operand" "=x")
9058 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9059 (parallel [(const_int 0)
9066 (parallel [(const_int 1)
9074 (parallel [(const_int 2)
9081 (parallel [(const_int 3)
9084 (const_int 15)]))))))]
9086 "vphaddubd\t{%1, %0|%0, %1}"
9087 [(set_attr "type" "sseiadd1")])
9089 (define_insn "xop_phaddubq"
9090 [(set (match_operand:V2DI 0 "register_operand" "=x")
9096 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9097 (parallel [(const_int 0)
9102 (parallel [(const_int 1)
9108 (parallel [(const_int 2)
9113 (parallel [(const_int 3)
9120 (parallel [(const_int 8)
9125 (parallel [(const_int 9)
9131 (parallel [(const_int 10)
9136 (parallel [(const_int 11)
9137 (const_int 15)])))))))]
9139 "vphaddubq\t{%1, %0|%0, %1}"
9140 [(set_attr "type" "sseiadd1")])
9142 (define_insn "xop_phadduwd"
9143 [(set (match_operand:V4SI 0 "register_operand" "=x")
9147 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9148 (parallel [(const_int 0)
9155 (parallel [(const_int 1)
9158 (const_int 7)])))))]
9160 "vphadduwd\t{%1, %0|%0, %1}"
9161 [(set_attr "type" "sseiadd1")])
9163 (define_insn "xop_phadduwq"
9164 [(set (match_operand:V2DI 0 "register_operand" "=x")
9169 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9170 (parallel [(const_int 0)
9175 (parallel [(const_int 1)
9181 (parallel [(const_int 2)
9186 (parallel [(const_int 3)
9187 (const_int 7)]))))))]
9189 "vphadduwq\t{%1, %0|%0, %1}"
9190 [(set_attr "type" "sseiadd1")])
9192 (define_insn "xop_phaddudq"
9193 [(set (match_operand:V2DI 0 "register_operand" "=x")
9197 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9198 (parallel [(const_int 0)
9203 (parallel [(const_int 1)
9204 (const_int 3)])))))]
9206 "vphaddudq\t{%1, %0|%0, %1}"
9207 [(set_attr "type" "sseiadd1")])
9209 (define_insn "xop_phsubbw"
9210 [(set (match_operand:V8HI 0 "register_operand" "=x")
9214 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9215 (parallel [(const_int 0)
9226 (parallel [(const_int 1)
9233 (const_int 15)])))))]
9235 "vphsubbw\t{%1, %0|%0, %1}"
9236 [(set_attr "type" "sseiadd1")])
9238 (define_insn "xop_phsubwd"
9239 [(set (match_operand:V4SI 0 "register_operand" "=x")
9243 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9244 (parallel [(const_int 0)
9251 (parallel [(const_int 1)
9254 (const_int 7)])))))]
9256 "vphsubwd\t{%1, %0|%0, %1}"
9257 [(set_attr "type" "sseiadd1")])
9259 (define_insn "xop_phsubdq"
9260 [(set (match_operand:V2DI 0 "register_operand" "=x")
9264 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9265 (parallel [(const_int 0)
9270 (parallel [(const_int 1)
9271 (const_int 3)])))))]
9273 "vphsubdq\t{%1, %0|%0, %1}"
9274 [(set_attr "type" "sseiadd1")])
9276 ;; XOP permute instructions
9277 (define_insn "xop_pperm"
9278 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
9280 [(match_operand:V16QI 1 "register_operand" "x,x")
9281 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
9282 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
9283 UNSPEC_XOP_PERMUTE))]
9284 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
9285 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9286 [(set_attr "type" "sse4arg")
9287 (set_attr "mode" "TI")])
9289 ;; XOP pack instructions that combine two vectors into a smaller vector
9290 (define_insn "xop_pperm_pack_v2di_v4si"
9291 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
9294 (match_operand:V2DI 1 "register_operand" "x,x"))
9296 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
9297 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
9298 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
9299 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9300 [(set_attr "type" "sse4arg")
9301 (set_attr "mode" "TI")])
9303 (define_insn "xop_pperm_pack_v4si_v8hi"
9304 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9307 (match_operand:V4SI 1 "register_operand" "x,x"))
9309 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
9310 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
9311 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
9312 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9313 [(set_attr "type" "sse4arg")
9314 (set_attr "mode" "TI")])
9316 (define_insn "xop_pperm_pack_v8hi_v16qi"
9317 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
9320 (match_operand:V8HI 1 "register_operand" "x,x"))
9322 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
9323 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
9324 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
9325 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9326 [(set_attr "type" "sse4arg")
9327 (set_attr "mode" "TI")])
9329 ;; XOP packed rotate instructions
9330 (define_expand "rotl<mode>3"
9331 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
9333 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
9334 (match_operand:SI 2 "general_operand")))]
9337 /* If we were given a scalar, convert it to parallel */
9338 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
9340 rtvec vs = rtvec_alloc (<ssescalarnum>);
9341 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
9342 rtx reg = gen_reg_rtx (<MODE>mode);
9343 rtx op2 = operands[2];
9346 if (GET_MODE (op2) != <ssescalarmode>mode)
9348 op2 = gen_reg_rtx (<ssescalarmode>mode);
9349 convert_move (op2, operands[2], false);
9352 for (i = 0; i < <ssescalarnum>; i++)
9353 RTVEC_ELT (vs, i) = op2;
9355 emit_insn (gen_vec_init<mode> (reg, par));
9356 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
9361 (define_expand "rotr<mode>3"
9362 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
9363 (rotatert:SSEMODE1248
9364 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
9365 (match_operand:SI 2 "general_operand")))]
9368 /* If we were given a scalar, convert it to parallel */
9369 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
9371 rtvec vs = rtvec_alloc (<ssescalarnum>);
9372 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
9373 rtx neg = gen_reg_rtx (<MODE>mode);
9374 rtx reg = gen_reg_rtx (<MODE>mode);
9375 rtx op2 = operands[2];
9378 if (GET_MODE (op2) != <ssescalarmode>mode)
9380 op2 = gen_reg_rtx (<ssescalarmode>mode);
9381 convert_move (op2, operands[2], false);
9384 for (i = 0; i < <ssescalarnum>; i++)
9385 RTVEC_ELT (vs, i) = op2;
9387 emit_insn (gen_vec_init<mode> (reg, par));
9388 emit_insn (gen_neg<mode>2 (neg, reg));
9389 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
9394 (define_insn "xop_rotl<mode>3"
9395 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
9397 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
9398 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
9400 "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
9401 [(set_attr "type" "sseishft")
9402 (set_attr "length_immediate" "1")
9403 (set_attr "mode" "TI")])
9405 (define_insn "xop_rotr<mode>3"
9406 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
9407 (rotatert:SSEMODE1248
9408 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
9409 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
9412 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
9413 return \"vprot<ssevecsize>\t{%3, %1, %0|%0, %1, %3}\";
9415 [(set_attr "type" "sseishft")
9416 (set_attr "length_immediate" "1")
9417 (set_attr "mode" "TI")])
9419 (define_expand "vrotr<mode>3"
9420 [(match_operand:SSEMODE1248 0 "register_operand" "")
9421 (match_operand:SSEMODE1248 1 "register_operand" "")
9422 (match_operand:SSEMODE1248 2 "register_operand" "")]
9425 rtx reg = gen_reg_rtx (<MODE>mode);
9426 emit_insn (gen_neg<mode>2 (reg, operands[2]));
9427 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
9431 (define_expand "vrotl<mode>3"
9432 [(match_operand:SSEMODE1248 0 "register_operand" "")
9433 (match_operand:SSEMODE1248 1 "register_operand" "")
9434 (match_operand:SSEMODE1248 2 "register_operand" "")]
9437 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
9441 (define_insn "xop_vrotl<mode>3"
9442 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
9443 (if_then_else:SSEMODE1248
9445 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
9448 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
9450 (rotatert:SSEMODE1248
9452 (neg:SSEMODE1248 (match_dup 2)))))]
9453 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
9454 "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
9455 [(set_attr "type" "sseishft")
9456 (set_attr "prefix_data16" "0")
9457 (set_attr "prefix_extra" "2")
9458 (set_attr "mode" "TI")])
9460 ;; XOP packed shift instructions.
9461 ;; FIXME: add V2DI back in
9462 (define_expand "vlshr<mode>3"
9463 [(match_operand:SSEMODE124 0 "register_operand" "")
9464 (match_operand:SSEMODE124 1 "register_operand" "")
9465 (match_operand:SSEMODE124 2 "register_operand" "")]
9468 rtx neg = gen_reg_rtx (<MODE>mode);
9469 emit_insn (gen_neg<mode>2 (neg, operands[2]));
9470 emit_insn (gen_xop_lshl<mode>3 (operands[0], operands[1], neg));
9474 (define_expand "vashr<mode>3"
9475 [(match_operand:SSEMODE124 0 "register_operand" "")
9476 (match_operand:SSEMODE124 1 "register_operand" "")
9477 (match_operand:SSEMODE124 2 "register_operand" "")]
9480 rtx neg = gen_reg_rtx (<MODE>mode);
9481 emit_insn (gen_neg<mode>2 (neg, operands[2]));
9482 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], neg));
9486 (define_expand "vashl<mode>3"
9487 [(match_operand:SSEMODE124 0 "register_operand" "")
9488 (match_operand:SSEMODE124 1 "register_operand" "")
9489 (match_operand:SSEMODE124 2 "register_operand" "")]
9492 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], operands[2]));
9496 (define_insn "xop_ashl<mode>3"
9497 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
9498 (if_then_else:SSEMODE1248
9500 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
9503 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
9505 (ashiftrt:SSEMODE1248
9507 (neg:SSEMODE1248 (match_dup 2)))))]
9508 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
9509 "vpsha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
9510 [(set_attr "type" "sseishft")
9511 (set_attr "prefix_data16" "0")
9512 (set_attr "prefix_extra" "2")
9513 (set_attr "mode" "TI")])
9515 (define_insn "xop_lshl<mode>3"
9516 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
9517 (if_then_else:SSEMODE1248
9519 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
9522 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
9524 (lshiftrt:SSEMODE1248
9526 (neg:SSEMODE1248 (match_dup 2)))))]
9527 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
9528 "vpshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
9529 [(set_attr "type" "sseishft")
9530 (set_attr "prefix_data16" "0")
9531 (set_attr "prefix_extra" "2")
9532 (set_attr "mode" "TI")])
9534 ;; SSE2 doesn't have some shift varients, so define versions for XOP
9535 (define_expand "ashlv16qi3"
9536 [(match_operand:V16QI 0 "register_operand" "")
9537 (match_operand:V16QI 1 "register_operand" "")
9538 (match_operand:SI 2 "nonmemory_operand" "")]
9541 rtvec vs = rtvec_alloc (16);
9542 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
9543 rtx reg = gen_reg_rtx (V16QImode);
9545 for (i = 0; i < 16; i++)
9546 RTVEC_ELT (vs, i) = operands[2];
9548 emit_insn (gen_vec_initv16qi (reg, par));
9549 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
9553 (define_expand "lshlv16qi3"
9554 [(match_operand:V16QI 0 "register_operand" "")
9555 (match_operand:V16QI 1 "register_operand" "")
9556 (match_operand:SI 2 "nonmemory_operand" "")]
9559 rtvec vs = rtvec_alloc (16);
9560 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
9561 rtx reg = gen_reg_rtx (V16QImode);
9563 for (i = 0; i < 16; i++)
9564 RTVEC_ELT (vs, i) = operands[2];
9566 emit_insn (gen_vec_initv16qi (reg, par));
9567 emit_insn (gen_xop_lshlv16qi3 (operands[0], operands[1], reg));
9571 (define_expand "ashrv16qi3"
9572 [(match_operand:V16QI 0 "register_operand" "")
9573 (match_operand:V16QI 1 "register_operand" "")
9574 (match_operand:SI 2 "nonmemory_operand" "")]
9577 rtvec vs = rtvec_alloc (16);
9578 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
9579 rtx reg = gen_reg_rtx (V16QImode);
9581 rtx ele = ((CONST_INT_P (operands[2]))
9582 ? GEN_INT (- INTVAL (operands[2]))
9585 for (i = 0; i < 16; i++)
9586 RTVEC_ELT (vs, i) = ele;
9588 emit_insn (gen_vec_initv16qi (reg, par));
9590 if (!CONST_INT_P (operands[2]))
9592 rtx neg = gen_reg_rtx (V16QImode);
9593 emit_insn (gen_negv16qi2 (neg, reg));
9594 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], neg));
9597 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
9602 (define_expand "ashrv2di3"
9603 [(match_operand:V2DI 0 "register_operand" "")
9604 (match_operand:V2DI 1 "register_operand" "")
9605 (match_operand:DI 2 "nonmemory_operand" "")]
9608 rtvec vs = rtvec_alloc (2);
9609 rtx par = gen_rtx_PARALLEL (V2DImode, vs);
9610 rtx reg = gen_reg_rtx (V2DImode);
9613 if (CONST_INT_P (operands[2]))
9614 ele = GEN_INT (- INTVAL (operands[2]));
9615 else if (GET_MODE (operands[2]) != DImode)
9617 rtx move = gen_reg_rtx (DImode);
9618 ele = gen_reg_rtx (DImode);
9619 convert_move (move, operands[2], false);
9620 emit_insn (gen_negdi2 (ele, move));
9624 ele = gen_reg_rtx (DImode);
9625 emit_insn (gen_negdi2 (ele, operands[2]));
9628 RTVEC_ELT (vs, 0) = ele;
9629 RTVEC_ELT (vs, 1) = ele;
9630 emit_insn (gen_vec_initv2di (reg, par));
9631 emit_insn (gen_xop_ashlv2di3 (operands[0], operands[1], reg));
9636 (define_insn "xop_frcz<mode>2"
9637 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
9639 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
9642 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
9643 [(set_attr "type" "ssecvt1")
9644 (set_attr "mode" "<MODE>")])
9647 (define_expand "xop_vmfrcz<mode>2"
9648 [(set (match_operand:SSEMODEF2P 0 "register_operand")
9649 (vec_merge:SSEMODEF2P
9651 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand")]
9657 operands[3] = CONST0_RTX (<MODE>mode);
9660 (define_insn "*xop_vmfrcz_<mode>"
9661 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9662 (vec_merge:SSEMODEF2P
9664 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
9666 (match_operand:SSEMODEF2P 2 "const0_operand")
9669 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
9670 [(set_attr "type" "ssecvt1")
9671 (set_attr "mode" "<MODE>")])
9673 (define_insn "xop_maskcmp<mode>3"
9674 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
9675 (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator"
9676 [(match_operand:SSEMODE1248 2 "register_operand" "x")
9677 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
9679 "vpcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
9680 [(set_attr "type" "sse4arg")
9681 (set_attr "prefix_data16" "0")
9682 (set_attr "prefix_rep" "0")
9683 (set_attr "prefix_extra" "2")
9684 (set_attr "length_immediate" "1")
9685 (set_attr "mode" "TI")])
9687 (define_insn "xop_maskcmp_uns<mode>3"
9688 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
9689 (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
9690 [(match_operand:SSEMODE1248 2 "register_operand" "x")
9691 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
9693 "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
9694 [(set_attr "type" "ssecmp")
9695 (set_attr "prefix_data16" "0")
9696 (set_attr "prefix_rep" "0")
9697 (set_attr "prefix_extra" "2")
9698 (set_attr "length_immediate" "1")
9699 (set_attr "mode" "TI")])
9701 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
9702 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
9703 ;; the exact instruction generated for the intrinsic.
9704 (define_insn "xop_maskcmp_uns2<mode>3"
9705 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
9707 [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
9708 [(match_operand:SSEMODE1248 2 "register_operand" "x")
9709 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])]
9710 UNSPEC_XOP_UNSIGNED_CMP))]
9712 "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
9713 [(set_attr "type" "ssecmp")
9714 (set_attr "prefix_data16" "0")
9715 (set_attr "prefix_extra" "2")
9716 (set_attr "length_immediate" "1")
9717 (set_attr "mode" "TI")])
9719 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
9720 ;; being added here to be complete.
9721 (define_insn "xop_pcom_tf<mode>3"
9722 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
9724 [(match_operand:SSEMODE1248 1 "register_operand" "x")
9725 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
9726 (match_operand:SI 3 "const_int_operand" "n")]
9727 UNSPEC_XOP_TRUEFALSE))]
9730 return ((INTVAL (operands[3]) != 0)
9731 ? "vpcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
9732 : "vpcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}");
9734 [(set_attr "type" "ssecmp")
9735 (set_attr "prefix_data16" "0")
9736 (set_attr "prefix_extra" "2")
9737 (set_attr "length_immediate" "1")
9738 (set_attr "mode" "TI")])
9740 (define_insn "xop_vpermil2<mode>3"
9741 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9743 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
9744 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "%x")
9745 (match_operand:<avxpermvecmode> 3 "nonimmediate_operand" "xm")
9746 (match_operand:SI 4 "const_0_to_3_operand" "n")]
9749 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
9750 [(set_attr "type" "sse4arg")
9751 (set_attr "length_immediate" "1")
9752 (set_attr "mode" "<MODE>")])
9754 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9756 (define_insn "aesenc"
9757 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
9758 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
9759 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
9763 aesenc\t{%2, %0|%0, %2}
9764 vaesenc\t{%2, %1, %0|%0, %1, %2}"
9765 [(set_attr "isa" "noavx,avx")
9766 (set_attr "type" "sselog1")
9767 (set_attr "prefix_extra" "1")
9768 (set_attr "prefix" "orig,vex")
9769 (set_attr "mode" "TI")])
9771 (define_insn "aesenclast"
9772 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
9773 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
9774 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
9775 UNSPEC_AESENCLAST))]
9778 aesenclast\t{%2, %0|%0, %2}
9779 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
9780 [(set_attr "isa" "noavx,avx")
9781 (set_attr "type" "sselog1")
9782 (set_attr "prefix_extra" "1")
9783 (set_attr "prefix" "orig,vex")
9784 (set_attr "mode" "TI")])
9786 (define_insn "aesdec"
9787 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
9788 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
9789 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
9793 aesdec\t{%2, %0|%0, %2}
9794 vaesdec\t{%2, %1, %0|%0, %1, %2}"
9795 [(set_attr "isa" "noavx,avx")
9796 (set_attr "type" "sselog1")
9797 (set_attr "prefix_extra" "1")
9798 (set_attr "prefix" "orig,vex")
9799 (set_attr "mode" "TI")])
9801 (define_insn "aesdeclast"
9802 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
9803 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
9804 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
9805 UNSPEC_AESDECLAST))]
9808 aesdeclast\t{%2, %0|%0, %2}
9809 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
9810 [(set_attr "isa" "noavx,avx")
9811 (set_attr "type" "sselog1")
9812 (set_attr "prefix_extra" "1")
9813 (set_attr "prefix" "orig,vex")
9814 (set_attr "mode" "TI")])
9816 (define_insn "aesimc"
9817 [(set (match_operand:V2DI 0 "register_operand" "=x")
9818 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
9821 "%vaesimc\t{%1, %0|%0, %1}"
9822 [(set_attr "type" "sselog1")
9823 (set_attr "prefix_extra" "1")
9824 (set_attr "prefix" "maybe_vex")
9825 (set_attr "mode" "TI")])
9827 (define_insn "aeskeygenassist"
9828 [(set (match_operand:V2DI 0 "register_operand" "=x")
9829 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
9830 (match_operand:SI 2 "const_0_to_255_operand" "n")]
9831 UNSPEC_AESKEYGENASSIST))]
9833 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
9834 [(set_attr "type" "sselog1")
9835 (set_attr "prefix_extra" "1")
9836 (set_attr "length_immediate" "1")
9837 (set_attr "prefix" "maybe_vex")
9838 (set_attr "mode" "TI")])
9840 (define_insn "pclmulqdq"
9841 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
9842 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
9843 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")
9844 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9848 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
9849 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9850 [(set_attr "isa" "noavx,avx")
9851 (set_attr "type" "sselog1")
9852 (set_attr "prefix_extra" "1")
9853 (set_attr "length_immediate" "1")
9854 (set_attr "prefix" "orig,vex")
9855 (set_attr "mode" "TI")])
9857 (define_expand "avx_vzeroall"
9858 [(match_par_dup 0 [(const_int 0)])]
9861 int nregs = TARGET_64BIT ? 16 : 8;
9864 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
9866 XVECEXP (operands[0], 0, 0)
9867 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
9870 for (regno = 0; regno < nregs; regno++)
9871 XVECEXP (operands[0], 0, regno + 1)
9872 = gen_rtx_SET (VOIDmode,
9873 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
9874 CONST0_RTX (V8SImode));
9877 (define_insn "*avx_vzeroall"
9878 [(match_parallel 0 "vzeroall_operation"
9879 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
9882 [(set_attr "type" "sse")
9883 (set_attr "modrm" "0")
9884 (set_attr "memory" "none")
9885 (set_attr "prefix" "vex")
9886 (set_attr "mode" "OI")])
9888 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
9889 ;; if the upper 128bits are unused.
9890 (define_insn "avx_vzeroupper"
9891 [(unspec_volatile [(match_operand 0 "const_int_operand" "")]
9892 UNSPECV_VZEROUPPER)]
9895 [(set_attr "type" "sse")
9896 (set_attr "modrm" "0")
9897 (set_attr "memory" "none")
9898 (set_attr "prefix" "vex")
9899 (set_attr "mode" "OI")])
9901 (define_insn "vec_dup<mode>"
9902 [(set (match_operand:AVX256MODE24P 0 "register_operand" "=x,x")
9903 (vec_duplicate:AVX256MODE24P
9904 (match_operand:<avxscalarmode> 1 "nonimmediate_operand" "m,?x")))]
9907 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
9909 [(set_attr "type" "ssemov")
9910 (set_attr "prefix_extra" "1")
9911 (set_attr "prefix" "vex")
9912 (set_attr "mode" "V8SF")])
9915 [(set (match_operand:AVX256MODE24P 0 "register_operand" "")
9916 (vec_duplicate:AVX256MODE24P
9917 (match_operand:<avxscalarmode> 1 "register_operand" "")))]
9918 "TARGET_AVX && reload_completed"
9919 [(set (match_dup 2) (vec_duplicate:<avxhalfvecmode> (match_dup 1)))
9920 (set (match_dup 0) (vec_concat:AVX256MODE24P (match_dup 2) (match_dup 2)))]
9921 "operands[2] = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (operands[0]));")
9923 (define_insn "avx_vbroadcastf128_<mode>"
9924 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x,x")
9925 (vec_concat:AVX256MODE
9926 (match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
9930 vbroadcastf128\t{%1, %0|%0, %1}
9931 vinsertf128\t{$1, %1, %0, %0|%0, %0, %1, 1}
9932 vperm2f128\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
9933 [(set_attr "type" "ssemov,sselog1,sselog1")
9934 (set_attr "prefix_extra" "1")
9935 (set_attr "length_immediate" "0,1,1")
9936 (set_attr "prefix" "vex")
9937 (set_attr "mode" "V4SF,V8SF,V8SF")])
9939 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
9940 ;; If it so happens that the input is in memory, use vbroadcast.
9941 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
9942 (define_insn "*avx_vperm_broadcast_v4sf"
9943 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
9945 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
9946 (match_parallel 2 "avx_vbroadcast_operand"
9947 [(match_operand 3 "const_int_operand" "C,n,n")])))]
9950 int elt = INTVAL (operands[3]);
9951 switch (which_alternative)
9955 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
9956 return "vbroadcastss\t{%1, %0|%0, %1}";
9958 operands[2] = GEN_INT (elt * 0x55);
9959 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
9964 [(set_attr "type" "ssemov,ssemov,sselog1")
9965 (set_attr "prefix_extra" "1")
9966 (set_attr "length_immediate" "0,0,1")
9967 (set_attr "prefix" "vex")
9968 (set_attr "mode" "SF,SF,V4SF")])
9970 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
9971 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x,x,x")
9972 (vec_select:AVX256MODEF2P
9973 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "m,o,?x")
9974 (match_parallel 2 "avx_vbroadcast_operand"
9975 [(match_operand 3 "const_int_operand" "C,n,n")])))]
9978 "&& reload_completed"
9979 [(set (match_dup 0) (vec_duplicate:AVX256MODEF2P (match_dup 1)))]
9981 rtx op0 = operands[0], op1 = operands[1];
9982 int elt = INTVAL (operands[3]);
9988 /* Shuffle element we care about into all elements of the 128-bit lane.
9989 The other lane gets shuffled too, but we don't care. */
9990 if (<MODE>mode == V4DFmode)
9991 mask = (elt & 1 ? 15 : 0);
9993 mask = (elt & 3) * 0x55;
9994 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
9996 /* Shuffle the lane we care about into both lanes of the dest. */
9997 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
9998 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
10002 operands[1] = adjust_address_nv (op1, <avxscalarmode>mode,
10003 elt * GET_MODE_SIZE (<avxscalarmode>mode));
10006 (define_expand "avx_vpermil<mode>"
10007 [(set (match_operand:AVXMODEFDP 0 "register_operand" "")
10008 (vec_select:AVXMODEFDP
10009 (match_operand:AVXMODEFDP 1 "nonimmediate_operand" "")
10010 (match_operand:SI 2 "const_0_to_255_operand" "")))]
10013 int mask = INTVAL (operands[2]);
10014 rtx perm[<ssescalarnum>];
10016 perm[0] = GEN_INT (mask & 1);
10017 perm[1] = GEN_INT ((mask >> 1) & 1);
10018 if (<MODE>mode == V4DFmode)
10020 perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
10021 perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
10025 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
10028 (define_expand "avx_vpermil<mode>"
10029 [(set (match_operand:AVXMODEFSP 0 "register_operand" "")
10030 (vec_select:AVXMODEFSP
10031 (match_operand:AVXMODEFSP 1 "nonimmediate_operand" "")
10032 (match_operand:SI 2 "const_0_to_255_operand" "")))]
10035 int mask = INTVAL (operands[2]);
10036 rtx perm[<ssescalarnum>];
10038 perm[0] = GEN_INT (mask & 3);
10039 perm[1] = GEN_INT ((mask >> 2) & 3);
10040 perm[2] = GEN_INT ((mask >> 4) & 3);
10041 perm[3] = GEN_INT ((mask >> 6) & 3);
10042 if (<MODE>mode == V8SFmode)
10044 perm[4] = GEN_INT ((mask & 3) + 4);
10045 perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
10046 perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
10047 perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
10051 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
10054 (define_insn "*avx_vpermilp<mode>"
10055 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
10056 (vec_select:AVXMODEF2P
10057 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")
10058 (match_parallel 2 "avx_vpermilp_<mode>_operand"
10059 [(match_operand 3 "const_int_operand" "")])))]
10062 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
10063 operands[2] = GEN_INT (mask);
10064 return "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
10066 [(set_attr "type" "sselog")
10067 (set_attr "prefix_extra" "1")
10068 (set_attr "length_immediate" "1")
10069 (set_attr "prefix" "vex")
10070 (set_attr "mode" "<MODE>")])
10072 (define_insn "avx_vpermilvar<mode>3"
10073 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
10075 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
10076 (match_operand:<avxpermvecmode> 2 "nonimmediate_operand" "xm")]
10079 "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10080 [(set_attr "type" "sselog")
10081 (set_attr "prefix_extra" "1")
10082 (set_attr "prefix" "vex")
10083 (set_attr "mode" "<MODE>")])
10085 (define_expand "avx_vperm2f128<mode>3"
10086 [(set (match_operand:AVX256MODE2P 0 "register_operand" "")
10087 (unspec:AVX256MODE2P
10088 [(match_operand:AVX256MODE2P 1 "register_operand" "")
10089 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "")
10090 (match_operand:SI 3 "const_0_to_255_operand" "")]
10091 UNSPEC_VPERMIL2F128))]
10094 int mask = INTVAL (operands[3]);
10095 if ((mask & 0x88) == 0)
10097 rtx perm[<ssescalarnum>], t1, t2;
10098 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
10100 base = (mask & 3) * nelt2;
10101 for (i = 0; i < nelt2; ++i)
10102 perm[i] = GEN_INT (base + i);
10104 base = ((mask >> 4) & 3) * nelt2;
10105 for (i = 0; i < nelt2; ++i)
10106 perm[i + nelt2] = GEN_INT (base + i);
10108 t2 = gen_rtx_VEC_CONCAT (<ssedoublesizemode>mode,
10109 operands[1], operands[2]);
10110 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
10111 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
10112 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
10118 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
10119 ;; means that in order to represent this properly in rtl we'd have to
10120 ;; nest *another* vec_concat with a zero operand and do the select from
10121 ;; a 4x wide vector. That doesn't seem very nice.
10122 (define_insn "*avx_vperm2f128<mode>_full"
10123 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
10124 (unspec:AVX256MODE2P
10125 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
10126 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
10127 (match_operand:SI 3 "const_0_to_255_operand" "n")]
10128 UNSPEC_VPERMIL2F128))]
10130 "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10131 [(set_attr "type" "sselog")
10132 (set_attr "prefix_extra" "1")
10133 (set_attr "length_immediate" "1")
10134 (set_attr "prefix" "vex")
10135 (set_attr "mode" "V8SF")])
10137 (define_insn "*avx_vperm2f128<mode>_nozero"
10138 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
10139 (vec_select:AVX256MODE2P
10140 (vec_concat:<ssedoublesizemode>
10141 (match_operand:AVX256MODE2P 1 "register_operand" "x")
10142 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
10143 (match_parallel 3 "avx_vperm2f128_<mode>_operand"
10144 [(match_operand 4 "const_int_operand" "")])))]
10147 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
10148 operands[3] = GEN_INT (mask);
10149 return "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}";
10151 [(set_attr "type" "sselog")
10152 (set_attr "prefix_extra" "1")
10153 (set_attr "length_immediate" "1")
10154 (set_attr "prefix" "vex")
10155 (set_attr "mode" "V8SF")])
10157 (define_expand "avx_vinsertf128<mode>"
10158 [(match_operand:AVX256MODE 0 "register_operand" "")
10159 (match_operand:AVX256MODE 1 "register_operand" "")
10160 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "")
10161 (match_operand:SI 3 "const_0_to_1_operand" "")]
10164 rtx (*insn)(rtx, rtx, rtx);
10166 switch (INTVAL (operands[3]))
10169 insn = gen_vec_set_lo_<mode>;
10172 insn = gen_vec_set_hi_<mode>;
10175 gcc_unreachable ();
10178 emit_insn (insn (operands[0], operands[1], operands[2]));
10182 (define_insn "vec_set_lo_<mode>"
10183 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
10184 (vec_concat:AVX256MODE4P
10185 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
10186 (vec_select:<avxhalfvecmode>
10187 (match_operand:AVX256MODE4P 1 "register_operand" "x")
10188 (parallel [(const_int 2) (const_int 3)]))))]
10190 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
10191 [(set_attr "type" "sselog")
10192 (set_attr "prefix_extra" "1")
10193 (set_attr "length_immediate" "1")
10194 (set_attr "prefix" "vex")
10195 (set_attr "mode" "V8SF")])
10197 (define_insn "vec_set_hi_<mode>"
10198 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
10199 (vec_concat:AVX256MODE4P
10200 (vec_select:<avxhalfvecmode>
10201 (match_operand:AVX256MODE4P 1 "register_operand" "x")
10202 (parallel [(const_int 0) (const_int 1)]))
10203 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
10205 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
10206 [(set_attr "type" "sselog")
10207 (set_attr "prefix_extra" "1")
10208 (set_attr "length_immediate" "1")
10209 (set_attr "prefix" "vex")
10210 (set_attr "mode" "V8SF")])
10212 (define_insn "vec_set_lo_<mode>"
10213 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
10214 (vec_concat:AVX256MODE8P
10215 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
10216 (vec_select:<avxhalfvecmode>
10217 (match_operand:AVX256MODE8P 1 "register_operand" "x")
10218 (parallel [(const_int 4) (const_int 5)
10219 (const_int 6) (const_int 7)]))))]
10221 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
10222 [(set_attr "type" "sselog")
10223 (set_attr "prefix_extra" "1")
10224 (set_attr "length_immediate" "1")
10225 (set_attr "prefix" "vex")
10226 (set_attr "mode" "V8SF")])
10228 (define_insn "vec_set_hi_<mode>"
10229 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
10230 (vec_concat:AVX256MODE8P
10231 (vec_select:<avxhalfvecmode>
10232 (match_operand:AVX256MODE8P 1 "register_operand" "x")
10233 (parallel [(const_int 0) (const_int 1)
10234 (const_int 2) (const_int 3)]))
10235 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
10237 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
10238 [(set_attr "type" "sselog")
10239 (set_attr "prefix_extra" "1")
10240 (set_attr "length_immediate" "1")
10241 (set_attr "prefix" "vex")
10242 (set_attr "mode" "V8SF")])
10244 (define_insn "vec_set_lo_v16hi"
10245 [(set (match_operand:V16HI 0 "register_operand" "=x")
10247 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10249 (match_operand:V16HI 1 "register_operand" "x")
10250 (parallel [(const_int 8) (const_int 9)
10251 (const_int 10) (const_int 11)
10252 (const_int 12) (const_int 13)
10253 (const_int 14) (const_int 15)]))))]
10255 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
10256 [(set_attr "type" "sselog")
10257 (set_attr "prefix_extra" "1")
10258 (set_attr "length_immediate" "1")
10259 (set_attr "prefix" "vex")
10260 (set_attr "mode" "V8SF")])
10262 (define_insn "vec_set_hi_v16hi"
10263 [(set (match_operand:V16HI 0 "register_operand" "=x")
10266 (match_operand:V16HI 1 "register_operand" "x")
10267 (parallel [(const_int 0) (const_int 1)
10268 (const_int 2) (const_int 3)
10269 (const_int 4) (const_int 5)
10270 (const_int 6) (const_int 7)]))
10271 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
10273 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
10274 [(set_attr "type" "sselog")
10275 (set_attr "prefix_extra" "1")
10276 (set_attr "length_immediate" "1")
10277 (set_attr "prefix" "vex")
10278 (set_attr "mode" "V8SF")])
10280 (define_insn "vec_set_lo_v32qi"
10281 [(set (match_operand:V32QI 0 "register_operand" "=x")
10283 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
10285 (match_operand:V32QI 1 "register_operand" "x")
10286 (parallel [(const_int 16) (const_int 17)
10287 (const_int 18) (const_int 19)
10288 (const_int 20) (const_int 21)
10289 (const_int 22) (const_int 23)
10290 (const_int 24) (const_int 25)
10291 (const_int 26) (const_int 27)
10292 (const_int 28) (const_int 29)
10293 (const_int 30) (const_int 31)]))))]
10295 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
10296 [(set_attr "type" "sselog")
10297 (set_attr "prefix_extra" "1")
10298 (set_attr "length_immediate" "1")
10299 (set_attr "prefix" "vex")
10300 (set_attr "mode" "V8SF")])
10302 (define_insn "vec_set_hi_v32qi"
10303 [(set (match_operand:V32QI 0 "register_operand" "=x")
10306 (match_operand:V32QI 1 "register_operand" "x")
10307 (parallel [(const_int 0) (const_int 1)
10308 (const_int 2) (const_int 3)
10309 (const_int 4) (const_int 5)
10310 (const_int 6) (const_int 7)
10311 (const_int 8) (const_int 9)
10312 (const_int 10) (const_int 11)
10313 (const_int 12) (const_int 13)
10314 (const_int 14) (const_int 15)]))
10315 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
10317 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
10318 [(set_attr "type" "sselog")
10319 (set_attr "prefix_extra" "1")
10320 (set_attr "length_immediate" "1")
10321 (set_attr "prefix" "vex")
10322 (set_attr "mode" "V8SF")])
10324 (define_expand "avx_maskload<ssemodesuffix><avxmodesuffix>"
10325 [(set (match_operand:VF 0 "register_operand" "")
10327 [(match_operand:<avxpermvecmode> 2 "register_operand" "")
10328 (match_operand:VF 1 "memory_operand" "")
10333 (define_expand "avx_maskstore<ssemodesuffix><avxmodesuffix>"
10334 [(set (match_operand:VF 0 "memory_operand" "")
10336 [(match_operand:<avxpermvecmode> 1 "register_operand" "")
10337 (match_operand:VF 2 "register_operand" "")
10342 (define_insn "*avx_maskmov<ssemodesuffix><avxmodesuffix>"
10343 [(set (match_operand:VF 0 "nonimmediate_operand" "=x,m")
10345 [(match_operand:<avxpermvecmode> 1 "register_operand" "x,x")
10346 (match_operand:VF 2 "nonimmediate_operand" "m,x")
10350 && (REG_P (operands[0]) == MEM_P (operands[2]))"
10351 "vmaskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10352 [(set_attr "type" "sselog1")
10353 (set_attr "prefix_extra" "1")
10354 (set_attr "prefix" "vex")
10355 (set_attr "mode" "<MODE>")])
10357 (define_insn_and_split "avx_<avxmodesuffixp><avxmodesuffix>_<avxmodesuffixp>"
10358 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
10359 (unspec:AVX256MODE2P
10360 [(match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "xm,x")]
10364 "&& reload_completed"
10367 rtx op1 = operands[1];
10369 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
10371 op1 = gen_lowpart (<MODE>mode, op1);
10372 emit_move_insn (operands[0], op1);
10376 (define_expand "vec_init<mode>"
10377 [(match_operand:AVX256MODE 0 "register_operand" "")
10378 (match_operand 1 "" "")]
10381 ix86_expand_vector_init (false, operands[0], operands[1]);
10385 (define_insn "*vec_concat<mode>_avx"
10386 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
10387 (vec_concat:AVX256MODE
10388 (match_operand:<avxhalfvecmode> 1 "register_operand" "x,x")
10389 (match_operand:<avxhalfvecmode> 2 "vector_move_operand" "xm,C")))]
10392 switch (which_alternative)
10395 return "vinsertf128\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
10397 switch (get_attr_mode (insn))
10400 return "vmovaps\t{%1, %x0|%x0, %1}";
10402 return "vmovapd\t{%1, %x0|%x0, %1}";
10404 return "vmovdqa\t{%1, %x0|%x0, %1}";
10407 gcc_unreachable ();
10410 [(set_attr "type" "sselog,ssemov")
10411 (set_attr "prefix_extra" "1,*")
10412 (set_attr "length_immediate" "1,*")
10413 (set_attr "prefix" "vex")
10414 (set_attr "mode" "<avxvecmode>")])
10416 (define_insn "vcvtph2ps"
10417 [(set (match_operand:V4SF 0 "register_operand" "=x")
10419 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
10421 (parallel [(const_int 0) (const_int 1)
10422 (const_int 1) (const_int 2)])))]
10424 "vcvtph2ps\t{%1, %0|%0, %1}"
10425 [(set_attr "type" "ssecvt")
10426 (set_attr "prefix" "vex")
10427 (set_attr "mode" "V4SF")])
10429 (define_insn "*vcvtph2ps_load"
10430 [(set (match_operand:V4SF 0 "register_operand" "=x")
10431 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
10432 UNSPEC_VCVTPH2PS))]
10434 "vcvtph2ps\t{%1, %0|%0, %1}"
10435 [(set_attr "type" "ssecvt")
10436 (set_attr "prefix" "vex")
10437 (set_attr "mode" "V8SF")])
10439 (define_insn "vcvtph2ps256"
10440 [(set (match_operand:V8SF 0 "register_operand" "=x")
10441 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
10442 UNSPEC_VCVTPH2PS))]
10444 "vcvtph2ps\t{%1, %0|%0, %1}"
10445 [(set_attr "type" "ssecvt")
10446 (set_attr "prefix" "vex")
10447 (set_attr "mode" "V8SF")])
10449 (define_expand "vcvtps2ph"
10450 [(set (match_operand:V8HI 0 "register_operand" "")
10452 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "")
10453 (match_operand:SI 2 "immediate_operand" "")]
10457 "operands[3] = CONST0_RTX (V4HImode);")
10459 (define_insn "*vcvtps2ph"
10460 [(set (match_operand:V8HI 0 "register_operand" "=x")
10462 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
10463 (match_operand:SI 2 "immediate_operand" "N")]
10465 (match_operand:V4HI 3 "const0_operand" "")))]
10467 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
10468 [(set_attr "type" "ssecvt")
10469 (set_attr "prefix" "vex")
10470 (set_attr "mode" "V4SF")])
10472 (define_insn "*vcvtps2ph_store"
10473 [(set (match_operand:V4HI 0 "memory_operand" "=m")
10474 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
10475 (match_operand:SI 2 "immediate_operand" "N")]
10476 UNSPEC_VCVTPS2PH))]
10478 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
10479 [(set_attr "type" "ssecvt")
10480 (set_attr "prefix" "vex")
10481 (set_attr "mode" "V4SF")])
10483 (define_insn "vcvtps2ph256"
10484 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
10485 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
10486 (match_operand:SI 2 "immediate_operand" "N")]
10487 UNSPEC_VCVTPS2PH))]
10489 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
10490 [(set_attr "type" "ssecvt")
10491 (set_attr "prefix" "vex")
10492 (set_attr "mode" "V8SF")])