1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 ;; All vector modes including V1TImode, used in move patterns.
22 (define_mode_iterator V16
23 [(V32QI "TARGET_AVX") V16QI
24 (V16HI "TARGET_AVX") V8HI
25 (V8SI "TARGET_AVX") V4SI
26 (V4DI "TARGET_AVX") V2DI
28 (V8SF "TARGET_AVX") V4SF
29 (V4DF "TARGET_AVX") V2DF])
32 (define_mode_iterator V
33 [(V32QI "TARGET_AVX") V16QI
34 (V16HI "TARGET_AVX") V8HI
35 (V8SI "TARGET_AVX") V4SI
36 (V4DI "TARGET_AVX") V2DI
37 (V8SF "TARGET_AVX") V4SF
38 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
40 ;; All 128bit vector modes
41 (define_mode_iterator V_128
42 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
44 ;; All 256bit vector modes
45 (define_mode_iterator V_256
46 [V32QI V16HI V8SI V4DI V8SF V4DF])
48 ;; All vector float modes
49 (define_mode_iterator VF
50 [(V8SF "TARGET_AVX") V4SF
51 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
53 ;; All SFmode vector float modes
54 (define_mode_iterator VF1
55 [(V8SF "TARGET_AVX") V4SF])
57 ;; All DFmode vector float modes
58 (define_mode_iterator VF2
59 [(V4DF "TARGET_AVX") V2DF])
61 ;; All 128bit vector float modes
62 (define_mode_iterator VF_128
63 [V4SF (V2DF "TARGET_SSE2")])
65 ;; All 256bit vector float modes
66 (define_mode_iterator VF_256
69 ;; All vector integer modes
70 (define_mode_iterator VI
71 [(V32QI "TARGET_AVX") V16QI
72 (V16HI "TARGET_AVX") V8HI
73 (V8SI "TARGET_AVX") V4SI
74 (V4DI "TARGET_AVX") V2DI])
76 (define_mode_iterator VI_AVX2
77 [(V32QI "TARGET_AVX2") V16QI
78 (V16HI "TARGET_AVX2") V8HI
79 (V8SI "TARGET_AVX2") V4SI
80 (V4DI "TARGET_AVX2") V2DI])
82 ;; All QImode vector integer modes
83 (define_mode_iterator VI1
84 [(V32QI "TARGET_AVX") V16QI])
86 ;; All DImode vector integer modes
87 (define_mode_iterator VI8
88 [(V4DI "TARGET_AVX") V2DI])
90 (define_mode_iterator VI1_AVX2
91 [(V32QI "TARGET_AVX2") V16QI])
93 (define_mode_iterator VI2_AVX2
94 [(V16HI "TARGET_AVX2") V8HI])
96 (define_mode_iterator VI4_AVX2
97 [(V8SI "TARGET_AVX2") V4SI])
99 (define_mode_iterator VI8_AVX2
100 [(V4DI "TARGET_AVX2") V2DI])
102 (define_mode_iterator VIMAX_AVX2
103 [(V4DI "TARGET_AVX2") V1TI])
105 (define_mode_iterator SSESCALARMODE
106 [(V4DI "TARGET_AVX2") TI])
108 (define_mode_iterator VI12_AVX2
109 [(V32QI "TARGET_AVX2") V16QI
110 (V16HI "TARGET_AVX2") V8HI])
112 (define_mode_iterator VI24_AVX2
113 [(V16HI "TARGET_AVX2") V8HI
114 (V8SI "TARGET_AVX2") V4SI])
116 (define_mode_iterator VI124_AVX2
117 [(V32QI "TARGET_AVX2") V16QI
118 (V16HI "TARGET_AVX2") V8HI
119 (V8SI "TARGET_AVX2") V4SI])
121 (define_mode_iterator VI248_AVX2
122 [(V16HI "TARGET_AVX2") V8HI
123 (V8SI "TARGET_AVX2") V4SI
124 (V4DI "TARGET_AVX2") V2DI])
126 (define_mode_iterator VI48_AVX2
127 [V8SI V4SI V4DI V2DI])
129 (define_mode_iterator VI4SD_AVX2
132 (define_mode_iterator V48_AVX2
135 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
136 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
138 (define_mode_attr sse2_avx2
139 [(V16QI "sse2") (V32QI "avx2")
140 (V8HI "sse2") (V16HI "avx2")
141 (V4SI "sse2") (V8SI "avx2")
142 (V2DI "sse2") (V4DI "avx2")
145 (define_mode_attr ssse3_avx2
146 [(V16QI "ssse3") (V32QI "avx2")
147 (V8HI "ssse3") (V16HI "avx2")
148 (V4SI "ssse3") (V8SI "avx2")
149 (V2DI "ssse3") (V4DI "avx2")
152 (define_mode_attr sse4_1_avx2
153 [(V16QI "sse4_1") (V32QI "avx2")
154 (V8HI "sse4_1") (V16HI "avx2")
155 (V4SI "sse4_1") (V8SI "avx2")
156 (V2DI "sse4_1") (V4DI "avx2")])
158 (define_mode_attr avx_avx2
159 [(V4SF "avx") (V2DF "avx")
160 (V8SF "avx") (V4DF "avx")
161 (V4SI "avx2") (V2DI "avx2")
162 (V8SI "avx2") (V4DI "avx2")])
164 ;; Mapping of logic-shift operators
165 (define_code_iterator lshift [lshiftrt ashift])
167 ;; Base name for define_insn
168 (define_code_attr lshift_insn [(lshiftrt "srl") (ashift "sll")])
170 ;; Base name for insn mnemonic
171 (define_code_attr lshift [(lshiftrt "lshr") (ashift "lshl")])
173 (define_mode_attr ssedoublemode
174 [(V16HI "V16SI") (V8HI "V8SI")])
176 (define_mode_attr ssebytemode
177 [(V4DI "V32QI") (V2DI "V16QI")])
179 ;; All 128bit vector integer modes
180 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
182 ;; All 256bit vector integer modes
183 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
185 ;; Random 128bit vector integer mode combinations
186 (define_mode_iterator VI12_128 [V16QI V8HI])
187 (define_mode_iterator VI14_128 [V16QI V4SI])
188 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
189 (define_mode_iterator VI24_128 [V8HI V4SI])
190 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
192 ;; Random 256bit vector integer mode combinations
193 (define_mode_iterator VI124_256 [V32QI V16HI V8SI])
194 (define_mode_iterator VI1248_256 [V32QI V16HI V8SI V4DI])
195 (define_mode_iterator VI248_256 [V16HI V8SI V4DI])
197 ;; Int-float size matches
198 (define_mode_iterator VI4F_128 [V4SI V4SF])
199 (define_mode_iterator VI8F_128 [V2DI V2DF])
200 (define_mode_iterator VI4F_256 [V8SI V8SF])
201 (define_mode_iterator VI8F_256 [V4DI V4DF])
203 ;; Mapping from float mode to required SSE level
204 (define_mode_attr sse
205 [(SF "sse") (DF "sse2")
206 (V4SF "sse") (V2DF "sse2")
207 (V8SF "avx") (V4DF "avx")])
209 (define_mode_attr sse2
210 [(V16QI "sse2") (V32QI "avx")
211 (V2DI "sse2") (V4DI "avx")])
213 (define_mode_attr sse3
214 [(V16QI "sse3") (V32QI "avx")])
216 (define_mode_attr sse4_1
217 [(V4SF "sse4_1") (V2DF "sse4_1")
218 (V8SF "avx") (V4DF "avx")])
220 (define_mode_attr avxsizesuffix
221 [(V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
222 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
223 (V8SF "256") (V4DF "256")
224 (V4SF "") (V2DF "")])
226 ;; SSE instruction mode
227 (define_mode_attr sseinsnmode
228 [(V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")
229 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
230 (V8SF "V8SF") (V4DF "V4DF")
231 (V4SF "V4SF") (V2DF "V2DF")
232 (TI "TI") (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")])
234 ;; Mapping of vector float modes to an integer mode of the same size
235 (define_mode_attr sseintvecmode
236 [(V8SF "V8SI") (V4DF "V4DI")
237 (V4SF "V4SI") (V2DF "V2DI")
238 (V4DF "V4DI") (V8SF "V8SI")
239 (V8SI "V8SI") (V4DI "V4DI")
240 (V4SI "V4SI") (V2DI "V2DI")])
242 ;; Mapping of vector modes to a vector mode of double size
243 (define_mode_attr ssedoublevecmode
244 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
245 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
246 (V8SF "V16SF") (V4DF "V8DF")
247 (V4SF "V8SF") (V2DF "V4DF")])
249 ;; Mapping of vector modes to a vector mode of half size
250 (define_mode_attr ssehalfvecmode
251 [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
252 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
253 (V8SF "V4SF") (V4DF "V2DF")
256 ;; Mapping of vector modes back to the scalar modes
257 (define_mode_attr ssescalarmode
258 [(V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI")
259 (V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI")
260 (V8SF "SF") (V4DF "DF")
261 (V4SF "SF") (V2DF "DF")])
263 ;; Number of scalar elements in each vector type
264 (define_mode_attr ssescalarnum
265 [(V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
266 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
267 (V8SF "8") (V4DF "4")
268 (V4SF "4") (V2DF "2")])
270 ;; SSE scalar suffix for vector modes
271 (define_mode_attr ssescalarmodesuffix
273 (V8SF "ss") (V4DF "sd")
274 (V4SF "ss") (V2DF "sd")
275 (V8SI "ss") (V4DI "sd")
278 ;; Pack/unpack vector modes
279 (define_mode_attr sseunpackmode
280 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
281 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")])
283 (define_mode_attr ssepackmode
284 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
285 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")])
287 ;; Mapping of the max integer size for xop rotate immediate constraint
288 (define_mode_attr sserotatemax
289 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
291 ;; Mapping of mode to cast intrinsic name
292 (define_mode_attr castmode [(V8SI "si") (V8SF "ps") (V4DF "pd")])
294 ;; Instruction suffix for sign and zero extensions.
295 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
298 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
300 (define_mode_iterator AVXMODE48P_DI
301 [V2DI V2DF V4DI V4DF V4SF V4SI])
302 (define_mode_attr AVXMODE48P_DI
303 [(V2DI "V2DI") (V2DF "V2DI")
304 (V4DI "V4DI") (V4DF "V4DI")
305 (V4SI "V2DI") (V4SF "V2DI")
306 (V8SI "V4DI") (V8SF "V4DI")])
307 (define_mode_attr gthrfirstp
308 [(V2DI "p") (V2DF "")
311 (V8SI "p") (V8SF "")])
312 (define_mode_attr gthrlastp
313 [(V2DI "q") (V2DF "pd")
314 (V4DI "q") (V4DF "pd")
315 (V4SI "d") (V4SF "ps")
316 (V8SI "d") (V8SF "ps")])
318 (define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
320 ;; Mapping of immediate bits for blend instructions
321 (define_mode_attr blendbits
322 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
324 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
326 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
330 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
332 ;; All of these patterns are enabled for SSE1 as well as SSE2.
333 ;; This is essential for maintaining stable calling conventions.
335 (define_expand "mov<mode>"
336 [(set (match_operand:V16 0 "nonimmediate_operand" "")
337 (match_operand:V16 1 "nonimmediate_operand" ""))]
340 ix86_expand_vector_move (<MODE>mode, operands);
344 (define_insn "*mov<mode>_internal"
345 [(set (match_operand:V16 0 "nonimmediate_operand" "=x,x ,m")
346 (match_operand:V16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
348 && (register_operand (operands[0], <MODE>mode)
349 || register_operand (operands[1], <MODE>mode))"
351 switch (which_alternative)
354 return standard_sse_constant_opcode (insn, operands[1]);
357 switch (get_attr_mode (insn))
362 && (misaligned_operand (operands[0], <MODE>mode)
363 || misaligned_operand (operands[1], <MODE>mode)))
364 return "vmovups\t{%1, %0|%0, %1}";
366 return "%vmovaps\t{%1, %0|%0, %1}";
371 && (misaligned_operand (operands[0], <MODE>mode)
372 || misaligned_operand (operands[1], <MODE>mode)))
373 return "vmovupd\t{%1, %0|%0, %1}";
374 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
375 return "%vmovaps\t{%1, %0|%0, %1}";
377 return "%vmovapd\t{%1, %0|%0, %1}";
382 && (misaligned_operand (operands[0], <MODE>mode)
383 || misaligned_operand (operands[1], <MODE>mode)))
384 return "vmovdqu\t{%1, %0|%0, %1}";
385 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
386 return "%vmovaps\t{%1, %0|%0, %1}";
388 return "%vmovdqa\t{%1, %0|%0, %1}";
397 [(set_attr "type" "sselog1,ssemov,ssemov")
398 (set_attr "prefix" "maybe_vex")
400 (cond [(ne (symbol_ref "TARGET_AVX") (const_int 0))
401 (const_string "<sseinsnmode>")
403 (ne (symbol_ref "optimize_function_for_size_p (cfun)")
405 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
406 (and (eq_attr "alternative" "2")
407 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
409 (const_string "V4SF")
410 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
411 (const_string "V4SF")
412 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
413 (const_string "V2DF")
415 (const_string "TI")))])
417 (define_insn "sse2_movq128"
418 [(set (match_operand:V2DI 0 "register_operand" "=x")
421 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
422 (parallel [(const_int 0)]))
425 "%vmovq\t{%1, %0|%0, %1}"
426 [(set_attr "type" "ssemov")
427 (set_attr "prefix" "maybe_vex")
428 (set_attr "mode" "TI")])
430 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
431 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
432 ;; from memory, we'd prefer to load the memory directly into the %xmm
433 ;; register. To facilitate this happy circumstance, this pattern won't
434 ;; split until after register allocation. If the 64-bit value didn't
435 ;; come from memory, this is the best we can do. This is much better
436 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
439 (define_insn_and_split "movdi_to_sse"
441 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
442 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
443 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
444 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
446 "&& reload_completed"
449 if (register_operand (operands[1], DImode))
451 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
452 Assemble the 64-bit DImode value in an xmm register. */
453 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
454 gen_rtx_SUBREG (SImode, operands[1], 0)));
455 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
456 gen_rtx_SUBREG (SImode, operands[1], 4)));
457 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
460 else if (memory_operand (operands[1], DImode))
461 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
462 operands[1], const0_rtx));
468 [(set (match_operand:V4SF 0 "register_operand" "")
469 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
470 "TARGET_SSE && reload_completed"
473 (vec_duplicate:V4SF (match_dup 1))
477 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
478 operands[2] = CONST0_RTX (V4SFmode);
482 [(set (match_operand:V2DF 0 "register_operand" "")
483 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
484 "TARGET_SSE2 && reload_completed"
485 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
487 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
488 operands[2] = CONST0_RTX (DFmode);
491 (define_expand "push<mode>1"
492 [(match_operand:V16 0 "register_operand" "")]
495 ix86_expand_push (<MODE>mode, operands[0]);
499 (define_expand "movmisalign<mode>"
500 [(set (match_operand:V16 0 "nonimmediate_operand" "")
501 (match_operand:V16 1 "nonimmediate_operand" ""))]
504 ix86_expand_vector_move_misalign (<MODE>mode, operands);
508 (define_expand "<sse>_movu<ssemodesuffix><avxsizesuffix>"
509 [(set (match_operand:VF 0 "nonimmediate_operand" "")
511 [(match_operand:VF 1 "nonimmediate_operand" "")]
515 if (MEM_P (operands[0]) && MEM_P (operands[1]))
516 operands[1] = force_reg (<MODE>mode, operands[1]);
519 (define_insn "*<sse>_movu<ssemodesuffix><avxsizesuffix>"
520 [(set (match_operand:VF 0 "nonimmediate_operand" "=x,m")
522 [(match_operand:VF 1 "nonimmediate_operand" "xm,x")]
524 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
525 "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
526 [(set_attr "type" "ssemov")
527 (set_attr "movu" "1")
528 (set_attr "prefix" "maybe_vex")
529 (set_attr "mode" "<MODE>")])
531 (define_expand "<sse2>_movdqu<avxsizesuffix>"
532 [(set (match_operand:VI1 0 "nonimmediate_operand" "")
533 (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "")]
537 if (MEM_P (operands[0]) && MEM_P (operands[1]))
538 operands[1] = force_reg (<MODE>mode, operands[1]);
541 (define_insn "*<sse2>_movdqu<avxsizesuffix>"
542 [(set (match_operand:VI1 0 "nonimmediate_operand" "=x,m")
543 (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "xm,x")]
545 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
546 "%vmovdqu\t{%1, %0|%0, %1}"
547 [(set_attr "type" "ssemov")
548 (set_attr "movu" "1")
549 (set (attr "prefix_data16")
551 (ne (symbol_ref "TARGET_AVX") (const_int 0))
554 (set_attr "prefix" "maybe_vex")
555 (set_attr "mode" "<sseinsnmode>")])
557 (define_insn "<sse3>_lddqu<avxsizesuffix>"
558 [(set (match_operand:VI1 0 "register_operand" "=x")
559 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
562 "%vlddqu\t{%1, %0|%0, %1}"
563 [(set_attr "type" "ssemov")
564 (set_attr "movu" "1")
565 (set (attr "prefix_data16")
567 (ne (symbol_ref "TARGET_AVX") (const_int 0))
570 (set (attr "prefix_rep")
572 (ne (symbol_ref "TARGET_AVX") (const_int 0))
575 (set_attr "prefix" "maybe_vex")
576 (set_attr "mode" "<sseinsnmode>")])
578 (define_insn "sse2_movntsi"
579 [(set (match_operand:SI 0 "memory_operand" "=m")
580 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
583 "movnti\t{%1, %0|%0, %1}"
584 [(set_attr "type" "ssemov")
585 (set_attr "prefix_data16" "0")
586 (set_attr "mode" "V2DF")])
588 (define_insn "<sse>_movnt<mode>"
589 [(set (match_operand:VF 0 "memory_operand" "=m")
590 (unspec:VF [(match_operand:VF 1 "register_operand" "x")]
593 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
594 [(set_attr "type" "ssemov")
595 (set_attr "prefix" "maybe_vex")
596 (set_attr "mode" "<MODE>")])
598 (define_insn "<sse2>_movnt<mode>"
599 [(set (match_operand:VI8 0 "memory_operand" "=m")
600 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "x")]
603 "%vmovntdq\t{%1, %0|%0, %1}"
604 [(set_attr "type" "ssecvt")
605 (set (attr "prefix_data16")
607 (ne (symbol_ref "TARGET_AVX") (const_int 0))
610 (set_attr "prefix" "maybe_vex")
611 (set_attr "mode" "<sseinsnmode>")])
613 ; Expand patterns for non-temporal stores. At the moment, only those
614 ; that directly map to insns are defined; it would be possible to
615 ; define patterns for other modes that would expand to several insns.
617 ;; Modes handled by storent patterns.
618 (define_mode_iterator STORENT_MODE
619 [(SI "TARGET_SSE2") (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
621 (V8SF "TARGET_AVX") V4SF
622 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
624 (define_expand "storent<mode>"
625 [(set (match_operand:STORENT_MODE 0 "memory_operand" "")
627 [(match_operand:STORENT_MODE 1 "register_operand" "")]
631 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
633 ;; Parallel floating point arithmetic
635 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
637 (define_expand "<code><mode>2"
638 [(set (match_operand:VF 0 "register_operand" "")
640 (match_operand:VF 1 "register_operand" "")))]
642 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
644 (define_insn_and_split "*absneg<mode>2"
645 [(set (match_operand:VF 0 "register_operand" "=x,x,x,x")
646 (match_operator:VF 3 "absneg_operator"
647 [(match_operand:VF 1 "nonimmediate_operand" "0, xm,x, m")]))
648 (use (match_operand:VF 2 "nonimmediate_operand" "xm,0, xm,x"))]
654 enum rtx_code absneg_op;
660 if (MEM_P (operands[1]))
661 op1 = operands[2], op2 = operands[1];
663 op1 = operands[1], op2 = operands[2];
668 if (rtx_equal_p (operands[0], operands[1]))
674 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
675 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
676 t = gen_rtx_SET (VOIDmode, operands[0], t);
680 [(set_attr "isa" "noavx,noavx,avx,avx")])
682 (define_expand "<plusminus_insn><mode>3"
683 [(set (match_operand:VF 0 "register_operand" "")
685 (match_operand:VF 1 "nonimmediate_operand" "")
686 (match_operand:VF 2 "nonimmediate_operand" "")))]
688 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
690 (define_insn "*<plusminus_insn><mode>3"
691 [(set (match_operand:VF 0 "register_operand" "=x,x")
693 (match_operand:VF 1 "nonimmediate_operand" "<comm>0,x")
694 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
695 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
697 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
698 v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
699 [(set_attr "isa" "noavx,avx")
700 (set_attr "type" "sseadd")
701 (set_attr "prefix" "orig,vex")
702 (set_attr "mode" "<MODE>")])
704 (define_insn "<sse>_vm<plusminus_insn><mode>3"
705 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
708 (match_operand:VF_128 1 "register_operand" "0,x")
709 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
714 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}
715 v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
716 [(set_attr "isa" "noavx,avx")
717 (set_attr "type" "sseadd")
718 (set_attr "prefix" "orig,vex")
719 (set_attr "mode" "<ssescalarmode>")])
721 (define_expand "mul<mode>3"
722 [(set (match_operand:VF 0 "register_operand" "")
724 (match_operand:VF 1 "nonimmediate_operand" "")
725 (match_operand:VF 2 "nonimmediate_operand" "")))]
727 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
729 (define_insn "*mul<mode>3"
730 [(set (match_operand:VF 0 "register_operand" "=x,x")
732 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
733 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
734 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
736 mul<ssemodesuffix>\t{%2, %0|%0, %2}
737 vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
738 [(set_attr "isa" "noavx,avx")
739 (set_attr "type" "ssemul")
740 (set_attr "prefix" "orig,vex")
741 (set_attr "mode" "<MODE>")])
743 (define_insn "<sse>_vmmul<mode>3"
744 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
747 (match_operand:VF_128 1 "register_operand" "0,x")
748 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
753 mul<ssescalarmodesuffix>\t{%2, %0|%0, %2}
754 vmul<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
755 [(set_attr "isa" "noavx,avx")
756 (set_attr "type" "ssemul")
757 (set_attr "prefix" "orig,vex")
758 (set_attr "mode" "<ssescalarmode>")])
760 (define_expand "div<mode>3"
761 [(set (match_operand:VF2 0 "register_operand" "")
762 (div:VF2 (match_operand:VF2 1 "register_operand" "")
763 (match_operand:VF2 2 "nonimmediate_operand" "")))]
765 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
767 (define_expand "div<mode>3"
768 [(set (match_operand:VF1 0 "register_operand" "")
769 (div:VF1 (match_operand:VF1 1 "register_operand" "")
770 (match_operand:VF1 2 "nonimmediate_operand" "")))]
773 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
775 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
776 && flag_finite_math_only && !flag_trapping_math
777 && flag_unsafe_math_optimizations)
779 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
784 (define_insn "<sse>_div<mode>3"
785 [(set (match_operand:VF 0 "register_operand" "=x,x")
787 (match_operand:VF 1 "register_operand" "0,x")
788 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
791 div<ssemodesuffix>\t{%2, %0|%0, %2}
792 vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
793 [(set_attr "isa" "noavx,avx")
794 (set_attr "type" "ssediv")
795 (set_attr "prefix" "orig,vex")
796 (set_attr "mode" "<MODE>")])
798 (define_insn "<sse>_vmdiv<mode>3"
799 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
802 (match_operand:VF_128 1 "register_operand" "0,x")
803 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
808 div<ssescalarmodesuffix>\t{%2, %0|%0, %2}
809 vdiv<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
810 [(set_attr "isa" "noavx,avx")
811 (set_attr "type" "ssediv")
812 (set_attr "prefix" "orig,vex")
813 (set_attr "mode" "<ssescalarmode>")])
815 (define_insn "<sse>_rcp<mode>2"
816 [(set (match_operand:VF1 0 "register_operand" "=x")
818 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
820 "%vrcpps\t{%1, %0|%0, %1}"
821 [(set_attr "type" "sse")
822 (set_attr "atom_sse_attr" "rcp")
823 (set_attr "prefix" "maybe_vex")
824 (set_attr "mode" "<MODE>")])
826 (define_insn "sse_vmrcpv4sf2"
827 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
829 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
831 (match_operand:V4SF 2 "register_operand" "0,x")
835 rcpss\t{%1, %0|%0, %1}
836 vrcpss\t{%1, %2, %0|%0, %2, %1}"
837 [(set_attr "isa" "noavx,avx")
838 (set_attr "type" "sse")
839 (set_attr "atom_sse_attr" "rcp")
840 (set_attr "prefix" "orig,vex")
841 (set_attr "mode" "SF")])
843 (define_expand "sqrt<mode>2"
844 [(set (match_operand:VF2 0 "register_operand" "")
845 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand" "")))]
848 (define_expand "sqrt<mode>2"
849 [(set (match_operand:VF1 0 "register_operand" "")
850 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand" "")))]
853 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
854 && flag_finite_math_only && !flag_trapping_math
855 && flag_unsafe_math_optimizations)
857 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
862 (define_insn "<sse>_sqrt<mode>2"
863 [(set (match_operand:VF 0 "register_operand" "=x")
864 (sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "xm")))]
866 "%vsqrt<ssemodesuffix>\t{%1, %0|%0, %1}"
867 [(set_attr "type" "sse")
868 (set_attr "atom_sse_attr" "sqrt")
869 (set_attr "prefix" "maybe_vex")
870 (set_attr "mode" "<MODE>")])
872 (define_insn "<sse>_vmsqrt<mode>2"
873 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
876 (match_operand:VF_128 1 "nonimmediate_operand" "xm,xm"))
877 (match_operand:VF_128 2 "register_operand" "0,x")
881 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}
882 vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
883 [(set_attr "isa" "noavx,avx")
884 (set_attr "type" "sse")
885 (set_attr "atom_sse_attr" "sqrt")
886 (set_attr "prefix" "orig,vex")
887 (set_attr "mode" "<ssescalarmode>")])
889 (define_expand "rsqrt<mode>2"
890 [(set (match_operand:VF1 0 "register_operand" "")
892 [(match_operand:VF1 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
895 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
899 (define_insn "<sse>_rsqrt<mode>2"
900 [(set (match_operand:VF1 0 "register_operand" "=x")
902 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
904 "%vrsqrtps\t{%1, %0|%0, %1}"
905 [(set_attr "type" "sse")
906 (set_attr "prefix" "maybe_vex")
907 (set_attr "mode" "<MODE>")])
909 (define_insn "sse_vmrsqrtv4sf2"
910 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
912 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
914 (match_operand:V4SF 2 "register_operand" "0,x")
918 rsqrtss\t{%1, %0|%0, %1}
919 vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
920 [(set_attr "isa" "noavx,avx")
921 (set_attr "type" "sse")
922 (set_attr "prefix" "orig,vex")
923 (set_attr "mode" "SF")])
925 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
926 ;; isn't really correct, as those rtl operators aren't defined when
927 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
929 (define_expand "<code><mode>3"
930 [(set (match_operand:VF 0 "register_operand" "")
932 (match_operand:VF 1 "nonimmediate_operand" "")
933 (match_operand:VF 2 "nonimmediate_operand" "")))]
936 if (!flag_finite_math_only)
937 operands[1] = force_reg (<MODE>mode, operands[1]);
938 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
941 (define_insn "*<code><mode>3_finite"
942 [(set (match_operand:VF 0 "register_operand" "=x,x")
944 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
945 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
946 "TARGET_SSE && flag_finite_math_only
947 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
949 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
950 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
951 [(set_attr "isa" "noavx,avx")
952 (set_attr "type" "sseadd")
953 (set_attr "prefix" "orig,vex")
954 (set_attr "mode" "<MODE>")])
956 (define_insn "*<code><mode>3"
957 [(set (match_operand:VF 0 "register_operand" "=x,x")
959 (match_operand:VF 1 "register_operand" "0,x")
960 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
961 "TARGET_SSE && !flag_finite_math_only"
963 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
964 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
965 [(set_attr "isa" "noavx,avx")
966 (set_attr "type" "sseadd")
967 (set_attr "prefix" "orig,vex")
968 (set_attr "mode" "<MODE>")])
970 (define_insn "<sse>_vm<code><mode>3"
971 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
974 (match_operand:VF_128 1 "register_operand" "0,x")
975 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
980 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %2}
981 v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
982 [(set_attr "isa" "noavx,avx")
983 (set_attr "type" "sse")
984 (set_attr "prefix" "orig,vex")
985 (set_attr "mode" "<ssescalarmode>")])
987 ;; These versions of the min/max patterns implement exactly the operations
988 ;; min = (op1 < op2 ? op1 : op2)
989 ;; max = (!(op1 < op2) ? op1 : op2)
990 ;; Their operands are not commutative, and thus they may be used in the
991 ;; presence of -0.0 and NaN.
993 (define_insn "*ieee_smin<mode>3"
994 [(set (match_operand:VF 0 "register_operand" "=x,x")
996 [(match_operand:VF 1 "register_operand" "0,x")
997 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
1001 min<ssemodesuffix>\t{%2, %0|%0, %2}
1002 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1003 [(set_attr "isa" "noavx,avx")
1004 (set_attr "type" "sseadd")
1005 (set_attr "prefix" "orig,vex")
1006 (set_attr "mode" "<MODE>")])
1008 (define_insn "*ieee_smax<mode>3"
1009 [(set (match_operand:VF 0 "register_operand" "=x,x")
1011 [(match_operand:VF 1 "register_operand" "0,x")
1012 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
1016 max<ssemodesuffix>\t{%2, %0|%0, %2}
1017 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1018 [(set_attr "isa" "noavx,avx")
1019 (set_attr "type" "sseadd")
1020 (set_attr "prefix" "orig,vex")
1021 (set_attr "mode" "<MODE>")])
1023 (define_insn "avx_addsubv4df3"
1024 [(set (match_operand:V4DF 0 "register_operand" "=x")
1027 (match_operand:V4DF 1 "register_operand" "x")
1028 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1029 (minus:V4DF (match_dup 1) (match_dup 2))
1032 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1033 [(set_attr "type" "sseadd")
1034 (set_attr "prefix" "vex")
1035 (set_attr "mode" "V4DF")])
1037 (define_insn "sse3_addsubv2df3"
1038 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1041 (match_operand:V2DF 1 "register_operand" "0,x")
1042 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
1043 (minus:V2DF (match_dup 1) (match_dup 2))
1047 addsubpd\t{%2, %0|%0, %2}
1048 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1049 [(set_attr "isa" "noavx,avx")
1050 (set_attr "type" "sseadd")
1051 (set_attr "atom_unit" "complex")
1052 (set_attr "prefix" "orig,vex")
1053 (set_attr "mode" "V2DF")])
1055 (define_insn "avx_addsubv8sf3"
1056 [(set (match_operand:V8SF 0 "register_operand" "=x")
1059 (match_operand:V8SF 1 "register_operand" "x")
1060 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1061 (minus:V8SF (match_dup 1) (match_dup 2))
1064 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1065 [(set_attr "type" "sseadd")
1066 (set_attr "prefix" "vex")
1067 (set_attr "mode" "V8SF")])
1069 (define_insn "sse3_addsubv4sf3"
1070 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1073 (match_operand:V4SF 1 "register_operand" "0,x")
1074 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
1075 (minus:V4SF (match_dup 1) (match_dup 2))
1079 addsubps\t{%2, %0|%0, %2}
1080 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1081 [(set_attr "isa" "noavx,avx")
1082 (set_attr "type" "sseadd")
1083 (set_attr "prefix" "orig,vex")
1084 (set_attr "prefix_rep" "1,*")
1085 (set_attr "mode" "V4SF")])
1087 (define_insn "avx_h<plusminus_insn>v4df3"
1088 [(set (match_operand:V4DF 0 "register_operand" "=x")
1093 (match_operand:V4DF 1 "register_operand" "x")
1094 (parallel [(const_int 0)]))
1095 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1097 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1098 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
1102 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1103 (parallel [(const_int 0)]))
1104 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
1106 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1107 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1109 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1110 [(set_attr "type" "sseadd")
1111 (set_attr "prefix" "vex")
1112 (set_attr "mode" "V4DF")])
1114 (define_insn "sse3_h<plusminus_insn>v2df3"
1115 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1119 (match_operand:V2DF 1 "register_operand" "0,x")
1120 (parallel [(const_int 0)]))
1121 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1124 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
1125 (parallel [(const_int 0)]))
1126 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1129 h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}
1130 vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1131 [(set_attr "isa" "noavx,avx")
1132 (set_attr "type" "sseadd")
1133 (set_attr "prefix" "orig,vex")
1134 (set_attr "mode" "V2DF")])
1136 (define_insn "avx_h<plusminus_insn>v8sf3"
1137 [(set (match_operand:V8SF 0 "register_operand" "=x")
1143 (match_operand:V8SF 1 "register_operand" "x")
1144 (parallel [(const_int 0)]))
1145 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1147 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1148 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1152 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1153 (parallel [(const_int 0)]))
1154 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1156 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1157 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1161 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1162 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1164 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1165 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1168 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1169 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1171 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1172 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1174 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1175 [(set_attr "type" "sseadd")
1176 (set_attr "prefix" "vex")
1177 (set_attr "mode" "V8SF")])
1179 (define_insn "sse3_h<plusminus_insn>v4sf3"
1180 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1185 (match_operand:V4SF 1 "register_operand" "0,x")
1186 (parallel [(const_int 0)]))
1187 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1189 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1190 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1194 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
1195 (parallel [(const_int 0)]))
1196 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1198 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1199 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1202 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
1203 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1204 [(set_attr "isa" "noavx,avx")
1205 (set_attr "type" "sseadd")
1206 (set_attr "atom_unit" "complex")
1207 (set_attr "prefix" "orig,vex")
1208 (set_attr "prefix_rep" "1,*")
1209 (set_attr "mode" "V4SF")])
1211 (define_expand "reduc_splus_v4df"
1212 [(match_operand:V4DF 0 "register_operand" "")
1213 (match_operand:V4DF 1 "register_operand" "")]
1216 rtx tmp = gen_reg_rtx (V4DFmode);
1217 rtx tmp2 = gen_reg_rtx (V4DFmode);
1218 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
1219 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
1220 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
1224 (define_expand "reduc_splus_v2df"
1225 [(match_operand:V2DF 0 "register_operand" "")
1226 (match_operand:V2DF 1 "register_operand" "")]
1229 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1233 (define_expand "reduc_splus_v8sf"
1234 [(match_operand:V8SF 0 "register_operand" "")
1235 (match_operand:V8SF 1 "register_operand" "")]
1238 rtx tmp = gen_reg_rtx (V8SFmode);
1239 rtx tmp2 = gen_reg_rtx (V8SFmode);
1240 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
1241 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
1242 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
1243 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
1247 (define_expand "reduc_splus_v4sf"
1248 [(match_operand:V4SF 0 "register_operand" "")
1249 (match_operand:V4SF 1 "register_operand" "")]
1254 rtx tmp = gen_reg_rtx (V4SFmode);
1255 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1256 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1259 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
1264 (define_expand "reduc_smax_v4sf"
1265 [(match_operand:V4SF 0 "register_operand" "")
1266 (match_operand:V4SF 1 "register_operand" "")]
1269 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
1273 (define_expand "reduc_smin_v4sf"
1274 [(match_operand:V4SF 0 "register_operand" "")
1275 (match_operand:V4SF 1 "register_operand" "")]
1278 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
1282 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1284 ;; Parallel floating point comparisons
1286 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1288 (define_insn "avx_cmp<mode>3"
1289 [(set (match_operand:VF 0 "register_operand" "=x")
1291 [(match_operand:VF 1 "register_operand" "x")
1292 (match_operand:VF 2 "nonimmediate_operand" "xm")
1293 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1296 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1297 [(set_attr "type" "ssecmp")
1298 (set_attr "length_immediate" "1")
1299 (set_attr "prefix" "vex")
1300 (set_attr "mode" "<MODE>")])
1302 (define_insn "avx_vmcmp<mode>3"
1303 [(set (match_operand:VF_128 0 "register_operand" "=x")
1306 [(match_operand:VF_128 1 "register_operand" "x")
1307 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
1308 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1313 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1314 [(set_attr "type" "ssecmp")
1315 (set_attr "length_immediate" "1")
1316 (set_attr "prefix" "vex")
1317 (set_attr "mode" "<ssescalarmode>")])
1319 (define_insn "*<sse>_maskcmp<mode>3_comm"
1320 [(set (match_operand:VF 0 "register_operand" "=x,x")
1321 (match_operator:VF 3 "sse_comparison_operator"
1322 [(match_operand:VF 1 "register_operand" "%0,x")
1323 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1325 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
1327 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1328 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1329 [(set_attr "isa" "noavx,avx")
1330 (set_attr "type" "ssecmp")
1331 (set_attr "length_immediate" "1")
1332 (set_attr "prefix" "orig,vex")
1333 (set_attr "mode" "<MODE>")])
1335 (define_insn "<sse>_maskcmp<mode>3"
1336 [(set (match_operand:VF 0 "register_operand" "=x,x")
1337 (match_operator:VF 3 "sse_comparison_operator"
1338 [(match_operand:VF 1 "register_operand" "0,x")
1339 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1342 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1343 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1344 [(set_attr "isa" "noavx,avx")
1345 (set_attr "type" "ssecmp")
1346 (set_attr "length_immediate" "1")
1347 (set_attr "prefix" "orig,vex")
1348 (set_attr "mode" "<MODE>")])
1350 (define_insn "<sse>_vmmaskcmp<mode>3"
1351 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1353 (match_operator:VF_128 3 "sse_comparison_operator"
1354 [(match_operand:VF_128 1 "register_operand" "0,x")
1355 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
1360 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %2}
1361 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1362 [(set_attr "isa" "noavx,avx")
1363 (set_attr "type" "ssecmp")
1364 (set_attr "length_immediate" "1,*")
1365 (set_attr "prefix" "orig,vex")
1366 (set_attr "mode" "<ssescalarmode>")])
1368 (define_insn "<sse>_comi"
1369 [(set (reg:CCFP FLAGS_REG)
1372 (match_operand:<ssevecmode> 0 "register_operand" "x")
1373 (parallel [(const_int 0)]))
1375 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1376 (parallel [(const_int 0)]))))]
1377 "SSE_FLOAT_MODE_P (<MODE>mode)"
1378 "%vcomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1379 [(set_attr "type" "ssecomi")
1380 (set_attr "prefix" "maybe_vex")
1381 (set_attr "prefix_rep" "0")
1382 (set (attr "prefix_data16")
1383 (if_then_else (eq_attr "mode" "DF")
1385 (const_string "0")))
1386 (set_attr "mode" "<MODE>")])
1388 (define_insn "<sse>_ucomi"
1389 [(set (reg:CCFPU FLAGS_REG)
1392 (match_operand:<ssevecmode> 0 "register_operand" "x")
1393 (parallel [(const_int 0)]))
1395 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1396 (parallel [(const_int 0)]))))]
1397 "SSE_FLOAT_MODE_P (<MODE>mode)"
1398 "%vucomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1399 [(set_attr "type" "ssecomi")
1400 (set_attr "prefix" "maybe_vex")
1401 (set_attr "prefix_rep" "0")
1402 (set (attr "prefix_data16")
1403 (if_then_else (eq_attr "mode" "DF")
1405 (const_string "0")))
1406 (set_attr "mode" "<MODE>")])
1408 (define_expand "vcond<mode>"
1409 [(set (match_operand:VF 0 "register_operand" "")
1411 (match_operator 3 ""
1412 [(match_operand:VF 4 "nonimmediate_operand" "")
1413 (match_operand:VF 5 "nonimmediate_operand" "")])
1414 (match_operand:VF 1 "general_operand" "")
1415 (match_operand:VF 2 "general_operand" "")))]
1418 bool ok = ix86_expand_fp_vcond (operands);
1423 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1425 ;; Parallel floating point logical operations
1427 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1429 (define_insn "<sse>_andnot<mode>3"
1430 [(set (match_operand:VF 0 "register_operand" "=x,x")
1433 (match_operand:VF 1 "register_operand" "0,x"))
1434 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1437 static char buf[32];
1440 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1442 switch (which_alternative)
1445 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1448 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1454 snprintf (buf, sizeof (buf), insn, suffix);
1457 [(set_attr "isa" "noavx,avx")
1458 (set_attr "type" "sselog")
1459 (set_attr "prefix" "orig,vex")
1460 (set_attr "mode" "<MODE>")])
1462 (define_expand "<code><mode>3"
1463 [(set (match_operand:VF 0 "register_operand" "")
1465 (match_operand:VF 1 "nonimmediate_operand" "")
1466 (match_operand:VF 2 "nonimmediate_operand" "")))]
1468 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1470 (define_insn "*<code><mode>3"
1471 [(set (match_operand:VF 0 "register_operand" "=x,x")
1473 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
1474 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1475 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1477 static char buf[32];
1480 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1482 switch (which_alternative)
1485 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1488 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1494 snprintf (buf, sizeof (buf), insn, suffix);
1497 [(set_attr "isa" "noavx,avx")
1498 (set_attr "type" "sselog")
1499 (set_attr "prefix" "orig,vex")
1500 (set_attr "mode" "<MODE>")])
1502 (define_expand "copysign<mode>3"
1505 (not:VF (match_dup 3))
1506 (match_operand:VF 1 "nonimmediate_operand" "")))
1508 (and:VF (match_dup 3)
1509 (match_operand:VF 2 "nonimmediate_operand" "")))
1510 (set (match_operand:VF 0 "register_operand" "")
1511 (ior:VF (match_dup 4) (match_dup 5)))]
1514 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
1516 operands[4] = gen_reg_rtx (<MODE>mode);
1517 operands[5] = gen_reg_rtx (<MODE>mode);
1520 ;; Also define scalar versions. These are used for abs, neg, and
1521 ;; conditional move. Using subregs into vector modes causes register
1522 ;; allocation lossage. These patterns do not allow memory operands
1523 ;; because the native instructions read the full 128-bits.
1525 (define_insn "*andnot<mode>3"
1526 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1529 (match_operand:MODEF 1 "register_operand" "0,x"))
1530 (match_operand:MODEF 2 "register_operand" "x,x")))]
1531 "SSE_FLOAT_MODE_P (<MODE>mode)"
1533 static char buf[32];
1536 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1538 switch (which_alternative)
1541 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1544 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1550 snprintf (buf, sizeof (buf), insn, suffix);
1553 [(set_attr "isa" "noavx,avx")
1554 (set_attr "type" "sselog")
1555 (set_attr "prefix" "orig,vex")
1556 (set_attr "mode" "<ssevecmode>")])
1558 (define_insn "*<code><mode>3"
1559 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1561 (match_operand:MODEF 1 "register_operand" "%0,x")
1562 (match_operand:MODEF 2 "register_operand" "x,x")))]
1563 "SSE_FLOAT_MODE_P (<MODE>mode)"
1565 static char buf[32];
1568 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1570 switch (which_alternative)
1573 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1576 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1582 snprintf (buf, sizeof (buf), insn, suffix);
1585 [(set_attr "isa" "noavx,avx")
1586 (set_attr "type" "sselog")
1587 (set_attr "prefix" "orig,vex")
1588 (set_attr "mode" "<ssevecmode>")])
1590 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1592 ;; FMA4 floating point multiply/accumulate instructions. This
1593 ;; includes the scalar version of the instructions as well as the
1596 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1598 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1599 ;; combine to generate a multiply/add with two memory references. We then
1600 ;; split this insn, into loading up the destination register with one of the
1601 ;; memory operations. If we don't manage to split the insn, reload will
1602 ;; generate the appropriate moves. The reason this is needed, is that combine
1603 ;; has already folded one of the memory references into both the multiply and
1604 ;; add insns, and it can't generate a new pseudo. I.e.:
1605 ;; (set (reg1) (mem (addr1)))
1606 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1607 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1609 ;; ??? This is historic, pre-dating the gimple fma transformation.
1610 ;; We could now properly represent that only one memory operand is
1611 ;; allowed and not be penalized during optimization.
1613 ;; Intrinsic FMA operations.
1615 ;; The standard names for fma is only available with SSE math enabled.
1616 (define_expand "fma<mode>4"
1617 [(set (match_operand:FMAMODE 0 "register_operand")
1619 (match_operand:FMAMODE 1 "nonimmediate_operand")
1620 (match_operand:FMAMODE 2 "nonimmediate_operand")
1621 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1622 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1624 (define_expand "fms<mode>4"
1625 [(set (match_operand:FMAMODE 0 "register_operand")
1627 (match_operand:FMAMODE 1 "nonimmediate_operand")
1628 (match_operand:FMAMODE 2 "nonimmediate_operand")
1629 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1630 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1632 (define_expand "fnma<mode>4"
1633 [(set (match_operand:FMAMODE 0 "register_operand")
1635 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1636 (match_operand:FMAMODE 2 "nonimmediate_operand")
1637 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1638 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1640 (define_expand "fnms<mode>4"
1641 [(set (match_operand:FMAMODE 0 "register_operand")
1643 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1644 (match_operand:FMAMODE 2 "nonimmediate_operand")
1645 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1646 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1648 ;; The builtin for fma4intrin.h is not constrained by SSE math enabled.
1649 (define_expand "fma4i_fmadd_<mode>"
1650 [(set (match_operand:FMAMODE 0 "register_operand")
1652 (match_operand:FMAMODE 1 "nonimmediate_operand")
1653 (match_operand:FMAMODE 2 "nonimmediate_operand")
1654 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1655 "TARGET_FMA || TARGET_FMA4")
1657 (define_insn "*fma4i_fmadd_<mode>"
1658 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1660 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1661 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1662 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1664 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1665 [(set_attr "type" "ssemuladd")
1666 (set_attr "mode" "<MODE>")])
1668 (define_insn "*fma4i_fmsub_<mode>"
1669 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1671 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1672 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1674 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1676 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1677 [(set_attr "type" "ssemuladd")
1678 (set_attr "mode" "<MODE>")])
1680 (define_insn "*fma4i_fnmadd_<mode>"
1681 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1684 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1685 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1686 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1688 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1689 [(set_attr "type" "ssemuladd")
1690 (set_attr "mode" "<MODE>")])
1692 (define_insn "*fma4i_fnmsub_<mode>"
1693 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1696 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1697 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1699 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1701 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1702 [(set_attr "type" "ssemuladd")
1703 (set_attr "mode" "<MODE>")])
1705 ;; Scalar versions of the above. Unlike ADDSS et al, these write the
1706 ;; entire destination register, with the high-order elements zeroed.
1708 (define_expand "fma4i_vmfmadd_<mode>"
1709 [(set (match_operand:VF_128 0 "register_operand")
1712 (match_operand:VF_128 1 "nonimmediate_operand")
1713 (match_operand:VF_128 2 "nonimmediate_operand")
1714 (match_operand:VF_128 3 "nonimmediate_operand"))
1719 operands[4] = CONST0_RTX (<MODE>mode);
1722 (define_insn "*fma4i_vmfmadd_<mode>"
1723 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1726 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
1727 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1728 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
1729 (match_operand:VF_128 4 "const0_operand" "")
1732 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1733 [(set_attr "type" "ssemuladd")
1734 (set_attr "mode" "<MODE>")])
1736 (define_insn "*fma4i_vmfmsub_<mode>"
1737 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1740 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
1741 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1743 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
1744 (match_operand:VF_128 4 "const0_operand" "")
1747 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1748 [(set_attr "type" "ssemuladd")
1749 (set_attr "mode" "<MODE>")])
1751 (define_insn "*fma4i_vmfnmadd_<mode>"
1752 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1756 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
1757 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1758 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
1759 (match_operand:VF_128 4 "const0_operand" "")
1762 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1763 [(set_attr "type" "ssemuladd")
1764 (set_attr "mode" "<MODE>")])
1766 (define_insn "*fma4i_vmfnmsub_<mode>"
1767 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1771 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
1772 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1774 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
1775 (match_operand:VF_128 4 "const0_operand" "")
1778 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1779 [(set_attr "type" "ssemuladd")
1780 (set_attr "mode" "<MODE>")])
1782 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1784 ;; FMA4 Parallel floating point multiply addsub and subadd operations.
1786 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1788 ;; It would be possible to represent these without the UNSPEC as
1791 ;; (fma op1 op2 op3)
1792 ;; (fma op1 op2 (neg op3))
1795 ;; But this doesn't seem useful in practice.
1797 (define_expand "fmaddsub_<mode>"
1798 [(set (match_operand:VF 0 "register_operand")
1800 [(match_operand:VF 1 "nonimmediate_operand")
1801 (match_operand:VF 2 "nonimmediate_operand")
1802 (match_operand:VF 3 "nonimmediate_operand")]
1804 "TARGET_FMA || TARGET_FMA4")
1806 (define_insn "*fma4_fmaddsub_<mode>"
1807 [(set (match_operand:VF 0 "register_operand" "=x,x")
1809 [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
1810 (match_operand:VF 2 "nonimmediate_operand" " x,m")
1811 (match_operand:VF 3 "nonimmediate_operand" "xm,x")]
1814 "vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1815 [(set_attr "type" "ssemuladd")
1816 (set_attr "mode" "<MODE>")])
1818 (define_insn "*fma4_fmsubadd_<mode>"
1819 [(set (match_operand:VF 0 "register_operand" "=x,x")
1821 [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
1822 (match_operand:VF 2 "nonimmediate_operand" " x,m")
1824 (match_operand:VF 3 "nonimmediate_operand" "xm,x"))]
1827 "vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1828 [(set_attr "type" "ssemuladd")
1829 (set_attr "mode" "<MODE>")])
1831 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1833 ;; FMA3 floating point multiply/accumulate instructions.
1835 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1837 (define_insn "*fma_fmadd_<mode>"
1838 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1840 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
1841 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1842 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
1845 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1846 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1847 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1848 [(set_attr "type" "ssemuladd")
1849 (set_attr "mode" "<MODE>")])
1851 (define_insn "*fma_fmsub_<mode>"
1852 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1854 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
1855 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1857 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
1860 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1861 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1862 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1863 [(set_attr "type" "ssemuladd")
1864 (set_attr "mode" "<MODE>")])
1866 (define_insn "*fma_fnmadd_<mode>"
1867 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1870 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
1871 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1872 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
1875 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1876 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1877 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1878 [(set_attr "type" "ssemuladd")
1879 (set_attr "mode" "<MODE>")])
1881 (define_insn "*fma_fnmsub_<mode>"
1882 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1885 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
1886 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1888 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
1891 vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1892 vfnmsub231<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1893 vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1894 [(set_attr "type" "ssemuladd")
1895 (set_attr "mode" "<MODE>")])
1897 (define_insn "*fma_fmaddsub_<mode>"
1898 [(set (match_operand:VF 0 "register_operand" "=x,x,x")
1900 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
1901 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
1902 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0")]
1906 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1907 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1908 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1909 [(set_attr "type" "ssemuladd")
1910 (set_attr "mode" "<MODE>")])
1912 (define_insn "*fma_fmsubadd_<mode>"
1913 [(set (match_operand:VF 0 "register_operand" "=x,x,x")
1915 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
1916 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
1918 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0"))]
1922 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1923 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1924 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1925 [(set_attr "type" "ssemuladd")
1926 (set_attr "mode" "<MODE>")])
1928 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1930 ;; Parallel single-precision floating point conversion operations
1932 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1934 (define_insn "sse_cvtpi2ps"
1935 [(set (match_operand:V4SF 0 "register_operand" "=x")
1938 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
1939 (match_operand:V4SF 1 "register_operand" "0")
1942 "cvtpi2ps\t{%2, %0|%0, %2}"
1943 [(set_attr "type" "ssecvt")
1944 (set_attr "mode" "V4SF")])
1946 (define_insn "sse_cvtps2pi"
1947 [(set (match_operand:V2SI 0 "register_operand" "=y")
1949 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1951 (parallel [(const_int 0) (const_int 1)])))]
1953 "cvtps2pi\t{%1, %0|%0, %1}"
1954 [(set_attr "type" "ssecvt")
1955 (set_attr "unit" "mmx")
1956 (set_attr "mode" "DI")])
1958 (define_insn "sse_cvttps2pi"
1959 [(set (match_operand:V2SI 0 "register_operand" "=y")
1961 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
1962 (parallel [(const_int 0) (const_int 1)])))]
1964 "cvttps2pi\t{%1, %0|%0, %1}"
1965 [(set_attr "type" "ssecvt")
1966 (set_attr "unit" "mmx")
1967 (set_attr "prefix_rep" "0")
1968 (set_attr "mode" "SF")])
1970 (define_insn "sse_cvtsi2ss"
1971 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
1974 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
1975 (match_operand:V4SF 1 "register_operand" "0,0,x")
1979 cvtsi2ss\t{%2, %0|%0, %2}
1980 cvtsi2ss\t{%2, %0|%0, %2}
1981 vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
1982 [(set_attr "isa" "noavx,noavx,avx")
1983 (set_attr "type" "sseicvt")
1984 (set_attr "athlon_decode" "vector,double,*")
1985 (set_attr "amdfam10_decode" "vector,double,*")
1986 (set_attr "bdver1_decode" "double,direct,*")
1987 (set_attr "prefix" "orig,orig,vex")
1988 (set_attr "mode" "SF")])
1990 (define_insn "sse_cvtsi2ssq"
1991 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
1994 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
1995 (match_operand:V4SF 1 "register_operand" "0,0,x")
1997 "TARGET_SSE && TARGET_64BIT"
1999 cvtsi2ssq\t{%2, %0|%0, %2}
2000 cvtsi2ssq\t{%2, %0|%0, %2}
2001 vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2002 [(set_attr "isa" "noavx,noavx,avx")
2003 (set_attr "type" "sseicvt")
2004 (set_attr "athlon_decode" "vector,double,*")
2005 (set_attr "amdfam10_decode" "vector,double,*")
2006 (set_attr "bdver1_decode" "double,direct,*")
2007 (set_attr "length_vex" "*,*,4")
2008 (set_attr "prefix_rex" "1,1,*")
2009 (set_attr "prefix" "orig,orig,vex")
2010 (set_attr "mode" "SF")])
2012 (define_insn "sse_cvtss2si"
2013 [(set (match_operand:SI 0 "register_operand" "=r,r")
2016 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2017 (parallel [(const_int 0)]))]
2018 UNSPEC_FIX_NOTRUNC))]
2020 "%vcvtss2si\t{%1, %0|%0, %1}"
2021 [(set_attr "type" "sseicvt")
2022 (set_attr "athlon_decode" "double,vector")
2023 (set_attr "bdver1_decode" "double,double")
2024 (set_attr "prefix_rep" "1")
2025 (set_attr "prefix" "maybe_vex")
2026 (set_attr "mode" "SI")])
2028 (define_insn "sse_cvtss2si_2"
2029 [(set (match_operand:SI 0 "register_operand" "=r,r")
2030 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2031 UNSPEC_FIX_NOTRUNC))]
2033 "%vcvtss2si\t{%1, %0|%0, %1}"
2034 [(set_attr "type" "sseicvt")
2035 (set_attr "athlon_decode" "double,vector")
2036 (set_attr "amdfam10_decode" "double,double")
2037 (set_attr "bdver1_decode" "double,double")
2038 (set_attr "prefix_rep" "1")
2039 (set_attr "prefix" "maybe_vex")
2040 (set_attr "mode" "SI")])
2042 (define_insn "sse_cvtss2siq"
2043 [(set (match_operand:DI 0 "register_operand" "=r,r")
2046 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2047 (parallel [(const_int 0)]))]
2048 UNSPEC_FIX_NOTRUNC))]
2049 "TARGET_SSE && TARGET_64BIT"
2050 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2051 [(set_attr "type" "sseicvt")
2052 (set_attr "athlon_decode" "double,vector")
2053 (set_attr "bdver1_decode" "double,double")
2054 (set_attr "prefix_rep" "1")
2055 (set_attr "prefix" "maybe_vex")
2056 (set_attr "mode" "DI")])
2058 (define_insn "sse_cvtss2siq_2"
2059 [(set (match_operand:DI 0 "register_operand" "=r,r")
2060 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2061 UNSPEC_FIX_NOTRUNC))]
2062 "TARGET_SSE && TARGET_64BIT"
2063 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2064 [(set_attr "type" "sseicvt")
2065 (set_attr "athlon_decode" "double,vector")
2066 (set_attr "amdfam10_decode" "double,double")
2067 (set_attr "bdver1_decode" "double,double")
2068 (set_attr "prefix_rep" "1")
2069 (set_attr "prefix" "maybe_vex")
2070 (set_attr "mode" "DI")])
2072 (define_insn "sse_cvttss2si"
2073 [(set (match_operand:SI 0 "register_operand" "=r,r")
2076 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2077 (parallel [(const_int 0)]))))]
2079 "%vcvttss2si\t{%1, %0|%0, %1}"
2080 [(set_attr "type" "sseicvt")
2081 (set_attr "athlon_decode" "double,vector")
2082 (set_attr "amdfam10_decode" "double,double")
2083 (set_attr "bdver1_decode" "double,double")
2084 (set_attr "prefix_rep" "1")
2085 (set_attr "prefix" "maybe_vex")
2086 (set_attr "mode" "SI")])
2088 (define_insn "sse_cvttss2siq"
2089 [(set (match_operand:DI 0 "register_operand" "=r,r")
2092 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2093 (parallel [(const_int 0)]))))]
2094 "TARGET_SSE && TARGET_64BIT"
2095 "%vcvttss2si{q}\t{%1, %0|%0, %1}"
2096 [(set_attr "type" "sseicvt")
2097 (set_attr "athlon_decode" "double,vector")
2098 (set_attr "amdfam10_decode" "double,double")
2099 (set_attr "bdver1_decode" "double,double")
2100 (set_attr "prefix_rep" "1")
2101 (set_attr "prefix" "maybe_vex")
2102 (set_attr "mode" "DI")])
2104 (define_insn "avx_cvtdq2ps256"
2105 [(set (match_operand:V8SF 0 "register_operand" "=x")
2106 (float:V8SF (match_operand:V8SI 1 "nonimmediate_operand" "xm")))]
2108 "vcvtdq2ps\t{%1, %0|%0, %1}"
2109 [(set_attr "type" "ssecvt")
2110 (set_attr "prefix" "vex")
2111 (set_attr "mode" "V8SF")])
2113 (define_insn "sse2_cvtdq2ps"
2114 [(set (match_operand:V4SF 0 "register_operand" "=x")
2115 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2117 "%vcvtdq2ps\t{%1, %0|%0, %1}"
2118 [(set_attr "type" "ssecvt")
2119 (set_attr "prefix" "maybe_vex")
2120 (set_attr "mode" "V4SF")])
2122 (define_expand "sse2_cvtudq2ps"
2124 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "")))
2126 (lt:V4SF (match_dup 5) (match_dup 3)))
2128 (and:V4SF (match_dup 6) (match_dup 4)))
2129 (set (match_operand:V4SF 0 "register_operand" "")
2130 (plus:V4SF (match_dup 5) (match_dup 7)))]
2133 REAL_VALUE_TYPE TWO32r;
2137 real_ldexp (&TWO32r, &dconst1, 32);
2138 x = const_double_from_real_value (TWO32r, SFmode);
2140 operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
2141 operands[4] = force_reg (V4SFmode,
2142 ix86_build_const_vector (V4SFmode, 1, x));
2144 for (i = 5; i < 8; i++)
2145 operands[i] = gen_reg_rtx (V4SFmode);
2148 (define_insn "avx_cvtps2dq256"
2149 [(set (match_operand:V8SI 0 "register_operand" "=x")
2150 (unspec:V8SI [(match_operand:V8SF 1 "nonimmediate_operand" "xm")]
2151 UNSPEC_FIX_NOTRUNC))]
2153 "vcvtps2dq\t{%1, %0|%0, %1}"
2154 [(set_attr "type" "ssecvt")
2155 (set_attr "prefix" "vex")
2156 (set_attr "mode" "OI")])
2158 (define_insn "sse2_cvtps2dq"
2159 [(set (match_operand:V4SI 0 "register_operand" "=x")
2160 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2161 UNSPEC_FIX_NOTRUNC))]
2163 "%vcvtps2dq\t{%1, %0|%0, %1}"
2164 [(set_attr "type" "ssecvt")
2165 (set (attr "prefix_data16")
2167 (ne (symbol_ref "TARGET_AVX") (const_int 0))
2169 (const_string "1")))
2170 (set_attr "prefix" "maybe_vex")
2171 (set_attr "mode" "TI")])
2173 (define_insn "avx_cvttps2dq256"
2174 [(set (match_operand:V8SI 0 "register_operand" "=x")
2175 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
2177 "vcvttps2dq\t{%1, %0|%0, %1}"
2178 [(set_attr "type" "ssecvt")
2179 (set_attr "prefix" "vex")
2180 (set_attr "mode" "OI")])
2182 (define_insn "sse2_cvttps2dq"
2183 [(set (match_operand:V4SI 0 "register_operand" "=x")
2184 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2186 "%vcvttps2dq\t{%1, %0|%0, %1}"
2187 [(set_attr "type" "ssecvt")
2188 (set (attr "prefix_rep")
2190 (ne (symbol_ref "TARGET_AVX") (const_int 0))
2192 (const_string "1")))
2193 (set (attr "prefix_data16")
2195 (ne (symbol_ref "TARGET_AVX") (const_int 0))
2197 (const_string "0")))
2198 (set_attr "prefix_data16" "0")
2199 (set_attr "prefix" "maybe_vex")
2200 (set_attr "mode" "TI")])
2202 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2204 ;; Parallel double-precision floating point conversion operations
2206 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2208 (define_insn "sse2_cvtpi2pd"
2209 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2210 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2212 "cvtpi2pd\t{%1, %0|%0, %1}"
2213 [(set_attr "type" "ssecvt")
2214 (set_attr "unit" "mmx,*")
2215 (set_attr "prefix_data16" "1,*")
2216 (set_attr "mode" "V2DF")])
2218 (define_insn "sse2_cvtpd2pi"
2219 [(set (match_operand:V2SI 0 "register_operand" "=y")
2220 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2221 UNSPEC_FIX_NOTRUNC))]
2223 "cvtpd2pi\t{%1, %0|%0, %1}"
2224 [(set_attr "type" "ssecvt")
2225 (set_attr "unit" "mmx")
2226 (set_attr "bdver1_decode" "double")
2227 (set_attr "prefix_data16" "1")
2228 (set_attr "mode" "DI")])
2230 (define_insn "sse2_cvttpd2pi"
2231 [(set (match_operand:V2SI 0 "register_operand" "=y")
2232 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2234 "cvttpd2pi\t{%1, %0|%0, %1}"
2235 [(set_attr "type" "ssecvt")
2236 (set_attr "unit" "mmx")
2237 (set_attr "bdver1_decode" "double")
2238 (set_attr "prefix_data16" "1")
2239 (set_attr "mode" "TI")])
2241 (define_insn "sse2_cvtsi2sd"
2242 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2245 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2246 (match_operand:V2DF 1 "register_operand" "0,0,x")
2250 cvtsi2sd\t{%2, %0|%0, %2}
2251 cvtsi2sd\t{%2, %0|%0, %2}
2252 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2253 [(set_attr "isa" "noavx,noavx,avx")
2254 (set_attr "type" "sseicvt")
2255 (set_attr "athlon_decode" "double,direct,*")
2256 (set_attr "amdfam10_decode" "vector,double,*")
2257 (set_attr "bdver1_decode" "double,direct,*")
2258 (set_attr "prefix" "orig,orig,vex")
2259 (set_attr "mode" "DF")])
2261 (define_insn "sse2_cvtsi2sdq"
2262 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2265 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2266 (match_operand:V2DF 1 "register_operand" "0,0,x")
2268 "TARGET_SSE2 && TARGET_64BIT"
2270 cvtsi2sdq\t{%2, %0|%0, %2}
2271 cvtsi2sdq\t{%2, %0|%0, %2}
2272 vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2273 [(set_attr "isa" "noavx,noavx,avx")
2274 (set_attr "type" "sseicvt")
2275 (set_attr "athlon_decode" "double,direct,*")
2276 (set_attr "amdfam10_decode" "vector,double,*")
2277 (set_attr "bdver1_decode" "double,direct,*")
2278 (set_attr "length_vex" "*,*,4")
2279 (set_attr "prefix_rex" "1,1,*")
2280 (set_attr "prefix" "orig,orig,vex")
2281 (set_attr "mode" "DF")])
2283 (define_insn "sse2_cvtsd2si"
2284 [(set (match_operand:SI 0 "register_operand" "=r,r")
2287 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2288 (parallel [(const_int 0)]))]
2289 UNSPEC_FIX_NOTRUNC))]
2291 "%vcvtsd2si\t{%1, %0|%0, %1}"
2292 [(set_attr "type" "sseicvt")
2293 (set_attr "athlon_decode" "double,vector")
2294 (set_attr "bdver1_decode" "double,double")
2295 (set_attr "prefix_rep" "1")
2296 (set_attr "prefix" "maybe_vex")
2297 (set_attr "mode" "SI")])
2299 (define_insn "sse2_cvtsd2si_2"
2300 [(set (match_operand:SI 0 "register_operand" "=r,r")
2301 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2302 UNSPEC_FIX_NOTRUNC))]
2304 "%vcvtsd2si\t{%1, %0|%0, %1}"
2305 [(set_attr "type" "sseicvt")
2306 (set_attr "athlon_decode" "double,vector")
2307 (set_attr "amdfam10_decode" "double,double")
2308 (set_attr "bdver1_decode" "double,double")
2309 (set_attr "prefix_rep" "1")
2310 (set_attr "prefix" "maybe_vex")
2311 (set_attr "mode" "SI")])
2313 (define_insn "sse2_cvtsd2siq"
2314 [(set (match_operand:DI 0 "register_operand" "=r,r")
2317 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2318 (parallel [(const_int 0)]))]
2319 UNSPEC_FIX_NOTRUNC))]
2320 "TARGET_SSE2 && TARGET_64BIT"
2321 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2322 [(set_attr "type" "sseicvt")
2323 (set_attr "athlon_decode" "double,vector")
2324 (set_attr "bdver1_decode" "double,double")
2325 (set_attr "prefix_rep" "1")
2326 (set_attr "prefix" "maybe_vex")
2327 (set_attr "mode" "DI")])
2329 (define_insn "sse2_cvtsd2siq_2"
2330 [(set (match_operand:DI 0 "register_operand" "=r,r")
2331 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2332 UNSPEC_FIX_NOTRUNC))]
2333 "TARGET_SSE2 && TARGET_64BIT"
2334 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2335 [(set_attr "type" "sseicvt")
2336 (set_attr "athlon_decode" "double,vector")
2337 (set_attr "amdfam10_decode" "double,double")
2338 (set_attr "bdver1_decode" "double,double")
2339 (set_attr "prefix_rep" "1")
2340 (set_attr "prefix" "maybe_vex")
2341 (set_attr "mode" "DI")])
2343 (define_insn "sse2_cvttsd2si"
2344 [(set (match_operand:SI 0 "register_operand" "=r,r")
2347 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2348 (parallel [(const_int 0)]))))]
2350 "%vcvttsd2si\t{%1, %0|%0, %1}"
2351 [(set_attr "type" "sseicvt")
2352 (set_attr "athlon_decode" "double,vector")
2353 (set_attr "amdfam10_decode" "double,double")
2354 (set_attr "bdver1_decode" "double,double")
2355 (set_attr "prefix_rep" "1")
2356 (set_attr "prefix" "maybe_vex")
2357 (set_attr "mode" "SI")])
2359 (define_insn "sse2_cvttsd2siq"
2360 [(set (match_operand:DI 0 "register_operand" "=r,r")
2363 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2364 (parallel [(const_int 0)]))))]
2365 "TARGET_SSE2 && TARGET_64BIT"
2366 "%vcvttsd2si{q}\t{%1, %0|%0, %1}"
2367 [(set_attr "type" "sseicvt")
2368 (set_attr "athlon_decode" "double,vector")
2369 (set_attr "amdfam10_decode" "double,double")
2370 (set_attr "bdver1_decode" "double,double")
2371 (set_attr "prefix_rep" "1")
2372 (set_attr "prefix" "maybe_vex")
2373 (set_attr "mode" "DI")])
2375 (define_insn "avx_cvtdq2pd256"
2376 [(set (match_operand:V4DF 0 "register_operand" "=x")
2377 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2379 "vcvtdq2pd\t{%1, %0|%0, %1}"
2380 [(set_attr "type" "ssecvt")
2381 (set_attr "prefix" "vex")
2382 (set_attr "mode" "V4DF")])
2384 (define_insn "*avx_cvtdq2pd256_2"
2385 [(set (match_operand:V4DF 0 "register_operand" "=x")
2388 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
2389 (parallel [(const_int 0) (const_int 1)
2390 (const_int 2) (const_int 3)]))))]
2392 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
2393 [(set_attr "type" "ssecvt")
2394 (set_attr "prefix" "vex")
2395 (set_attr "mode" "V4DF")])
2397 (define_insn "sse2_cvtdq2pd"
2398 [(set (match_operand:V2DF 0 "register_operand" "=x")
2401 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2402 (parallel [(const_int 0) (const_int 1)]))))]
2404 "%vcvtdq2pd\t{%1, %0|%0, %1}"
2405 [(set_attr "type" "ssecvt")
2406 (set_attr "prefix" "maybe_vex")
2407 (set_attr "mode" "V2DF")])
2409 (define_insn "avx_cvtpd2dq256"
2410 [(set (match_operand:V4SI 0 "register_operand" "=x")
2411 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2412 UNSPEC_FIX_NOTRUNC))]
2414 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2415 [(set_attr "type" "ssecvt")
2416 (set_attr "prefix" "vex")
2417 (set_attr "mode" "OI")])
2419 (define_expand "sse2_cvtpd2dq"
2420 [(set (match_operand:V4SI 0 "register_operand" "")
2422 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2426 "operands[2] = CONST0_RTX (V2SImode);")
2428 (define_insn "*sse2_cvtpd2dq"
2429 [(set (match_operand:V4SI 0 "register_operand" "=x")
2431 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2433 (match_operand:V2SI 2 "const0_operand" "")))]
2437 return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
2439 return "cvtpd2dq\t{%1, %0|%0, %1}";
2441 [(set_attr "type" "ssecvt")
2442 (set_attr "prefix_rep" "1")
2443 (set_attr "prefix_data16" "0")
2444 (set_attr "prefix" "maybe_vex")
2445 (set_attr "mode" "TI")
2446 (set_attr "amdfam10_decode" "double")
2447 (set_attr "athlon_decode" "vector")
2448 (set_attr "bdver1_decode" "double")])
2450 (define_insn "avx_cvttpd2dq256"
2451 [(set (match_operand:V4SI 0 "register_operand" "=x")
2452 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2454 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2455 [(set_attr "type" "ssecvt")
2456 (set_attr "prefix" "vex")
2457 (set_attr "mode" "OI")])
2459 (define_expand "sse2_cvttpd2dq"
2460 [(set (match_operand:V4SI 0 "register_operand" "")
2462 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2465 "operands[2] = CONST0_RTX (V2SImode);")
2467 (define_insn "*sse2_cvttpd2dq"
2468 [(set (match_operand:V4SI 0 "register_operand" "=x")
2470 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2471 (match_operand:V2SI 2 "const0_operand" "")))]
2475 return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
2477 return "cvttpd2dq\t{%1, %0|%0, %1}";
2479 [(set_attr "type" "ssecvt")
2480 (set_attr "amdfam10_decode" "double")
2481 (set_attr "athlon_decode" "vector")
2482 (set_attr "bdver1_decode" "double")
2483 (set_attr "prefix" "maybe_vex")
2484 (set_attr "mode" "TI")])
2486 (define_insn "sse2_cvtsd2ss"
2487 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2490 (float_truncate:V2SF
2491 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,xm")))
2492 (match_operand:V4SF 1 "register_operand" "0,0,x")
2496 cvtsd2ss\t{%2, %0|%0, %2}
2497 cvtsd2ss\t{%2, %0|%0, %2}
2498 vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2499 [(set_attr "isa" "noavx,noavx,avx")
2500 (set_attr "type" "ssecvt")
2501 (set_attr "athlon_decode" "vector,double,*")
2502 (set_attr "amdfam10_decode" "vector,double,*")
2503 (set_attr "bdver1_decode" "direct,direct,*")
2504 (set_attr "prefix" "orig,orig,vex")
2505 (set_attr "mode" "SF")])
2507 (define_insn "sse2_cvtss2sd"
2508 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2512 (match_operand:V4SF 2 "nonimmediate_operand" "x,m,xm")
2513 (parallel [(const_int 0) (const_int 1)])))
2514 (match_operand:V2DF 1 "register_operand" "0,0,x")
2518 cvtss2sd\t{%2, %0|%0, %2}
2519 cvtss2sd\t{%2, %0|%0, %2}
2520 vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
2521 [(set_attr "isa" "noavx,noavx,avx")
2522 (set_attr "type" "ssecvt")
2523 (set_attr "amdfam10_decode" "vector,double,*")
2524 (set_attr "athlon_decode" "direct,direct,*")
2525 (set_attr "bdver1_decode" "direct,direct,*")
2526 (set_attr "prefix" "orig,orig,vex")
2527 (set_attr "mode" "DF")])
2529 (define_insn "avx_cvtpd2ps256"
2530 [(set (match_operand:V4SF 0 "register_operand" "=x")
2531 (float_truncate:V4SF
2532 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2534 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
2535 [(set_attr "type" "ssecvt")
2536 (set_attr "prefix" "vex")
2537 (set_attr "mode" "V4SF")])
2539 (define_expand "sse2_cvtpd2ps"
2540 [(set (match_operand:V4SF 0 "register_operand" "")
2542 (float_truncate:V2SF
2543 (match_operand:V2DF 1 "nonimmediate_operand" ""))
2546 "operands[2] = CONST0_RTX (V2SFmode);")
2548 (define_insn "*sse2_cvtpd2ps"
2549 [(set (match_operand:V4SF 0 "register_operand" "=x")
2551 (float_truncate:V2SF
2552 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2553 (match_operand:V2SF 2 "const0_operand" "")))]
2557 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
2559 return "cvtpd2ps\t{%1, %0|%0, %1}";
2561 [(set_attr "type" "ssecvt")
2562 (set_attr "amdfam10_decode" "double")
2563 (set_attr "athlon_decode" "vector")
2564 (set_attr "bdver1_decode" "double")
2565 (set_attr "prefix_data16" "1")
2566 (set_attr "prefix" "maybe_vex")
2567 (set_attr "mode" "V4SF")])
2569 (define_insn "avx_cvtps2pd256"
2570 [(set (match_operand:V4DF 0 "register_operand" "=x")
2572 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2574 "vcvtps2pd\t{%1, %0|%0, %1}"
2575 [(set_attr "type" "ssecvt")
2576 (set_attr "prefix" "vex")
2577 (set_attr "mode" "V4DF")])
2579 (define_insn "*avx_cvtps2pd256_2"
2580 [(set (match_operand:V4DF 0 "register_operand" "=x")
2583 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
2584 (parallel [(const_int 0) (const_int 1)
2585 (const_int 2) (const_int 3)]))))]
2587 "vcvtps2pd\t{%x1, %0|%0, %x1}"
2588 [(set_attr "type" "ssecvt")
2589 (set_attr "prefix" "vex")
2590 (set_attr "mode" "V4DF")])
2592 (define_insn "sse2_cvtps2pd"
2593 [(set (match_operand:V2DF 0 "register_operand" "=x")
2596 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2597 (parallel [(const_int 0) (const_int 1)]))))]
2599 "%vcvtps2pd\t{%1, %0|%0, %1}"
2600 [(set_attr "type" "ssecvt")
2601 (set_attr "amdfam10_decode" "direct")
2602 (set_attr "athlon_decode" "double")
2603 (set_attr "bdver1_decode" "double")
2604 (set_attr "prefix_data16" "0")
2605 (set_attr "prefix" "maybe_vex")
2606 (set_attr "mode" "V2DF")])
2608 (define_expand "vec_unpacks_hi_v4sf"
2613 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2614 (parallel [(const_int 6) (const_int 7)
2615 (const_int 2) (const_int 3)])))
2616 (set (match_operand:V2DF 0 "register_operand" "")
2620 (parallel [(const_int 0) (const_int 1)]))))]
2622 "operands[2] = gen_reg_rtx (V4SFmode);")
2624 (define_expand "vec_unpacks_hi_v8sf"
2627 (match_operand:V8SF 1 "nonimmediate_operand" "")
2628 (parallel [(const_int 4) (const_int 5)
2629 (const_int 6) (const_int 7)])))
2630 (set (match_operand:V4DF 0 "register_operand" "")
2634 "operands[2] = gen_reg_rtx (V4SFmode);")
2636 (define_expand "vec_unpacks_lo_v4sf"
2637 [(set (match_operand:V2DF 0 "register_operand" "")
2640 (match_operand:V4SF 1 "nonimmediate_operand" "")
2641 (parallel [(const_int 0) (const_int 1)]))))]
2644 (define_expand "vec_unpacks_lo_v8sf"
2645 [(set (match_operand:V4DF 0 "register_operand" "")
2648 (match_operand:V8SF 1 "nonimmediate_operand" "")
2649 (parallel [(const_int 0) (const_int 1)
2650 (const_int 2) (const_int 3)]))))]
2653 (define_expand "vec_unpacks_float_hi_v8hi"
2654 [(match_operand:V4SF 0 "register_operand" "")
2655 (match_operand:V8HI 1 "register_operand" "")]
2658 rtx tmp = gen_reg_rtx (V4SImode);
2660 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
2661 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2665 (define_expand "vec_unpacks_float_lo_v8hi"
2666 [(match_operand:V4SF 0 "register_operand" "")
2667 (match_operand:V8HI 1 "register_operand" "")]
2670 rtx tmp = gen_reg_rtx (V4SImode);
2672 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
2673 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2677 (define_expand "vec_unpacku_float_hi_v8hi"
2678 [(match_operand:V4SF 0 "register_operand" "")
2679 (match_operand:V8HI 1 "register_operand" "")]
2682 rtx tmp = gen_reg_rtx (V4SImode);
2684 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
2685 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2689 (define_expand "vec_unpacku_float_lo_v8hi"
2690 [(match_operand:V4SF 0 "register_operand" "")
2691 (match_operand:V8HI 1 "register_operand" "")]
2694 rtx tmp = gen_reg_rtx (V4SImode);
2696 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
2697 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2701 (define_expand "vec_unpacks_float_hi_v4si"
2704 (match_operand:V4SI 1 "nonimmediate_operand" "")
2705 (parallel [(const_int 2) (const_int 3)
2706 (const_int 2) (const_int 3)])))
2707 (set (match_operand:V2DF 0 "register_operand" "")
2711 (parallel [(const_int 0) (const_int 1)]))))]
2713 "operands[2] = gen_reg_rtx (V4SImode);")
2715 (define_expand "vec_unpacks_float_lo_v4si"
2716 [(set (match_operand:V2DF 0 "register_operand" "")
2719 (match_operand:V4SI 1 "nonimmediate_operand" "")
2720 (parallel [(const_int 0) (const_int 1)]))))]
2723 (define_expand "vec_unpacks_float_hi_v8si"
2726 (match_operand:V8SI 1 "nonimmediate_operand" "")
2727 (parallel [(const_int 4) (const_int 5)
2728 (const_int 6) (const_int 7)])))
2729 (set (match_operand:V4DF 0 "register_operand" "")
2733 "operands[2] = gen_reg_rtx (V4SImode);")
2735 (define_expand "vec_unpacks_float_lo_v8si"
2736 [(set (match_operand:V4DF 0 "register_operand" "")
2739 (match_operand:V8SI 1 "nonimmediate_operand" "")
2740 (parallel [(const_int 0) (const_int 1)
2741 (const_int 2) (const_int 3)]))))]
2744 (define_expand "vec_unpacku_float_hi_v4si"
2747 (match_operand:V4SI 1 "nonimmediate_operand" "")
2748 (parallel [(const_int 2) (const_int 3)
2749 (const_int 2) (const_int 3)])))
2754 (parallel [(const_int 0) (const_int 1)]))))
2756 (lt:V2DF (match_dup 6) (match_dup 3)))
2758 (and:V2DF (match_dup 7) (match_dup 4)))
2759 (set (match_operand:V2DF 0 "register_operand" "")
2760 (plus:V2DF (match_dup 6) (match_dup 8)))]
2763 REAL_VALUE_TYPE TWO32r;
2767 real_ldexp (&TWO32r, &dconst1, 32);
2768 x = const_double_from_real_value (TWO32r, DFmode);
2770 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2771 operands[4] = force_reg (V2DFmode,
2772 ix86_build_const_vector (V2DFmode, 1, x));
2774 operands[5] = gen_reg_rtx (V4SImode);
2776 for (i = 6; i < 9; i++)
2777 operands[i] = gen_reg_rtx (V2DFmode);
2780 (define_expand "vec_unpacku_float_lo_v4si"
2784 (match_operand:V4SI 1 "nonimmediate_operand" "")
2785 (parallel [(const_int 0) (const_int 1)]))))
2787 (lt:V2DF (match_dup 5) (match_dup 3)))
2789 (and:V2DF (match_dup 6) (match_dup 4)))
2790 (set (match_operand:V2DF 0 "register_operand" "")
2791 (plus:V2DF (match_dup 5) (match_dup 7)))]
2794 REAL_VALUE_TYPE TWO32r;
2798 real_ldexp (&TWO32r, &dconst1, 32);
2799 x = const_double_from_real_value (TWO32r, DFmode);
2801 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2802 operands[4] = force_reg (V2DFmode,
2803 ix86_build_const_vector (V2DFmode, 1, x));
2805 for (i = 5; i < 8; i++)
2806 operands[i] = gen_reg_rtx (V2DFmode);
2809 (define_expand "vec_pack_trunc_v4df"
2811 (float_truncate:V4SF
2812 (match_operand:V4DF 1 "nonimmediate_operand" "")))
2814 (float_truncate:V4SF
2815 (match_operand:V4DF 2 "nonimmediate_operand" "")))
2816 (set (match_operand:V8SF 0 "register_operand" "")
2822 operands[3] = gen_reg_rtx (V4SFmode);
2823 operands[4] = gen_reg_rtx (V4SFmode);
2826 (define_expand "vec_pack_trunc_v2df"
2827 [(match_operand:V4SF 0 "register_operand" "")
2828 (match_operand:V2DF 1 "nonimmediate_operand" "")
2829 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2834 r1 = gen_reg_rtx (V4SFmode);
2835 r2 = gen_reg_rtx (V4SFmode);
2837 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
2838 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
2839 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
2843 (define_expand "vec_pack_sfix_trunc_v2df"
2844 [(match_operand:V4SI 0 "register_operand" "")
2845 (match_operand:V2DF 1 "nonimmediate_operand" "")
2846 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2851 r1 = gen_reg_rtx (V4SImode);
2852 r2 = gen_reg_rtx (V4SImode);
2854 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
2855 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
2856 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
2857 gen_lowpart (V2DImode, r1),
2858 gen_lowpart (V2DImode, r2)));
2862 (define_expand "vec_pack_sfix_v2df"
2863 [(match_operand:V4SI 0 "register_operand" "")
2864 (match_operand:V2DF 1 "nonimmediate_operand" "")
2865 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2870 r1 = gen_reg_rtx (V4SImode);
2871 r2 = gen_reg_rtx (V4SImode);
2873 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
2874 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
2875 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
2876 gen_lowpart (V2DImode, r1),
2877 gen_lowpart (V2DImode, r2)));
2881 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2883 ;; Parallel single-precision floating point element swizzling
2885 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2887 (define_expand "sse_movhlps_exp"
2888 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
2891 (match_operand:V4SF 1 "nonimmediate_operand" "")
2892 (match_operand:V4SF 2 "nonimmediate_operand" ""))
2893 (parallel [(const_int 6)
2899 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
2901 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
2903 /* Fix up the destination if needed. */
2904 if (dst != operands[0])
2905 emit_move_insn (operands[0], dst);
2910 (define_insn "sse_movhlps"
2911 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
2914 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
2915 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
2916 (parallel [(const_int 6)
2920 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2922 movhlps\t{%2, %0|%0, %2}
2923 vmovhlps\t{%2, %1, %0|%0, %1, %2}
2924 movlps\t{%H2, %0|%0, %H2}
2925 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
2926 %vmovhps\t{%2, %0|%0, %2}"
2927 [(set_attr "isa" "noavx,avx,noavx,avx,*")
2928 (set_attr "type" "ssemov")
2929 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
2930 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
2932 (define_expand "sse_movlhps_exp"
2933 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
2936 (match_operand:V4SF 1 "nonimmediate_operand" "")
2937 (match_operand:V4SF 2 "nonimmediate_operand" ""))
2938 (parallel [(const_int 0)
2944 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
2946 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
2948 /* Fix up the destination if needed. */
2949 if (dst != operands[0])
2950 emit_move_insn (operands[0], dst);
2955 (define_insn "sse_movlhps"
2956 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
2959 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
2960 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,x,x"))
2961 (parallel [(const_int 0)
2965 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
2967 movlhps\t{%2, %0|%0, %2}
2968 vmovlhps\t{%2, %1, %0|%0, %1, %2}
2969 movhps\t{%2, %0|%0, %2}
2970 vmovhps\t{%2, %1, %0|%0, %1, %2}
2971 %vmovlps\t{%2, %H0|%H0, %2}"
2972 [(set_attr "isa" "noavx,avx,noavx,avx,*")
2973 (set_attr "type" "ssemov")
2974 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
2975 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
2977 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
2978 (define_insn "avx_unpckhps256"
2979 [(set (match_operand:V8SF 0 "register_operand" "=x")
2982 (match_operand:V8SF 1 "register_operand" "x")
2983 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
2984 (parallel [(const_int 2) (const_int 10)
2985 (const_int 3) (const_int 11)
2986 (const_int 6) (const_int 14)
2987 (const_int 7) (const_int 15)])))]
2989 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
2990 [(set_attr "type" "sselog")
2991 (set_attr "prefix" "vex")
2992 (set_attr "mode" "V8SF")])
2994 (define_expand "vec_interleave_highv8sf"
2998 (match_operand:V8SF 1 "register_operand" "x")
2999 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3000 (parallel [(const_int 0) (const_int 8)
3001 (const_int 1) (const_int 9)
3002 (const_int 4) (const_int 12)
3003 (const_int 5) (const_int 13)])))
3009 (parallel [(const_int 2) (const_int 10)
3010 (const_int 3) (const_int 11)
3011 (const_int 6) (const_int 14)
3012 (const_int 7) (const_int 15)])))
3013 (set (match_operand:V8SF 0 "register_operand" "")
3018 (parallel [(const_int 4) (const_int 5)
3019 (const_int 6) (const_int 7)
3020 (const_int 12) (const_int 13)
3021 (const_int 14) (const_int 15)])))]
3024 operands[3] = gen_reg_rtx (V8SFmode);
3025 operands[4] = gen_reg_rtx (V8SFmode);
3028 (define_insn "vec_interleave_highv4sf"
3029 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3032 (match_operand:V4SF 1 "register_operand" "0,x")
3033 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3034 (parallel [(const_int 2) (const_int 6)
3035 (const_int 3) (const_int 7)])))]
3038 unpckhps\t{%2, %0|%0, %2}
3039 vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3040 [(set_attr "isa" "noavx,avx")
3041 (set_attr "type" "sselog")
3042 (set_attr "prefix" "orig,vex")
3043 (set_attr "mode" "V4SF")])
3045 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3046 (define_insn "avx_unpcklps256"
3047 [(set (match_operand:V8SF 0 "register_operand" "=x")
3050 (match_operand:V8SF 1 "register_operand" "x")
3051 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3052 (parallel [(const_int 0) (const_int 8)
3053 (const_int 1) (const_int 9)
3054 (const_int 4) (const_int 12)
3055 (const_int 5) (const_int 13)])))]
3057 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3058 [(set_attr "type" "sselog")
3059 (set_attr "prefix" "vex")
3060 (set_attr "mode" "V8SF")])
3062 (define_expand "vec_interleave_lowv8sf"
3066 (match_operand:V8SF 1 "register_operand" "x")
3067 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3068 (parallel [(const_int 0) (const_int 8)
3069 (const_int 1) (const_int 9)
3070 (const_int 4) (const_int 12)
3071 (const_int 5) (const_int 13)])))
3077 (parallel [(const_int 2) (const_int 10)
3078 (const_int 3) (const_int 11)
3079 (const_int 6) (const_int 14)
3080 (const_int 7) (const_int 15)])))
3081 (set (match_operand:V8SF 0 "register_operand" "")
3086 (parallel [(const_int 0) (const_int 1)
3087 (const_int 2) (const_int 3)
3088 (const_int 8) (const_int 9)
3089 (const_int 10) (const_int 11)])))]
3092 operands[3] = gen_reg_rtx (V8SFmode);
3093 operands[4] = gen_reg_rtx (V8SFmode);
3096 (define_insn "vec_interleave_lowv4sf"
3097 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3100 (match_operand:V4SF 1 "register_operand" "0,x")
3101 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3102 (parallel [(const_int 0) (const_int 4)
3103 (const_int 1) (const_int 5)])))]
3106 unpcklps\t{%2, %0|%0, %2}
3107 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3108 [(set_attr "isa" "noavx,avx")
3109 (set_attr "type" "sselog")
3110 (set_attr "prefix" "orig,vex")
3111 (set_attr "mode" "V4SF")])
3113 ;; These are modeled with the same vec_concat as the others so that we
3114 ;; capture users of shufps that can use the new instructions
3115 (define_insn "avx_movshdup256"
3116 [(set (match_operand:V8SF 0 "register_operand" "=x")
3119 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3121 (parallel [(const_int 1) (const_int 1)
3122 (const_int 3) (const_int 3)
3123 (const_int 5) (const_int 5)
3124 (const_int 7) (const_int 7)])))]
3126 "vmovshdup\t{%1, %0|%0, %1}"
3127 [(set_attr "type" "sse")
3128 (set_attr "prefix" "vex")
3129 (set_attr "mode" "V8SF")])
3131 (define_insn "sse3_movshdup"
3132 [(set (match_operand:V4SF 0 "register_operand" "=x")
3135 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3137 (parallel [(const_int 1)
3142 "%vmovshdup\t{%1, %0|%0, %1}"
3143 [(set_attr "type" "sse")
3144 (set_attr "prefix_rep" "1")
3145 (set_attr "prefix" "maybe_vex")
3146 (set_attr "mode" "V4SF")])
3148 (define_insn "avx_movsldup256"
3149 [(set (match_operand:V8SF 0 "register_operand" "=x")
3152 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3154 (parallel [(const_int 0) (const_int 0)
3155 (const_int 2) (const_int 2)
3156 (const_int 4) (const_int 4)
3157 (const_int 6) (const_int 6)])))]
3159 "vmovsldup\t{%1, %0|%0, %1}"
3160 [(set_attr "type" "sse")
3161 (set_attr "prefix" "vex")
3162 (set_attr "mode" "V8SF")])
3164 (define_insn "sse3_movsldup"
3165 [(set (match_operand:V4SF 0 "register_operand" "=x")
3168 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3170 (parallel [(const_int 0)
3175 "%vmovsldup\t{%1, %0|%0, %1}"
3176 [(set_attr "type" "sse")
3177 (set_attr "prefix_rep" "1")
3178 (set_attr "prefix" "maybe_vex")
3179 (set_attr "mode" "V4SF")])
3181 (define_expand "avx_shufps256"
3182 [(match_operand:V8SF 0 "register_operand" "")
3183 (match_operand:V8SF 1 "register_operand" "")
3184 (match_operand:V8SF 2 "nonimmediate_operand" "")
3185 (match_operand:SI 3 "const_int_operand" "")]
3188 int mask = INTVAL (operands[3]);
3189 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3190 GEN_INT ((mask >> 0) & 3),
3191 GEN_INT ((mask >> 2) & 3),
3192 GEN_INT (((mask >> 4) & 3) + 8),
3193 GEN_INT (((mask >> 6) & 3) + 8),
3194 GEN_INT (((mask >> 0) & 3) + 4),
3195 GEN_INT (((mask >> 2) & 3) + 4),
3196 GEN_INT (((mask >> 4) & 3) + 12),
3197 GEN_INT (((mask >> 6) & 3) + 12)));
3201 ;; One bit in mask selects 2 elements.
3202 (define_insn "avx_shufps256_1"
3203 [(set (match_operand:V8SF 0 "register_operand" "=x")
3206 (match_operand:V8SF 1 "register_operand" "x")
3207 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3208 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3209 (match_operand 4 "const_0_to_3_operand" "")
3210 (match_operand 5 "const_8_to_11_operand" "")
3211 (match_operand 6 "const_8_to_11_operand" "")
3212 (match_operand 7 "const_4_to_7_operand" "")
3213 (match_operand 8 "const_4_to_7_operand" "")
3214 (match_operand 9 "const_12_to_15_operand" "")
3215 (match_operand 10 "const_12_to_15_operand" "")])))]
3217 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3218 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3219 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3220 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3223 mask = INTVAL (operands[3]);
3224 mask |= INTVAL (operands[4]) << 2;
3225 mask |= (INTVAL (operands[5]) - 8) << 4;
3226 mask |= (INTVAL (operands[6]) - 8) << 6;
3227 operands[3] = GEN_INT (mask);
3229 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3231 [(set_attr "type" "sselog")
3232 (set_attr "length_immediate" "1")
3233 (set_attr "prefix" "vex")
3234 (set_attr "mode" "V8SF")])
3236 (define_expand "sse_shufps"
3237 [(match_operand:V4SF 0 "register_operand" "")
3238 (match_operand:V4SF 1 "register_operand" "")
3239 (match_operand:V4SF 2 "nonimmediate_operand" "")
3240 (match_operand:SI 3 "const_int_operand" "")]
3243 int mask = INTVAL (operands[3]);
3244 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3245 GEN_INT ((mask >> 0) & 3),
3246 GEN_INT ((mask >> 2) & 3),
3247 GEN_INT (((mask >> 4) & 3) + 4),
3248 GEN_INT (((mask >> 6) & 3) + 4)));
3252 (define_insn "sse_shufps_<mode>"
3253 [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
3254 (vec_select:VI4F_128
3255 (vec_concat:<ssedoublevecmode>
3256 (match_operand:VI4F_128 1 "register_operand" "0,x")
3257 (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm"))
3258 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3259 (match_operand 4 "const_0_to_3_operand" "")
3260 (match_operand 5 "const_4_to_7_operand" "")
3261 (match_operand 6 "const_4_to_7_operand" "")])))]
3265 mask |= INTVAL (operands[3]) << 0;
3266 mask |= INTVAL (operands[4]) << 2;
3267 mask |= (INTVAL (operands[5]) - 4) << 4;
3268 mask |= (INTVAL (operands[6]) - 4) << 6;
3269 operands[3] = GEN_INT (mask);
3271 switch (which_alternative)
3274 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3276 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3281 [(set_attr "isa" "noavx,avx")
3282 (set_attr "type" "sselog")
3283 (set_attr "length_immediate" "1")
3284 (set_attr "prefix" "orig,vex")
3285 (set_attr "mode" "V4SF")])
3287 (define_insn "sse_storehps"
3288 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3290 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3291 (parallel [(const_int 2) (const_int 3)])))]
3294 %vmovhps\t{%1, %0|%0, %1}
3295 %vmovhlps\t{%1, %d0|%d0, %1}
3296 %vmovlps\t{%H1, %d0|%d0, %H1}"
3297 [(set_attr "type" "ssemov")
3298 (set_attr "prefix" "maybe_vex")
3299 (set_attr "mode" "V2SF,V4SF,V2SF")])
3301 (define_expand "sse_loadhps_exp"
3302 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3305 (match_operand:V4SF 1 "nonimmediate_operand" "")
3306 (parallel [(const_int 0) (const_int 1)]))
3307 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3310 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3312 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
3314 /* Fix up the destination if needed. */
3315 if (dst != operands[0])
3316 emit_move_insn (operands[0], dst);
3321 (define_insn "sse_loadhps"
3322 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
3325 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3326 (parallel [(const_int 0) (const_int 1)]))
3327 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))]
3330 movhps\t{%2, %0|%0, %2}
3331 vmovhps\t{%2, %1, %0|%0, %1, %2}
3332 movlhps\t{%2, %0|%0, %2}
3333 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3334 %vmovlps\t{%2, %H0|%H0, %2}"
3335 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3336 (set_attr "type" "ssemov")
3337 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3338 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
3340 (define_insn "sse_storelps"
3341 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3343 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
3344 (parallel [(const_int 0) (const_int 1)])))]
3347 %vmovlps\t{%1, %0|%0, %1}
3348 %vmovaps\t{%1, %0|%0, %1}
3349 %vmovlps\t{%1, %d0|%d0, %1}"
3350 [(set_attr "type" "ssemov")
3351 (set_attr "prefix" "maybe_vex")
3352 (set_attr "mode" "V2SF,V4SF,V2SF")])
3354 (define_expand "sse_loadlps_exp"
3355 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3357 (match_operand:V2SF 2 "nonimmediate_operand" "")
3359 (match_operand:V4SF 1 "nonimmediate_operand" "")
3360 (parallel [(const_int 2) (const_int 3)]))))]
3363 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3365 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
3367 /* Fix up the destination if needed. */
3368 if (dst != operands[0])
3369 emit_move_insn (operands[0], dst);
3374 (define_insn "sse_loadlps"
3375 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
3377 (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,x,x")
3379 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
3380 (parallel [(const_int 2) (const_int 3)]))))]
3383 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3384 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3385 movlps\t{%2, %0|%0, %2}
3386 vmovlps\t{%2, %1, %0|%0, %1, %2}
3387 %vmovlps\t{%2, %0|%0, %2}"
3388 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3389 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
3390 (set_attr "length_immediate" "1,1,*,*,*")
3391 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3392 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3394 (define_insn "sse_movss"
3395 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3397 (match_operand:V4SF 2 "register_operand" " x,x")
3398 (match_operand:V4SF 1 "register_operand" " 0,x")
3402 movss\t{%2, %0|%0, %2}
3403 vmovss\t{%2, %1, %0|%0, %1, %2}"
3404 [(set_attr "isa" "noavx,avx")
3405 (set_attr "type" "ssemov")
3406 (set_attr "prefix" "orig,vex")
3407 (set_attr "mode" "SF")])
3409 (define_expand "vec_dupv4sf"
3410 [(set (match_operand:V4SF 0 "register_operand" "")
3412 (match_operand:SF 1 "nonimmediate_operand" "")))]
3416 operands[1] = force_reg (SFmode, operands[1]);
3419 (define_insn "avx2_vec_dupv4sf"
3420 [(set (match_operand:V4SF 0 "register_operand" "=x")
3423 (match_operand:V4SF 1 "register_operand" "x")
3424 (parallel [(const_int 0)]))))]
3426 "vbroadcastss\t{%1, %0|%0, %1}"
3427 [(set_attr "type" "sselog1")
3428 (set_attr "prefix" "vex")
3429 (set_attr "mode" "V4SF")])
3431 (define_insn "*vec_dupv4sf_avx"
3432 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3434 (match_operand:SF 1 "nonimmediate_operand" "x,m")))]
3437 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
3438 vbroadcastss\t{%1, %0|%0, %1}"
3439 [(set_attr "type" "sselog1,ssemov")
3440 (set_attr "length_immediate" "1,0")
3441 (set_attr "prefix_extra" "0,1")
3442 (set_attr "prefix" "vex")
3443 (set_attr "mode" "V4SF")])
3445 (define_insn "avx2_vec_dupv8sf"
3446 [(set (match_operand:V8SF 0 "register_operand" "=x")
3449 (match_operand:V4SF 1 "register_operand" "x")
3450 (parallel [(const_int 0)]))))]
3452 "vbroadcastss\t{%1, %0|%0, %1}"
3453 [(set_attr "type" "sselog1")
3454 (set_attr "prefix" "vex")
3455 (set_attr "mode" "V8SF")])
3457 (define_insn "*vec_dupv4sf"
3458 [(set (match_operand:V4SF 0 "register_operand" "=x")
3460 (match_operand:SF 1 "register_operand" "0")))]
3462 "shufps\t{$0, %0, %0|%0, %0, 0}"
3463 [(set_attr "type" "sselog1")
3464 (set_attr "length_immediate" "1")
3465 (set_attr "mode" "V4SF")])
3467 ;; Although insertps takes register source, we prefer
3468 ;; unpcklps with register source since it is shorter.
3469 (define_insn "*vec_concatv2sf_sse4_1"
3470 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,x,x,*y ,*y")
3472 (match_operand:SF 1 "nonimmediate_operand" " 0,x,0,x,m, 0 , m")
3473 (match_operand:SF 2 "vector_move_operand" " x,x,m,m,C,*ym, C")))]
3476 unpcklps\t{%2, %0|%0, %2}
3477 vunpcklps\t{%2, %1, %0|%0, %1, %2}
3478 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3479 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3480 %vmovss\t{%1, %0|%0, %1}
3481 punpckldq\t{%2, %0|%0, %2}
3482 movd\t{%1, %0|%0, %1}"
3483 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
3484 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
3485 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
3486 (set_attr "prefix_extra" "*,*,1,1,*,*,*")
3487 (set_attr "length_immediate" "*,*,1,1,*,*,*")
3488 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
3489 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
3491 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3492 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3493 ;; alternatives pretty much forces the MMX alternative to be chosen.
3494 (define_insn "*vec_concatv2sf_sse"
3495 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
3497 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3498 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
3501 unpcklps\t{%2, %0|%0, %2}
3502 movss\t{%1, %0|%0, %1}
3503 punpckldq\t{%2, %0|%0, %2}
3504 movd\t{%1, %0|%0, %1}"
3505 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3506 (set_attr "mode" "V4SF,SF,DI,DI")])
3508 (define_insn "*vec_concatv4sf"
3509 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x")
3511 (match_operand:V2SF 1 "register_operand" " 0,x,0,x")
3512 (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
3515 movlhps\t{%2, %0|%0, %2}
3516 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3517 movhps\t{%2, %0|%0, %2}
3518 vmovhps\t{%2, %1, %0|%0, %1, %2}"
3519 [(set_attr "isa" "noavx,avx,noavx,avx")
3520 (set_attr "type" "ssemov")
3521 (set_attr "prefix" "orig,vex,orig,vex")
3522 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
3524 (define_expand "vec_init<mode>"
3525 [(match_operand:V_128 0 "register_operand" "")
3526 (match_operand 1 "" "")]
3529 ix86_expand_vector_init (false, operands[0], operands[1]);
3533 ;; Avoid combining registers from different units in a single alternative,
3534 ;; see comment above inline_secondary_memory_needed function in i386.c
3535 (define_insn "vec_set<mode>_0"
3536 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
3537 "=x,x,x ,x,x,x,x ,x ,m,m ,m")
3539 (vec_duplicate:VI4F_128
3540 (match_operand:<ssescalarmode> 2 "general_operand"
3541 " x,m,*r,m,x,x,*rm,*rm,x,fF,*r"))
3542 (match_operand:VI4F_128 1 "vector_move_operand"
3543 " C,C,C ,C,0,x,0 ,x ,0,0 ,0")
3547 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
3548 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
3549 %vmovd\t{%2, %0|%0, %2}
3550 movss\t{%2, %0|%0, %2}
3551 movss\t{%2, %0|%0, %2}
3552 vmovss\t{%2, %1, %0|%0, %1, %2}
3553 pinsrd\t{$0, %2, %0|%0, %2, 0}
3554 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
3558 [(set_attr "isa" "sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,avx,*,*,*")
3560 (cond [(eq_attr "alternative" "0,6,7")
3561 (const_string "sselog")
3562 (eq_attr "alternative" "9")
3563 (const_string "fmov")
3564 (eq_attr "alternative" "10")
3565 (const_string "imov")
3567 (const_string "ssemov")))
3568 (set_attr "prefix_extra" "*,*,*,*,*,*,1,1,*,*,*")
3569 (set_attr "length_immediate" "*,*,*,*,*,*,1,1,*,*,*")
3570 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex,*,*,*")
3571 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,*,*,*")])
3573 ;; A subset is vec_setv4sf.
3574 (define_insn "*vec_setv4sf_sse4_1"
3575 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3578 (match_operand:SF 2 "nonimmediate_operand" "xm,xm"))
3579 (match_operand:V4SF 1 "register_operand" "0,x")
3580 (match_operand:SI 3 "const_int_operand" "")))]
3582 && ((unsigned) exact_log2 (INTVAL (operands[3]))
3583 < GET_MODE_NUNITS (V4SFmode))"
3585 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
3586 switch (which_alternative)
3589 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3591 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3596 [(set_attr "isa" "noavx,avx")
3597 (set_attr "type" "sselog")
3598 (set_attr "prefix_data16" "1,*")
3599 (set_attr "prefix_extra" "1")
3600 (set_attr "length_immediate" "1")
3601 (set_attr "prefix" "orig,vex")
3602 (set_attr "mode" "V4SF")])
3604 (define_insn "sse4_1_insertps"
3605 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3606 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
3607 (match_operand:V4SF 1 "register_operand" "0,x")
3608 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
3612 if (MEM_P (operands[2]))
3614 unsigned count_s = INTVAL (operands[3]) >> 6;
3616 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
3617 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
3619 switch (which_alternative)
3622 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3624 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3629 [(set_attr "isa" "noavx,avx")
3630 (set_attr "type" "sselog")
3631 (set_attr "prefix_data16" "1,*")
3632 (set_attr "prefix_extra" "1")
3633 (set_attr "length_immediate" "1")
3634 (set_attr "prefix" "orig,vex")
3635 (set_attr "mode" "V4SF")])
3638 [(set (match_operand:VI4F_128 0 "memory_operand" "")
3640 (vec_duplicate:VI4F_128
3641 (match_operand:<ssescalarmode> 1 "nonmemory_operand" ""))
3644 "TARGET_SSE && reload_completed"
3647 emit_move_insn (adjust_address (operands[0], <ssescalarmode>mode, 0),
3652 (define_expand "vec_set<mode>"
3653 [(match_operand:V_128 0 "register_operand" "")
3654 (match_operand:<ssescalarmode> 1 "register_operand" "")
3655 (match_operand 2 "const_int_operand" "")]
3658 ix86_expand_vector_set (false, operands[0], operands[1],
3659 INTVAL (operands[2]));
3663 (define_insn_and_split "*vec_extractv4sf_0"
3664 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
3666 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
3667 (parallel [(const_int 0)])))]
3668 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3670 "&& reload_completed"
3673 rtx op1 = operands[1];
3675 op1 = gen_rtx_REG (SFmode, REGNO (op1));
3677 op1 = gen_lowpart (SFmode, op1);
3678 emit_move_insn (operands[0], op1);
3682 (define_expand "avx_vextractf128<mode>"
3683 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "")
3684 (match_operand:V_256 1 "register_operand" "")
3685 (match_operand:SI 2 "const_0_to_1_operand" "")]
3688 rtx (*insn)(rtx, rtx);
3690 switch (INTVAL (operands[2]))
3693 insn = gen_vec_extract_lo_<mode>;
3696 insn = gen_vec_extract_hi_<mode>;
3702 emit_insn (insn (operands[0], operands[1]));
3706 (define_insn_and_split "vec_extract_lo_<mode>"
3707 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
3708 (vec_select:<ssehalfvecmode>
3709 (match_operand:VI8F_256 1 "nonimmediate_operand" "xm,x")
3710 (parallel [(const_int 0) (const_int 1)])))]
3713 "&& reload_completed"
3716 rtx op1 = operands[1];
3718 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
3720 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
3721 emit_move_insn (operands[0], op1);
3725 (define_insn "vec_extract_hi_<mode>"
3726 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
3727 (vec_select:<ssehalfvecmode>
3728 (match_operand:VI8F_256 1 "register_operand" "x,x")
3729 (parallel [(const_int 2) (const_int 3)])))]
3731 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3732 [(set_attr "type" "sselog")
3733 (set_attr "prefix_extra" "1")
3734 (set_attr "length_immediate" "1")
3735 (set_attr "memory" "none,store")
3736 (set_attr "prefix" "vex")
3737 (set_attr "mode" "V8SF")])
3739 (define_insn_and_split "vec_extract_lo_<mode>"
3740 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
3741 (vec_select:<ssehalfvecmode>
3742 (match_operand:VI4F_256 1 "nonimmediate_operand" "xm,x")
3743 (parallel [(const_int 0) (const_int 1)
3744 (const_int 2) (const_int 3)])))]
3747 "&& reload_completed"
3750 rtx op1 = operands[1];
3752 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
3754 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
3755 emit_move_insn (operands[0], op1);
3759 (define_insn "vec_extract_hi_<mode>"
3760 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
3761 (vec_select:<ssehalfvecmode>
3762 (match_operand:VI4F_256 1 "register_operand" "x,x")
3763 (parallel [(const_int 4) (const_int 5)
3764 (const_int 6) (const_int 7)])))]
3766 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3767 [(set_attr "type" "sselog")
3768 (set_attr "prefix_extra" "1")
3769 (set_attr "length_immediate" "1")
3770 (set_attr "memory" "none,store")
3771 (set_attr "prefix" "vex")
3772 (set_attr "mode" "V8SF")])
3774 (define_insn_and_split "vec_extract_lo_v16hi"
3775 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
3777 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
3778 (parallel [(const_int 0) (const_int 1)
3779 (const_int 2) (const_int 3)
3780 (const_int 4) (const_int 5)
3781 (const_int 6) (const_int 7)])))]
3784 "&& reload_completed"
3787 rtx op1 = operands[1];
3789 op1 = gen_rtx_REG (V8HImode, REGNO (op1));
3791 op1 = gen_lowpart (V8HImode, op1);
3792 emit_move_insn (operands[0], op1);
3796 (define_insn "vec_extract_hi_v16hi"
3797 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
3799 (match_operand:V16HI 1 "register_operand" "x,x")
3800 (parallel [(const_int 8) (const_int 9)
3801 (const_int 10) (const_int 11)
3802 (const_int 12) (const_int 13)
3803 (const_int 14) (const_int 15)])))]
3805 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3806 [(set_attr "type" "sselog")
3807 (set_attr "prefix_extra" "1")
3808 (set_attr "length_immediate" "1")
3809 (set_attr "memory" "none,store")
3810 (set_attr "prefix" "vex")
3811 (set_attr "mode" "V8SF")])
3813 (define_insn_and_split "vec_extract_lo_v32qi"
3814 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
3816 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
3817 (parallel [(const_int 0) (const_int 1)
3818 (const_int 2) (const_int 3)
3819 (const_int 4) (const_int 5)
3820 (const_int 6) (const_int 7)
3821 (const_int 8) (const_int 9)
3822 (const_int 10) (const_int 11)
3823 (const_int 12) (const_int 13)
3824 (const_int 14) (const_int 15)])))]
3827 "&& reload_completed"
3830 rtx op1 = operands[1];
3832 op1 = gen_rtx_REG (V16QImode, REGNO (op1));
3834 op1 = gen_lowpart (V16QImode, op1);
3835 emit_move_insn (operands[0], op1);
3839 (define_insn "vec_extract_hi_v32qi"
3840 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
3842 (match_operand:V32QI 1 "register_operand" "x,x")
3843 (parallel [(const_int 16) (const_int 17)
3844 (const_int 18) (const_int 19)
3845 (const_int 20) (const_int 21)
3846 (const_int 22) (const_int 23)
3847 (const_int 24) (const_int 25)
3848 (const_int 26) (const_int 27)
3849 (const_int 28) (const_int 29)
3850 (const_int 30) (const_int 31)])))]
3852 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3853 [(set_attr "type" "sselog")
3854 (set_attr "prefix_extra" "1")
3855 (set_attr "length_immediate" "1")
3856 (set_attr "memory" "none,store")
3857 (set_attr "prefix" "vex")
3858 (set_attr "mode" "V8SF")])
3860 (define_insn "*sse4_1_extractps"
3861 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
3863 (match_operand:V4SF 1 "register_operand" "x")
3864 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
3866 "%vextractps\t{%2, %1, %0|%0, %1, %2}"
3867 [(set_attr "type" "sselog")
3868 (set_attr "prefix_data16" "1")
3869 (set_attr "prefix_extra" "1")
3870 (set_attr "length_immediate" "1")
3871 (set_attr "prefix" "maybe_vex")
3872 (set_attr "mode" "V4SF")])
3874 (define_insn_and_split "*vec_extract_v4sf_mem"
3875 [(set (match_operand:SF 0 "register_operand" "=x*rf")
3877 (match_operand:V4SF 1 "memory_operand" "o")
3878 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
3884 int i = INTVAL (operands[2]);
3886 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
3890 ;; Modes handled by vec_extract patterns.
3891 (define_mode_iterator VEC_EXTRACT_MODE
3892 [V16QI V8HI V4SI V2DI
3893 (V8SF "TARGET_AVX") V4SF
3894 (V4DF "TARGET_AVX") V2DF])
3896 (define_expand "vec_extract<mode>"
3897 [(match_operand:<ssescalarmode> 0 "register_operand" "")
3898 (match_operand:VEC_EXTRACT_MODE 1 "register_operand" "")
3899 (match_operand 2 "const_int_operand" "")]
3902 ix86_expand_vector_extract (false, operands[0], operands[1],
3903 INTVAL (operands[2]));
3907 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3909 ;; Parallel double-precision floating point element swizzling
3911 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3913 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3914 (define_insn "avx_unpckhpd256"
3915 [(set (match_operand:V4DF 0 "register_operand" "=x")
3918 (match_operand:V4DF 1 "register_operand" "x")
3919 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
3920 (parallel [(const_int 1) (const_int 5)
3921 (const_int 3) (const_int 7)])))]
3923 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
3924 [(set_attr "type" "sselog")
3925 (set_attr "prefix" "vex")
3926 (set_attr "mode" "V4DF")])
3928 (define_expand "vec_interleave_highv4df"
3932 (match_operand:V4DF 1 "register_operand" "x")
3933 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
3934 (parallel [(const_int 0) (const_int 4)
3935 (const_int 2) (const_int 6)])))
3941 (parallel [(const_int 1) (const_int 5)
3942 (const_int 3) (const_int 7)])))
3943 (set (match_operand:V4DF 0 "register_operand" "")
3948 (parallel [(const_int 2) (const_int 3)
3949 (const_int 6) (const_int 7)])))]
3952 operands[3] = gen_reg_rtx (V4DFmode);
3953 operands[4] = gen_reg_rtx (V4DFmode);
3957 (define_expand "vec_interleave_highv2df"
3958 [(set (match_operand:V2DF 0 "register_operand" "")
3961 (match_operand:V2DF 1 "nonimmediate_operand" "")
3962 (match_operand:V2DF 2 "nonimmediate_operand" ""))
3963 (parallel [(const_int 1)
3967 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
3968 operands[2] = force_reg (V2DFmode, operands[2]);
3971 (define_insn "*vec_interleave_highv2df"
3972 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
3975 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x")
3976 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0"))
3977 (parallel [(const_int 1)
3979 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
3981 unpckhpd\t{%2, %0|%0, %2}
3982 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
3983 %vmovddup\t{%H1, %0|%0, %H1}
3984 movlpd\t{%H1, %0|%0, %H1}
3985 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
3986 %vmovhpd\t{%1, %0|%0, %1}"
3987 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
3988 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
3989 (set_attr "prefix_data16" "*,*,*,1,*,1")
3990 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
3991 (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")])
3993 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3994 (define_expand "avx_movddup256"
3995 [(set (match_operand:V4DF 0 "register_operand" "")
3998 (match_operand:V4DF 1 "nonimmediate_operand" "")
4000 (parallel [(const_int 0) (const_int 4)
4001 (const_int 2) (const_int 6)])))]
4004 (define_expand "avx_unpcklpd256"
4005 [(set (match_operand:V4DF 0 "register_operand" "")
4008 (match_operand:V4DF 1 "register_operand" "")
4009 (match_operand:V4DF 2 "nonimmediate_operand" ""))
4010 (parallel [(const_int 0) (const_int 4)
4011 (const_int 2) (const_int 6)])))]
4014 (define_insn "*avx_unpcklpd256"
4015 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
4018 (match_operand:V4DF 1 "nonimmediate_operand" "xm,x")
4019 (match_operand:V4DF 2 "nonimmediate_operand" " 1,xm"))
4020 (parallel [(const_int 0) (const_int 4)
4021 (const_int 2) (const_int 6)])))]
4023 && (!MEM_P (operands[1]) || rtx_equal_p (operands[1], operands[2]))"
4025 vmovddup\t{%1, %0|%0, %1}
4026 vunpcklpd\t{%2, %1, %0|%0, %1, %2}"
4027 [(set_attr "type" "sselog")
4028 (set_attr "prefix" "vex")
4029 (set_attr "mode" "V4DF")])
4031 (define_expand "vec_interleave_lowv4df"
4035 (match_operand:V4DF 1 "register_operand" "x")
4036 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4037 (parallel [(const_int 0) (const_int 4)
4038 (const_int 2) (const_int 6)])))
4044 (parallel [(const_int 1) (const_int 5)
4045 (const_int 3) (const_int 7)])))
4046 (set (match_operand:V4DF 0 "register_operand" "")
4051 (parallel [(const_int 0) (const_int 1)
4052 (const_int 4) (const_int 5)])))]
4055 operands[3] = gen_reg_rtx (V4DFmode);
4056 operands[4] = gen_reg_rtx (V4DFmode);
4059 (define_expand "vec_interleave_lowv2df"
4060 [(set (match_operand:V2DF 0 "register_operand" "")
4063 (match_operand:V2DF 1 "nonimmediate_operand" "")
4064 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4065 (parallel [(const_int 0)
4069 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
4070 operands[1] = force_reg (V2DFmode, operands[1]);
4073 (define_insn "*vec_interleave_lowv2df"
4074 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,o")
4077 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0")
4078 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x"))
4079 (parallel [(const_int 0)
4081 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4083 unpcklpd\t{%2, %0|%0, %2}
4084 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4085 %vmovddup\t{%1, %0|%0, %1}
4086 movhpd\t{%2, %0|%0, %2}
4087 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4088 %vmovlpd\t{%2, %H0|%H0, %2}"
4089 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
4090 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
4091 (set_attr "prefix_data16" "*,*,*,1,*,1")
4092 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
4093 (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")])
4096 [(set (match_operand:V2DF 0 "memory_operand" "")
4099 (match_operand:V2DF 1 "register_operand" "")
4101 (parallel [(const_int 0)
4103 "TARGET_SSE3 && reload_completed"
4106 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4107 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4108 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4113 [(set (match_operand:V2DF 0 "register_operand" "")
4116 (match_operand:V2DF 1 "memory_operand" "")
4118 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "")
4119 (match_operand:SI 3 "const_int_operand" "")])))]
4120 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
4121 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
4123 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
4126 (define_expand "avx_shufpd256"
4127 [(match_operand:V4DF 0 "register_operand" "")
4128 (match_operand:V4DF 1 "register_operand" "")
4129 (match_operand:V4DF 2 "nonimmediate_operand" "")
4130 (match_operand:SI 3 "const_int_operand" "")]
4133 int mask = INTVAL (operands[3]);
4134 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4136 GEN_INT (mask & 2 ? 5 : 4),
4137 GEN_INT (mask & 4 ? 3 : 2),
4138 GEN_INT (mask & 8 ? 7 : 6)));
4142 (define_insn "avx_shufpd256_1"
4143 [(set (match_operand:V4DF 0 "register_operand" "=x")
4146 (match_operand:V4DF 1 "register_operand" "x")
4147 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4148 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4149 (match_operand 4 "const_4_to_5_operand" "")
4150 (match_operand 5 "const_2_to_3_operand" "")
4151 (match_operand 6 "const_6_to_7_operand" "")])))]
4155 mask = INTVAL (operands[3]);
4156 mask |= (INTVAL (operands[4]) - 4) << 1;
4157 mask |= (INTVAL (operands[5]) - 2) << 2;
4158 mask |= (INTVAL (operands[6]) - 6) << 3;
4159 operands[3] = GEN_INT (mask);
4161 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4163 [(set_attr "type" "sselog")
4164 (set_attr "length_immediate" "1")
4165 (set_attr "prefix" "vex")
4166 (set_attr "mode" "V4DF")])
4168 (define_expand "sse2_shufpd"
4169 [(match_operand:V2DF 0 "register_operand" "")
4170 (match_operand:V2DF 1 "register_operand" "")
4171 (match_operand:V2DF 2 "nonimmediate_operand" "")
4172 (match_operand:SI 3 "const_int_operand" "")]
4175 int mask = INTVAL (operands[3]);
4176 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4178 GEN_INT (mask & 2 ? 3 : 2)));
4182 ;; Modes handled by vec_extract_even/odd pattern.
4183 (define_mode_iterator VEC_EXTRACT_EVENODD_MODE
4184 [(V16QI "TARGET_SSE2")
4185 (V8HI "TARGET_SSE2")
4186 (V4SI "TARGET_SSE2")
4187 (V2DI "TARGET_SSE2")
4188 (V8SF "TARGET_AVX") V4SF
4189 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
4191 (define_expand "vec_extract_even<mode>"
4192 [(match_operand:VEC_EXTRACT_EVENODD_MODE 0 "register_operand" "")
4193 (match_operand:VEC_EXTRACT_EVENODD_MODE 1 "register_operand" "")
4194 (match_operand:VEC_EXTRACT_EVENODD_MODE 2 "register_operand" "")]
4197 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 0);
4201 (define_expand "vec_extract_odd<mode>"
4202 [(match_operand:VEC_EXTRACT_EVENODD_MODE 0 "register_operand" "")
4203 (match_operand:VEC_EXTRACT_EVENODD_MODE 1 "register_operand" "")
4204 (match_operand:VEC_EXTRACT_EVENODD_MODE 2 "register_operand" "")]
4207 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 1);
4211 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4212 (define_insn "avx2_interleave_highv4di"
4213 [(set (match_operand:V4DI 0 "register_operand" "=x")
4216 (match_operand:V4DI 1 "register_operand" "x")
4217 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
4218 (parallel [(const_int 1)
4223 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4224 [(set_attr "type" "sselog")
4225 (set_attr "prefix" "vex")
4226 (set_attr "mode" "OI")])
4228 (define_insn "vec_interleave_highv2di"
4229 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4232 (match_operand:V2DI 1 "register_operand" "0,x")
4233 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4234 (parallel [(const_int 1)
4238 punpckhqdq\t{%2, %0|%0, %2}
4239 vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4240 [(set_attr "isa" "noavx,avx")
4241 (set_attr "type" "sselog")
4242 (set_attr "prefix_data16" "1,*")
4243 (set_attr "prefix" "orig,vex")
4244 (set_attr "mode" "TI")])
4246 (define_insn "avx2_interleave_lowv4di"
4247 [(set (match_operand:V4DI 0 "register_operand" "=x")
4250 (match_operand:V4DI 1 "register_operand" "x")
4251 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
4252 (parallel [(const_int 0)
4257 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4258 [(set_attr "type" "sselog")
4259 (set_attr "prefix" "vex")
4260 (set_attr "mode" "OI")])
4262 (define_insn "vec_interleave_lowv2di"
4263 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4266 (match_operand:V2DI 1 "register_operand" "0,x")
4267 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4268 (parallel [(const_int 0)
4272 punpcklqdq\t{%2, %0|%0, %2}
4273 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4274 [(set_attr "isa" "noavx,avx")
4275 (set_attr "type" "sselog")
4276 (set_attr "prefix_data16" "1,*")
4277 (set_attr "prefix" "orig,vex")
4278 (set_attr "mode" "TI")])
4280 (define_insn "sse2_shufpd_<mode>"
4281 [(set (match_operand:VI8F_128 0 "register_operand" "=x,x")
4282 (vec_select:VI8F_128
4283 (vec_concat:<ssedoublevecmode>
4284 (match_operand:VI8F_128 1 "register_operand" "0,x")
4285 (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm"))
4286 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4287 (match_operand 4 "const_2_to_3_operand" "")])))]
4291 mask = INTVAL (operands[3]);
4292 mask |= (INTVAL (operands[4]) - 2) << 1;
4293 operands[3] = GEN_INT (mask);
4295 switch (which_alternative)
4298 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4300 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4305 [(set_attr "isa" "noavx,avx")
4306 (set_attr "type" "sselog")
4307 (set_attr "length_immediate" "1")
4308 (set_attr "prefix" "orig,vex")
4309 (set_attr "mode" "V2DF")])
4311 ;; Avoid combining registers from different units in a single alternative,
4312 ;; see comment above inline_secondary_memory_needed function in i386.c
4313 (define_insn "sse2_storehpd"
4314 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r")
4316 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
4317 (parallel [(const_int 1)])))]
4318 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4320 %vmovhpd\t{%1, %0|%0, %1}
4322 vunpckhpd\t{%d1, %0|%0, %d1}
4326 [(set_attr "isa" "*,noavx,avx,*,*,*")
4327 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
4328 (set (attr "prefix_data16")
4330 (and (eq_attr "alternative" "0")
4331 (eq (symbol_ref "TARGET_AVX") (const_int 0)))
4333 (const_string "*")))
4334 (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
4335 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
4338 [(set (match_operand:DF 0 "register_operand" "")
4340 (match_operand:V2DF 1 "memory_operand" "")
4341 (parallel [(const_int 1)])))]
4342 "TARGET_SSE2 && reload_completed"
4343 [(set (match_dup 0) (match_dup 1))]
4344 "operands[1] = adjust_address (operands[1], DFmode, 8);")
4346 (define_insn "*vec_extractv2df_1_sse"
4347 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4349 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
4350 (parallel [(const_int 1)])))]
4351 "!TARGET_SSE2 && TARGET_SSE
4352 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4354 movhps\t{%1, %0|%0, %1}
4355 movhlps\t{%1, %0|%0, %1}
4356 movlps\t{%H1, %0|%0, %H1}"
4357 [(set_attr "type" "ssemov")
4358 (set_attr "mode" "V2SF,V4SF,V2SF")])
4360 ;; Avoid combining registers from different units in a single alternative,
4361 ;; see comment above inline_secondary_memory_needed function in i386.c
4362 (define_insn "sse2_storelpd"
4363 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4365 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4366 (parallel [(const_int 0)])))]
4367 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4369 %vmovlpd\t{%1, %0|%0, %1}
4374 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4375 (set_attr "prefix_data16" "1,*,*,*,*")
4376 (set_attr "prefix" "maybe_vex")
4377 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4380 [(set (match_operand:DF 0 "register_operand" "")
4382 (match_operand:V2DF 1 "nonimmediate_operand" "")
4383 (parallel [(const_int 0)])))]
4384 "TARGET_SSE2 && reload_completed"
4387 rtx op1 = operands[1];
4389 op1 = gen_rtx_REG (DFmode, REGNO (op1));
4391 op1 = gen_lowpart (DFmode, op1);
4392 emit_move_insn (operands[0], op1);
4396 (define_insn "*vec_extractv2df_0_sse"
4397 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4399 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
4400 (parallel [(const_int 0)])))]
4401 "!TARGET_SSE2 && TARGET_SSE
4402 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4404 movlps\t{%1, %0|%0, %1}
4405 movaps\t{%1, %0|%0, %1}
4406 movlps\t{%1, %0|%0, %1}"
4407 [(set_attr "type" "ssemov")
4408 (set_attr "mode" "V2SF,V4SF,V2SF")])
4410 (define_expand "sse2_loadhpd_exp"
4411 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4414 (match_operand:V2DF 1 "nonimmediate_operand" "")
4415 (parallel [(const_int 0)]))
4416 (match_operand:DF 2 "nonimmediate_operand" "")))]
4419 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4421 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
4423 /* Fix up the destination if needed. */
4424 if (dst != operands[0])
4425 emit_move_insn (operands[0], dst);
4430 ;; Avoid combining registers from different units in a single alternative,
4431 ;; see comment above inline_secondary_memory_needed function in i386.c
4432 (define_insn "sse2_loadhpd"
4433 [(set (match_operand:V2DF 0 "nonimmediate_operand"
4437 (match_operand:V2DF 1 "nonimmediate_operand"
4439 (parallel [(const_int 0)]))
4440 (match_operand:DF 2 "nonimmediate_operand"
4441 " m,m,x,x,x,*f,r")))]
4442 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4444 movhpd\t{%2, %0|%0, %2}
4445 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4446 unpcklpd\t{%2, %0|%0, %2}
4447 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4451 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
4452 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
4453 (set_attr "prefix_data16" "1,*,*,*,*,*,*")
4454 (set_attr "prefix" "orig,vex,orig,vex,*,*,*")
4455 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
4458 [(set (match_operand:V2DF 0 "memory_operand" "")
4460 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
4461 (match_operand:DF 1 "register_operand" "")))]
4462 "TARGET_SSE2 && reload_completed"
4463 [(set (match_dup 0) (match_dup 1))]
4464 "operands[0] = adjust_address (operands[0], DFmode, 8);")
4466 (define_expand "sse2_loadlpd_exp"
4467 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4469 (match_operand:DF 2 "nonimmediate_operand" "")
4471 (match_operand:V2DF 1 "nonimmediate_operand" "")
4472 (parallel [(const_int 1)]))))]
4475 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4477 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
4479 /* Fix up the destination if needed. */
4480 if (dst != operands[0])
4481 emit_move_insn (operands[0], dst);
4486 ;; Avoid combining registers from different units in a single alternative,
4487 ;; see comment above inline_secondary_memory_needed function in i386.c
4488 (define_insn "sse2_loadlpd"
4489 [(set (match_operand:V2DF 0 "nonimmediate_operand"
4490 "=x,x,x,x,x,x,x,x,m,m ,m")
4492 (match_operand:DF 2 "nonimmediate_operand"
4493 " m,m,m,x,x,0,0,x,x,*f,r")
4495 (match_operand:V2DF 1 "vector_move_operand"
4496 " C,0,x,0,x,x,o,o,0,0 ,0")
4497 (parallel [(const_int 1)]))))]
4498 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4500 %vmovsd\t{%2, %0|%0, %2}
4501 movlpd\t{%2, %0|%0, %2}
4502 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4503 movsd\t{%2, %0|%0, %2}
4504 vmovsd\t{%2, %1, %0|%0, %1, %2}
4505 shufpd\t{$2, %1, %0|%0, %1, 2}
4506 movhpd\t{%H1, %0|%0, %H1}
4507 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
4511 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
4513 (cond [(eq_attr "alternative" "5")
4514 (const_string "sselog")
4515 (eq_attr "alternative" "9")
4516 (const_string "fmov")
4517 (eq_attr "alternative" "10")
4518 (const_string "imov")
4520 (const_string "ssemov")))
4521 (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
4522 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
4523 (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
4524 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
4527 [(set (match_operand:V2DF 0 "memory_operand" "")
4529 (match_operand:DF 1 "register_operand" "")
4530 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
4531 "TARGET_SSE2 && reload_completed"
4532 [(set (match_dup 0) (match_dup 1))]
4533 "operands[0] = adjust_address (operands[0], DFmode, 8);")
4535 (define_insn "sse2_movsd"
4536 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o")
4538 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
4539 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
4543 movsd\t{%2, %0|%0, %2}
4544 vmovsd\t{%2, %1, %0|%0, %1, %2}
4545 movlpd\t{%2, %0|%0, %2}
4546 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4547 %vmovlpd\t{%2, %0|%0, %2}
4548 shufpd\t{$2, %1, %0|%0, %1, 2}
4549 movhps\t{%H1, %0|%0, %H1}
4550 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
4551 %vmovhps\t{%1, %H0|%H0, %1}"
4552 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
4555 (eq_attr "alternative" "5")
4556 (const_string "sselog")
4557 (const_string "ssemov")))
4558 (set (attr "prefix_data16")
4560 (and (eq_attr "alternative" "2,4")
4561 (eq (symbol_ref "TARGET_AVX") (const_int 0)))
4563 (const_string "*")))
4564 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
4565 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
4566 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
4568 (define_expand "vec_dupv2df"
4569 [(set (match_operand:V2DF 0 "register_operand" "")
4571 (match_operand:DF 1 "nonimmediate_operand" "")))]
4575 operands[1] = force_reg (DFmode, operands[1]);
4578 (define_insn "*vec_dupv2df_sse3"
4579 [(set (match_operand:V2DF 0 "register_operand" "=x")
4581 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
4583 "%vmovddup\t{%1, %0|%0, %1}"
4584 [(set_attr "type" "sselog1")
4585 (set_attr "prefix" "maybe_vex")
4586 (set_attr "mode" "DF")])
4588 (define_insn "*vec_dupv2df"
4589 [(set (match_operand:V2DF 0 "register_operand" "=x")
4591 (match_operand:DF 1 "register_operand" "0")))]
4594 [(set_attr "type" "sselog1")
4595 (set_attr "mode" "V2DF")])
4597 (define_insn "*vec_concatv2df_sse3"
4598 [(set (match_operand:V2DF 0 "register_operand" "=x")
4600 (match_operand:DF 1 "nonimmediate_operand" "xm")
4603 "%vmovddup\t{%1, %0|%0, %1}"
4604 [(set_attr "type" "sselog1")
4605 (set_attr "prefix" "maybe_vex")
4606 (set_attr "mode" "DF")])
4608 (define_insn "*vec_concatv2df"
4609 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x,x,x,x,x")
4611 (match_operand:DF 1 "nonimmediate_operand" " 0,x,0,x,m,0,0")
4612 (match_operand:DF 2 "vector_move_operand" " x,x,m,m,C,x,m")))]
4615 unpcklpd\t{%2, %0|%0, %2}
4616 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4617 movhpd\t{%2, %0|%0, %2}
4618 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4619 %vmovsd\t{%1, %0|%0, %1}
4620 movlhps\t{%2, %0|%0, %2}
4621 movhps\t{%2, %0|%0, %2}"
4622 [(set_attr "isa" "sse2_noavx,avx,sse2_noavx,avx,sse2,noavx,noavx")
4625 (eq_attr "alternative" "0,1")
4626 (const_string "sselog")
4627 (const_string "ssemov")))
4628 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
4629 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
4630 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF,DF,V4SF,V2SF")])
4632 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4634 ;; Parallel integral arithmetic
4636 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4638 (define_expand "neg<mode>2"
4639 [(set (match_operand:VI_128 0 "register_operand" "")
4642 (match_operand:VI_128 1 "nonimmediate_operand" "")))]
4644 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
4646 (define_expand "<plusminus_insn><mode>3"
4647 [(set (match_operand:VI_AVX2 0 "register_operand" "")
4649 (match_operand:VI_AVX2 1 "nonimmediate_operand" "")
4650 (match_operand:VI_AVX2 2 "nonimmediate_operand" "")))]
4652 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4654 (define_insn "*<plusminus_insn><mode>3"
4655 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,x")
4657 (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,x")
4658 (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,xm")))]
4659 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4661 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
4662 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
4663 [(set_attr "isa" "noavx,avx")
4664 (set_attr "type" "sseiadd")
4665 (set_attr "prefix_data16" "1,*")
4666 (set_attr "prefix" "orig,vex")
4667 (set_attr "mode" "<sseinsnmode>")])
4669 (define_expand "<sse2_avx2>_<plusminus_insn><mode>3"
4670 [(set (match_operand:VI12_AVX2 0 "register_operand" "")
4671 (sat_plusminus:VI12_AVX2
4672 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "")
4673 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "")))]
4675 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4677 (define_insn "*<sse2_avx2>_<plusminus_insn><mode>3"
4678 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,x")
4679 (sat_plusminus:VI12_AVX2
4680 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,x")
4681 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,xm")))]
4682 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4684 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
4685 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
4686 [(set_attr "isa" "noavx,avx")
4687 (set_attr "type" "sseiadd")
4688 (set_attr "prefix_data16" "1,*")
4689 (set_attr "prefix" "orig,vex")
4690 (set_attr "mode" "TI")])
4692 (define_insn_and_split "mulv16qi3"
4693 [(set (match_operand:V16QI 0 "register_operand" "")
4694 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
4695 (match_operand:V16QI 2 "register_operand" "")))]
4697 && can_create_pseudo_p ()"
4705 for (i = 0; i < 6; ++i)
4706 t[i] = gen_reg_rtx (V16QImode);
4708 /* Unpack data such that we've got a source byte in each low byte of
4709 each word. We don't care what goes into the high byte of each word.
4710 Rather than trying to get zero in there, most convenient is to let
4711 it be a copy of the low byte. */
4712 emit_insn (gen_vec_interleave_highv16qi (t[0], operands[1], operands[1]));
4713 emit_insn (gen_vec_interleave_highv16qi (t[1], operands[2], operands[2]));
4714 emit_insn (gen_vec_interleave_lowv16qi (t[2], operands[1], operands[1]));
4715 emit_insn (gen_vec_interleave_lowv16qi (t[3], operands[2], operands[2]));
4717 /* Multiply words. The end-of-line annotations here give a picture of what
4718 the output of that instruction looks like. Dot means don't care; the
4719 letters are the bytes of the result with A being the most significant. */
4720 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
4721 gen_lowpart (V8HImode, t[0]),
4722 gen_lowpart (V8HImode, t[1])));
4723 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
4724 gen_lowpart (V8HImode, t[2]),
4725 gen_lowpart (V8HImode, t[3])));
4727 /* Extract the even bytes and merge them back together. */
4728 ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0);
4732 (define_expand "mul<mode>3"
4733 [(set (match_operand:VI2_AVX2 0 "register_operand" "")
4734 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "")
4735 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "")))]
4737 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
4739 (define_insn "*mul<mode>3"
4740 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
4741 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x")
4742 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))]
4743 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
4745 pmullw\t{%2, %0|%0, %2}
4746 vpmullw\t{%2, %1, %0|%0, %1, %2}"
4747 [(set_attr "isa" "noavx,avx")
4748 (set_attr "type" "sseimul")
4749 (set_attr "prefix_data16" "1,*")
4750 (set_attr "prefix" "orig,vex")
4751 (set_attr "mode" "<sseinsnmode>")])
4753 (define_expand "<s>mul<mode>3_highpart"
4754 [(set (match_operand:VI2_AVX2 0 "register_operand" "")
4756 (lshiftrt:<ssedoublemode>
4757 (mult:<ssedoublemode>
4758 (any_extend:<ssedoublemode>
4759 (match_operand:VI2_AVX2 1 "nonimmediate_operand" ""))
4760 (any_extend:<ssedoublemode>
4761 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "")))
4764 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
4766 (define_insn "*<s>mul<mode>3_highpart"
4767 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
4769 (lshiftrt:<ssedoublemode>
4770 (mult:<ssedoublemode>
4771 (any_extend:<ssedoublemode>
4772 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
4773 (any_extend:<ssedoublemode>
4774 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
4776 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
4778 pmulh<u>w\t{%2, %0|%0, %2}
4779 vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
4780 [(set_attr "isa" "noavx,avx")
4781 (set_attr "type" "sseimul")
4782 (set_attr "prefix_data16" "1,*")
4783 (set_attr "prefix" "orig,vex")
4784 (set_attr "mode" "<sseinsnmode>")])
4786 (define_expand "avx2_umulv4siv4di3"
4787 [(set (match_operand:V4DI 0 "register_operand" "")
4791 (match_operand:V8SI 1 "nonimmediate_operand" "")
4792 (parallel [(const_int 0) (const_int 2)
4793 (const_int 4) (const_int 6)])))
4796 (match_operand:V8SI 2 "nonimmediate_operand" "")
4797 (parallel [(const_int 0) (const_int 2)
4798 (const_int 4) (const_int 6)])))))]
4800 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
4802 (define_insn "*avx_umulv4siv4di3"
4803 [(set (match_operand:V4DI 0 "register_operand" "=x")
4807 (match_operand:V8SI 1 "nonimmediate_operand" "%x")
4808 (parallel [(const_int 0) (const_int 2)
4809 (const_int 4) (const_int 6)])))
4812 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
4813 (parallel [(const_int 0) (const_int 2)
4814 (const_int 4) (const_int 6)])))))]
4815 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
4816 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
4817 [(set_attr "type" "sseimul")
4818 (set_attr "prefix" "vex")
4819 (set_attr "mode" "OI")])
4821 (define_expand "sse2_umulv2siv2di3"
4822 [(set (match_operand:V2DI 0 "register_operand" "")
4826 (match_operand:V4SI 1 "nonimmediate_operand" "")
4827 (parallel [(const_int 0) (const_int 2)])))
4830 (match_operand:V4SI 2 "nonimmediate_operand" "")
4831 (parallel [(const_int 0) (const_int 2)])))))]
4833 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
4835 (define_insn "*sse2_umulv2siv2di3"
4836 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4840 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
4841 (parallel [(const_int 0) (const_int 2)])))
4844 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
4845 (parallel [(const_int 0) (const_int 2)])))))]
4846 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
4848 pmuludq\t{%2, %0|%0, %2}
4849 vpmuludq\t{%2, %1, %0|%0, %1, %2}"
4850 [(set_attr "isa" "noavx,avx")
4851 (set_attr "type" "sseimul")
4852 (set_attr "prefix_data16" "1,*")
4853 (set_attr "prefix" "orig,vex")
4854 (set_attr "mode" "TI")])
4856 (define_expand "avx2_mulv4siv4di3"
4857 [(set (match_operand:V4DI 0 "register_operand" "")
4861 (match_operand:V8SI 1 "nonimmediate_operand" "")
4862 (parallel [(const_int 0) (const_int 2)
4863 (const_int 4) (const_int 6)])))
4866 (match_operand:V8SI 2 "nonimmediate_operand" "")
4867 (parallel [(const_int 0) (const_int 2)
4868 (const_int 4) (const_int 6)])))))]
4870 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
4872 (define_insn "*avx2_mulv4siv4di3"
4873 [(set (match_operand:V4DI 0 "register_operand" "=x")
4877 (match_operand:V8SI 1 "nonimmediate_operand" "x")
4878 (parallel [(const_int 0) (const_int 2)
4879 (const_int 4) (const_int 6)])))
4882 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
4883 (parallel [(const_int 0) (const_int 2)
4884 (const_int 4) (const_int 6)])))))]
4885 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
4886 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
4887 [(set_attr "isa" "avx")
4888 (set_attr "type" "sseimul")
4889 (set_attr "prefix_extra" "1")
4890 (set_attr "prefix" "vex")
4891 (set_attr "mode" "OI")])
4893 (define_expand "sse4_1_mulv2siv2di3"
4894 [(set (match_operand:V2DI 0 "register_operand" "")
4898 (match_operand:V4SI 1 "nonimmediate_operand" "")
4899 (parallel [(const_int 0) (const_int 2)])))
4902 (match_operand:V4SI 2 "nonimmediate_operand" "")
4903 (parallel [(const_int 0) (const_int 2)])))))]
4905 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
4907 (define_insn "*sse4_1_mulv2siv2di3"
4908 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4912 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
4913 (parallel [(const_int 0) (const_int 2)])))
4916 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
4917 (parallel [(const_int 0) (const_int 2)])))))]
4918 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
4920 pmuldq\t{%2, %0|%0, %2}
4921 vpmuldq\t{%2, %1, %0|%0, %1, %2}"
4922 [(set_attr "isa" "noavx,avx")
4923 (set_attr "type" "sseimul")
4924 (set_attr "prefix_data16" "1,*")
4925 (set_attr "prefix_extra" "1")
4926 (set_attr "prefix" "orig,vex")
4927 (set_attr "mode" "TI")])
4929 (define_expand "avx2_pmaddwd"
4930 [(set (match_operand:V8SI 0 "register_operand" "")
4935 (match_operand:V16HI 1 "nonimmediate_operand" "")
4936 (parallel [(const_int 0)
4946 (match_operand:V16HI 2 "nonimmediate_operand" "")
4947 (parallel [(const_int 0)
4957 (vec_select:V8HI (match_dup 1)
4958 (parallel [(const_int 1)
4967 (vec_select:V8HI (match_dup 2)
4968 (parallel [(const_int 1)
4975 (const_int 15)]))))))]
4977 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
4979 (define_expand "sse2_pmaddwd"
4980 [(set (match_operand:V4SI 0 "register_operand" "")
4985 (match_operand:V8HI 1 "nonimmediate_operand" "")
4986 (parallel [(const_int 0)
4992 (match_operand:V8HI 2 "nonimmediate_operand" "")
4993 (parallel [(const_int 0)
4999 (vec_select:V4HI (match_dup 1)
5000 (parallel [(const_int 1)
5005 (vec_select:V4HI (match_dup 2)
5006 (parallel [(const_int 1)
5009 (const_int 7)]))))))]
5011 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5013 (define_insn "*avx2_pmaddwd"
5014 [(set (match_operand:V8SI 0 "register_operand" "=x")
5019 (match_operand:V16HI 1 "nonimmediate_operand" "%x")
5020 (parallel [(const_int 0)
5030 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
5031 (parallel [(const_int 0)
5041 (vec_select:V8HI (match_dup 1)
5042 (parallel [(const_int 1)
5051 (vec_select:V8HI (match_dup 2)
5052 (parallel [(const_int 1)
5059 (const_int 15)]))))))]
5060 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
5061 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5062 [(set_attr "type" "sseiadd")
5063 (set_attr "prefix" "vex")
5064 (set_attr "mode" "OI")])
5066 (define_insn "*sse2_pmaddwd"
5067 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
5072 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
5073 (parallel [(const_int 0)
5079 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
5080 (parallel [(const_int 0)
5086 (vec_select:V4HI (match_dup 1)
5087 (parallel [(const_int 1)
5092 (vec_select:V4HI (match_dup 2)
5093 (parallel [(const_int 1)
5096 (const_int 7)]))))))]
5097 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5099 pmaddwd\t{%2, %0|%0, %2}
5100 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5101 [(set_attr "isa" "noavx,avx")
5102 (set_attr "type" "sseiadd")
5103 (set_attr "atom_unit" "simul")
5104 (set_attr "prefix_data16" "1,*")
5105 (set_attr "prefix" "orig,vex")
5106 (set_attr "mode" "TI")])
5108 (define_expand "mul<mode>3"
5109 [(set (match_operand:VI4_AVX2 0 "register_operand" "")
5110 (mult:VI4_AVX2 (match_operand:VI4_AVX2 1 "register_operand" "")
5111 (match_operand:VI4_AVX2 2 "register_operand" "")))]
5114 if (TARGET_SSE4_1 || TARGET_AVX)
5115 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
5118 (define_insn "*<sse4_1_avx2>_mul<mode>3"
5119 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,x")
5120 (mult:VI4_AVX2 (match_operand:VI4_AVX2 1 "nonimmediate_operand" "%0,x")
5121 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm,xm")))]
5122 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
5124 pmulld\t{%2, %0|%0, %2}
5125 vpmulld\t{%2, %1, %0|%0, %1, %2}"
5126 [(set_attr "isa" "noavx,avx")
5127 (set_attr "type" "sseimul")
5128 (set_attr "prefix_extra" "1")
5129 (set_attr "prefix" "orig,vex")
5130 (set_attr "mode" "<sseinsnmode>")])
5132 (define_insn_and_split "*sse2_mulv4si3"
5133 [(set (match_operand:V4SI 0 "register_operand" "")
5134 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5135 (match_operand:V4SI 2 "register_operand" "")))]
5136 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_AVX
5137 && can_create_pseudo_p ()"
5142 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5148 t1 = gen_reg_rtx (V4SImode);
5149 t2 = gen_reg_rtx (V4SImode);
5150 t3 = gen_reg_rtx (V4SImode);
5151 t4 = gen_reg_rtx (V4SImode);
5152 t5 = gen_reg_rtx (V4SImode);
5153 t6 = gen_reg_rtx (V4SImode);
5154 thirtytwo = GEN_INT (32);
5156 /* Multiply elements 2 and 0. */
5157 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
5160 /* Shift both input vectors down one element, so that elements 3
5161 and 1 are now in the slots for elements 2 and 0. For K8, at
5162 least, this is faster than using a shuffle. */
5163 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5164 gen_lowpart (V1TImode, op1),
5166 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5167 gen_lowpart (V1TImode, op2),
5169 /* Multiply elements 3 and 1. */
5170 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
5173 /* Move the results in element 2 down to element 1; we don't care
5174 what goes in elements 2 and 3. */
5175 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
5176 const0_rtx, const0_rtx));
5177 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
5178 const0_rtx, const0_rtx));
5180 /* Merge the parts back together. */
5181 emit_insn (gen_vec_interleave_lowv4si (op0, t5, t6));
5185 (define_insn_and_split "mulv2di3"
5186 [(set (match_operand:V2DI 0 "register_operand" "")
5187 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
5188 (match_operand:V2DI 2 "register_operand" "")))]
5190 && can_create_pseudo_p ()"
5195 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5204 /* op1: A,B,C,D, op2: E,F,G,H */
5205 op1 = gen_lowpart (V4SImode, op1);
5206 op2 = gen_lowpart (V4SImode, op2);
5208 t1 = gen_reg_rtx (V4SImode);
5209 t2 = gen_reg_rtx (V4SImode);
5210 t3 = gen_reg_rtx (V2DImode);
5211 t4 = gen_reg_rtx (V2DImode);
5214 emit_insn (gen_sse2_pshufd_1 (t1, op1,
5220 /* t2: (B*E),(A*F),(D*G),(C*H) */
5221 emit_insn (gen_mulv4si3 (t2, t1, op2));
5223 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
5224 emit_insn (gen_xop_phadddq (t3, t2));
5226 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
5227 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
5229 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
5230 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
5234 t1 = gen_reg_rtx (V2DImode);
5235 t2 = gen_reg_rtx (V2DImode);
5236 t3 = gen_reg_rtx (V2DImode);
5237 t4 = gen_reg_rtx (V2DImode);
5238 t5 = gen_reg_rtx (V2DImode);
5239 t6 = gen_reg_rtx (V2DImode);
5240 thirtytwo = GEN_INT (32);
5242 /* Multiply low parts. */
5243 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
5244 gen_lowpart (V4SImode, op2)));
5246 /* Shift input vectors left 32 bits so we can multiply high parts. */
5247 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
5248 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
5250 /* Multiply high parts by low parts. */
5251 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
5252 gen_lowpart (V4SImode, t3)));
5253 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
5254 gen_lowpart (V4SImode, t2)));
5256 /* Shift them back. */
5257 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
5258 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
5260 /* Add the three parts together. */
5261 emit_insn (gen_addv2di3 (t6, t1, t4));
5262 emit_insn (gen_addv2di3 (op0, t6, t5));
5267 (define_expand "vec_widen_smult_hi_v8hi"
5268 [(match_operand:V4SI 0 "register_operand" "")
5269 (match_operand:V8HI 1 "register_operand" "")
5270 (match_operand:V8HI 2 "register_operand" "")]
5273 rtx op1, op2, t1, t2, dest;
5277 t1 = gen_reg_rtx (V8HImode);
5278 t2 = gen_reg_rtx (V8HImode);
5279 dest = gen_lowpart (V8HImode, operands[0]);
5281 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5282 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5283 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5287 (define_expand "vec_widen_smult_lo_v8hi"
5288 [(match_operand:V4SI 0 "register_operand" "")
5289 (match_operand:V8HI 1 "register_operand" "")
5290 (match_operand:V8HI 2 "register_operand" "")]
5293 rtx op1, op2, t1, t2, dest;
5297 t1 = gen_reg_rtx (V8HImode);
5298 t2 = gen_reg_rtx (V8HImode);
5299 dest = gen_lowpart (V8HImode, operands[0]);
5301 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5302 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5303 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5307 (define_expand "vec_widen_umult_hi_v8hi"
5308 [(match_operand:V4SI 0 "register_operand" "")
5309 (match_operand:V8HI 1 "register_operand" "")
5310 (match_operand:V8HI 2 "register_operand" "")]
5313 rtx op1, op2, t1, t2, dest;
5317 t1 = gen_reg_rtx (V8HImode);
5318 t2 = gen_reg_rtx (V8HImode);
5319 dest = gen_lowpart (V8HImode, operands[0]);
5321 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5322 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5323 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5327 (define_expand "vec_widen_umult_lo_v8hi"
5328 [(match_operand:V4SI 0 "register_operand" "")
5329 (match_operand:V8HI 1 "register_operand" "")
5330 (match_operand:V8HI 2 "register_operand" "")]
5333 rtx op1, op2, t1, t2, dest;
5337 t1 = gen_reg_rtx (V8HImode);
5338 t2 = gen_reg_rtx (V8HImode);
5339 dest = gen_lowpart (V8HImode, operands[0]);
5341 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5342 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5343 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5347 (define_expand "vec_widen_smult_hi_v4si"
5348 [(match_operand:V2DI 0 "register_operand" "")
5349 (match_operand:V4SI 1 "register_operand" "")
5350 (match_operand:V4SI 2 "register_operand" "")]
5355 t1 = gen_reg_rtx (V4SImode);
5356 t2 = gen_reg_rtx (V4SImode);
5358 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5363 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5368 emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
5372 (define_expand "vec_widen_smult_lo_v4si"
5373 [(match_operand:V2DI 0 "register_operand" "")
5374 (match_operand:V4SI 1 "register_operand" "")
5375 (match_operand:V4SI 2 "register_operand" "")]
5380 t1 = gen_reg_rtx (V4SImode);
5381 t2 = gen_reg_rtx (V4SImode);
5383 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5388 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5393 emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
5397 (define_expand "vec_widen_umult_hi_v4si"
5398 [(match_operand:V2DI 0 "register_operand" "")
5399 (match_operand:V4SI 1 "register_operand" "")
5400 (match_operand:V4SI 2 "register_operand" "")]
5403 rtx op1, op2, t1, t2;
5407 t1 = gen_reg_rtx (V4SImode);
5408 t2 = gen_reg_rtx (V4SImode);
5410 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5411 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5412 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5416 (define_expand "vec_widen_umult_lo_v4si"
5417 [(match_operand:V2DI 0 "register_operand" "")
5418 (match_operand:V4SI 1 "register_operand" "")
5419 (match_operand:V4SI 2 "register_operand" "")]
5422 rtx op1, op2, t1, t2;
5426 t1 = gen_reg_rtx (V4SImode);
5427 t2 = gen_reg_rtx (V4SImode);
5429 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5430 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5431 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5435 (define_expand "sdot_prodv8hi"
5436 [(match_operand:V4SI 0 "register_operand" "")
5437 (match_operand:V8HI 1 "register_operand" "")
5438 (match_operand:V8HI 2 "register_operand" "")
5439 (match_operand:V4SI 3 "register_operand" "")]
5442 rtx t = gen_reg_rtx (V4SImode);
5443 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
5444 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
5448 (define_expand "udot_prodv4si"
5449 [(match_operand:V2DI 0 "register_operand" "")
5450 (match_operand:V4SI 1 "register_operand" "")
5451 (match_operand:V4SI 2 "register_operand" "")
5452 (match_operand:V2DI 3 "register_operand" "")]
5457 t1 = gen_reg_rtx (V2DImode);
5458 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
5459 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5461 t2 = gen_reg_rtx (V4SImode);
5462 t3 = gen_reg_rtx (V4SImode);
5463 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5464 gen_lowpart (V1TImode, operands[1]),
5466 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5467 gen_lowpart (V1TImode, operands[2]),
5470 t4 = gen_reg_rtx (V2DImode);
5471 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
5473 emit_insn (gen_addv2di3 (operands[0], t1, t4));
5477 (define_insn "ashr<mode>3"
5478 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
5480 (match_operand:VI24_AVX2 1 "register_operand" "0,x")
5481 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5484 psra<ssemodesuffix>\t{%2, %0|%0, %2}
5485 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5486 [(set_attr "isa" "noavx,avx")
5487 (set_attr "type" "sseishft")
5488 (set (attr "length_immediate")
5489 (if_then_else (match_operand 2 "const_int_operand" "")
5491 (const_string "0")))
5492 (set_attr "prefix_data16" "1,*")
5493 (set_attr "prefix" "orig,vex")
5494 (set_attr "mode" "<sseinsnmode>")])
5496 (define_insn "avx2_lshrqv4di3"
5497 [(set (match_operand:V4DI 0 "register_operand" "=x")
5499 (match_operand:V4DI 1 "register_operand" "x")
5500 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
5503 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5504 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
5506 [(set_attr "type" "sseishft")
5507 (set_attr "prefix" "vex")
5508 (set_attr "length_immediate" "1")
5509 (set_attr "mode" "OI")])
5511 (define_insn "lshr<mode>3"
5512 [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
5513 (lshiftrt:VI248_AVX2
5514 (match_operand:VI248_AVX2 1 "register_operand" "0,x")
5515 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5518 psrl<ssemodesuffix>\t{%2, %0|%0, %2}
5519 vpsrl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5520 [(set_attr "isa" "noavx,avx")
5521 (set_attr "type" "sseishft")
5522 (set (attr "length_immediate")
5523 (if_then_else (match_operand 2 "const_int_operand" "")
5525 (const_string "0")))
5526 (set_attr "prefix_data16" "1,*")
5527 (set_attr "prefix" "orig,vex")
5528 (set_attr "mode" "<sseinsnmode>")])
5530 (define_insn "avx2_lshlqv4di3"
5531 [(set (match_operand:V4DI 0 "register_operand" "=x")
5532 (ashift:V4DI (match_operand:V4DI 1 "register_operand" "x")
5533 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
5536 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5537 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
5539 [(set_attr "type" "sseishft")
5540 (set_attr "prefix" "vex")
5541 (set_attr "length_immediate" "1")
5542 (set_attr "mode" "OI")])
5544 (define_insn "avx2_lshl<mode>3"
5545 [(set (match_operand:VI248_256 0 "register_operand" "=x")
5547 (match_operand:VI248_256 1 "register_operand" "x")
5548 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5550 "vpsll<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5551 [(set_attr "type" "sseishft")
5552 (set_attr "prefix" "vex")
5553 (set (attr "length_immediate")
5554 (if_then_else (match_operand 2 "const_int_operand" "")
5556 (const_string "0")))
5557 (set_attr "mode" "OI")])
5559 (define_insn "ashl<mode>3"
5560 [(set (match_operand:VI248_128 0 "register_operand" "=x,x")
5562 (match_operand:VI248_128 1 "register_operand" "0,x")
5563 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5566 psll<ssemodesuffix>\t{%2, %0|%0, %2}
5567 vpsll<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5568 [(set_attr "isa" "noavx,avx")
5569 (set_attr "type" "sseishft")
5570 (set (attr "length_immediate")
5571 (if_then_else (match_operand 2 "const_int_operand" "")
5573 (const_string "0")))
5574 (set_attr "prefix_data16" "1,*")
5575 (set_attr "prefix" "orig,vex")
5576 (set_attr "mode" "TI")])
5578 (define_expand "vec_shl_<mode>"
5579 [(set (match_operand:VI_128 0 "register_operand" "")
5581 (match_operand:VI_128 1 "register_operand" "")
5582 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5585 operands[0] = gen_lowpart (V1TImode, operands[0]);
5586 operands[1] = gen_lowpart (V1TImode, operands[1]);
5589 (define_insn "<sse2_avx2>_ashl<mode>3"
5590 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
5592 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
5593 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5596 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5598 switch (which_alternative)
5601 return "pslldq\t{%2, %0|%0, %2}";
5603 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
5608 [(set_attr "isa" "noavx,avx")
5609 (set_attr "type" "sseishft")
5610 (set_attr "length_immediate" "1")
5611 (set_attr "prefix_data16" "1,*")
5612 (set_attr "prefix" "orig,vex")
5613 (set_attr "mode" "<sseinsnmode>")])
5615 (define_expand "vec_shr_<mode>"
5616 [(set (match_operand:VI_128 0 "register_operand" "")
5618 (match_operand:VI_128 1 "register_operand" "")
5619 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5622 operands[0] = gen_lowpart (V1TImode, operands[0]);
5623 operands[1] = gen_lowpart (V1TImode, operands[1]);
5626 (define_expand "avx2_<code><mode>3"
5627 [(set (match_operand:VI124_256 0 "register_operand" "")
5629 (match_operand:VI124_256 1 "nonimmediate_operand" "")
5630 (match_operand:VI124_256 2 "nonimmediate_operand" "")))]
5632 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5634 (define_insn "*avx2_<code><mode>3"
5635 [(set (match_operand:VI124_256 0 "register_operand" "=x")
5637 (match_operand:VI124_256 1 "nonimmediate_operand" "%x")
5638 (match_operand:VI124_256 2 "nonimmediate_operand" "xm")))]
5639 "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5640 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5641 [(set_attr "type" "sseiadd")
5642 (set_attr "prefix_extra" "1")
5643 (set_attr "prefix" "vex")
5644 (set_attr "mode" "OI")])
5646 (define_insn "sse2_lshrv1ti3"
5647 [(set (match_operand:V1TI 0 "register_operand" "=x,x")
5649 (match_operand:V1TI 1 "register_operand" "0,x")
5650 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5653 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5655 switch (which_alternative)
5658 return "psrldq\t{%2, %0|%0, %2}";
5660 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
5665 [(set_attr "isa" "noavx,avx")
5666 (set_attr "type" "sseishft")
5667 (set_attr "length_immediate" "1")
5668 (set_attr "atom_unit" "sishuf")
5669 (set_attr "prefix_data16" "1,*")
5670 (set_attr "prefix" "orig,vex")
5671 (set_attr "mode" "TI")])
5673 (define_expand "avx2_<code><mode>3"
5674 [(set (match_operand:VI124_256 0 "register_operand" "")
5676 (match_operand:VI124_256 1 "nonimmediate_operand" "")
5677 (match_operand:VI124_256 2 "nonimmediate_operand" "")))]
5679 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5681 (define_insn "*avx2_<code><mode>3"
5682 [(set (match_operand:VI124_256 0 "register_operand" "=x")
5684 (match_operand:VI124_256 1 "nonimmediate_operand" "%x")
5685 (match_operand:VI124_256 2 "nonimmediate_operand" "xm")))]
5686 "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5687 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5688 [(set_attr "type" "sseiadd")
5689 (set_attr "prefix_extra" "1")
5690 (set_attr "prefix" "vex")
5691 (set_attr "mode" "OI")])
5693 (define_insn "*sse4_1_<code><mode>3"
5694 [(set (match_operand:VI14_128 0 "register_operand" "=x,x")
5696 (match_operand:VI14_128 1 "nonimmediate_operand" "%0,x")
5697 (match_operand:VI14_128 2 "nonimmediate_operand" "xm,xm")))]
5698 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5700 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
5701 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5702 [(set_attr "isa" "noavx,avx")
5703 (set_attr "type" "sseiadd")
5704 (set_attr "prefix_extra" "1,*")
5705 (set_attr "prefix" "orig,vex")
5706 (set_attr "mode" "TI")])
5708 (define_insn "*<code>v8hi3"
5709 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
5711 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
5712 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
5713 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
5715 p<maxmin_int>w\t{%2, %0|%0, %2}
5716 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
5717 [(set_attr "isa" "noavx,avx")
5718 (set_attr "type" "sseiadd")
5719 (set_attr "prefix_data16" "1,*")
5720 (set_attr "prefix_extra" "*,1")
5721 (set_attr "prefix" "orig,vex")
5722 (set_attr "mode" "TI")])
5724 (define_expand "smax<mode>3"
5725 [(set (match_operand:VI14_128 0 "register_operand" "")
5726 (smax:VI14_128 (match_operand:VI14_128 1 "register_operand" "")
5727 (match_operand:VI14_128 2 "register_operand" "")))]
5731 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
5737 xops[0] = operands[0];
5738 xops[1] = operands[1];
5739 xops[2] = operands[2];
5740 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5741 xops[4] = operands[1];
5742 xops[5] = operands[2];
5743 ok = ix86_expand_int_vcond (xops);
5749 (define_expand "smin<mode>3"
5750 [(set (match_operand:VI14_128 0 "register_operand" "")
5751 (smin:VI14_128 (match_operand:VI14_128 1 "register_operand" "")
5752 (match_operand:VI14_128 2 "register_operand" "")))]
5756 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
5762 xops[0] = operands[0];
5763 xops[1] = operands[2];
5764 xops[2] = operands[1];
5765 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5766 xops[4] = operands[1];
5767 xops[5] = operands[2];
5768 ok = ix86_expand_int_vcond (xops);
5774 (define_expand "<code>v8hi3"
5775 [(set (match_operand:V8HI 0 "register_operand" "")
5777 (match_operand:V8HI 1 "nonimmediate_operand" "")
5778 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5780 "ix86_fixup_binary_operands_no_copy (<CODE>, V8HImode, operands);")
5782 (define_expand "smaxv2di3"
5783 [(set (match_operand:V2DI 0 "register_operand" "")
5784 (smax:V2DI (match_operand:V2DI 1 "register_operand" "")
5785 (match_operand:V2DI 2 "register_operand" "")))]
5791 xops[0] = operands[0];
5792 xops[1] = operands[1];
5793 xops[2] = operands[2];
5794 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5795 xops[4] = operands[1];
5796 xops[5] = operands[2];
5797 ok = ix86_expand_int_vcond (xops);
5802 (define_expand "sminv2di3"
5803 [(set (match_operand:V2DI 0 "register_operand" "")
5804 (smin:V2DI (match_operand:V2DI 1 "register_operand" "")
5805 (match_operand:V2DI 2 "register_operand" "")))]
5811 xops[0] = operands[0];
5812 xops[1] = operands[2];
5813 xops[2] = operands[1];
5814 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5815 xops[4] = operands[1];
5816 xops[5] = operands[2];
5817 ok = ix86_expand_int_vcond (xops);
5822 (define_insn "*sse4_1_<code><mode>3"
5823 [(set (match_operand:VI24_128 0 "register_operand" "=x,x")
5825 (match_operand:VI24_128 1 "nonimmediate_operand" "%0,x")
5826 (match_operand:VI24_128 2 "nonimmediate_operand" "xm,xm")))]
5827 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5829 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
5830 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5831 [(set_attr "isa" "noavx,avx")
5832 (set_attr "type" "sseiadd")
5833 (set_attr "prefix_extra" "1,*")
5834 (set_attr "prefix" "orig,vex")
5835 (set_attr "mode" "TI")])
5837 (define_insn "*<code>v16qi3"
5838 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
5840 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")
5841 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))]
5842 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
5844 p<maxmin_int>b\t{%2, %0|%0, %2}
5845 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
5846 [(set_attr "isa" "noavx,avx")
5847 (set_attr "type" "sseiadd")
5848 (set_attr "prefix_data16" "1,*")
5849 (set_attr "prefix_extra" "*,1")
5850 (set_attr "prefix" "orig,vex")
5851 (set_attr "mode" "TI")])
5853 (define_expand "<code>v16qi3"
5854 [(set (match_operand:V16QI 0 "register_operand" "")
5856 (match_operand:V16QI 1 "nonimmediate_operand" "")
5857 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
5859 "ix86_fixup_binary_operands_no_copy (<CODE>, V16QImode, operands);")
5861 (define_expand "umaxv8hi3"
5862 [(set (match_operand:V8HI 0 "register_operand" "")
5863 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
5864 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5868 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
5871 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
5872 if (rtx_equal_p (op3, op2))
5873 op3 = gen_reg_rtx (V8HImode);
5874 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
5875 emit_insn (gen_addv8hi3 (op0, op3, op2));
5880 (define_expand "umaxv4si3"
5881 [(set (match_operand:V4SI 0 "register_operand" "")
5882 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
5883 (match_operand:V4SI 2 "register_operand" "")))]
5887 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
5893 xops[0] = operands[0];
5894 xops[1] = operands[1];
5895 xops[2] = operands[2];
5896 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5897 xops[4] = operands[1];
5898 xops[5] = operands[2];
5899 ok = ix86_expand_int_vcond (xops);
5905 (define_expand "umin<mode>3"
5906 [(set (match_operand:VI24_128 0 "register_operand" "")
5907 (umin:VI24_128 (match_operand:VI24_128 1 "register_operand" "")
5908 (match_operand:VI24_128 2 "register_operand" "")))]
5912 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
5918 xops[0] = operands[0];
5919 xops[1] = operands[2];
5920 xops[2] = operands[1];
5921 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5922 xops[4] = operands[1];
5923 xops[5] = operands[2];
5924 ok = ix86_expand_int_vcond (xops);
5930 (define_expand "umaxv2di3"
5931 [(set (match_operand:V2DI 0 "register_operand" "")
5932 (umax:V2DI (match_operand:V2DI 1 "register_operand" "")
5933 (match_operand:V2DI 2 "register_operand" "")))]
5939 xops[0] = operands[0];
5940 xops[1] = operands[1];
5941 xops[2] = operands[2];
5942 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5943 xops[4] = operands[1];
5944 xops[5] = operands[2];
5945 ok = ix86_expand_int_vcond (xops);
5950 (define_expand "uminv2di3"
5951 [(set (match_operand:V2DI 0 "register_operand" "")
5952 (umin:V2DI (match_operand:V2DI 1 "register_operand" "")
5953 (match_operand:V2DI 2 "register_operand" "")))]
5959 xops[0] = operands[0];
5960 xops[1] = operands[2];
5961 xops[2] = operands[1];
5962 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5963 xops[4] = operands[1];
5964 xops[5] = operands[2];
5965 ok = ix86_expand_int_vcond (xops);
5970 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5972 ;; Parallel integral comparisons
5974 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5976 (define_expand "avx2_eq<mode>3"
5977 [(set (match_operand:VI1248_256 0 "register_operand" "")
5979 (match_operand:VI1248_256 1 "nonimmediate_operand" "")
5980 (match_operand:VI1248_256 2 "nonimmediate_operand" "")))]
5982 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
5984 (define_insn "*avx2_eq<mode>3"
5985 [(set (match_operand:VI1248_256 0 "register_operand" "=x")
5987 (match_operand:VI1248_256 1 "nonimmediate_operand" "%x")
5988 (match_operand:VI1248_256 2 "nonimmediate_operand" "xm")))]
5989 "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
5990 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5991 [(set_attr "type" "ssecmp")
5992 (set_attr "prefix_extra" "1")
5993 (set_attr "prefix" "vex")
5994 (set_attr "mode" "OI")])
5996 (define_insn "*sse4_1_eqv2di3"
5997 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5999 (match_operand:V2DI 1 "nonimmediate_operand" "%0,x")
6000 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
6001 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
6003 pcmpeqq\t{%2, %0|%0, %2}
6004 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
6005 [(set_attr "isa" "noavx,avx")
6006 (set_attr "type" "ssecmp")
6007 (set_attr "prefix_extra" "1")
6008 (set_attr "prefix" "orig,vex")
6009 (set_attr "mode" "TI")])
6011 (define_insn "*sse2_eq<mode>3"
6012 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
6014 (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x")
6015 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
6016 "TARGET_SSE2 && !TARGET_XOP
6017 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6019 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
6020 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6021 [(set_attr "isa" "noavx,avx")
6022 (set_attr "type" "ssecmp")
6023 (set_attr "prefix_data16" "1,*")
6024 (set_attr "prefix" "orig,vex")
6025 (set_attr "mode" "TI")])
6027 (define_expand "sse2_eq<mode>3"
6028 [(set (match_operand:VI124_128 0 "register_operand" "")
6030 (match_operand:VI124_128 1 "nonimmediate_operand" "")
6031 (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
6032 "TARGET_SSE2 && !TARGET_XOP "
6033 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6035 (define_expand "sse4_1_eqv2di3"
6036 [(set (match_operand:V2DI 0 "register_operand" "")
6038 (match_operand:V2DI 1 "nonimmediate_operand" "")
6039 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
6041 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
6043 (define_insn "sse4_2_gtv2di3"
6044 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6046 (match_operand:V2DI 1 "register_operand" "0,x")
6047 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
6050 pcmpgtq\t{%2, %0|%0, %2}
6051 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
6052 [(set_attr "isa" "noavx,avx")
6053 (set_attr "type" "ssecmp")
6054 (set_attr "prefix_extra" "1")
6055 (set_attr "prefix" "orig,vex")
6056 (set_attr "mode" "TI")])
6058 (define_insn "avx2_gt<mode>3"
6059 [(set (match_operand:VI1248_256 0 "register_operand" "=x")
6061 (match_operand:VI1248_256 1 "register_operand" "x")
6062 (match_operand:VI1248_256 2 "nonimmediate_operand" "xm")))]
6064 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6065 [(set_attr "type" "ssecmp")
6066 (set_attr "prefix_extra" "1")
6067 (set_attr "prefix" "vex")
6068 (set_attr "mode" "OI")])
6070 (define_insn "sse2_gt<mode>3"
6071 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
6073 (match_operand:VI124_128 1 "register_operand" "0,x")
6074 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
6075 "TARGET_SSE2 && !TARGET_XOP"
6077 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
6078 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6079 [(set_attr "isa" "noavx,avx")
6080 (set_attr "type" "ssecmp")
6081 (set_attr "prefix_data16" "1,*")
6082 (set_attr "prefix" "orig,vex")
6083 (set_attr "mode" "TI")])
6085 (define_expand "vcond<mode>"
6086 [(set (match_operand:VI124_128 0 "register_operand" "")
6087 (if_then_else:VI124_128
6088 (match_operator 3 ""
6089 [(match_operand:VI124_128 4 "nonimmediate_operand" "")
6090 (match_operand:VI124_128 5 "nonimmediate_operand" "")])
6091 (match_operand:VI124_128 1 "general_operand" "")
6092 (match_operand:VI124_128 2 "general_operand" "")))]
6095 bool ok = ix86_expand_int_vcond (operands);
6100 (define_expand "vcondv2di"
6101 [(set (match_operand:V2DI 0 "register_operand" "")
6103 (match_operator 3 ""
6104 [(match_operand:V2DI 4 "nonimmediate_operand" "")
6105 (match_operand:V2DI 5 "nonimmediate_operand" "")])
6106 (match_operand:V2DI 1 "general_operand" "")
6107 (match_operand:V2DI 2 "general_operand" "")))]
6110 bool ok = ix86_expand_int_vcond (operands);
6115 (define_expand "vcondu<mode>"
6116 [(set (match_operand:VI124_128 0 "register_operand" "")
6117 (if_then_else:VI124_128
6118 (match_operator 3 ""
6119 [(match_operand:VI124_128 4 "nonimmediate_operand" "")
6120 (match_operand:VI124_128 5 "nonimmediate_operand" "")])
6121 (match_operand:VI124_128 1 "general_operand" "")
6122 (match_operand:VI124_128 2 "general_operand" "")))]
6125 bool ok = ix86_expand_int_vcond (operands);
6130 (define_expand "vconduv2di"
6131 [(set (match_operand:V2DI 0 "register_operand" "")
6133 (match_operator 3 ""
6134 [(match_operand:V2DI 4 "nonimmediate_operand" "")
6135 (match_operand:V2DI 5 "nonimmediate_operand" "")])
6136 (match_operand:V2DI 1 "general_operand" "")
6137 (match_operand:V2DI 2 "general_operand" "")))]
6140 bool ok = ix86_expand_int_vcond (operands);
6145 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6147 ;; Parallel bitwise logical operations
6149 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6151 (define_expand "one_cmpl<mode>2"
6152 [(set (match_operand:VI 0 "register_operand" "")
6153 (xor:VI (match_operand:VI 1 "nonimmediate_operand" "")
6157 int i, n = GET_MODE_NUNITS (<MODE>mode);
6158 rtvec v = rtvec_alloc (n);
6160 for (i = 0; i < n; ++i)
6161 RTVEC_ELT (v, i) = constm1_rtx;
6163 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
6166 (define_expand "<sse2_avx2>_andnot<mode>3"
6167 [(set (match_operand:VI_AVX2 0 "register_operand" "")
6169 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand" ""))
6170 (match_operand:VI_AVX2 2 "nonimmediate_operand" "")))]
6173 (define_insn "*andnot<mode>3"
6174 [(set (match_operand:VI 0 "register_operand" "=x,x")
6176 (not:VI (match_operand:VI 1 "register_operand" "0,x"))
6177 (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
6180 static char buf[32];
6184 switch (get_attr_mode (insn))
6187 gcc_assert (TARGET_AVX2);
6189 gcc_assert (TARGET_SSE2);
6195 gcc_assert (TARGET_AVX);
6197 gcc_assert (TARGET_SSE);
6206 switch (which_alternative)
6209 ops = "%s\t{%%2, %%0|%%0, %%2}";
6212 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
6218 snprintf (buf, sizeof (buf), ops, tmp);
6221 [(set_attr "isa" "noavx,avx")
6222 (set_attr "type" "sselog")
6223 (set (attr "prefix_data16")
6225 (and (eq_attr "alternative" "0")
6226 (eq_attr "mode" "TI"))
6228 (const_string "*")))
6229 (set_attr "prefix" "orig,vex")
6231 (cond [(ne (symbol_ref "TARGET_AVX2") (const_int 0))
6233 (ne (symbol_ref "GET_MODE_SIZE (<MODE>mode) > 128") (const_int 0))
6234 (const_string "V8SF")
6235 (ne (symbol_ref "TARGET_SSE2") (const_int 0))
6238 (const_string "V4SF")))])
6240 (define_expand "<code><mode>3"
6241 [(set (match_operand:VI 0 "register_operand" "")
6243 (match_operand:VI 1 "nonimmediate_operand" "")
6244 (match_operand:VI 2 "nonimmediate_operand" "")))]
6246 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
6248 (define_insn "*<code><mode>3"
6249 [(set (match_operand:VI 0 "register_operand" "=x,x")
6251 (match_operand:VI 1 "nonimmediate_operand" "%0,x")
6252 (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
6254 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6256 static char buf[32];
6260 switch (get_attr_mode (insn))
6263 gcc_assert (TARGET_AVX2);
6265 gcc_assert (TARGET_SSE2);
6271 gcc_assert (TARGET_AVX);
6273 gcc_assert (TARGET_SSE);
6282 switch (which_alternative)
6285 ops = "%s\t{%%2, %%0|%%0, %%2}";
6288 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
6294 snprintf (buf, sizeof (buf), ops, tmp);
6297 [(set_attr "isa" "noavx,avx")
6298 (set_attr "type" "sselog")
6299 (set (attr "prefix_data16")
6301 (and (eq_attr "alternative" "0")
6302 (eq_attr "mode" "TI"))
6304 (const_string "*")))
6305 (set_attr "prefix" "orig,vex")
6307 (cond [(ne (symbol_ref "TARGET_AVX2") (const_int 0))
6309 (ne (symbol_ref "GET_MODE_SIZE (<MODE>mode) > 128") (const_int 0))
6310 (const_string "V8SF")
6311 (ne (symbol_ref "TARGET_SSE2") (const_int 0))
6314 (const_string "V4SF")))])
6316 (define_insn "*andnottf3"
6317 [(set (match_operand:TF 0 "register_operand" "=x,x")
6319 (not:TF (match_operand:TF 1 "register_operand" "0,x"))
6320 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
6323 pandn\t{%2, %0|%0, %2}
6324 vpandn\t{%2, %1, %0|%0, %1, %2}"
6325 [(set_attr "isa" "noavx,avx")
6326 (set_attr "type" "sselog")
6327 (set_attr "prefix_data16" "1,*")
6328 (set_attr "prefix" "orig,vex")
6329 (set_attr "mode" "TI")])
6331 (define_expand "<code>tf3"
6332 [(set (match_operand:TF 0 "register_operand" "")
6334 (match_operand:TF 1 "nonimmediate_operand" "")
6335 (match_operand:TF 2 "nonimmediate_operand" "")))]
6337 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
6339 (define_insn "*<code>tf3"
6340 [(set (match_operand:TF 0 "register_operand" "=x,x")
6342 (match_operand:TF 1 "nonimmediate_operand" "%0,x")
6343 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
6345 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
6347 p<logic>\t{%2, %0|%0, %2}
6348 vp<logic>\t{%2, %1, %0|%0, %1, %2}"
6349 [(set_attr "isa" "noavx,avx")
6350 (set_attr "type" "sselog")
6351 (set_attr "prefix_data16" "1,*")
6352 (set_attr "prefix" "orig,vex")
6353 (set_attr "mode" "TI")])
6355 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6357 ;; Parallel integral element swizzling
6359 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6361 (define_expand "vec_pack_trunc_<mode>"
6362 [(match_operand:<ssepackmode> 0 "register_operand" "")
6363 (match_operand:VI248_128 1 "register_operand" "")
6364 (match_operand:VI248_128 2 "register_operand" "")]
6367 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
6368 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
6369 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6373 (define_insn "<sse2_avx2>_packsswb"
6374 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
6375 (vec_concat:VI1_AVX2
6376 (ss_truncate:<ssehalfvecmode>
6377 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6378 (ss_truncate:<ssehalfvecmode>
6379 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6382 packsswb\t{%2, %0|%0, %2}
6383 vpacksswb\t{%2, %1, %0|%0, %1, %2}"
6384 [(set_attr "isa" "noavx,avx")
6385 (set_attr "type" "sselog")
6386 (set_attr "prefix_data16" "1,*")
6387 (set_attr "prefix" "orig,vex")
6388 (set_attr "mode" "<sseinsnmode>")])
6390 (define_insn "<sse2_avx2>_packssdw"
6391 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
6392 (vec_concat:VI2_AVX2
6393 (ss_truncate:<ssehalfvecmode>
6394 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6395 (ss_truncate:<ssehalfvecmode>
6396 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6399 packssdw\t{%2, %0|%0, %2}
6400 vpackssdw\t{%2, %1, %0|%0, %1, %2}"
6401 [(set_attr "isa" "noavx,avx")
6402 (set_attr "type" "sselog")
6403 (set_attr "prefix_data16" "1,*")
6404 (set_attr "prefix" "orig,vex")
6405 (set_attr "mode" "<sseinsnmode>")])
6407 (define_insn "<sse2_avx2>_packuswb"
6408 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
6409 (vec_concat:VI1_AVX2
6410 (us_truncate:<ssehalfvecmode>
6411 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6412 (us_truncate:<ssehalfvecmode>
6413 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6416 packuswb\t{%2, %0|%0, %2}
6417 vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6418 [(set_attr "isa" "noavx,avx")
6419 (set_attr "type" "sselog")
6420 (set_attr "prefix_data16" "1,*")
6421 (set_attr "prefix" "orig,vex")
6422 (set_attr "mode" "<sseinsnmode>")])
6424 (define_insn "avx2_interleave_highv32qi"
6425 [(set (match_operand:V32QI 0 "register_operand" "=x")
6428 (match_operand:V32QI 1 "register_operand" "x")
6429 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
6430 (parallel [(const_int 8) (const_int 40)
6431 (const_int 9) (const_int 41)
6432 (const_int 10) (const_int 42)
6433 (const_int 11) (const_int 43)
6434 (const_int 12) (const_int 44)
6435 (const_int 13) (const_int 45)
6436 (const_int 14) (const_int 46)
6437 (const_int 15) (const_int 47)
6438 (const_int 24) (const_int 56)
6439 (const_int 25) (const_int 57)
6440 (const_int 26) (const_int 58)
6441 (const_int 27) (const_int 59)
6442 (const_int 28) (const_int 60)
6443 (const_int 29) (const_int 61)
6444 (const_int 30) (const_int 62)
6445 (const_int 31) (const_int 63)
6446 (const_int 32) (const_int 64)])))]
6448 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6449 [(set_attr "type" "sselog")
6450 (set_attr "prefix" "vex")
6451 (set_attr "mode" "OI")])
6453 (define_insn "vec_interleave_highv16qi"
6454 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6457 (match_operand:V16QI 1 "register_operand" "0,x")
6458 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
6459 (parallel [(const_int 8) (const_int 24)
6460 (const_int 9) (const_int 25)
6461 (const_int 10) (const_int 26)
6462 (const_int 11) (const_int 27)
6463 (const_int 12) (const_int 28)
6464 (const_int 13) (const_int 29)
6465 (const_int 14) (const_int 30)
6466 (const_int 15) (const_int 31)])))]
6469 punpckhbw\t{%2, %0|%0, %2}
6470 vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6471 [(set_attr "isa" "noavx,avx")
6472 (set_attr "type" "sselog")
6473 (set_attr "prefix_data16" "1,*")
6474 (set_attr "prefix" "orig,vex")
6475 (set_attr "mode" "TI")])
6477 (define_insn "avx2_interleave_lowv32qi"
6478 [(set (match_operand:V32QI 0 "register_operand" "=x")
6481 (match_operand:V32QI 1 "register_operand" "x")
6482 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
6483 (parallel [(const_int 0) (const_int 32)
6484 (const_int 1) (const_int 33)
6485 (const_int 2) (const_int 34)
6486 (const_int 3) (const_int 35)
6487 (const_int 4) (const_int 36)
6488 (const_int 5) (const_int 37)
6489 (const_int 6) (const_int 38)
6490 (const_int 7) (const_int 39)
6491 (const_int 15) (const_int 47)
6492 (const_int 16) (const_int 48)
6493 (const_int 17) (const_int 49)
6494 (const_int 18) (const_int 50)
6495 (const_int 19) (const_int 51)
6496 (const_int 20) (const_int 52)
6497 (const_int 21) (const_int 53)
6498 (const_int 22) (const_int 54)
6499 (const_int 23) (const_int 55)])))]
6501 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6502 [(set_attr "type" "sselog")
6503 (set_attr "prefix" "vex")
6504 (set_attr "mode" "OI")])
6506 (define_insn "vec_interleave_lowv16qi"
6507 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6510 (match_operand:V16QI 1 "register_operand" "0,x")
6511 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
6512 (parallel [(const_int 0) (const_int 16)
6513 (const_int 1) (const_int 17)
6514 (const_int 2) (const_int 18)
6515 (const_int 3) (const_int 19)
6516 (const_int 4) (const_int 20)
6517 (const_int 5) (const_int 21)
6518 (const_int 6) (const_int 22)
6519 (const_int 7) (const_int 23)])))]
6522 punpcklbw\t{%2, %0|%0, %2}
6523 vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6524 [(set_attr "isa" "noavx,avx")
6525 (set_attr "type" "sselog")
6526 (set_attr "prefix_data16" "1,*")
6527 (set_attr "prefix" "orig,vex")
6528 (set_attr "mode" "TI")])
6530 (define_insn "avx2_interleave_highv16hi"
6531 [(set (match_operand:V16HI 0 "register_operand" "=x")
6534 (match_operand:V16HI 1 "register_operand" "x")
6535 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
6536 (parallel [(const_int 4) (const_int 20)
6537 (const_int 5) (const_int 21)
6538 (const_int 6) (const_int 22)
6539 (const_int 7) (const_int 23)
6540 (const_int 12) (const_int 28)
6541 (const_int 13) (const_int 29)
6542 (const_int 14) (const_int 30)
6543 (const_int 15) (const_int 31)])))]
6545 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6546 [(set_attr "type" "sselog")
6547 (set_attr "prefix" "vex")
6548 (set_attr "mode" "OI")])
6550 (define_insn "vec_interleave_highv8hi"
6551 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6554 (match_operand:V8HI 1 "register_operand" "0,x")
6555 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
6556 (parallel [(const_int 4) (const_int 12)
6557 (const_int 5) (const_int 13)
6558 (const_int 6) (const_int 14)
6559 (const_int 7) (const_int 15)])))]
6562 punpckhwd\t{%2, %0|%0, %2}
6563 vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6564 [(set_attr "isa" "noavx,avx")
6565 (set_attr "type" "sselog")
6566 (set_attr "prefix_data16" "1,*")
6567 (set_attr "prefix" "orig,vex")
6568 (set_attr "mode" "TI")])
6570 (define_insn "avx2_interleave_lowv16hi"
6571 [(set (match_operand:V16HI 0 "register_operand" "=x")
6574 (match_operand:V16HI 1 "register_operand" "x")
6575 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
6576 (parallel [(const_int 0) (const_int 16)
6577 (const_int 1) (const_int 17)
6578 (const_int 2) (const_int 18)
6579 (const_int 3) (const_int 19)
6580 (const_int 8) (const_int 24)
6581 (const_int 9) (const_int 25)
6582 (const_int 10) (const_int 26)
6583 (const_int 11) (const_int 27)])))]
6585 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6586 [(set_attr "type" "sselog")
6587 (set_attr "prefix" "vex")
6588 (set_attr "mode" "OI")])
6590 (define_insn "vec_interleave_lowv8hi"
6591 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6594 (match_operand:V8HI 1 "register_operand" "0,x")
6595 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
6596 (parallel [(const_int 0) (const_int 8)
6597 (const_int 1) (const_int 9)
6598 (const_int 2) (const_int 10)
6599 (const_int 3) (const_int 11)])))]
6602 punpcklwd\t{%2, %0|%0, %2}
6603 vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6604 [(set_attr "isa" "noavx,avx")
6605 (set_attr "type" "sselog")
6606 (set_attr "prefix_data16" "1,*")
6607 (set_attr "prefix" "orig,vex")
6608 (set_attr "mode" "TI")])
6610 (define_insn "avx2_interleave_highv8si"
6611 [(set (match_operand:V8SI 0 "register_operand" "=x")
6614 (match_operand:V8SI 1 "register_operand" "x")
6615 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
6616 (parallel [(const_int 2) (const_int 10)
6617 (const_int 3) (const_int 11)
6618 (const_int 6) (const_int 14)
6619 (const_int 7) (const_int 15)])))]
6621 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6622 [(set_attr "type" "sselog")
6623 (set_attr "prefix" "vex")
6624 (set_attr "mode" "OI")])
6626 (define_insn "vec_interleave_highv4si"
6627 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6630 (match_operand:V4SI 1 "register_operand" "0,x")
6631 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6632 (parallel [(const_int 2) (const_int 6)
6633 (const_int 3) (const_int 7)])))]
6636 punpckhdq\t{%2, %0|%0, %2}
6637 vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6638 [(set_attr "isa" "noavx,avx")
6639 (set_attr "type" "sselog")
6640 (set_attr "prefix_data16" "1,*")
6641 (set_attr "prefix" "orig,vex")
6642 (set_attr "mode" "TI")])
6644 (define_insn "avx2_interleave_lowv8si"
6645 [(set (match_operand:V8SI 0 "register_operand" "=x")
6648 (match_operand:V8SI 1 "register_operand" "x")
6649 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
6650 (parallel [(const_int 0) (const_int 8)
6651 (const_int 1) (const_int 9)
6652 (const_int 4) (const_int 12)
6653 (const_int 5) (const_int 13)])))]
6655 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6656 [(set_attr "type" "sselog")
6657 (set_attr "prefix" "vex")
6658 (set_attr "mode" "OI")])
6660 (define_insn "vec_interleave_lowv4si"
6661 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6664 (match_operand:V4SI 1 "register_operand" "0,x")
6665 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6666 (parallel [(const_int 0) (const_int 4)
6667 (const_int 1) (const_int 5)])))]
6670 punpckldq\t{%2, %0|%0, %2}
6671 vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6672 [(set_attr "isa" "noavx,avx")
6673 (set_attr "type" "sselog")
6674 (set_attr "prefix_data16" "1,*")
6675 (set_attr "prefix" "orig,vex")
6676 (set_attr "mode" "TI")])
6678 ;; Modes handled by pinsr patterns.
6679 (define_mode_iterator PINSR_MODE
6680 [(V16QI "TARGET_SSE4_1") V8HI
6681 (V4SI "TARGET_SSE4_1")
6682 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
6684 (define_mode_attr sse2p4_1
6685 [(V16QI "sse4_1") (V8HI "sse2")
6686 (V4SI "sse4_1") (V2DI "sse4_1")])
6688 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
6689 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
6690 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x")
6691 (vec_merge:PINSR_MODE
6692 (vec_duplicate:PINSR_MODE
6693 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m"))
6694 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x")
6695 (match_operand:SI 3 "const_int_operand" "")))]
6697 && ((unsigned) exact_log2 (INTVAL (operands[3]))
6698 < GET_MODE_NUNITS (<MODE>mode))"
6700 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6702 switch (which_alternative)
6705 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
6706 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
6709 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
6711 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
6712 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
6715 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6720 [(set_attr "isa" "noavx,noavx,avx,avx")
6721 (set_attr "type" "sselog")
6722 (set (attr "prefix_rex")
6724 (and (eq (symbol_ref "TARGET_AVX") (const_int 0))
6725 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
6727 (const_string "*")))
6728 (set (attr "prefix_data16")
6730 (and (eq (symbol_ref "TARGET_AVX") (const_int 0))
6731 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
6733 (const_string "*")))
6734 (set (attr "prefix_extra")
6736 (and (eq (symbol_ref "TARGET_AVX") (const_int 0))
6737 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
6739 (const_string "1")))
6740 (set_attr "length_immediate" "1")
6741 (set_attr "prefix" "orig,orig,vex,vex")
6742 (set_attr "mode" "TI")])
6744 (define_insn "*sse4_1_pextrb_<mode>"
6745 [(set (match_operand:SWI48 0 "register_operand" "=r")
6748 (match_operand:V16QI 1 "register_operand" "x")
6749 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
6751 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
6752 [(set_attr "type" "sselog")
6753 (set_attr "prefix_extra" "1")
6754 (set_attr "length_immediate" "1")
6755 (set_attr "prefix" "maybe_vex")
6756 (set_attr "mode" "TI")])
6758 (define_insn "*sse4_1_pextrb_memory"
6759 [(set (match_operand:QI 0 "memory_operand" "=m")
6761 (match_operand:V16QI 1 "register_operand" "x")
6762 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
6764 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
6765 [(set_attr "type" "sselog")
6766 (set_attr "prefix_extra" "1")
6767 (set_attr "length_immediate" "1")
6768 (set_attr "prefix" "maybe_vex")
6769 (set_attr "mode" "TI")])
6771 (define_insn "*sse2_pextrw_<mode>"
6772 [(set (match_operand:SWI48 0 "register_operand" "=r")
6775 (match_operand:V8HI 1 "register_operand" "x")
6776 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
6778 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
6779 [(set_attr "type" "sselog")
6780 (set_attr "prefix_data16" "1")
6781 (set_attr "length_immediate" "1")
6782 (set_attr "prefix" "maybe_vex")
6783 (set_attr "mode" "TI")])
6785 (define_insn "*sse4_1_pextrw_memory"
6786 [(set (match_operand:HI 0 "memory_operand" "=m")
6788 (match_operand:V8HI 1 "register_operand" "x")
6789 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
6791 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
6792 [(set_attr "type" "sselog")
6793 (set_attr "prefix_extra" "1")
6794 (set_attr "length_immediate" "1")
6795 (set_attr "prefix" "maybe_vex")
6796 (set_attr "mode" "TI")])
6798 (define_insn "*sse4_1_pextrd"
6799 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
6801 (match_operand:V4SI 1 "register_operand" "x")
6802 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
6804 "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
6805 [(set_attr "type" "sselog")
6806 (set_attr "prefix_extra" "1")
6807 (set_attr "length_immediate" "1")
6808 (set_attr "prefix" "maybe_vex")
6809 (set_attr "mode" "TI")])
6811 (define_insn "*sse4_1_pextrd_zext"
6812 [(set (match_operand:DI 0 "register_operand" "=r")
6815 (match_operand:V4SI 1 "register_operand" "x")
6816 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))]
6817 "TARGET_64BIT && TARGET_SSE4_1"
6818 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
6819 [(set_attr "type" "sselog")
6820 (set_attr "prefix_extra" "1")
6821 (set_attr "length_immediate" "1")
6822 (set_attr "prefix" "maybe_vex")
6823 (set_attr "mode" "TI")])
6825 ;; It must come before *vec_extractv2di_1_rex64 since it is preferred.
6826 (define_insn "*sse4_1_pextrq"
6827 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
6829 (match_operand:V2DI 1 "register_operand" "x")
6830 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
6831 "TARGET_SSE4_1 && TARGET_64BIT"
6832 "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
6833 [(set_attr "type" "sselog")
6834 (set_attr "prefix_rex" "1")
6835 (set_attr "prefix_extra" "1")
6836 (set_attr "length_immediate" "1")
6837 (set_attr "prefix" "maybe_vex")
6838 (set_attr "mode" "TI")])
6840 (define_expand "avx2_pshufdv3"
6841 [(match_operand:V8SI 0 "register_operand" "")
6842 (match_operand:V8SI 1 "nonimmediate_operand" "")
6843 (match_operand:SI 2 "const_0_to_255_operand" "")]
6846 int mask = INTVAL (operands[2]);
6847 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
6848 GEN_INT ((mask >> 0) & 3),
6849 GEN_INT ((mask >> 2) & 3),
6850 GEN_INT ((mask >> 4) & 3),
6851 GEN_INT ((mask >> 6) & 3)));
6855 (define_insn "avx2_pshufd_1"
6856 [(set (match_operand:V8SI 0 "register_operand" "=x")
6858 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
6859 (parallel [(match_operand 2 "const_0_to_3_operand" "")
6860 (match_operand 3 "const_0_to_3_operand" "")
6861 (match_operand 4 "const_0_to_3_operand" "")
6862 (match_operand 5 "const_0_to_3_operand" "")
6870 mask |= INTVAL (operands[2]) << 0;
6871 mask |= INTVAL (operands[3]) << 2;
6872 mask |= INTVAL (operands[4]) << 4;
6873 mask |= INTVAL (operands[5]) << 6;
6874 operands[2] = GEN_INT (mask);
6876 return "vpshufd\t{%2, %1, %0|%0, %1, %2}";
6878 [(set_attr "type" "sselog1")
6879 (set_attr "prefix" "vex")
6880 (set_attr "length_immediate" "1")
6881 (set_attr "mode" "OI")])
6883 (define_expand "sse2_pshufd"
6884 [(match_operand:V4SI 0 "register_operand" "")
6885 (match_operand:V4SI 1 "nonimmediate_operand" "")
6886 (match_operand:SI 2 "const_int_operand" "")]
6889 int mask = INTVAL (operands[2]);
6890 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
6891 GEN_INT ((mask >> 0) & 3),
6892 GEN_INT ((mask >> 2) & 3),
6893 GEN_INT ((mask >> 4) & 3),
6894 GEN_INT ((mask >> 6) & 3)));
6898 (define_insn "sse2_pshufd_1"
6899 [(set (match_operand:V4SI 0 "register_operand" "=x")
6901 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
6902 (parallel [(match_operand 2 "const_0_to_3_operand" "")
6903 (match_operand 3 "const_0_to_3_operand" "")
6904 (match_operand 4 "const_0_to_3_operand" "")
6905 (match_operand 5 "const_0_to_3_operand" "")])))]
6909 mask |= INTVAL (operands[2]) << 0;
6910 mask |= INTVAL (operands[3]) << 2;
6911 mask |= INTVAL (operands[4]) << 4;
6912 mask |= INTVAL (operands[5]) << 6;
6913 operands[2] = GEN_INT (mask);
6915 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
6917 [(set_attr "type" "sselog1")
6918 (set_attr "prefix_data16" "1")
6919 (set_attr "prefix" "maybe_vex")
6920 (set_attr "length_immediate" "1")
6921 (set_attr "mode" "TI")])
6923 (define_expand "avx2_pshuflwv3"
6924 [(match_operand:V16HI 0 "register_operand" "")
6925 (match_operand:V16HI 1 "nonimmediate_operand" "")
6926 (match_operand:SI 2 "const_0_to_255_operand" "")]
6929 int mask = INTVAL (operands[2]);
6930 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
6931 GEN_INT ((mask >> 0) & 3),
6932 GEN_INT ((mask >> 2) & 3),
6933 GEN_INT ((mask >> 4) & 3),
6934 GEN_INT ((mask >> 6) & 3)));
6938 (define_insn "avx2_pshuflw_1"
6939 [(set (match_operand:V16HI 0 "register_operand" "=x")
6941 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
6942 (parallel [(match_operand 2 "const_0_to_3_operand" "")
6943 (match_operand 3 "const_0_to_3_operand" "")
6944 (match_operand 4 "const_0_to_3_operand" "")
6945 (match_operand 5 "const_0_to_3_operand" "")
6961 mask |= INTVAL (operands[2]) << 0;
6962 mask |= INTVAL (operands[3]) << 2;
6963 mask |= INTVAL (operands[4]) << 4;
6964 mask |= INTVAL (operands[5]) << 6;
6965 operands[2] = GEN_INT (mask);
6967 return "vpshuflw\t{%2, %1, %0|%0, %1, %2}";
6969 [(set_attr "type" "sselog")
6970 (set_attr "prefix" "vex")
6971 (set_attr "length_immediate" "1")
6972 (set_attr "mode" "OI")])
6974 (define_expand "sse2_pshuflw"
6975 [(match_operand:V8HI 0 "register_operand" "")
6976 (match_operand:V8HI 1 "nonimmediate_operand" "")
6977 (match_operand:SI 2 "const_int_operand" "")]
6980 int mask = INTVAL (operands[2]);
6981 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
6982 GEN_INT ((mask >> 0) & 3),
6983 GEN_INT ((mask >> 2) & 3),
6984 GEN_INT ((mask >> 4) & 3),
6985 GEN_INT ((mask >> 6) & 3)));
6989 (define_insn "sse2_pshuflw_1"
6990 [(set (match_operand:V8HI 0 "register_operand" "=x")
6992 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
6993 (parallel [(match_operand 2 "const_0_to_3_operand" "")
6994 (match_operand 3 "const_0_to_3_operand" "")
6995 (match_operand 4 "const_0_to_3_operand" "")
6996 (match_operand 5 "const_0_to_3_operand" "")
7004 mask |= INTVAL (operands[2]) << 0;
7005 mask |= INTVAL (operands[3]) << 2;
7006 mask |= INTVAL (operands[4]) << 4;
7007 mask |= INTVAL (operands[5]) << 6;
7008 operands[2] = GEN_INT (mask);
7010 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7012 [(set_attr "type" "sselog")
7013 (set_attr "prefix_data16" "0")
7014 (set_attr "prefix_rep" "1")
7015 (set_attr "prefix" "maybe_vex")
7016 (set_attr "length_immediate" "1")
7017 (set_attr "mode" "TI")])
7019 (define_expand "avx2_pshufhwv3"
7020 [(match_operand:V16HI 0 "register_operand" "")
7021 (match_operand:V16HI 1 "nonimmediate_operand" "")
7022 (match_operand:SI 2 "const_0_to_255_operand" "")]
7025 int mask = INTVAL (operands[2]);
7026 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
7027 GEN_INT (((mask >> 0) & 3) + 4),
7028 GEN_INT (((mask >> 2) & 3) + 4),
7029 GEN_INT (((mask >> 4) & 3) + 4),
7030 GEN_INT (((mask >> 6) & 3) + 4)));
7034 (define_insn "avx2_pshufhw_1"
7035 [(set (match_operand:V16HI 0 "register_operand" "=x")
7037 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
7038 (parallel [(const_int 0)
7042 (match_operand 2 "const_4_to_7_operand" "")
7043 (match_operand 3 "const_4_to_7_operand" "")
7044 (match_operand 4 "const_4_to_7_operand" "")
7045 (match_operand 5 "const_4_to_7_operand" "")
7057 mask |= (INTVAL (operands[2]) - 4) << 0;
7058 mask |= (INTVAL (operands[3]) - 4) << 2;
7059 mask |= (INTVAL (operands[4]) - 4) << 4;
7060 mask |= (INTVAL (operands[5]) - 4) << 6;
7061 operands[2] = GEN_INT (mask);
7063 return "vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7065 [(set_attr "type" "sselog")
7066 (set_attr "prefix" "vex")
7067 (set_attr "length_immediate" "1")
7068 (set_attr "mode" "OI")])
7070 (define_expand "sse2_pshufhw"
7071 [(match_operand:V8HI 0 "register_operand" "")
7072 (match_operand:V8HI 1 "nonimmediate_operand" "")
7073 (match_operand:SI 2 "const_int_operand" "")]
7076 int mask = INTVAL (operands[2]);
7077 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
7078 GEN_INT (((mask >> 0) & 3) + 4),
7079 GEN_INT (((mask >> 2) & 3) + 4),
7080 GEN_INT (((mask >> 4) & 3) + 4),
7081 GEN_INT (((mask >> 6) & 3) + 4)));
7085 (define_insn "sse2_pshufhw_1"
7086 [(set (match_operand:V8HI 0 "register_operand" "=x")
7088 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7089 (parallel [(const_int 0)
7093 (match_operand 2 "const_4_to_7_operand" "")
7094 (match_operand 3 "const_4_to_7_operand" "")
7095 (match_operand 4 "const_4_to_7_operand" "")
7096 (match_operand 5 "const_4_to_7_operand" "")])))]
7100 mask |= (INTVAL (operands[2]) - 4) << 0;
7101 mask |= (INTVAL (operands[3]) - 4) << 2;
7102 mask |= (INTVAL (operands[4]) - 4) << 4;
7103 mask |= (INTVAL (operands[5]) - 4) << 6;
7104 operands[2] = GEN_INT (mask);
7106 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7108 [(set_attr "type" "sselog")
7109 (set_attr "prefix_rep" "1")
7110 (set_attr "prefix_data16" "0")
7111 (set_attr "prefix" "maybe_vex")
7112 (set_attr "length_immediate" "1")
7113 (set_attr "mode" "TI")])
7115 (define_expand "sse2_loadd"
7116 [(set (match_operand:V4SI 0 "register_operand" "")
7119 (match_operand:SI 1 "nonimmediate_operand" ""))
7123 "operands[2] = CONST0_RTX (V4SImode);")
7125 (define_insn "sse2_loadld"
7126 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x,x,x")
7129 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x"))
7130 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,x")
7134 %vmovd\t{%2, %0|%0, %2}
7135 %vmovd\t{%2, %0|%0, %2}
7136 movss\t{%2, %0|%0, %2}
7137 movss\t{%2, %0|%0, %2}
7138 vmovss\t{%2, %1, %0|%0, %1, %2}"
7139 [(set_attr "isa" "sse2,*,noavx,noavx,avx")
7140 (set_attr "type" "ssemov")
7141 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex")
7142 (set_attr "mode" "TI,TI,V4SF,SF,SF")])
7144 (define_insn_and_split "sse2_stored"
7145 [(set (match_operand:SI 0 "nonimmediate_operand" "=xm,r")
7147 (match_operand:V4SI 1 "register_operand" "x,Yi")
7148 (parallel [(const_int 0)])))]
7151 "&& reload_completed
7152 && (TARGET_INTER_UNIT_MOVES
7153 || MEM_P (operands [0])
7154 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7155 [(set (match_dup 0) (match_dup 1))]
7156 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
7158 (define_insn_and_split "*vec_ext_v4si_mem"
7159 [(set (match_operand:SI 0 "register_operand" "=r")
7161 (match_operand:V4SI 1 "memory_operand" "o")
7162 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
7168 int i = INTVAL (operands[2]);
7170 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
7174 (define_expand "sse_storeq"
7175 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7177 (match_operand:V2DI 1 "register_operand" "")
7178 (parallel [(const_int 0)])))]
7181 (define_insn "*sse2_storeq_rex64"
7182 [(set (match_operand:DI 0 "nonimmediate_operand" "=xm,*r,r")
7184 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
7185 (parallel [(const_int 0)])))]
7186 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7190 mov{q}\t{%1, %0|%0, %1}"
7191 [(set_attr "type" "*,*,imov")
7192 (set_attr "mode" "*,*,DI")])
7194 (define_insn "*sse2_storeq"
7195 [(set (match_operand:DI 0 "nonimmediate_operand" "=xm")
7197 (match_operand:V2DI 1 "register_operand" "x")
7198 (parallel [(const_int 0)])))]
7203 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7205 (match_operand:V2DI 1 "register_operand" "")
7206 (parallel [(const_int 0)])))]
7209 && (TARGET_INTER_UNIT_MOVES
7210 || MEM_P (operands [0])
7211 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7212 [(set (match_dup 0) (match_dup 1))]
7213 "operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));")
7215 (define_insn "*vec_extractv2di_1_rex64"
7216 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,x,r")
7218 (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o,o")
7219 (parallel [(const_int 1)])))]
7220 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7222 %vmovhps\t{%1, %0|%0, %1}
7223 psrldq\t{$8, %0|%0, 8}
7224 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7225 %vmovq\t{%H1, %0|%0, %H1}
7226 mov{q}\t{%H1, %0|%0, %H1}"
7227 [(set_attr "isa" "*,noavx,avx,*,*")
7228 (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,imov")
7229 (set_attr "length_immediate" "*,1,1,*,*")
7230 (set_attr "memory" "*,none,none,*,*")
7231 (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig")
7232 (set_attr "mode" "V2SF,TI,TI,TI,DI")])
7234 (define_insn "*vec_extractv2di_1"
7235 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,x,x,x")
7237 (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o,x,o")
7238 (parallel [(const_int 1)])))]
7239 "!TARGET_64BIT && TARGET_SSE
7240 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7242 %vmovhps\t{%1, %0|%0, %1}
7243 psrldq\t{$8, %0|%0, 8}
7244 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7245 %vmovq\t{%H1, %0|%0, %H1}
7246 movhlps\t{%1, %0|%0, %1}
7247 movlps\t{%H1, %0|%0, %H1}"
7248 [(set_attr "isa" "*,sse2_noavx,avx,sse2,noavx,noavx")
7249 (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,ssemov,ssemov")
7250 (set_attr "length_immediate" "*,1,1,*,*,*")
7251 (set_attr "memory" "*,none,none,*,*,*")
7252 (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig,orig")
7253 (set_attr "mode" "V2SF,TI,TI,TI,V4SF,V2SF")])
7255 (define_insn "*vec_dupv4si_avx"
7256 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7258 (match_operand:SI 1 "nonimmediate_operand" " x,m")))]
7261 vpshufd\t{$0, %1, %0|%0, %1, 0}
7262 vbroadcastss\t{%1, %0|%0, %1}"
7263 [(set_attr "type" "sselog1,ssemov")
7264 (set_attr "length_immediate" "1,0")
7265 (set_attr "prefix_extra" "0,1")
7266 (set_attr "prefix" "vex")
7267 (set_attr "mode" "TI,V4SF")])
7269 (define_insn "*vec_dupv4si"
7270 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7272 (match_operand:SI 1 "register_operand" " x,0")))]
7275 pshufd\t{$0, %1, %0|%0, %1, 0}
7276 shufps\t{$0, %0, %0|%0, %0, 0}"
7277 [(set_attr "isa" "sse2,*")
7278 (set_attr "type" "sselog1")
7279 (set_attr "length_immediate" "1")
7280 (set_attr "mode" "TI,V4SF")])
7282 (define_insn "*vec_dupv2di_sse3"
7283 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
7285 (match_operand:DI 1 "nonimmediate_operand" " 0,x,m")))]
7289 vpunpcklqdq\t{%d1, %0|%0, %d1}
7290 %vmovddup\t{%1, %0|%0, %1}"
7291 [(set_attr "isa" "noavx,avx,*")
7292 (set_attr "type" "sselog1")
7293 (set_attr "prefix" "orig,vex,maybe_vex")
7294 (set_attr "mode" "TI,TI,DF")])
7296 (define_insn "*vec_dupv2di"
7297 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7299 (match_operand:DI 1 "register_operand" " 0,0")))]
7304 [(set_attr "isa" "sse2,*")
7305 (set_attr "type" "sselog1,ssemov")
7306 (set_attr "mode" "TI,V4SF")])
7308 (define_insn "*vec_concatv2si_sse4_1"
7309 [(set (match_operand:V2SI 0 "register_operand" "=x, x,x,x, x, *y,*y")
7311 (match_operand:SI 1 "nonimmediate_operand" " 0, x,0,x,rm, 0,rm")
7312 (match_operand:SI 2 "vector_move_operand" "rm,rm,x,x, C,*ym, C")))]
7315 pinsrd\t{$1, %2, %0|%0, %2, 1}
7316 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
7317 punpckldq\t{%2, %0|%0, %2}
7318 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7319 %vmovd\t{%1, %0|%0, %1}
7320 punpckldq\t{%2, %0|%0, %2}
7321 movd\t{%1, %0|%0, %1}"
7322 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
7323 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
7324 (set_attr "prefix_extra" "1,1,*,*,*,*,*")
7325 (set_attr "length_immediate" "1,1,*,*,*,*,*")
7326 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
7327 (set_attr "mode" "TI,TI,TI,TI,TI,DI,DI")])
7329 ;; ??? In theory we can match memory for the MMX alternative, but allowing
7330 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7331 ;; alternatives pretty much forces the MMX alternative to be chosen.
7332 (define_insn "*vec_concatv2si_sse2"
7333 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
7335 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
7336 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
7339 punpckldq\t{%2, %0|%0, %2}
7340 movd\t{%1, %0|%0, %1}
7341 punpckldq\t{%2, %0|%0, %2}
7342 movd\t{%1, %0|%0, %1}"
7343 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7344 (set_attr "mode" "TI,TI,DI,DI")])
7346 (define_insn "*vec_concatv2si_sse"
7347 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
7349 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
7350 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
7353 unpcklps\t{%2, %0|%0, %2}
7354 movss\t{%1, %0|%0, %1}
7355 punpckldq\t{%2, %0|%0, %2}
7356 movd\t{%1, %0|%0, %1}"
7357 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7358 (set_attr "mode" "V4SF,V4SF,DI,DI")])
7360 (define_insn "*vec_concatv4si"
7361 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x,x")
7363 (match_operand:V2SI 1 "register_operand" " 0,x,0,0,x")
7364 (match_operand:V2SI 2 "nonimmediate_operand" " x,x,x,m,m")))]
7367 punpcklqdq\t{%2, %0|%0, %2}
7368 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7369 movlhps\t{%2, %0|%0, %2}
7370 movhps\t{%2, %0|%0, %2}
7371 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7372 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
7373 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
7374 (set_attr "prefix" "orig,vex,orig,orig,vex")
7375 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
7377 ;; movd instead of movq is required to handle broken assemblers.
7378 (define_insn "*vec_concatv2di_rex64"
7379 [(set (match_operand:V2DI 0 "register_operand"
7380 "=x,x ,x ,Yi,!x,x,x,x,x")
7382 (match_operand:DI 1 "nonimmediate_operand"
7383 " 0,x ,xm,r ,*y,0,x,0,x")
7384 (match_operand:DI 2 "vector_move_operand"
7385 "rm,rm,C ,C ,C ,x,x,m,m")))]
7388 pinsrq\t{$1, %2, %0|%0, %2, 1}
7389 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
7390 %vmovq\t{%1, %0|%0, %1}
7391 %vmovd\t{%1, %0|%0, %1}
7392 movq2dq\t{%1, %0|%0, %1}
7393 punpcklqdq\t{%2, %0|%0, %2}
7394 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7395 movhps\t{%2, %0|%0, %2}
7396 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7397 [(set_attr "isa" "sse4_noavx,avx,*,*,*,noavx,avx,noavx,avx")
7400 (eq_attr "alternative" "0,1,5,6")
7401 (const_string "sselog")
7402 (const_string "ssemov")))
7403 (set (attr "prefix_rex")
7405 (and (eq_attr "alternative" "0,3")
7406 (eq (symbol_ref "TARGET_AVX") (const_int 0)))
7408 (const_string "*")))
7409 (set_attr "prefix_extra" "1,1,*,*,*,*,*,*,*")
7410 (set_attr "length_immediate" "1,1,*,*,*,*,*,*,*")
7411 (set_attr "prefix" "orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex")
7412 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,V2SF,V2SF")])
7414 (define_insn "vec_concatv2di"
7415 [(set (match_operand:V2DI 0 "register_operand" "=x,?x,x,x,x,x,x")
7417 (match_operand:DI 1 "nonimmediate_operand" "xm,*y,0,x,0,0,x")
7418 (match_operand:DI 2 "vector_move_operand" " C, C,x,x,x,m,m")))]
7419 "!TARGET_64BIT && TARGET_SSE"
7421 %vmovq\t{%1, %0|%0, %1}
7422 movq2dq\t{%1, %0|%0, %1}
7423 punpcklqdq\t{%2, %0|%0, %2}
7424 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7425 movlhps\t{%2, %0|%0, %2}
7426 movhps\t{%2, %0|%0, %2}
7427 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7428 [(set_attr "isa" "sse2,sse2,sse2_noavx,avx,noavx,noavx,avx")
7429 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,ssemov,ssemov")
7430 (set_attr "prefix" "maybe_vex,orig,orig,vex,orig,orig,vex")
7431 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")])
7433 (define_expand "vec_unpacks_lo_<mode>"
7434 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7435 (match_operand:VI124_128 1 "register_operand" "")]
7437 "ix86_expand_sse_unpack (operands, false, false); DONE;")
7439 (define_expand "vec_unpacks_hi_<mode>"
7440 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7441 (match_operand:VI124_128 1 "register_operand" "")]
7443 "ix86_expand_sse_unpack (operands, false, true); DONE;")
7445 (define_expand "vec_unpacku_lo_<mode>"
7446 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7447 (match_operand:VI124_128 1 "register_operand" "")]
7449 "ix86_expand_sse_unpack (operands, true, false); DONE;")
7451 (define_expand "vec_unpacku_hi_<mode>"
7452 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7453 (match_operand:VI124_128 1 "register_operand" "")]
7455 "ix86_expand_sse_unpack (operands, true, true); DONE;")
7457 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7461 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7463 (define_expand "avx2_uavgv32qi3"
7464 [(set (match_operand:V32QI 0 "register_operand" "")
7470 (match_operand:V32QI 1 "nonimmediate_operand" ""))
7472 (match_operand:V32QI 2 "nonimmediate_operand" "")))
7473 (const_vector:V32QI [(const_int 1) (const_int 1)
7474 (const_int 1) (const_int 1)
7475 (const_int 1) (const_int 1)
7476 (const_int 1) (const_int 1)
7477 (const_int 1) (const_int 1)
7478 (const_int 1) (const_int 1)
7479 (const_int 1) (const_int 1)
7480 (const_int 1) (const_int 1)
7481 (const_int 1) (const_int 1)
7482 (const_int 1) (const_int 1)
7483 (const_int 1) (const_int 1)
7484 (const_int 1) (const_int 1)
7485 (const_int 1) (const_int 1)
7486 (const_int 1) (const_int 1)
7487 (const_int 1) (const_int 1)
7488 (const_int 1) (const_int 1)]))
7491 "ix86_fixup_binary_operands_no_copy (PLUS, V32QImode, operands);")
7493 (define_expand "sse2_uavgv16qi3"
7494 [(set (match_operand:V16QI 0 "register_operand" "")
7500 (match_operand:V16QI 1 "nonimmediate_operand" ""))
7502 (match_operand:V16QI 2 "nonimmediate_operand" "")))
7503 (const_vector:V16QI [(const_int 1) (const_int 1)
7504 (const_int 1) (const_int 1)
7505 (const_int 1) (const_int 1)
7506 (const_int 1) (const_int 1)
7507 (const_int 1) (const_int 1)
7508 (const_int 1) (const_int 1)
7509 (const_int 1) (const_int 1)
7510 (const_int 1) (const_int 1)]))
7513 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
7515 (define_insn "*avx2_uavgv32qi3"
7516 [(set (match_operand:V32QI 0 "register_operand" "=x")
7522 (match_operand:V32QI 1 "nonimmediate_operand" "%x"))
7524 (match_operand:V32QI 2 "nonimmediate_operand" "xm")))
7525 (const_vector:V32QI [(const_int 1) (const_int 1)
7526 (const_int 1) (const_int 1)
7527 (const_int 1) (const_int 1)
7528 (const_int 1) (const_int 1)
7529 (const_int 1) (const_int 1)
7530 (const_int 1) (const_int 1)
7531 (const_int 1) (const_int 1)
7532 (const_int 1) (const_int 1)
7533 (const_int 1) (const_int 1)
7534 (const_int 1) (const_int 1)
7535 (const_int 1) (const_int 1)
7536 (const_int 1) (const_int 1)
7537 (const_int 1) (const_int 1)
7538 (const_int 1) (const_int 1)
7539 (const_int 1) (const_int 1)
7540 (const_int 1) (const_int 1)]))
7542 "TARGET_AVX2 && ix86_binary_operator_ok (PLUS, V32QImode, operands)"
7543 "vpavgb\t{%2, %1, %0|%0, %1, %2}"
7544 [(set_attr "type" "sseiadd")
7545 (set_attr "prefix" "vex")
7546 (set_attr "mode" "OI")])
7548 (define_insn "*sse2_uavgv16qi3"
7549 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
7555 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x"))
7557 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))
7558 (const_vector:V16QI [(const_int 1) (const_int 1)
7559 (const_int 1) (const_int 1)
7560 (const_int 1) (const_int 1)
7561 (const_int 1) (const_int 1)
7562 (const_int 1) (const_int 1)
7563 (const_int 1) (const_int 1)
7564 (const_int 1) (const_int 1)
7565 (const_int 1) (const_int 1)]))
7567 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7569 pavgb\t{%2, %0|%0, %2}
7570 vpavgb\t{%2, %1, %0|%0, %1, %2}"
7571 [(set_attr "isa" "noavx,avx")
7572 (set_attr "type" "sseiadd")
7573 (set_attr "prefix_data16" "1,*")
7574 (set_attr "prefix" "orig,vex")
7575 (set_attr "mode" "TI")])
7577 (define_expand "avx2_uavgv16hi3"
7578 [(set (match_operand:V16HI 0 "register_operand" "")
7584 (match_operand:V16HI 1 "nonimmediate_operand" ""))
7586 (match_operand:V16HI 2 "nonimmediate_operand" "")))
7587 (const_vector:V16HI [(const_int 1) (const_int 1)
7588 (const_int 1) (const_int 1)
7589 (const_int 1) (const_int 1)
7590 (const_int 1) (const_int 1)
7591 (const_int 1) (const_int 1)
7592 (const_int 1) (const_int 1)
7593 (const_int 1) (const_int 1)
7594 (const_int 1) (const_int 1)]))
7597 "ix86_fixup_binary_operands_no_copy (PLUS, V16HImode, operands);")
7599 (define_expand "sse2_uavgv8hi3"
7600 [(set (match_operand:V8HI 0 "register_operand" "")
7606 (match_operand:V8HI 1 "nonimmediate_operand" ""))
7608 (match_operand:V8HI 2 "nonimmediate_operand" "")))
7609 (const_vector:V8HI [(const_int 1) (const_int 1)
7610 (const_int 1) (const_int 1)
7611 (const_int 1) (const_int 1)
7612 (const_int 1) (const_int 1)]))
7615 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
7617 (define_insn "*avx2_uavgv16hi3"
7618 [(set (match_operand:V16HI 0 "register_operand" "=x")
7624 (match_operand:V16HI 1 "nonimmediate_operand" "%x"))
7626 (match_operand:V16HI 2 "nonimmediate_operand" "xm")))
7627 (const_vector:V16HI [(const_int 1) (const_int 1)
7628 (const_int 1) (const_int 1)
7629 (const_int 1) (const_int 1)
7630 (const_int 1) (const_int 1)
7631 (const_int 1) (const_int 1)
7632 (const_int 1) (const_int 1)
7633 (const_int 1) (const_int 1)
7634 (const_int 1) (const_int 1)]))
7636 "TARGET_AVX2 && ix86_binary_operator_ok (PLUS, V16HImode, operands)"
7637 "vpavgw\t{%2, %1, %0|%0, %1, %2}"
7638 [(set_attr "type" "sseiadd")
7639 (set_attr "prefix" "vex")
7640 (set_attr "mode" "OI")])
7642 (define_insn "*sse2_uavgv8hi3"
7643 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7649 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
7651 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
7652 (const_vector:V8HI [(const_int 1) (const_int 1)
7653 (const_int 1) (const_int 1)
7654 (const_int 1) (const_int 1)
7655 (const_int 1) (const_int 1)]))
7657 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7659 pavgw\t{%2, %0|%0, %2}
7660 vpavgw\t{%2, %1, %0|%0, %1, %2}"
7661 [(set_attr "isa" "noavx,avx")
7662 (set_attr "type" "sseiadd")
7663 (set_attr "prefix_data16" "1,*")
7664 (set_attr "prefix" "orig,vex")
7665 (set_attr "mode" "TI")])
7667 ;; The correct representation for this is absolutely enormous, and
7668 ;; surely not generally useful.
7669 (define_insn "<sse2_avx2>_psadbw"
7670 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x,x")
7671 (unspec:VI8_AVX2 [(match_operand:<ssebytemode> 1 "register_operand" "0,x")
7672 (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,xm")]
7676 psadbw\t{%2, %0|%0, %2}
7677 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
7678 [(set_attr "isa" "noavx,avx")
7679 (set_attr "type" "sseiadd")
7680 (set_attr "atom_unit" "simul")
7681 (set_attr "prefix_data16" "1,*")
7682 (set_attr "prefix" "orig,vex")
7683 (set_attr "mode" "<sseinsnmode>")])
7685 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
7686 [(set (match_operand:SI 0 "register_operand" "=r")
7688 [(match_operand:VF 1 "register_operand" "x")]
7691 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
7692 [(set_attr "type" "ssemov")
7693 (set_attr "prefix" "maybe_vex")
7694 (set_attr "mode" "<MODE>")])
7696 (define_insn "avx2_pmovmskb"
7697 [(set (match_operand:SI 0 "register_operand" "=r")
7698 (unspec:SI [(match_operand:V32QI 1 "register_operand" "x")]
7701 "vpmovmskb\t{%1, %0|%0, %1}"
7702 [(set_attr "type" "ssemov")
7703 (set_attr "prefix" "vex")
7704 (set_attr "mode" "DI")])
7706 (define_insn "sse2_pmovmskb"
7707 [(set (match_operand:SI 0 "register_operand" "=r")
7708 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
7711 "%vpmovmskb\t{%1, %0|%0, %1}"
7712 [(set_attr "type" "ssemov")
7713 (set_attr "prefix_data16" "1")
7714 (set_attr "prefix" "maybe_vex")
7715 (set_attr "mode" "SI")])
7717 (define_expand "sse2_maskmovdqu"
7718 [(set (match_operand:V16QI 0 "memory_operand" "")
7719 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
7720 (match_operand:V16QI 2 "register_operand" "")
7725 (define_insn "*sse2_maskmovdqu"
7726 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
7727 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
7728 (match_operand:V16QI 2 "register_operand" "x")
7729 (mem:V16QI (match_dup 0))]
7732 "%vmaskmovdqu\t{%2, %1|%1, %2}"
7733 [(set_attr "type" "ssemov")
7734 (set_attr "prefix_data16" "1")
7735 ;; The implicit %rdi operand confuses default length_vex computation.
7736 (set (attr "length_vex")
7737 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
7738 (set_attr "prefix" "maybe_vex")
7739 (set_attr "mode" "TI")])
7741 (define_insn "sse_ldmxcsr"
7742 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
7746 [(set_attr "type" "sse")
7747 (set_attr "atom_sse_attr" "mxcsr")
7748 (set_attr "prefix" "maybe_vex")
7749 (set_attr "memory" "load")])
7751 (define_insn "sse_stmxcsr"
7752 [(set (match_operand:SI 0 "memory_operand" "=m")
7753 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
7756 [(set_attr "type" "sse")
7757 (set_attr "atom_sse_attr" "mxcsr")
7758 (set_attr "prefix" "maybe_vex")
7759 (set_attr "memory" "store")])
7761 (define_expand "sse_sfence"
7763 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
7764 "TARGET_SSE || TARGET_3DNOW_A"
7766 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
7767 MEM_VOLATILE_P (operands[0]) = 1;
7770 (define_insn "*sse_sfence"
7771 [(set (match_operand:BLK 0 "" "")
7772 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
7773 "TARGET_SSE || TARGET_3DNOW_A"
7775 [(set_attr "type" "sse")
7776 (set_attr "length_address" "0")
7777 (set_attr "atom_sse_attr" "fence")
7778 (set_attr "memory" "unknown")])
7780 (define_insn "sse2_clflush"
7781 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
7785 [(set_attr "type" "sse")
7786 (set_attr "atom_sse_attr" "fence")
7787 (set_attr "memory" "unknown")])
7789 (define_expand "sse2_mfence"
7791 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
7794 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
7795 MEM_VOLATILE_P (operands[0]) = 1;
7798 (define_insn "*sse2_mfence"
7799 [(set (match_operand:BLK 0 "" "")
7800 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
7801 "TARGET_64BIT || TARGET_SSE2"
7803 [(set_attr "type" "sse")
7804 (set_attr "length_address" "0")
7805 (set_attr "atom_sse_attr" "fence")
7806 (set_attr "memory" "unknown")])
7808 (define_expand "sse2_lfence"
7810 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
7813 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
7814 MEM_VOLATILE_P (operands[0]) = 1;
7817 (define_insn "*sse2_lfence"
7818 [(set (match_operand:BLK 0 "" "")
7819 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
7822 [(set_attr "type" "sse")
7823 (set_attr "length_address" "0")
7824 (set_attr "atom_sse_attr" "lfence")
7825 (set_attr "memory" "unknown")])
7827 (define_insn "sse3_mwait"
7828 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
7829 (match_operand:SI 1 "register_operand" "c")]
7832 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
7833 ;; Since 32bit register operands are implicitly zero extended to 64bit,
7834 ;; we only need to set up 32bit registers.
7836 [(set_attr "length" "3")])
7838 (define_insn "sse3_monitor"
7839 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
7840 (match_operand:SI 1 "register_operand" "c")
7841 (match_operand:SI 2 "register_operand" "d")]
7843 "TARGET_SSE3 && !TARGET_64BIT"
7844 "monitor\t%0, %1, %2"
7845 [(set_attr "length" "3")])
7847 (define_insn "sse3_monitor64"
7848 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
7849 (match_operand:SI 1 "register_operand" "c")
7850 (match_operand:SI 2 "register_operand" "d")]
7852 "TARGET_SSE3 && TARGET_64BIT"
7853 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
7854 ;; RCX and RDX are used. Since 32bit register operands are implicitly
7855 ;; zero extended to 64bit, we only need to set up 32bit registers.
7857 [(set_attr "length" "3")])
7859 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7861 ;; SSSE3 instructions
7863 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7865 (define_insn "avx2_phaddwv16hi3"
7866 [(set (match_operand:V16HI 0 "register_operand" "=x")
7873 (match_operand:V16HI 1 "register_operand" "x")
7874 (parallel [(const_int 0)]))
7875 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7877 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7878 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7881 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7882 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7884 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7885 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
7889 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
7890 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
7892 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
7893 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
7896 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
7897 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
7899 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
7900 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
7906 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
7907 (parallel [(const_int 0)]))
7908 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7910 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7911 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
7914 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
7915 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
7917 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
7918 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
7922 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
7923 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
7925 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
7926 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
7929 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
7930 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
7932 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
7933 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
7935 "vphaddw\t{%2, %1, %0|%0, %1, %2}"
7936 [(set_attr "type" "sseiadd")
7937 (set_attr "prefix_extra" "1")
7938 (set_attr "prefix" "vex")
7939 (set_attr "mode" "OI")])
7941 (define_insn "ssse3_phaddwv8hi3"
7942 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7948 (match_operand:V8HI 1 "register_operand" "0,x")
7949 (parallel [(const_int 0)]))
7950 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7952 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7953 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7956 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7957 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7959 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7960 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
7965 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
7966 (parallel [(const_int 0)]))
7967 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7969 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7970 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
7973 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
7974 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
7976 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
7977 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
7980 phaddw\t{%2, %0|%0, %2}
7981 vphaddw\t{%2, %1, %0|%0, %1, %2}"
7982 [(set_attr "isa" "noavx,avx")
7983 (set_attr "type" "sseiadd")
7984 (set_attr "atom_unit" "complex")
7985 (set_attr "prefix_data16" "1,*")
7986 (set_attr "prefix_extra" "1")
7987 (set_attr "prefix" "orig,vex")
7988 (set_attr "mode" "TI")])
7990 (define_insn "ssse3_phaddwv4hi3"
7991 [(set (match_operand:V4HI 0 "register_operand" "=y")
7996 (match_operand:V4HI 1 "register_operand" "0")
7997 (parallel [(const_int 0)]))
7998 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8000 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8001 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8005 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8006 (parallel [(const_int 0)]))
8007 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8009 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8010 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8012 "phaddw\t{%2, %0|%0, %2}"
8013 [(set_attr "type" "sseiadd")
8014 (set_attr "atom_unit" "complex")
8015 (set_attr "prefix_extra" "1")
8016 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8017 (set_attr "mode" "DI")])
8019 (define_insn "avx2_phadddv8si3"
8020 [(set (match_operand:V8SI 0 "register_operand" "=x")
8026 (match_operand:V8SI 1 "register_operand" "x")
8027 (parallel [(const_int 0)]))
8028 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8030 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8031 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8034 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
8035 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
8037 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
8038 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
8043 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
8044 (parallel [(const_int 0)]))
8045 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8047 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8048 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
8051 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
8052 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
8054 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
8055 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
8057 "vphaddd\t{%2, %1, %0|%0, %1, %2}"
8058 [(set_attr "type" "sseiadd")
8059 (set_attr "prefix_extra" "1")
8060 (set_attr "prefix" "vex")
8061 (set_attr "mode" "OI")])
8063 (define_insn "ssse3_phadddv4si3"
8064 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
8069 (match_operand:V4SI 1 "register_operand" "0,x")
8070 (parallel [(const_int 0)]))
8071 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8073 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8074 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8078 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
8079 (parallel [(const_int 0)]))
8080 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8082 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8083 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8086 phaddd\t{%2, %0|%0, %2}
8087 vphaddd\t{%2, %1, %0|%0, %1, %2}"
8088 [(set_attr "isa" "noavx,avx")
8089 (set_attr "type" "sseiadd")
8090 (set_attr "atom_unit" "complex")
8091 (set_attr "prefix_data16" "1,*")
8092 (set_attr "prefix_extra" "1")
8093 (set_attr "prefix" "orig,vex")
8094 (set_attr "mode" "TI")])
8096 (define_insn "ssse3_phadddv2si3"
8097 [(set (match_operand:V2SI 0 "register_operand" "=y")
8101 (match_operand:V2SI 1 "register_operand" "0")
8102 (parallel [(const_int 0)]))
8103 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8106 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8107 (parallel [(const_int 0)]))
8108 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8110 "phaddd\t{%2, %0|%0, %2}"
8111 [(set_attr "type" "sseiadd")
8112 (set_attr "atom_unit" "complex")
8113 (set_attr "prefix_extra" "1")
8114 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8115 (set_attr "mode" "DI")])
8117 (define_insn "avx2_phaddswv16hi3"
8118 [(set (match_operand:V16HI 0 "register_operand" "=x")
8125 (match_operand:V16HI 1 "register_operand" "x")
8126 (parallel [(const_int 0)]))
8127 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8129 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8130 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8133 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8134 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8136 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8137 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8141 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8142 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8144 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8145 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8148 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8149 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8151 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8152 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8158 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8159 (parallel [(const_int 0)]))
8160 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8162 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8163 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8166 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8167 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8169 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8170 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8174 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8175 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8177 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8178 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8181 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8182 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8184 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8185 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8187 "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8188 [(set_attr "type" "sseiadd")
8189 (set_attr "prefix_extra" "1")
8190 (set_attr "prefix" "vex")
8191 (set_attr "mode" "OI")])
8193 (define_insn "ssse3_phaddswv8hi3"
8194 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8200 (match_operand:V8HI 1 "register_operand" "0,x")
8201 (parallel [(const_int 0)]))
8202 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8204 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8205 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8208 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8209 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8211 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8212 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8217 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8218 (parallel [(const_int 0)]))
8219 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8221 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8222 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8225 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8226 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8228 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8229 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8232 phaddsw\t{%2, %0|%0, %2}
8233 vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8234 [(set_attr "isa" "noavx,avx")
8235 (set_attr "type" "sseiadd")
8236 (set_attr "atom_unit" "complex")
8237 (set_attr "prefix_data16" "1,*")
8238 (set_attr "prefix_extra" "1")
8239 (set_attr "prefix" "orig,vex")
8240 (set_attr "mode" "TI")])
8242 (define_insn "ssse3_phaddswv4hi3"
8243 [(set (match_operand:V4HI 0 "register_operand" "=y")
8248 (match_operand:V4HI 1 "register_operand" "0")
8249 (parallel [(const_int 0)]))
8250 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8252 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8253 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8257 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8258 (parallel [(const_int 0)]))
8259 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8261 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8262 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8264 "phaddsw\t{%2, %0|%0, %2}"
8265 [(set_attr "type" "sseiadd")
8266 (set_attr "atom_unit" "complex")
8267 (set_attr "prefix_extra" "1")
8268 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8269 (set_attr "mode" "DI")])
8271 (define_insn "avx2_phsubwv16hi3"
8272 [(set (match_operand:V16HI 0 "register_operand" "=x")
8279 (match_operand:V16HI 1 "register_operand" "x")
8280 (parallel [(const_int 0)]))
8281 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8283 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8284 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8287 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8288 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8290 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8291 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8295 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8296 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8298 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8299 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8302 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8303 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8305 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8306 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8312 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8313 (parallel [(const_int 0)]))
8314 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8316 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8317 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8320 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8321 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8323 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8324 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8328 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8329 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8331 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8332 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8335 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8336 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8338 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8339 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8341 "vphsubw\t{%2, %1, %0|%0, %1, %2}"
8342 [(set_attr "type" "sseiadd")
8343 (set_attr "prefix_extra" "1")
8344 (set_attr "prefix" "vex")
8345 (set_attr "mode" "OI")])
8347 (define_insn "ssse3_phsubwv8hi3"
8348 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8354 (match_operand:V8HI 1 "register_operand" "0,x")
8355 (parallel [(const_int 0)]))
8356 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8358 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8359 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8362 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8363 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8365 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8366 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8371 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8372 (parallel [(const_int 0)]))
8373 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8375 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8376 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8379 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8380 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8382 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8383 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8386 phsubw\t{%2, %0|%0, %2}
8387 vphsubw\t{%2, %1, %0|%0, %1, %2}"
8388 [(set_attr "isa" "noavx,avx")
8389 (set_attr "type" "sseiadd")
8390 (set_attr "atom_unit" "complex")
8391 (set_attr "prefix_data16" "1,*")
8392 (set_attr "prefix_extra" "1")
8393 (set_attr "prefix" "orig,vex")
8394 (set_attr "mode" "TI")])
8396 (define_insn "ssse3_phsubwv4hi3"
8397 [(set (match_operand:V4HI 0 "register_operand" "=y")
8402 (match_operand:V4HI 1 "register_operand" "0")
8403 (parallel [(const_int 0)]))
8404 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8406 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8407 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8411 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8412 (parallel [(const_int 0)]))
8413 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8415 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8416 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8418 "phsubw\t{%2, %0|%0, %2}"
8419 [(set_attr "type" "sseiadd")
8420 (set_attr "atom_unit" "complex")
8421 (set_attr "prefix_extra" "1")
8422 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8423 (set_attr "mode" "DI")])
8425 (define_insn "avx2_phsubdv8si3"
8426 [(set (match_operand:V8SI 0 "register_operand" "=x")
8432 (match_operand:V8SI 1 "register_operand" "x")
8433 (parallel [(const_int 0)]))
8434 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8436 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8437 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8440 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
8441 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
8443 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
8444 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
8449 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
8450 (parallel [(const_int 0)]))
8451 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8453 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8454 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
8457 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
8458 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
8460 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
8461 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
8463 "vphsubd\t{%2, %1, %0|%0, %1, %2}"
8464 [(set_attr "type" "sseiadd")
8465 (set_attr "prefix_extra" "1")
8466 (set_attr "prefix" "vex")
8467 (set_attr "mode" "OI")])
8469 (define_insn "ssse3_phsubdv4si3"
8470 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
8475 (match_operand:V4SI 1 "register_operand" "0,x")
8476 (parallel [(const_int 0)]))
8477 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8479 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8480 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8484 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
8485 (parallel [(const_int 0)]))
8486 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8488 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8489 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8492 phsubd\t{%2, %0|%0, %2}
8493 vphsubd\t{%2, %1, %0|%0, %1, %2}"
8495 [(set_attr "isa" "noavx,avx")
8496 (set_attr "type" "sseiadd")
8497 (set_attr "atom_unit" "complex")
8498 (set_attr "prefix_data16" "1,*")
8499 (set_attr "prefix_extra" "1")
8500 (set_attr "prefix" "orig,vex")
8501 (set_attr "mode" "TI")])
8503 (define_insn "ssse3_phsubdv2si3"
8504 [(set (match_operand:V2SI 0 "register_operand" "=y")
8508 (match_operand:V2SI 1 "register_operand" "0")
8509 (parallel [(const_int 0)]))
8510 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8513 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8514 (parallel [(const_int 0)]))
8515 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8517 "phsubd\t{%2, %0|%0, %2}"
8518 [(set_attr "type" "sseiadd")
8519 (set_attr "atom_unit" "complex")
8520 (set_attr "prefix_extra" "1")
8521 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8522 (set_attr "mode" "DI")])
8524 (define_insn "avx2_phsubswv16hi3"
8525 [(set (match_operand:V16HI 0 "register_operand" "=x")
8532 (match_operand:V16HI 1 "register_operand" "x")
8533 (parallel [(const_int 0)]))
8534 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8536 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8537 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8540 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8541 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8543 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8544 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8548 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8549 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8551 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8552 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8555 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8556 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8558 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8559 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8565 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8566 (parallel [(const_int 0)]))
8567 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8569 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8570 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8573 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8574 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8576 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8577 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8581 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8582 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8584 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8585 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8588 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8589 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8591 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8592 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8594 "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8595 [(set_attr "type" "sseiadd")
8596 (set_attr "prefix_extra" "1")
8597 (set_attr "prefix" "vex")
8598 (set_attr "mode" "OI")])
8600 (define_insn "ssse3_phsubswv8hi3"
8601 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8607 (match_operand:V8HI 1 "register_operand" "0,x")
8608 (parallel [(const_int 0)]))
8609 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8611 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8612 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8615 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8616 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8618 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8619 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8624 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8625 (parallel [(const_int 0)]))
8626 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8628 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8629 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8632 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8633 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8635 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8636 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8639 phsubsw\t{%2, %0|%0, %2}
8640 vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8641 [(set_attr "isa" "noavx,avx")
8642 (set_attr "type" "sseiadd")
8643 (set_attr "atom_unit" "complex")
8644 (set_attr "prefix_data16" "1,*")
8645 (set_attr "prefix_extra" "1")
8646 (set_attr "prefix" "orig,vex")
8647 (set_attr "mode" "TI")])
8649 (define_insn "ssse3_phsubswv4hi3"
8650 [(set (match_operand:V4HI 0 "register_operand" "=y")
8655 (match_operand:V4HI 1 "register_operand" "0")
8656 (parallel [(const_int 0)]))
8657 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8659 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8660 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8664 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8665 (parallel [(const_int 0)]))
8666 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8668 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8669 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8671 "phsubsw\t{%2, %0|%0, %2}"
8672 [(set_attr "type" "sseiadd")
8673 (set_attr "atom_unit" "complex")
8674 (set_attr "prefix_extra" "1")
8675 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8676 (set_attr "mode" "DI")])
8678 (define_insn "avx2_pmaddubsw256"
8679 [(set (match_operand:V16HI 0 "register_operand" "=x")
8684 (match_operand:V32QI 1 "register_operand" "x")
8685 (parallel [(const_int 0)
8703 (match_operand:V32QI 2 "nonimmediate_operand" "xm")
8704 (parallel [(const_int 0)
8722 (vec_select:V16QI (match_dup 1)
8723 (parallel [(const_int 1)
8740 (vec_select:V16QI (match_dup 2)
8741 (parallel [(const_int 1)
8756 (const_int 31)]))))))]
8758 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8759 [(set_attr "type" "sseiadd")
8760 (set_attr "prefix_extra" "1")
8761 (set_attr "prefix" "vex")
8762 (set_attr "mode" "OI")])
8764 (define_insn "ssse3_pmaddubsw128"
8765 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8770 (match_operand:V16QI 1 "register_operand" "0,x")
8771 (parallel [(const_int 0)
8781 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
8782 (parallel [(const_int 0)
8792 (vec_select:V8QI (match_dup 1)
8793 (parallel [(const_int 1)
8802 (vec_select:V8QI (match_dup 2)
8803 (parallel [(const_int 1)
8810 (const_int 15)]))))))]
8813 pmaddubsw\t{%2, %0|%0, %2}
8814 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8815 [(set_attr "isa" "noavx,avx")
8816 (set_attr "type" "sseiadd")
8817 (set_attr "atom_unit" "simul")
8818 (set_attr "prefix_data16" "1,*")
8819 (set_attr "prefix_extra" "1")
8820 (set_attr "prefix" "orig,vex")
8821 (set_attr "mode" "TI")])
8823 (define_insn "ssse3_pmaddubsw"
8824 [(set (match_operand:V4HI 0 "register_operand" "=y")
8829 (match_operand:V8QI 1 "register_operand" "0")
8830 (parallel [(const_int 0)
8836 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
8837 (parallel [(const_int 0)
8843 (vec_select:V4QI (match_dup 1)
8844 (parallel [(const_int 1)
8849 (vec_select:V4QI (match_dup 2)
8850 (parallel [(const_int 1)
8853 (const_int 7)]))))))]
8855 "pmaddubsw\t{%2, %0|%0, %2}"
8856 [(set_attr "type" "sseiadd")
8857 (set_attr "atom_unit" "simul")
8858 (set_attr "prefix_extra" "1")
8859 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8860 (set_attr "mode" "DI")])
8862 (define_expand "avx2_umulhrswv16hi3"
8863 [(set (match_operand:V16HI 0 "register_operand" "")
8870 (match_operand:V16HI 1 "nonimmediate_operand" ""))
8872 (match_operand:V16HI 2 "nonimmediate_operand" "")))
8874 (const_vector:V16HI [(const_int 1) (const_int 1)
8875 (const_int 1) (const_int 1)
8876 (const_int 1) (const_int 1)
8877 (const_int 1) (const_int 1)
8878 (const_int 1) (const_int 1)
8879 (const_int 1) (const_int 1)
8880 (const_int 1) (const_int 1)
8881 (const_int 1) (const_int 1)]))
8884 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
8886 (define_insn "*avx2_umulhrswv16hi3"
8887 [(set (match_operand:V16HI 0 "register_operand" "=x")
8894 (match_operand:V16HI 1 "nonimmediate_operand" "%x"))
8896 (match_operand:V16HI 2 "nonimmediate_operand" "xm")))
8898 (const_vector:V16HI [(const_int 1) (const_int 1)
8899 (const_int 1) (const_int 1)
8900 (const_int 1) (const_int 1)
8901 (const_int 1) (const_int 1)
8902 (const_int 1) (const_int 1)
8903 (const_int 1) (const_int 1)
8904 (const_int 1) (const_int 1)
8905 (const_int 1) (const_int 1)]))
8907 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
8908 "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
8909 [(set_attr "type" "sseimul")
8910 (set_attr "prefix_extra" "1")
8911 (set_attr "prefix" "vex")
8912 (set_attr "mode" "OI")])
8914 (define_expand "ssse3_pmulhrswv8hi3"
8915 [(set (match_operand:V8HI 0 "register_operand" "")
8922 (match_operand:V8HI 1 "nonimmediate_operand" ""))
8924 (match_operand:V8HI 2 "nonimmediate_operand" "")))
8926 (const_vector:V8HI [(const_int 1) (const_int 1)
8927 (const_int 1) (const_int 1)
8928 (const_int 1) (const_int 1)
8929 (const_int 1) (const_int 1)]))
8932 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
8934 (define_insn "*ssse3_pmulhrswv8hi3"
8935 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8942 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
8944 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
8946 (const_vector:V8HI [(const_int 1) (const_int 1)
8947 (const_int 1) (const_int 1)
8948 (const_int 1) (const_int 1)
8949 (const_int 1) (const_int 1)]))
8951 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
8953 pmulhrsw\t{%2, %0|%0, %2}
8954 vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
8955 [(set_attr "isa" "noavx,avx")
8956 (set_attr "type" "sseimul")
8957 (set_attr "prefix_data16" "1,*")
8958 (set_attr "prefix_extra" "1")
8959 (set_attr "prefix" "orig,vex")
8960 (set_attr "mode" "TI")])
8962 (define_expand "ssse3_pmulhrswv4hi3"
8963 [(set (match_operand:V4HI 0 "register_operand" "")
8970 (match_operand:V4HI 1 "nonimmediate_operand" ""))
8972 (match_operand:V4HI 2 "nonimmediate_operand" "")))
8974 (const_vector:V4HI [(const_int 1) (const_int 1)
8975 (const_int 1) (const_int 1)]))
8978 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
8980 (define_insn "*ssse3_pmulhrswv4hi3"
8981 [(set (match_operand:V4HI 0 "register_operand" "=y")
8988 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
8990 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
8992 (const_vector:V4HI [(const_int 1) (const_int 1)
8993 (const_int 1) (const_int 1)]))
8995 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
8996 "pmulhrsw\t{%2, %0|%0, %2}"
8997 [(set_attr "type" "sseimul")
8998 (set_attr "prefix_extra" "1")
8999 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9000 (set_attr "mode" "DI")])
9002 (define_insn "<ssse3_avx2>_pshufb<mode>3"
9003 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
9004 (unspec:VI1_AVX2 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
9005 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")]
9009 pshufb\t{%2, %0|%0, %2}
9010 vpshufb\t{%2, %1, %0|%0, %1, %2}"
9011 [(set_attr "isa" "noavx,avx")
9012 (set_attr "type" "sselog1")
9013 (set_attr "prefix_data16" "1,*")
9014 (set_attr "prefix_extra" "1")
9015 (set_attr "prefix" "orig,vex")
9016 (set_attr "mode" "<sseinsnmode>")])
9018 (define_insn "ssse3_pshufbv8qi3"
9019 [(set (match_operand:V8QI 0 "register_operand" "=y")
9020 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
9021 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
9024 "pshufb\t{%2, %0|%0, %2}";
9025 [(set_attr "type" "sselog1")
9026 (set_attr "prefix_extra" "1")
9027 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9028 (set_attr "mode" "DI")])
9030 (define_insn "<ssse3_avx2>_psign<mode>3"
9031 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
9033 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
9034 (match_operand:VI124_AVX2 2 "nonimmediate_operand" "xm,xm")]
9038 psign<ssemodesuffix>\t{%2, %0|%0, %2}
9039 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9040 [(set_attr "isa" "noavx,avx")
9041 (set_attr "type" "sselog1")
9042 (set_attr "prefix_data16" "1,*")
9043 (set_attr "prefix_extra" "1")
9044 (set_attr "prefix" "orig,vex")
9045 (set_attr "mode" "<sseinsnmode>")])
9047 (define_insn "ssse3_psign<mode>3"
9048 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9050 [(match_operand:MMXMODEI 1 "register_operand" "0")
9051 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
9054 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
9055 [(set_attr "type" "sselog1")
9056 (set_attr "prefix_extra" "1")
9057 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9058 (set_attr "mode" "DI")])
9060 (define_insn "<ssse3_avx2>_palignr<mode>"
9061 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x")
9062 (unspec:SSESCALARMODE [(match_operand:SSESCALARMODE 1 "register_operand" "0,x")
9063 (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,xm")
9064 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
9068 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9070 switch (which_alternative)
9073 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9075 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9080 [(set_attr "isa" "noavx,avx")
9081 (set_attr "type" "sseishft")
9082 (set_attr "atom_unit" "sishuf")
9083 (set_attr "prefix_data16" "1,*")
9084 (set_attr "prefix_extra" "1")
9085 (set_attr "length_immediate" "1")
9086 (set_attr "prefix" "orig,vex")
9087 (set_attr "mode" "<sseinsnmode>")])
9089 (define_insn "ssse3_palignrdi"
9090 [(set (match_operand:DI 0 "register_operand" "=y")
9091 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
9092 (match_operand:DI 2 "nonimmediate_operand" "ym")
9093 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9097 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9098 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9100 [(set_attr "type" "sseishft")
9101 (set_attr "atom_unit" "sishuf")
9102 (set_attr "prefix_extra" "1")
9103 (set_attr "length_immediate" "1")
9104 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9105 (set_attr "mode" "DI")])
9107 (define_insn "abs<mode>2"
9108 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x")
9110 (match_operand:VI124_AVX2 1 "nonimmediate_operand" "xm")))]
9112 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
9113 [(set_attr "type" "sselog1")
9114 (set_attr "prefix_data16" "1")
9115 (set_attr "prefix_extra" "1")
9116 (set_attr "prefix" "maybe_vex")
9117 (set_attr "mode" "<sseinsnmode>")])
9119 (define_insn "abs<mode>2"
9120 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9122 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
9124 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
9125 [(set_attr "type" "sselog1")
9126 (set_attr "prefix_rep" "0")
9127 (set_attr "prefix_extra" "1")
9128 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9129 (set_attr "mode" "DI")])
9131 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9133 ;; AMD SSE4A instructions
9135 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9137 (define_insn "sse4a_movnt<mode>"
9138 [(set (match_operand:MODEF 0 "memory_operand" "=m")
9140 [(match_operand:MODEF 1 "register_operand" "x")]
9143 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
9144 [(set_attr "type" "ssemov")
9145 (set_attr "mode" "<MODE>")])
9147 (define_insn "sse4a_vmmovnt<mode>"
9148 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
9149 (unspec:<ssescalarmode>
9150 [(vec_select:<ssescalarmode>
9151 (match_operand:VF_128 1 "register_operand" "x")
9152 (parallel [(const_int 0)]))]
9155 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
9156 [(set_attr "type" "ssemov")
9157 (set_attr "mode" "<ssescalarmode>")])
9159 (define_insn "sse4a_extrqi"
9160 [(set (match_operand:V2DI 0 "register_operand" "=x")
9161 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9162 (match_operand 2 "const_0_to_255_operand" "")
9163 (match_operand 3 "const_0_to_255_operand" "")]
9166 "extrq\t{%3, %2, %0|%0, %2, %3}"
9167 [(set_attr "type" "sse")
9168 (set_attr "prefix_data16" "1")
9169 (set_attr "length_immediate" "2")
9170 (set_attr "mode" "TI")])
9172 (define_insn "sse4a_extrq"
9173 [(set (match_operand:V2DI 0 "register_operand" "=x")
9174 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9175 (match_operand:V16QI 2 "register_operand" "x")]
9178 "extrq\t{%2, %0|%0, %2}"
9179 [(set_attr "type" "sse")
9180 (set_attr "prefix_data16" "1")
9181 (set_attr "mode" "TI")])
9183 (define_insn "sse4a_insertqi"
9184 [(set (match_operand:V2DI 0 "register_operand" "=x")
9185 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9186 (match_operand:V2DI 2 "register_operand" "x")
9187 (match_operand 3 "const_0_to_255_operand" "")
9188 (match_operand 4 "const_0_to_255_operand" "")]
9191 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
9192 [(set_attr "type" "sseins")
9193 (set_attr "prefix_data16" "0")
9194 (set_attr "prefix_rep" "1")
9195 (set_attr "length_immediate" "2")
9196 (set_attr "mode" "TI")])
9198 (define_insn "sse4a_insertq"
9199 [(set (match_operand:V2DI 0 "register_operand" "=x")
9200 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9201 (match_operand:V2DI 2 "register_operand" "x")]
9204 "insertq\t{%2, %0|%0, %2}"
9205 [(set_attr "type" "sseins")
9206 (set_attr "prefix_data16" "0")
9207 (set_attr "prefix_rep" "1")
9208 (set_attr "mode" "TI")])
9210 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9212 ;; Intel SSE4.1 instructions
9214 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9216 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
9217 [(set (match_operand:VF 0 "register_operand" "=x,x")
9219 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
9220 (match_operand:VF 1 "register_operand" "0,x")
9221 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "")))]
9224 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
9225 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9226 [(set_attr "isa" "noavx,avx")
9227 (set_attr "type" "ssemov")
9228 (set_attr "length_immediate" "1")
9229 (set_attr "prefix_data16" "1,*")
9230 (set_attr "prefix_extra" "1")
9231 (set_attr "prefix" "orig,vex")
9232 (set_attr "mode" "<MODE>")])
9234 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
9235 [(set (match_operand:VF 0 "reg_not_xmm0_operand_maybe_avx" "=x,x")
9237 [(match_operand:VF 1 "reg_not_xmm0_operand_maybe_avx" "0,x")
9238 (match_operand:VF 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
9239 (match_operand:VF 3 "register_operand" "Yz,x")]
9243 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
9244 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9245 [(set_attr "isa" "noavx,avx")
9246 (set_attr "type" "ssemov")
9247 (set_attr "length_immediate" "1")
9248 (set_attr "prefix_data16" "1,*")
9249 (set_attr "prefix_extra" "1")
9250 (set_attr "prefix" "orig,vex")
9251 (set_attr "mode" "<MODE>")])
9253 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
9254 [(set (match_operand:VF 0 "register_operand" "=x,x")
9256 [(match_operand:VF 1 "nonimmediate_operand" "%0,x")
9257 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
9258 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9262 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
9263 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9264 [(set_attr "isa" "noavx,avx")
9265 (set_attr "type" "ssemul")
9266 (set_attr "length_immediate" "1")
9267 (set_attr "prefix_data16" "1,*")
9268 (set_attr "prefix_extra" "1")
9269 (set_attr "prefix" "orig,vex")
9270 (set_attr "mode" "<MODE>")])
9272 (define_insn "<sse4_1_avx2>_movntdqa"
9273 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x")
9274 (unspec:VI8_AVX2 [(match_operand:VI8_AVX2 1 "memory_operand" "m")]
9277 "%vmovntdqa\t{%1, %0|%0, %1}"
9278 [(set_attr "type" "ssemov")
9279 (set_attr "prefix_extra" "1")
9280 (set_attr "prefix" "maybe_vex")
9281 (set_attr "mode" "<sseinsnmode>")])
9283 (define_insn "<sse4_1_avx2>_mpsadbw"
9284 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
9285 (unspec:VI1_AVX2 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
9286 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
9287 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9291 mpsadbw\t{%3, %2, %0|%0, %2, %3}
9292 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9293 [(set_attr "isa" "noavx,avx")
9294 (set_attr "type" "sselog1")
9295 (set_attr "length_immediate" "1")
9296 (set_attr "prefix_extra" "1")
9297 (set_attr "prefix" "orig,vex")
9298 (set_attr "mode" "<sseinsnmode>")])
9300 (define_insn "avx2_packusdw"
9301 [(set (match_operand:V16HI 0 "register_operand" "=x")
9304 (match_operand:V8SI 1 "register_operand" "x"))
9306 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))))]
9308 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9309 [(set_attr "type" "sselog")
9310 (set_attr "prefix_extra" "1")
9311 (set_attr "prefix" "vex")
9312 (set_attr "mode" "OI")])
9314 (define_insn "sse4_1_packusdw"
9315 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9318 (match_operand:V4SI 1 "register_operand" "0,x"))
9320 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))]
9323 packusdw\t{%2, %0|%0, %2}
9324 vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9325 [(set_attr "isa" "noavx,avx")
9326 (set_attr "type" "sselog")
9327 (set_attr "prefix_extra" "1")
9328 (set_attr "prefix" "orig,vex")
9329 (set_attr "mode" "TI")])
9331 (define_insn "<sse4_1_avx2>_pblendvb"
9332 [(set (match_operand:VI1_AVX2 0 "reg_not_xmm0_operand" "=x,x")
9334 [(match_operand:VI1_AVX2 1 "reg_not_xmm0_operand_maybe_avx" "0,x")
9335 (match_operand:VI1_AVX2 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
9336 (match_operand:VI1_AVX2 3 "register_operand" "Yz,x")]
9340 pblendvb\t{%3, %2, %0|%0, %2, %3}
9341 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9342 [(set_attr "isa" "noavx,avx")
9343 (set_attr "type" "ssemov")
9344 (set_attr "prefix_extra" "1")
9345 (set_attr "length_immediate" "*,1")
9346 (set_attr "prefix" "orig,vex")
9347 (set_attr "mode" "<sseinsnmode>")])
9349 (define_insn "<sse4_1_avx2>_pblendw"
9350 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
9352 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")
9353 (match_operand:VI2_AVX2 1 "register_operand" "0,x")
9354 (match_operand:SI 3 "const_0_to_255_operand" "n,n")))]
9357 pblendw\t{%3, %2, %0|%0, %2, %3}
9358 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9359 [(set_attr "isa" "noavx,avx")
9360 (set_attr "type" "ssemov")
9361 (set_attr "prefix_extra" "1")
9362 (set_attr "length_immediate" "1")
9363 (set_attr "prefix" "orig,vex")
9364 (set_attr "mode" "<sseinsnmode>")])
9366 (define_insn "avx2_pblendd<mode>"
9367 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
9369 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
9370 (match_operand:VI4_AVX2 1 "register_operand" "x")
9371 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9373 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9374 [(set_attr "type" "ssemov")
9375 (set_attr "prefix_extra" "1")
9376 (set_attr "length_immediate" "1")
9377 (set_attr "prefix" "vex")
9378 (set_attr "mode" "<sseinsnmode>")])
9380 (define_insn "sse4_1_phminposuw"
9381 [(set (match_operand:V8HI 0 "register_operand" "=x")
9382 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
9383 UNSPEC_PHMINPOSUW))]
9385 "%vphminposuw\t{%1, %0|%0, %1}"
9386 [(set_attr "type" "sselog1")
9387 (set_attr "prefix_extra" "1")
9388 (set_attr "prefix" "maybe_vex")
9389 (set_attr "mode" "TI")])
9391 (define_insn "avx2_<code>v16qiv16hi2"
9392 [(set (match_operand:V16HI 0 "register_operand" "=x")
9394 (match_operand:V16QI 1 "nonimmediate_operand" "xm")))]
9396 "vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
9397 [(set_attr "type" "ssemov")
9398 (set_attr "prefix_extra" "1")
9399 (set_attr "prefix" "vex")
9400 (set_attr "mode" "OI")])
9402 (define_insn "sse4_1_<code>v8qiv8hi2"
9403 [(set (match_operand:V8HI 0 "register_operand" "=x")
9406 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9407 (parallel [(const_int 0)
9416 "%vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
9417 [(set_attr "type" "ssemov")
9418 (set_attr "prefix_extra" "1")
9419 (set_attr "prefix" "maybe_vex")
9420 (set_attr "mode" "TI")])
9422 (define_insn "avx2_<code>v8qiv8si2"
9423 [(set (match_operand:V8SI 0 "register_operand" "=x")
9426 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9427 (parallel [(const_int 0)
9436 "vpmov<extsuffix>bd\t{%1, %0|%0, %1}"
9437 [(set_attr "type" "ssemov")
9438 (set_attr "prefix_extra" "1")
9439 (set_attr "prefix" "vex")
9440 (set_attr "mode" "OI")])
9442 (define_insn "sse4_1_<code>v4qiv4si2"
9443 [(set (match_operand:V4SI 0 "register_operand" "=x")
9446 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9447 (parallel [(const_int 0)
9452 "%vpmov<extsuffix>bd\t{%1, %0|%0, %1}"
9453 [(set_attr "type" "ssemov")
9454 (set_attr "prefix_extra" "1")
9455 (set_attr "prefix" "maybe_vex")
9456 (set_attr "mode" "TI")])
9458 (define_insn "avx2_<code>v8hiv8si2"
9459 [(set (match_operand:V8SI 0 "register_operand" "=x")
9461 (match_operand:V8HI 1 "nonimmediate_operand" "xm")))]
9463 "vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
9464 [(set_attr "type" "ssemov")
9465 (set_attr "prefix_extra" "1")
9466 (set_attr "prefix" "vex")
9467 (set_attr "mode" "OI")])
9469 (define_insn "sse4_1_<code>v4hiv4si2"
9470 [(set (match_operand:V4SI 0 "register_operand" "=x")
9473 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9474 (parallel [(const_int 0)
9479 "%vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
9480 [(set_attr "type" "ssemov")
9481 (set_attr "prefix_extra" "1")
9482 (set_attr "prefix" "maybe_vex")
9483 (set_attr "mode" "TI")])
9485 (define_insn "avx2_<code>v4qiv4di2"
9486 [(set (match_operand:V4DI 0 "register_operand" "=x")
9489 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9490 (parallel [(const_int 0)
9495 "vpmov<extsuffix>bq\t{%1, %0|%0, %1}"
9496 [(set_attr "type" "ssemov")
9497 (set_attr "prefix_extra" "1")
9498 (set_attr "prefix" "vex")
9499 (set_attr "mode" "OI")])
9501 (define_insn "sse4_1_<code>v2qiv2di2"
9502 [(set (match_operand:V2DI 0 "register_operand" "=x")
9505 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9506 (parallel [(const_int 0)
9509 "%vpmov<extsuffix>bq\t{%1, %0|%0, %1}"
9510 [(set_attr "type" "ssemov")
9511 (set_attr "prefix_extra" "1")
9512 (set_attr "prefix" "maybe_vex")
9513 (set_attr "mode" "TI")])
9515 (define_insn "avx2_<code>v4hiv4di2"
9516 [(set (match_operand:V4DI 0 "register_operand" "=x")
9519 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9520 (parallel [(const_int 0)
9525 "vpmov<extsuffix>wq\t{%1, %0|%0, %1}"
9526 [(set_attr "type" "ssemov")
9527 (set_attr "prefix_extra" "1")
9528 (set_attr "prefix" "vex")
9529 (set_attr "mode" "OI")])
9531 (define_insn "sse4_1_<code>v2hiv2di2"
9532 [(set (match_operand:V2DI 0 "register_operand" "=x")
9535 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9536 (parallel [(const_int 0)
9539 "%vpmov<extsuffix>wq\t{%1, %0|%0, %1}"
9540 [(set_attr "type" "ssemov")
9541 (set_attr "prefix_extra" "1")
9542 (set_attr "prefix" "maybe_vex")
9543 (set_attr "mode" "TI")])
9545 (define_insn "avx2_<code>v4siv4di2"
9546 [(set (match_operand:V4DI 0 "register_operand" "=x")
9548 (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
9550 "vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
9551 [(set_attr "type" "ssemov")
9552 (set_attr "prefix_extra" "1")
9553 (set_attr "mode" "OI")])
9555 (define_insn "sse4_1_<code>v2siv2di2"
9556 [(set (match_operand:V2DI 0 "register_operand" "=x")
9559 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9560 (parallel [(const_int 0)
9563 "%vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
9564 [(set_attr "type" "ssemov")
9565 (set_attr "prefix_extra" "1")
9566 (set_attr "prefix" "maybe_vex")
9567 (set_attr "mode" "TI")])
9569 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
9570 ;; setting FLAGS_REG. But it is not a really compare instruction.
9571 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
9572 [(set (reg:CC FLAGS_REG)
9573 (unspec:CC [(match_operand:VF 0 "register_operand" "x")
9574 (match_operand:VF 1 "nonimmediate_operand" "xm")]
9577 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
9578 [(set_attr "type" "ssecomi")
9579 (set_attr "prefix_extra" "1")
9580 (set_attr "prefix" "vex")
9581 (set_attr "mode" "<MODE>")])
9583 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
9584 ;; But it is not a really compare instruction.
9585 (define_insn "avx_ptest256"
9586 [(set (reg:CC FLAGS_REG)
9587 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
9588 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
9591 "vptest\t{%1, %0|%0, %1}"
9592 [(set_attr "type" "ssecomi")
9593 (set_attr "prefix_extra" "1")
9594 (set_attr "prefix" "vex")
9595 (set_attr "mode" "OI")])
9597 (define_insn "sse4_1_ptest"
9598 [(set (reg:CC FLAGS_REG)
9599 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
9600 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
9603 "%vptest\t{%1, %0|%0, %1}"
9604 [(set_attr "type" "ssecomi")
9605 (set_attr "prefix_extra" "1")
9606 (set_attr "prefix" "maybe_vex")
9607 (set_attr "mode" "TI")])
9609 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
9610 [(set (match_operand:VF 0 "register_operand" "=x")
9612 [(match_operand:VF 1 "nonimmediate_operand" "xm")
9613 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9616 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9617 [(set_attr "type" "ssecvt")
9618 (set (attr "prefix_data16")
9620 (ne (symbol_ref "TARGET_AVX") (const_int 0))
9622 (const_string "1")))
9623 (set_attr "prefix_extra" "1")
9624 (set_attr "length_immediate" "1")
9625 (set_attr "prefix" "maybe_vex")
9626 (set_attr "mode" "<MODE>")])
9628 (define_insn "sse4_1_round<ssescalarmodesuffix>"
9629 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
9632 [(match_operand:VF_128 2 "register_operand" "x,x")
9633 (match_operand:SI 3 "const_0_to_15_operand" "n,n")]
9635 (match_operand:VF_128 1 "register_operand" "0,x")
9639 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
9640 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9641 [(set_attr "isa" "noavx,avx")
9642 (set_attr "type" "ssecvt")
9643 (set_attr "length_immediate" "1")
9644 (set_attr "prefix_data16" "1,*")
9645 (set_attr "prefix_extra" "1")
9646 (set_attr "prefix" "orig,vex")
9647 (set_attr "mode" "<MODE>")])
9649 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9651 ;; Intel SSE4.2 string/text processing instructions
9653 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9655 (define_insn_and_split "sse4_2_pcmpestr"
9656 [(set (match_operand:SI 0 "register_operand" "=c,c")
9658 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
9659 (match_operand:SI 3 "register_operand" "a,a")
9660 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
9661 (match_operand:SI 5 "register_operand" "d,d")
9662 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
9664 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9672 (set (reg:CC FLAGS_REG)
9681 && can_create_pseudo_p ()"
9686 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9687 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9688 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9691 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
9692 operands[3], operands[4],
9693 operands[5], operands[6]));
9695 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
9696 operands[3], operands[4],
9697 operands[5], operands[6]));
9698 if (flags && !(ecx || xmm0))
9699 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
9700 operands[2], operands[3],
9701 operands[4], operands[5],
9705 [(set_attr "type" "sselog")
9706 (set_attr "prefix_data16" "1")
9707 (set_attr "prefix_extra" "1")
9708 (set_attr "length_immediate" "1")
9709 (set_attr "memory" "none,load")
9710 (set_attr "mode" "TI")])
9712 (define_insn "sse4_2_pcmpestri"
9713 [(set (match_operand:SI 0 "register_operand" "=c,c")
9715 [(match_operand:V16QI 1 "register_operand" "x,x")
9716 (match_operand:SI 2 "register_operand" "a,a")
9717 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9718 (match_operand:SI 4 "register_operand" "d,d")
9719 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9721 (set (reg:CC FLAGS_REG)
9730 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
9731 [(set_attr "type" "sselog")
9732 (set_attr "prefix_data16" "1")
9733 (set_attr "prefix_extra" "1")
9734 (set_attr "prefix" "maybe_vex")
9735 (set_attr "length_immediate" "1")
9736 (set_attr "memory" "none,load")
9737 (set_attr "mode" "TI")])
9739 (define_insn "sse4_2_pcmpestrm"
9740 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
9742 [(match_operand:V16QI 1 "register_operand" "x,x")
9743 (match_operand:SI 2 "register_operand" "a,a")
9744 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9745 (match_operand:SI 4 "register_operand" "d,d")
9746 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9748 (set (reg:CC FLAGS_REG)
9757 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
9758 [(set_attr "type" "sselog")
9759 (set_attr "prefix_data16" "1")
9760 (set_attr "prefix_extra" "1")
9761 (set_attr "length_immediate" "1")
9762 (set_attr "prefix" "maybe_vex")
9763 (set_attr "memory" "none,load")
9764 (set_attr "mode" "TI")])
9766 (define_insn "sse4_2_pcmpestr_cconly"
9767 [(set (reg:CC FLAGS_REG)
9769 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
9770 (match_operand:SI 3 "register_operand" "a,a,a,a")
9771 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
9772 (match_operand:SI 5 "register_operand" "d,d,d,d")
9773 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
9775 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
9776 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
9779 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
9780 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
9781 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
9782 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
9783 [(set_attr "type" "sselog")
9784 (set_attr "prefix_data16" "1")
9785 (set_attr "prefix_extra" "1")
9786 (set_attr "length_immediate" "1")
9787 (set_attr "memory" "none,load,none,load")
9788 (set_attr "prefix" "maybe_vex")
9789 (set_attr "mode" "TI")])
9791 (define_insn_and_split "sse4_2_pcmpistr"
9792 [(set (match_operand:SI 0 "register_operand" "=c,c")
9794 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
9795 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
9796 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
9798 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9804 (set (reg:CC FLAGS_REG)
9811 && can_create_pseudo_p ()"
9816 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9817 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9818 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9821 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
9822 operands[3], operands[4]));
9824 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
9825 operands[3], operands[4]));
9826 if (flags && !(ecx || xmm0))
9827 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
9828 operands[2], operands[3],
9832 [(set_attr "type" "sselog")
9833 (set_attr "prefix_data16" "1")
9834 (set_attr "prefix_extra" "1")
9835 (set_attr "length_immediate" "1")
9836 (set_attr "memory" "none,load")
9837 (set_attr "mode" "TI")])
9839 (define_insn "sse4_2_pcmpistri"
9840 [(set (match_operand:SI 0 "register_operand" "=c,c")
9842 [(match_operand:V16QI 1 "register_operand" "x,x")
9843 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
9844 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9846 (set (reg:CC FLAGS_REG)
9853 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
9854 [(set_attr "type" "sselog")
9855 (set_attr "prefix_data16" "1")
9856 (set_attr "prefix_extra" "1")
9857 (set_attr "length_immediate" "1")
9858 (set_attr "prefix" "maybe_vex")
9859 (set_attr "memory" "none,load")
9860 (set_attr "mode" "TI")])
9862 (define_insn "sse4_2_pcmpistrm"
9863 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
9865 [(match_operand:V16QI 1 "register_operand" "x,x")
9866 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
9867 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9869 (set (reg:CC FLAGS_REG)
9876 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
9877 [(set_attr "type" "sselog")
9878 (set_attr "prefix_data16" "1")
9879 (set_attr "prefix_extra" "1")
9880 (set_attr "length_immediate" "1")
9881 (set_attr "prefix" "maybe_vex")
9882 (set_attr "memory" "none,load")
9883 (set_attr "mode" "TI")])
9885 (define_insn "sse4_2_pcmpistr_cconly"
9886 [(set (reg:CC FLAGS_REG)
9888 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
9889 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
9890 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
9892 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
9893 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
9896 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
9897 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
9898 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
9899 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
9900 [(set_attr "type" "sselog")
9901 (set_attr "prefix_data16" "1")
9902 (set_attr "prefix_extra" "1")
9903 (set_attr "length_immediate" "1")
9904 (set_attr "memory" "none,load,none,load")
9905 (set_attr "prefix" "maybe_vex")
9906 (set_attr "mode" "TI")])
9908 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9912 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9914 ;; XOP parallel integer multiply/add instructions.
9915 ;; Note the XOP multiply/add instructions
9916 ;; a[i] = b[i] * c[i] + d[i];
9917 ;; do not allow the value being added to be a memory operation.
9918 (define_insn "xop_pmacsww"
9919 [(set (match_operand:V8HI 0 "register_operand" "=x")
9922 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
9923 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
9924 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
9926 "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9927 [(set_attr "type" "ssemuladd")
9928 (set_attr "mode" "TI")])
9930 (define_insn "xop_pmacssww"
9931 [(set (match_operand:V8HI 0 "register_operand" "=x")
9933 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
9934 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
9935 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
9937 "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9938 [(set_attr "type" "ssemuladd")
9939 (set_attr "mode" "TI")])
9941 (define_insn "xop_pmacsdd"
9942 [(set (match_operand:V4SI 0 "register_operand" "=x")
9945 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
9946 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
9947 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
9949 "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9950 [(set_attr "type" "ssemuladd")
9951 (set_attr "mode" "TI")])
9953 (define_insn "xop_pmacssdd"
9954 [(set (match_operand:V4SI 0 "register_operand" "=x")
9956 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
9957 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
9958 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
9960 "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9961 [(set_attr "type" "ssemuladd")
9962 (set_attr "mode" "TI")])
9964 (define_insn "xop_pmacssdql"
9965 [(set (match_operand:V2DI 0 "register_operand" "=x")
9970 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
9971 (parallel [(const_int 1)
9974 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
9975 (parallel [(const_int 1)
9977 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
9979 "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9980 [(set_attr "type" "ssemuladd")
9981 (set_attr "mode" "TI")])
9983 (define_insn "xop_pmacssdqh"
9984 [(set (match_operand:V2DI 0 "register_operand" "=x")
9989 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
9990 (parallel [(const_int 0)
9994 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
9995 (parallel [(const_int 0)
9997 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
9999 "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10000 [(set_attr "type" "ssemuladd")
10001 (set_attr "mode" "TI")])
10003 (define_insn "xop_pmacsdql"
10004 [(set (match_operand:V2DI 0 "register_operand" "=x")
10009 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10010 (parallel [(const_int 1)
10014 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10015 (parallel [(const_int 1)
10017 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10019 "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10020 [(set_attr "type" "ssemuladd")
10021 (set_attr "mode" "TI")])
10023 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10024 ;; fake it with a multiply/add. In general, we expect the define_split to
10025 ;; occur before register allocation, so we have to handle the corner case where
10026 ;; the target is the same as operands 1/2
10027 (define_insn_and_split "xop_mulv2div2di3_low"
10028 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10032 (match_operand:V4SI 1 "register_operand" "%x")
10033 (parallel [(const_int 1)
10037 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10038 (parallel [(const_int 1)
10039 (const_int 3)])))))]
10042 "&& reload_completed"
10043 [(set (match_dup 0)
10051 (parallel [(const_int 1)
10056 (parallel [(const_int 1)
10060 operands[3] = CONST0_RTX (V2DImode);
10062 [(set_attr "type" "ssemul")
10063 (set_attr "mode" "TI")])
10065 (define_insn "xop_pmacsdqh"
10066 [(set (match_operand:V2DI 0 "register_operand" "=x")
10071 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10072 (parallel [(const_int 0)
10076 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10077 (parallel [(const_int 0)
10079 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10081 "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10082 [(set_attr "type" "ssemuladd")
10083 (set_attr "mode" "TI")])
10085 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10086 ;; fake it with a multiply/add. In general, we expect the define_split to
10087 ;; occur before register allocation, so we have to handle the corner case where
10088 ;; the target is the same as either operands[1] or operands[2]
10089 (define_insn_and_split "xop_mulv2div2di3_high"
10090 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10094 (match_operand:V4SI 1 "register_operand" "%x")
10095 (parallel [(const_int 0)
10099 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10100 (parallel [(const_int 0)
10101 (const_int 2)])))))]
10104 "&& reload_completed"
10105 [(set (match_dup 0)
10113 (parallel [(const_int 0)
10118 (parallel [(const_int 0)
10122 operands[3] = CONST0_RTX (V2DImode);
10124 [(set_attr "type" "ssemul")
10125 (set_attr "mode" "TI")])
10127 ;; XOP parallel integer multiply/add instructions for the intrinisics
10128 (define_insn "xop_pmacsswd"
10129 [(set (match_operand:V4SI 0 "register_operand" "=x")
10134 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10135 (parallel [(const_int 1)
10141 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10142 (parallel [(const_int 1)
10146 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10148 "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10149 [(set_attr "type" "ssemuladd")
10150 (set_attr "mode" "TI")])
10152 (define_insn "xop_pmacswd"
10153 [(set (match_operand:V4SI 0 "register_operand" "=x")
10158 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10159 (parallel [(const_int 1)
10165 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10166 (parallel [(const_int 1)
10170 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10172 "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10173 [(set_attr "type" "ssemuladd")
10174 (set_attr "mode" "TI")])
10176 (define_insn "xop_pmadcsswd"
10177 [(set (match_operand:V4SI 0 "register_operand" "=x")
10183 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10184 (parallel [(const_int 0)
10190 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10191 (parallel [(const_int 0)
10199 (parallel [(const_int 1)
10206 (parallel [(const_int 1)
10209 (const_int 7)])))))
10210 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10212 "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10213 [(set_attr "type" "ssemuladd")
10214 (set_attr "mode" "TI")])
10216 (define_insn "xop_pmadcswd"
10217 [(set (match_operand:V4SI 0 "register_operand" "=x")
10223 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10224 (parallel [(const_int 0)
10230 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10231 (parallel [(const_int 0)
10239 (parallel [(const_int 1)
10246 (parallel [(const_int 1)
10249 (const_int 7)])))))
10250 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10252 "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10253 [(set_attr "type" "ssemuladd")
10254 (set_attr "mode" "TI")])
10256 ;; XOP parallel XMM conditional moves
10257 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
10258 [(set (match_operand:V 0 "register_operand" "=x,x")
10260 (match_operand:V 3 "nonimmediate_operand" "x,m")
10261 (match_operand:V 1 "vector_move_operand" "x,x")
10262 (match_operand:V 2 "vector_move_operand" "xm,x")))]
10264 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10265 [(set_attr "type" "sse4arg")])
10267 ;; XOP horizontal add/subtract instructions
10268 (define_insn "xop_phaddbw"
10269 [(set (match_operand:V8HI 0 "register_operand" "=x")
10273 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10274 (parallel [(const_int 0)
10285 (parallel [(const_int 1)
10292 (const_int 15)])))))]
10294 "vphaddbw\t{%1, %0|%0, %1}"
10295 [(set_attr "type" "sseiadd1")])
10297 (define_insn "xop_phaddbd"
10298 [(set (match_operand:V4SI 0 "register_operand" "=x")
10303 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10304 (parallel [(const_int 0)
10311 (parallel [(const_int 1)
10314 (const_int 13)]))))
10319 (parallel [(const_int 2)
10326 (parallel [(const_int 3)
10329 (const_int 15)]))))))]
10331 "vphaddbd\t{%1, %0|%0, %1}"
10332 [(set_attr "type" "sseiadd1")])
10334 (define_insn "xop_phaddbq"
10335 [(set (match_operand:V2DI 0 "register_operand" "=x")
10341 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10342 (parallel [(const_int 0)
10347 (parallel [(const_int 1)
10353 (parallel [(const_int 2)
10358 (parallel [(const_int 3)
10359 (const_int 7)])))))
10365 (parallel [(const_int 8)
10370 (parallel [(const_int 9)
10371 (const_int 13)]))))
10376 (parallel [(const_int 10)
10381 (parallel [(const_int 11)
10382 (const_int 15)])))))))]
10384 "vphaddbq\t{%1, %0|%0, %1}"
10385 [(set_attr "type" "sseiadd1")])
10387 (define_insn "xop_phaddwd"
10388 [(set (match_operand:V4SI 0 "register_operand" "=x")
10392 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10393 (parallel [(const_int 0)
10400 (parallel [(const_int 1)
10403 (const_int 7)])))))]
10405 "vphaddwd\t{%1, %0|%0, %1}"
10406 [(set_attr "type" "sseiadd1")])
10408 (define_insn "xop_phaddwq"
10409 [(set (match_operand:V2DI 0 "register_operand" "=x")
10414 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10415 (parallel [(const_int 0)
10420 (parallel [(const_int 1)
10426 (parallel [(const_int 2)
10431 (parallel [(const_int 3)
10432 (const_int 7)]))))))]
10434 "vphaddwq\t{%1, %0|%0, %1}"
10435 [(set_attr "type" "sseiadd1")])
10437 (define_insn "xop_phadddq"
10438 [(set (match_operand:V2DI 0 "register_operand" "=x")
10442 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10443 (parallel [(const_int 0)
10448 (parallel [(const_int 1)
10449 (const_int 3)])))))]
10451 "vphadddq\t{%1, %0|%0, %1}"
10452 [(set_attr "type" "sseiadd1")])
10454 (define_insn "xop_phaddubw"
10455 [(set (match_operand:V8HI 0 "register_operand" "=x")
10459 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10460 (parallel [(const_int 0)
10471 (parallel [(const_int 1)
10478 (const_int 15)])))))]
10480 "vphaddubw\t{%1, %0|%0, %1}"
10481 [(set_attr "type" "sseiadd1")])
10483 (define_insn "xop_phaddubd"
10484 [(set (match_operand:V4SI 0 "register_operand" "=x")
10489 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10490 (parallel [(const_int 0)
10497 (parallel [(const_int 1)
10500 (const_int 13)]))))
10505 (parallel [(const_int 2)
10512 (parallel [(const_int 3)
10515 (const_int 15)]))))))]
10517 "vphaddubd\t{%1, %0|%0, %1}"
10518 [(set_attr "type" "sseiadd1")])
10520 (define_insn "xop_phaddubq"
10521 [(set (match_operand:V2DI 0 "register_operand" "=x")
10527 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10528 (parallel [(const_int 0)
10533 (parallel [(const_int 1)
10539 (parallel [(const_int 2)
10544 (parallel [(const_int 3)
10545 (const_int 7)])))))
10551 (parallel [(const_int 8)
10556 (parallel [(const_int 9)
10557 (const_int 13)]))))
10562 (parallel [(const_int 10)
10567 (parallel [(const_int 11)
10568 (const_int 15)])))))))]
10570 "vphaddubq\t{%1, %0|%0, %1}"
10571 [(set_attr "type" "sseiadd1")])
10573 (define_insn "xop_phadduwd"
10574 [(set (match_operand:V4SI 0 "register_operand" "=x")
10578 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10579 (parallel [(const_int 0)
10586 (parallel [(const_int 1)
10589 (const_int 7)])))))]
10591 "vphadduwd\t{%1, %0|%0, %1}"
10592 [(set_attr "type" "sseiadd1")])
10594 (define_insn "xop_phadduwq"
10595 [(set (match_operand:V2DI 0 "register_operand" "=x")
10600 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10601 (parallel [(const_int 0)
10606 (parallel [(const_int 1)
10612 (parallel [(const_int 2)
10617 (parallel [(const_int 3)
10618 (const_int 7)]))))))]
10620 "vphadduwq\t{%1, %0|%0, %1}"
10621 [(set_attr "type" "sseiadd1")])
10623 (define_insn "xop_phaddudq"
10624 [(set (match_operand:V2DI 0 "register_operand" "=x")
10628 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10629 (parallel [(const_int 0)
10634 (parallel [(const_int 1)
10635 (const_int 3)])))))]
10637 "vphaddudq\t{%1, %0|%0, %1}"
10638 [(set_attr "type" "sseiadd1")])
10640 (define_insn "xop_phsubbw"
10641 [(set (match_operand:V8HI 0 "register_operand" "=x")
10645 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10646 (parallel [(const_int 0)
10657 (parallel [(const_int 1)
10664 (const_int 15)])))))]
10666 "vphsubbw\t{%1, %0|%0, %1}"
10667 [(set_attr "type" "sseiadd1")])
10669 (define_insn "xop_phsubwd"
10670 [(set (match_operand:V4SI 0 "register_operand" "=x")
10674 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10675 (parallel [(const_int 0)
10682 (parallel [(const_int 1)
10685 (const_int 7)])))))]
10687 "vphsubwd\t{%1, %0|%0, %1}"
10688 [(set_attr "type" "sseiadd1")])
10690 (define_insn "xop_phsubdq"
10691 [(set (match_operand:V2DI 0 "register_operand" "=x")
10695 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10696 (parallel [(const_int 0)
10701 (parallel [(const_int 1)
10702 (const_int 3)])))))]
10704 "vphsubdq\t{%1, %0|%0, %1}"
10705 [(set_attr "type" "sseiadd1")])
10707 ;; XOP permute instructions
10708 (define_insn "xop_pperm"
10709 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
10711 [(match_operand:V16QI 1 "register_operand" "x,x")
10712 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10713 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
10714 UNSPEC_XOP_PERMUTE))]
10715 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10716 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10717 [(set_attr "type" "sse4arg")
10718 (set_attr "mode" "TI")])
10720 ;; XOP pack instructions that combine two vectors into a smaller vector
10721 (define_insn "xop_pperm_pack_v2di_v4si"
10722 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10725 (match_operand:V2DI 1 "register_operand" "x,x"))
10727 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
10728 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
10729 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10730 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10731 [(set_attr "type" "sse4arg")
10732 (set_attr "mode" "TI")])
10734 (define_insn "xop_pperm_pack_v4si_v8hi"
10735 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10738 (match_operand:V4SI 1 "register_operand" "x,x"))
10740 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
10741 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
10742 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10743 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10744 [(set_attr "type" "sse4arg")
10745 (set_attr "mode" "TI")])
10747 (define_insn "xop_pperm_pack_v8hi_v16qi"
10748 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
10751 (match_operand:V8HI 1 "register_operand" "x,x"))
10753 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
10754 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
10755 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10756 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10757 [(set_attr "type" "sse4arg")
10758 (set_attr "mode" "TI")])
10760 ;; XOP packed rotate instructions
10761 (define_expand "rotl<mode>3"
10762 [(set (match_operand:VI_128 0 "register_operand" "")
10764 (match_operand:VI_128 1 "nonimmediate_operand" "")
10765 (match_operand:SI 2 "general_operand")))]
10768 /* If we were given a scalar, convert it to parallel */
10769 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
10771 rtvec vs = rtvec_alloc (<ssescalarnum>);
10772 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
10773 rtx reg = gen_reg_rtx (<MODE>mode);
10774 rtx op2 = operands[2];
10777 if (GET_MODE (op2) != <ssescalarmode>mode)
10779 op2 = gen_reg_rtx (<ssescalarmode>mode);
10780 convert_move (op2, operands[2], false);
10783 for (i = 0; i < <ssescalarnum>; i++)
10784 RTVEC_ELT (vs, i) = op2;
10786 emit_insn (gen_vec_init<mode> (reg, par));
10787 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
10792 (define_expand "rotr<mode>3"
10793 [(set (match_operand:VI_128 0 "register_operand" "")
10795 (match_operand:VI_128 1 "nonimmediate_operand" "")
10796 (match_operand:SI 2 "general_operand")))]
10799 /* If we were given a scalar, convert it to parallel */
10800 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
10802 rtvec vs = rtvec_alloc (<ssescalarnum>);
10803 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
10804 rtx neg = gen_reg_rtx (<MODE>mode);
10805 rtx reg = gen_reg_rtx (<MODE>mode);
10806 rtx op2 = operands[2];
10809 if (GET_MODE (op2) != <ssescalarmode>mode)
10811 op2 = gen_reg_rtx (<ssescalarmode>mode);
10812 convert_move (op2, operands[2], false);
10815 for (i = 0; i < <ssescalarnum>; i++)
10816 RTVEC_ELT (vs, i) = op2;
10818 emit_insn (gen_vec_init<mode> (reg, par));
10819 emit_insn (gen_neg<mode>2 (neg, reg));
10820 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
10825 (define_insn "xop_rotl<mode>3"
10826 [(set (match_operand:VI_128 0 "register_operand" "=x")
10828 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
10829 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
10831 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10832 [(set_attr "type" "sseishft")
10833 (set_attr "length_immediate" "1")
10834 (set_attr "mode" "TI")])
10836 (define_insn "xop_rotr<mode>3"
10837 [(set (match_operand:VI_128 0 "register_operand" "=x")
10839 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
10840 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
10843 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
10844 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
10846 [(set_attr "type" "sseishft")
10847 (set_attr "length_immediate" "1")
10848 (set_attr "mode" "TI")])
10850 (define_expand "vrotr<mode>3"
10851 [(match_operand:VI_128 0 "register_operand" "")
10852 (match_operand:VI_128 1 "register_operand" "")
10853 (match_operand:VI_128 2 "register_operand" "")]
10856 rtx reg = gen_reg_rtx (<MODE>mode);
10857 emit_insn (gen_neg<mode>2 (reg, operands[2]));
10858 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
10862 (define_expand "vrotl<mode>3"
10863 [(match_operand:VI_128 0 "register_operand" "")
10864 (match_operand:VI_128 1 "register_operand" "")
10865 (match_operand:VI_128 2 "register_operand" "")]
10868 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
10872 (define_insn "xop_vrotl<mode>3"
10873 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
10874 (if_then_else:VI_128
10876 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
10879 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
10883 (neg:VI_128 (match_dup 2)))))]
10884 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10885 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10886 [(set_attr "type" "sseishft")
10887 (set_attr "prefix_data16" "0")
10888 (set_attr "prefix_extra" "2")
10889 (set_attr "mode" "TI")])
10891 ;; XOP packed shift instructions.
10892 ;; FIXME: add V2DI back in
10893 (define_expand "vlshr<mode>3"
10894 [(match_operand:VI124_128 0 "register_operand" "")
10895 (match_operand:VI124_128 1 "register_operand" "")
10896 (match_operand:VI124_128 2 "register_operand" "")]
10899 rtx neg = gen_reg_rtx (<MODE>mode);
10900 emit_insn (gen_neg<mode>2 (neg, operands[2]));
10901 emit_insn (gen_xop_lshl<mode>3 (operands[0], operands[1], neg));
10905 (define_expand "vashr<mode>3"
10906 [(match_operand:VI124_128 0 "register_operand" "")
10907 (match_operand:VI124_128 1 "register_operand" "")
10908 (match_operand:VI124_128 2 "register_operand" "")]
10911 rtx neg = gen_reg_rtx (<MODE>mode);
10912 emit_insn (gen_neg<mode>2 (neg, operands[2]));
10913 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], neg));
10917 (define_expand "vashl<mode>3"
10918 [(match_operand:VI124_128 0 "register_operand" "")
10919 (match_operand:VI124_128 1 "register_operand" "")
10920 (match_operand:VI124_128 2 "register_operand" "")]
10923 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], operands[2]));
10927 (define_insn "xop_ashl<mode>3"
10928 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
10929 (if_then_else:VI_128
10931 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
10934 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
10938 (neg:VI_128 (match_dup 2)))))]
10939 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10940 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10941 [(set_attr "type" "sseishft")
10942 (set_attr "prefix_data16" "0")
10943 (set_attr "prefix_extra" "2")
10944 (set_attr "mode" "TI")])
10946 (define_insn "xop_lshl<mode>3"
10947 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
10948 (if_then_else:VI_128
10950 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
10953 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
10957 (neg:VI_128 (match_dup 2)))))]
10958 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10959 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10960 [(set_attr "type" "sseishft")
10961 (set_attr "prefix_data16" "0")
10962 (set_attr "prefix_extra" "2")
10963 (set_attr "mode" "TI")])
10965 ;; SSE2 doesn't have some shift varients, so define versions for XOP
10966 (define_expand "ashlv16qi3"
10967 [(match_operand:V16QI 0 "register_operand" "")
10968 (match_operand:V16QI 1 "register_operand" "")
10969 (match_operand:SI 2 "nonmemory_operand" "")]
10972 rtvec vs = rtvec_alloc (16);
10973 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
10974 rtx reg = gen_reg_rtx (V16QImode);
10976 for (i = 0; i < 16; i++)
10977 RTVEC_ELT (vs, i) = operands[2];
10979 emit_insn (gen_vec_initv16qi (reg, par));
10980 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
10984 (define_expand "lshlv16qi3"
10985 [(match_operand:V16QI 0 "register_operand" "")
10986 (match_operand:V16QI 1 "register_operand" "")
10987 (match_operand:SI 2 "nonmemory_operand" "")]
10990 rtvec vs = rtvec_alloc (16);
10991 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
10992 rtx reg = gen_reg_rtx (V16QImode);
10994 for (i = 0; i < 16; i++)
10995 RTVEC_ELT (vs, i) = operands[2];
10997 emit_insn (gen_vec_initv16qi (reg, par));
10998 emit_insn (gen_xop_lshlv16qi3 (operands[0], operands[1], reg));
11002 (define_expand "ashrv16qi3"
11003 [(match_operand:V16QI 0 "register_operand" "")
11004 (match_operand:V16QI 1 "register_operand" "")
11005 (match_operand:SI 2 "nonmemory_operand" "")]
11008 rtvec vs = rtvec_alloc (16);
11009 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11010 rtx reg = gen_reg_rtx (V16QImode);
11012 rtx ele = ((CONST_INT_P (operands[2]))
11013 ? GEN_INT (- INTVAL (operands[2]))
11016 for (i = 0; i < 16; i++)
11017 RTVEC_ELT (vs, i) = ele;
11019 emit_insn (gen_vec_initv16qi (reg, par));
11021 if (!CONST_INT_P (operands[2]))
11023 rtx neg = gen_reg_rtx (V16QImode);
11024 emit_insn (gen_negv16qi2 (neg, reg));
11025 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], neg));
11028 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11033 (define_expand "ashrv2di3"
11034 [(match_operand:V2DI 0 "register_operand" "")
11035 (match_operand:V2DI 1 "register_operand" "")
11036 (match_operand:DI 2 "nonmemory_operand" "")]
11039 rtvec vs = rtvec_alloc (2);
11040 rtx par = gen_rtx_PARALLEL (V2DImode, vs);
11041 rtx reg = gen_reg_rtx (V2DImode);
11044 if (CONST_INT_P (operands[2]))
11045 ele = GEN_INT (- INTVAL (operands[2]));
11046 else if (GET_MODE (operands[2]) != DImode)
11048 rtx move = gen_reg_rtx (DImode);
11049 ele = gen_reg_rtx (DImode);
11050 convert_move (move, operands[2], false);
11051 emit_insn (gen_negdi2 (ele, move));
11055 ele = gen_reg_rtx (DImode);
11056 emit_insn (gen_negdi2 (ele, operands[2]));
11059 RTVEC_ELT (vs, 0) = ele;
11060 RTVEC_ELT (vs, 1) = ele;
11061 emit_insn (gen_vec_initv2di (reg, par));
11062 emit_insn (gen_xop_ashlv2di3 (operands[0], operands[1], reg));
11066 ;; XOP FRCZ support
11067 (define_insn "xop_frcz<mode>2"
11068 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
11070 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
11073 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
11074 [(set_attr "type" "ssecvt1")
11075 (set_attr "mode" "<MODE>")])
11078 (define_expand "xop_vmfrcz<mode>2"
11079 [(set (match_operand:VF_128 0 "register_operand")
11082 [(match_operand:VF_128 1 "nonimmediate_operand")]
11088 operands[3] = CONST0_RTX (<MODE>mode);
11091 (define_insn "*xop_vmfrcz_<mode>"
11092 [(set (match_operand:VF_128 0 "register_operand" "=x")
11095 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
11097 (match_operand:VF_128 2 "const0_operand")
11100 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
11101 [(set_attr "type" "ssecvt1")
11102 (set_attr "mode" "<MODE>")])
11104 (define_insn "xop_maskcmp<mode>3"
11105 [(set (match_operand:VI_128 0 "register_operand" "=x")
11106 (match_operator:VI_128 1 "ix86_comparison_int_operator"
11107 [(match_operand:VI_128 2 "register_operand" "x")
11108 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
11110 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
11111 [(set_attr "type" "sse4arg")
11112 (set_attr "prefix_data16" "0")
11113 (set_attr "prefix_rep" "0")
11114 (set_attr "prefix_extra" "2")
11115 (set_attr "length_immediate" "1")
11116 (set_attr "mode" "TI")])
11118 (define_insn "xop_maskcmp_uns<mode>3"
11119 [(set (match_operand:VI_128 0 "register_operand" "=x")
11120 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
11121 [(match_operand:VI_128 2 "register_operand" "x")
11122 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
11124 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
11125 [(set_attr "type" "ssecmp")
11126 (set_attr "prefix_data16" "0")
11127 (set_attr "prefix_rep" "0")
11128 (set_attr "prefix_extra" "2")
11129 (set_attr "length_immediate" "1")
11130 (set_attr "mode" "TI")])
11132 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
11133 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
11134 ;; the exact instruction generated for the intrinsic.
11135 (define_insn "xop_maskcmp_uns2<mode>3"
11136 [(set (match_operand:VI_128 0 "register_operand" "=x")
11138 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
11139 [(match_operand:VI_128 2 "register_operand" "x")
11140 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
11141 UNSPEC_XOP_UNSIGNED_CMP))]
11143 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
11144 [(set_attr "type" "ssecmp")
11145 (set_attr "prefix_data16" "0")
11146 (set_attr "prefix_extra" "2")
11147 (set_attr "length_immediate" "1")
11148 (set_attr "mode" "TI")])
11150 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
11151 ;; being added here to be complete.
11152 (define_insn "xop_pcom_tf<mode>3"
11153 [(set (match_operand:VI_128 0 "register_operand" "=x")
11155 [(match_operand:VI_128 1 "register_operand" "x")
11156 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
11157 (match_operand:SI 3 "const_int_operand" "n")]
11158 UNSPEC_XOP_TRUEFALSE))]
11161 return ((INTVAL (operands[3]) != 0)
11162 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11163 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
11165 [(set_attr "type" "ssecmp")
11166 (set_attr "prefix_data16" "0")
11167 (set_attr "prefix_extra" "2")
11168 (set_attr "length_immediate" "1")
11169 (set_attr "mode" "TI")])
11171 (define_insn "xop_vpermil2<mode>3"
11172 [(set (match_operand:VF 0 "register_operand" "=x")
11174 [(match_operand:VF 1 "register_operand" "x")
11175 (match_operand:VF 2 "nonimmediate_operand" "%x")
11176 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm")
11177 (match_operand:SI 4 "const_0_to_3_operand" "n")]
11180 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
11181 [(set_attr "type" "sse4arg")
11182 (set_attr "length_immediate" "1")
11183 (set_attr "mode" "<MODE>")])
11185 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11187 (define_insn "aesenc"
11188 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11189 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11190 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11194 aesenc\t{%2, %0|%0, %2}
11195 vaesenc\t{%2, %1, %0|%0, %1, %2}"
11196 [(set_attr "isa" "noavx,avx")
11197 (set_attr "type" "sselog1")
11198 (set_attr "prefix_extra" "1")
11199 (set_attr "prefix" "orig,vex")
11200 (set_attr "mode" "TI")])
11202 (define_insn "aesenclast"
11203 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11204 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11205 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11206 UNSPEC_AESENCLAST))]
11209 aesenclast\t{%2, %0|%0, %2}
11210 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
11211 [(set_attr "isa" "noavx,avx")
11212 (set_attr "type" "sselog1")
11213 (set_attr "prefix_extra" "1")
11214 (set_attr "prefix" "orig,vex")
11215 (set_attr "mode" "TI")])
11217 (define_insn "aesdec"
11218 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11219 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11220 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11224 aesdec\t{%2, %0|%0, %2}
11225 vaesdec\t{%2, %1, %0|%0, %1, %2}"
11226 [(set_attr "isa" "noavx,avx")
11227 (set_attr "type" "sselog1")
11228 (set_attr "prefix_extra" "1")
11229 (set_attr "prefix" "orig,vex")
11230 (set_attr "mode" "TI")])
11232 (define_insn "aesdeclast"
11233 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11234 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11235 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11236 UNSPEC_AESDECLAST))]
11239 aesdeclast\t{%2, %0|%0, %2}
11240 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
11241 [(set_attr "isa" "noavx,avx")
11242 (set_attr "type" "sselog1")
11243 (set_attr "prefix_extra" "1")
11244 (set_attr "prefix" "orig,vex")
11245 (set_attr "mode" "TI")])
11247 (define_insn "aesimc"
11248 [(set (match_operand:V2DI 0 "register_operand" "=x")
11249 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
11252 "%vaesimc\t{%1, %0|%0, %1}"
11253 [(set_attr "type" "sselog1")
11254 (set_attr "prefix_extra" "1")
11255 (set_attr "prefix" "maybe_vex")
11256 (set_attr "mode" "TI")])
11258 (define_insn "aeskeygenassist"
11259 [(set (match_operand:V2DI 0 "register_operand" "=x")
11260 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
11261 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11262 UNSPEC_AESKEYGENASSIST))]
11264 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
11265 [(set_attr "type" "sselog1")
11266 (set_attr "prefix_extra" "1")
11267 (set_attr "length_immediate" "1")
11268 (set_attr "prefix" "maybe_vex")
11269 (set_attr "mode" "TI")])
11271 (define_insn "pclmulqdq"
11272 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11273 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11274 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")
11275 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
11279 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
11280 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11281 [(set_attr "isa" "noavx,avx")
11282 (set_attr "type" "sselog1")
11283 (set_attr "prefix_extra" "1")
11284 (set_attr "length_immediate" "1")
11285 (set_attr "prefix" "orig,vex")
11286 (set_attr "mode" "TI")])
11288 (define_expand "avx_vzeroall"
11289 [(match_par_dup 0 [(const_int 0)])]
11292 int nregs = TARGET_64BIT ? 16 : 8;
11295 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11297 XVECEXP (operands[0], 0, 0)
11298 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11301 for (regno = 0; regno < nregs; regno++)
11302 XVECEXP (operands[0], 0, regno + 1)
11303 = gen_rtx_SET (VOIDmode,
11304 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
11305 CONST0_RTX (V8SImode));
11308 (define_insn "*avx_vzeroall"
11309 [(match_parallel 0 "vzeroall_operation"
11310 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
11313 [(set_attr "type" "sse")
11314 (set_attr "modrm" "0")
11315 (set_attr "memory" "none")
11316 (set_attr "prefix" "vex")
11317 (set_attr "mode" "OI")])
11319 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
11320 ;; if the upper 128bits are unused.
11321 (define_insn "avx_vzeroupper"
11322 [(unspec_volatile [(match_operand 0 "const_int_operand" "")]
11323 UNSPECV_VZEROUPPER)]
11326 [(set_attr "type" "sse")
11327 (set_attr "modrm" "0")
11328 (set_attr "memory" "none")
11329 (set_attr "prefix" "vex")
11330 (set_attr "mode" "OI")])
11332 (define_mode_attr AVXTOSSEMODE
11333 [(V4DI "V2DI") (V2DI "V2DI")
11334 (V8SI "V4SI") (V4SI "V4SI")
11335 (V16HI "V8HI") (V8HI "V8HI")
11336 (V32QI "V16QI") (V16QI "V16QI")])
11338 (define_insn "avx2_pbroadcast<mode>"
11339 [(set (match_operand:VI 0 "register_operand" "=x")
11341 (vec_select:<ssescalarmode>
11342 (match_operand:<AVXTOSSEMODE> 1 "nonimmediate_operand" "xm")
11343 (parallel [(const_int 0)]))))]
11345 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %1}"
11346 [(set_attr "type" "ssemov")
11347 (set_attr "prefix_extra" "1")
11348 (set_attr "prefix" "vex")
11349 (set_attr "mode" "<sseinsnmode>")])
11351 (define_insn "avx2_permvarv8si"
11352 [(set (match_operand:V8SI 0 "register_operand" "=x")
11354 [(match_operand:V8SI 1 "register_operand" "x")
11355 (match_operand:V8SI 2 "nonimmediate_operand" "xm")]
11358 "vpermd\t{%2, %1, %0|%0, %1, %2}"
11359 [(set_attr "type" "sselog")
11360 (set_attr "prefix" "vex")
11361 (set_attr "mode" "OI")])
11363 (define_insn "avx2_permv4df"
11364 [(set (match_operand:V4DF 0 "register_operand" "=x")
11366 [(match_operand:V4DF 1 "register_operand" "xm")
11367 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11370 "vpermpd\t{%2, %1, %0|%0, %1, %2}"
11371 [(set_attr "type" "sselog")
11372 (set_attr "prefix_extra" "1")
11373 (set_attr "prefix" "vex")
11374 (set_attr "mode" "OI")])
11376 (define_insn "avx2_permvarv8sf"
11377 [(set (match_operand:V8SF 0 "register_operand" "=x")
11379 [(match_operand:V8SF 1 "register_operand" "x")
11380 (match_operand:V8SF 2 "nonimmediate_operand" "xm")]
11383 "vpermps\t{%2, %1, %0|%0, %1, %2}"
11384 [(set_attr "type" "sselog")
11385 (set_attr "prefix" "vex")
11386 (set_attr "mode" "OI")])
11388 (define_insn "avx2_permv4di"
11389 [(set (match_operand:V4DI 0 "register_operand" "=x")
11391 [(match_operand:V4DI 1 "register_operand" "xm")
11392 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11395 "vpermq\t{%2, %1, %0|%0, %1, %2}"
11396 [(set_attr "type" "sselog")
11397 (set_attr "prefix" "vex")
11398 (set_attr "mode" "OI")])
11400 (define_insn "avx2_permv2ti"
11401 [(set (match_operand:V4DI 0 "register_operand" "=x")
11403 [(match_operand:V4DI 1 "register_operand" "x")
11404 (match_operand:V4DI 2 "register_operand" "xm")
11405 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11408 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11409 [(set_attr "type" "sselog")
11410 (set_attr "prefix" "vex")
11411 (set_attr "mode" "OI")])
11413 (define_insn "avx2_vec_dupv4df"
11414 [(set (match_operand:V4DF 0 "register_operand" "=x")
11415 (vec_duplicate:V4DF
11417 (match_operand:V2DF 1 "register_operand" "x")
11418 (parallel [(const_int 0)]))))]
11420 "vbroadcastsd\t{%1, %0|%0, %1}"
11421 [(set_attr "type" "sselog1")
11422 (set_attr "prefix" "vex")
11423 (set_attr "mode" "V4DF")])
11425 ;; Modes handled by AVX vec_dup patterns.
11426 (define_mode_iterator AVX_VEC_DUP_MODE
11427 [V8SI V8SF V4DI V4DF])
11429 (define_insn "vec_dup<mode>"
11430 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x")
11431 (vec_duplicate:AVX_VEC_DUP_MODE
11432 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,?x")))]
11435 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
11437 [(set_attr "type" "ssemov")
11438 (set_attr "prefix_extra" "1")
11439 (set_attr "prefix" "vex")
11440 (set_attr "mode" "V8SF")])
11442 (define_insn "avx2_vbroadcasti128_<mode>"
11443 [(set (match_operand:VI_256 0 "register_operand" "=x")
11445 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m")
11448 "vbroadcasti128\t{%1, %0|%0, %1}"
11449 [(set_attr "type" "ssemov")
11450 (set_attr "prefix_extra" "1")
11451 (set_attr "prefix" "vex")
11452 (set_attr "mode" "OI")])
11455 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "")
11456 (vec_duplicate:AVX_VEC_DUP_MODE
11457 (match_operand:<ssescalarmode> 1 "register_operand" "")))]
11458 "TARGET_AVX && reload_completed"
11459 [(set (match_dup 2)
11460 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
11462 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
11463 "operands[2] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[0]));")
11465 (define_insn "avx_vbroadcastf128_<mode>"
11466 [(set (match_operand:V_256 0 "register_operand" "=x,x,x")
11468 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
11472 vbroadcastf128\t{%1, %0|%0, %1}
11473 vinsertf128\t{$1, %1, %0, %0|%0, %0, %1, 1}
11474 vperm2f128\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
11475 [(set_attr "type" "ssemov,sselog1,sselog1")
11476 (set_attr "prefix_extra" "1")
11477 (set_attr "length_immediate" "0,1,1")
11478 (set_attr "prefix" "vex")
11479 (set_attr "mode" "V4SF,V8SF,V8SF")])
11481 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
11482 ;; If it so happens that the input is in memory, use vbroadcast.
11483 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
11484 (define_insn "*avx_vperm_broadcast_v4sf"
11485 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
11487 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
11488 (match_parallel 2 "avx_vbroadcast_operand"
11489 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11492 int elt = INTVAL (operands[3]);
11493 switch (which_alternative)
11497 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
11498 return "vbroadcastss\t{%1, %0|%0, %1}";
11500 operands[2] = GEN_INT (elt * 0x55);
11501 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
11503 gcc_unreachable ();
11506 [(set_attr "type" "ssemov,ssemov,sselog1")
11507 (set_attr "prefix_extra" "1")
11508 (set_attr "length_immediate" "0,0,1")
11509 (set_attr "prefix" "vex")
11510 (set_attr "mode" "SF,SF,V4SF")])
11512 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
11513 [(set (match_operand:VF_256 0 "register_operand" "=x,x,x")
11515 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x")
11516 (match_parallel 2 "avx_vbroadcast_operand"
11517 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11520 "&& reload_completed"
11521 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
11523 rtx op0 = operands[0], op1 = operands[1];
11524 int elt = INTVAL (operands[3]);
11530 /* Shuffle element we care about into all elements of the 128-bit lane.
11531 The other lane gets shuffled too, but we don't care. */
11532 if (<MODE>mode == V4DFmode)
11533 mask = (elt & 1 ? 15 : 0);
11535 mask = (elt & 3) * 0x55;
11536 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
11538 /* Shuffle the lane we care about into both lanes of the dest. */
11539 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
11540 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
11544 operands[1] = adjust_address_nv (op1, <ssescalarmode>mode,
11545 elt * GET_MODE_SIZE (<ssescalarmode>mode));
11548 (define_expand "avx_vpermil<mode>"
11549 [(set (match_operand:VF2 0 "register_operand" "")
11551 (match_operand:VF2 1 "nonimmediate_operand" "")
11552 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11555 int mask = INTVAL (operands[2]);
11556 rtx perm[<ssescalarnum>];
11558 perm[0] = GEN_INT (mask & 1);
11559 perm[1] = GEN_INT ((mask >> 1) & 1);
11560 if (<MODE>mode == V4DFmode)
11562 perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
11563 perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
11567 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11570 (define_expand "avx_vpermil<mode>"
11571 [(set (match_operand:VF1 0 "register_operand" "")
11573 (match_operand:VF1 1 "nonimmediate_operand" "")
11574 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11577 int mask = INTVAL (operands[2]);
11578 rtx perm[<ssescalarnum>];
11580 perm[0] = GEN_INT (mask & 3);
11581 perm[1] = GEN_INT ((mask >> 2) & 3);
11582 perm[2] = GEN_INT ((mask >> 4) & 3);
11583 perm[3] = GEN_INT ((mask >> 6) & 3);
11584 if (<MODE>mode == V8SFmode)
11586 perm[4] = GEN_INT ((mask & 3) + 4);
11587 perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
11588 perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
11589 perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
11593 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11596 (define_insn "*avx_vpermilp<mode>"
11597 [(set (match_operand:VF 0 "register_operand" "=x")
11599 (match_operand:VF 1 "nonimmediate_operand" "xm")
11600 (match_parallel 2 ""
11601 [(match_operand 3 "const_int_operand" "")])))]
11603 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
11605 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
11606 operands[2] = GEN_INT (mask);
11607 return "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
11609 [(set_attr "type" "sselog")
11610 (set_attr "prefix_extra" "1")
11611 (set_attr "length_immediate" "1")
11612 (set_attr "prefix" "vex")
11613 (set_attr "mode" "<MODE>")])
11615 (define_insn "avx_vpermilvar<mode>3"
11616 [(set (match_operand:VF 0 "register_operand" "=x")
11618 [(match_operand:VF 1 "register_operand" "x")
11619 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "xm")]
11622 "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11623 [(set_attr "type" "sselog")
11624 (set_attr "prefix_extra" "1")
11625 (set_attr "prefix" "vex")
11626 (set_attr "mode" "<MODE>")])
11628 (define_expand "avx_vperm2f128<mode>3"
11629 [(set (match_operand:AVX256MODE2P 0 "register_operand" "")
11630 (unspec:AVX256MODE2P
11631 [(match_operand:AVX256MODE2P 1 "register_operand" "")
11632 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "")
11633 (match_operand:SI 3 "const_0_to_255_operand" "")]
11634 UNSPEC_VPERMIL2F128))]
11637 int mask = INTVAL (operands[3]);
11638 if ((mask & 0x88) == 0)
11640 rtx perm[<ssescalarnum>], t1, t2;
11641 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
11643 base = (mask & 3) * nelt2;
11644 for (i = 0; i < nelt2; ++i)
11645 perm[i] = GEN_INT (base + i);
11647 base = ((mask >> 4) & 3) * nelt2;
11648 for (i = 0; i < nelt2; ++i)
11649 perm[i + nelt2] = GEN_INT (base + i);
11651 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
11652 operands[1], operands[2]);
11653 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
11654 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
11655 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
11661 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
11662 ;; means that in order to represent this properly in rtl we'd have to
11663 ;; nest *another* vec_concat with a zero operand and do the select from
11664 ;; a 4x wide vector. That doesn't seem very nice.
11665 (define_insn "*avx_vperm2f128<mode>_full"
11666 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
11667 (unspec:AVX256MODE2P
11668 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
11669 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
11670 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11671 UNSPEC_VPERMIL2F128))]
11673 "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11674 [(set_attr "type" "sselog")
11675 (set_attr "prefix_extra" "1")
11676 (set_attr "length_immediate" "1")
11677 (set_attr "prefix" "vex")
11678 (set_attr "mode" "V8SF")])
11680 (define_insn "*avx_vperm2f128<mode>_nozero"
11681 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
11682 (vec_select:AVX256MODE2P
11683 (vec_concat:<ssedoublevecmode>
11684 (match_operand:AVX256MODE2P 1 "register_operand" "x")
11685 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
11686 (match_parallel 3 ""
11687 [(match_operand 4 "const_int_operand" "")])))]
11689 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
11691 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
11692 operands[3] = GEN_INT (mask);
11693 return "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}";
11695 [(set_attr "type" "sselog")
11696 (set_attr "prefix_extra" "1")
11697 (set_attr "length_immediate" "1")
11698 (set_attr "prefix" "vex")
11699 (set_attr "mode" "V8SF")])
11701 (define_expand "avx_vinsertf128<mode>"
11702 [(match_operand:V_256 0 "register_operand" "")
11703 (match_operand:V_256 1 "register_operand" "")
11704 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "")
11705 (match_operand:SI 3 "const_0_to_1_operand" "")]
11708 rtx (*insn)(rtx, rtx, rtx);
11710 switch (INTVAL (operands[3]))
11713 insn = gen_vec_set_lo_<mode>;
11716 insn = gen_vec_set_hi_<mode>;
11719 gcc_unreachable ();
11722 emit_insn (insn (operands[0], operands[1], operands[2]));
11726 (define_insn "avx2_vec_set_lo_v4di"
11727 [(set (match_operand:V4DI 0 "register_operand" "=x")
11729 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11731 (match_operand:V4DI 1 "register_operand" "x")
11732 (parallel [(const_int 2) (const_int 3)]))))]
11734 "vinserti128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11735 [(set_attr "type" "sselog")
11736 (set_attr "prefix_extra" "1")
11737 (set_attr "length_immediate" "1")
11738 (set_attr "prefix" "vex")
11739 (set_attr "mode" "OI")])
11741 (define_insn "avx2_vec_set_hi_v4di"
11742 [(set (match_operand:V4DI 0 "register_operand" "=x")
11745 (match_operand:V4DI 1 "register_operand" "x")
11746 (parallel [(const_int 0) (const_int 1)]))
11747 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
11749 "vinserti128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11750 [(set_attr "type" "sselog")
11751 (set_attr "prefix_extra" "1")
11752 (set_attr "length_immediate" "1")
11753 (set_attr "prefix" "vex")
11754 (set_attr "mode" "OI")])
11756 (define_insn "vec_set_lo_<mode>"
11757 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
11758 (vec_concat:VI8F_256
11759 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
11760 (vec_select:<ssehalfvecmode>
11761 (match_operand:VI8F_256 1 "register_operand" "x")
11762 (parallel [(const_int 2) (const_int 3)]))))]
11764 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11765 [(set_attr "type" "sselog")
11766 (set_attr "prefix_extra" "1")
11767 (set_attr "length_immediate" "1")
11768 (set_attr "prefix" "vex")
11769 (set_attr "mode" "V8SF")])
11771 (define_insn "vec_set_hi_<mode>"
11772 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
11773 (vec_concat:VI8F_256
11774 (vec_select:<ssehalfvecmode>
11775 (match_operand:VI8F_256 1 "register_operand" "x")
11776 (parallel [(const_int 0) (const_int 1)]))
11777 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
11779 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11780 [(set_attr "type" "sselog")
11781 (set_attr "prefix_extra" "1")
11782 (set_attr "length_immediate" "1")
11783 (set_attr "prefix" "vex")
11784 (set_attr "mode" "V8SF")])
11786 (define_insn "vec_set_lo_<mode>"
11787 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
11788 (vec_concat:VI4F_256
11789 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
11790 (vec_select:<ssehalfvecmode>
11791 (match_operand:VI4F_256 1 "register_operand" "x")
11792 (parallel [(const_int 4) (const_int 5)
11793 (const_int 6) (const_int 7)]))))]
11795 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11796 [(set_attr "type" "sselog")
11797 (set_attr "prefix_extra" "1")
11798 (set_attr "length_immediate" "1")
11799 (set_attr "prefix" "vex")
11800 (set_attr "mode" "V8SF")])
11802 (define_insn "vec_set_hi_<mode>"
11803 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
11804 (vec_concat:VI4F_256
11805 (vec_select:<ssehalfvecmode>
11806 (match_operand:VI4F_256 1 "register_operand" "x")
11807 (parallel [(const_int 0) (const_int 1)
11808 (const_int 2) (const_int 3)]))
11809 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
11811 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11812 [(set_attr "type" "sselog")
11813 (set_attr "prefix_extra" "1")
11814 (set_attr "length_immediate" "1")
11815 (set_attr "prefix" "vex")
11816 (set_attr "mode" "V8SF")])
11818 (define_insn "vec_set_lo_v16hi"
11819 [(set (match_operand:V16HI 0 "register_operand" "=x")
11821 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
11823 (match_operand:V16HI 1 "register_operand" "x")
11824 (parallel [(const_int 8) (const_int 9)
11825 (const_int 10) (const_int 11)
11826 (const_int 12) (const_int 13)
11827 (const_int 14) (const_int 15)]))))]
11829 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11830 [(set_attr "type" "sselog")
11831 (set_attr "prefix_extra" "1")
11832 (set_attr "length_immediate" "1")
11833 (set_attr "prefix" "vex")
11834 (set_attr "mode" "V8SF")])
11836 (define_insn "vec_set_hi_v16hi"
11837 [(set (match_operand:V16HI 0 "register_operand" "=x")
11840 (match_operand:V16HI 1 "register_operand" "x")
11841 (parallel [(const_int 0) (const_int 1)
11842 (const_int 2) (const_int 3)
11843 (const_int 4) (const_int 5)
11844 (const_int 6) (const_int 7)]))
11845 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
11847 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11848 [(set_attr "type" "sselog")
11849 (set_attr "prefix_extra" "1")
11850 (set_attr "length_immediate" "1")
11851 (set_attr "prefix" "vex")
11852 (set_attr "mode" "V8SF")])
11854 (define_insn "vec_set_lo_v32qi"
11855 [(set (match_operand:V32QI 0 "register_operand" "=x")
11857 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
11859 (match_operand:V32QI 1 "register_operand" "x")
11860 (parallel [(const_int 16) (const_int 17)
11861 (const_int 18) (const_int 19)
11862 (const_int 20) (const_int 21)
11863 (const_int 22) (const_int 23)
11864 (const_int 24) (const_int 25)
11865 (const_int 26) (const_int 27)
11866 (const_int 28) (const_int 29)
11867 (const_int 30) (const_int 31)]))))]
11869 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11870 [(set_attr "type" "sselog")
11871 (set_attr "prefix_extra" "1")
11872 (set_attr "length_immediate" "1")
11873 (set_attr "prefix" "vex")
11874 (set_attr "mode" "V8SF")])
11876 (define_insn "vec_set_hi_v32qi"
11877 [(set (match_operand:V32QI 0 "register_operand" "=x")
11880 (match_operand:V32QI 1 "register_operand" "x")
11881 (parallel [(const_int 0) (const_int 1)
11882 (const_int 2) (const_int 3)
11883 (const_int 4) (const_int 5)
11884 (const_int 6) (const_int 7)
11885 (const_int 8) (const_int 9)
11886 (const_int 10) (const_int 11)
11887 (const_int 12) (const_int 13)
11888 (const_int 14) (const_int 15)]))
11889 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
11891 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11892 [(set_attr "type" "sselog")
11893 (set_attr "prefix_extra" "1")
11894 (set_attr "length_immediate" "1")
11895 (set_attr "prefix" "vex")
11896 (set_attr "mode" "V8SF")])
11898 (define_expand "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
11899 [(set (match_operand:V48_AVX2 0 "register_operand" "")
11901 [(match_operand:<sseintvecmode> 2 "register_operand" "")
11902 (match_operand:V48_AVX2 1 "memory_operand" "")
11907 (define_expand "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
11908 [(set (match_operand:V48_AVX2 0 "memory_operand" "")
11910 [(match_operand:<sseintvecmode> 1 "register_operand" "")
11911 (match_operand:V48_AVX2 2 "register_operand" "")
11916 (define_insn "*avx2_maskmov<ssemodesuffix><avxsizesuffix>"
11917 [(set (match_operand:VI48_AVX2 0 "nonimmediate_operand" "=x,m")
11919 [(match_operand:<sseintvecmode> 1 "register_operand" "x,x")
11920 (match_operand:VI48_AVX2 2 "nonimmediate_operand" "m,x")
11924 && (REG_P (operands[0]) == MEM_P (operands[2]))"
11925 "vpmaskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11926 [(set_attr "type" "sselog1")
11927 (set_attr "prefix_extra" "1")
11928 (set_attr "prefix" "vex")
11929 (set_attr "mode" "<sseinsnmode>")])
11931 (define_insn "*avx_maskmov<ssemodesuffix><avxsizesuffix>"
11932 [(set (match_operand:VF 0 "nonimmediate_operand" "=x,m")
11934 [(match_operand:<sseintvecmode> 1 "register_operand" "x,x")
11935 (match_operand:VF 2 "nonimmediate_operand" "m,x")
11939 && (REG_P (operands[0]) == MEM_P (operands[2]))"
11940 "vmaskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11941 [(set_attr "type" "sselog1")
11942 (set_attr "prefix_extra" "1")
11943 (set_attr "prefix" "vex")
11944 (set_attr "mode" "<MODE>")])
11946 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
11947 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
11948 (unspec:AVX256MODE2P
11949 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
11953 "&& reload_completed"
11956 rtx op0 = operands[0];
11957 rtx op1 = operands[1];
11959 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
11961 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
11962 emit_move_insn (op0, op1);
11966 (define_expand "vec_init<mode>"
11967 [(match_operand:V_256 0 "register_operand" "")
11968 (match_operand 1 "" "")]
11971 ix86_expand_vector_init (false, operands[0], operands[1]);
11975 (define_insn "avx2_extracti128"
11976 [(set (match_operand:V2DI 0 "register_operand" "=x")
11978 (match_operand:V4DI 1 "nonimmediate_operand" "xm")
11979 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
11981 "vextracti128\t{%2, %1, %0|%0, %1, %2}"
11982 [(set_attr "type" "ssemov")
11983 (set_attr "prefix_extra" "1")
11984 (set_attr "prefix" "vex")
11985 (set_attr "mode" "OI")])
11987 (define_expand "avx2_inserti128"
11988 [(match_operand:V4DI 0 "register_operand" "")
11989 (match_operand:V4DI 1 "register_operand" "")
11990 (match_operand:V2DI 2 "nonimmediate_operand" "")
11991 (match_operand:SI 3 "const_0_to_1_operand" "")]
11994 rtx (*insn)(rtx, rtx, rtx);
11996 switch (INTVAL (operands[3]))
11999 insn = gen_avx2_vec_set_lo_v4di;
12002 insn = gen_avx2_vec_set_hi_v4di;
12005 gcc_unreachable ();
12008 emit_insn (insn (operands[0], operands[1], operands[2]));
12012 (define_insn "avx2_ashrvv8si"
12013 [(set (match_operand:V8SI 0 "register_operand" "=x")
12019 (match_operand:V8SI 1 "register_operand" "x")
12020 (parallel [(const_int 0)]))
12022 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
12023 (parallel [(const_int 0)])))
12027 (parallel [(const_int 1)]))
12030 (parallel [(const_int 1)]))))
12035 (parallel [(const_int 2)]))
12038 (parallel [(const_int 2)])))
12042 (parallel [(const_int 3)]))
12045 (parallel [(const_int 3)])))))
12051 (parallel [(const_int 0)]))
12054 (parallel [(const_int 0)])))
12058 (parallel [(const_int 1)]))
12061 (parallel [(const_int 1)]))))
12066 (parallel [(const_int 2)]))
12069 (parallel [(const_int 2)])))
12073 (parallel [(const_int 3)]))
12076 (parallel [(const_int 3)])))))))]
12078 "vpsravd\t{%2, %1, %0|%0, %1, %2}"
12079 [(set_attr "type" "sseishft")
12080 (set_attr "prefix" "vex")
12081 (set_attr "mode" "OI")])
12083 (define_insn "avx2_ashrvv4si"
12084 [(set (match_operand:V4SI 0 "register_operand" "=x")
12089 (match_operand:V4SI 1 "register_operand" "x")
12090 (parallel [(const_int 0)]))
12092 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
12093 (parallel [(const_int 0)])))
12097 (parallel [(const_int 1)]))
12100 (parallel [(const_int 1)]))))
12105 (parallel [(const_int 2)]))
12108 (parallel [(const_int 2)])))
12112 (parallel [(const_int 3)]))
12115 (parallel [(const_int 3)]))))))]
12117 "vpsravd\t{%2, %1, %0|%0, %1, %2}"
12118 [(set_attr "type" "sseishft")
12119 (set_attr "prefix" "vex")
12120 (set_attr "mode" "TI")])
12122 (define_insn "avx2_<lshift>vv8si"
12123 [(set (match_operand:V8SI 0 "register_operand" "=x")
12129 (match_operand:V8SI 1 "register_operand" "x")
12130 (parallel [(const_int 0)]))
12132 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
12133 (parallel [(const_int 0)])))
12137 (parallel [(const_int 1)]))
12140 (parallel [(const_int 1)]))))
12145 (parallel [(const_int 2)]))
12148 (parallel [(const_int 2)])))
12152 (parallel [(const_int 3)]))
12155 (parallel [(const_int 3)])))))
12161 (parallel [(const_int 0)]))
12164 (parallel [(const_int 0)])))
12168 (parallel [(const_int 1)]))
12171 (parallel [(const_int 1)]))))
12176 (parallel [(const_int 2)]))
12179 (parallel [(const_int 2)])))
12183 (parallel [(const_int 3)]))
12186 (parallel [(const_int 3)])))))))]
12188 "vp<lshift_insn>vd\t{%2, %1, %0|%0, %1, %2}"
12189 [(set_attr "type" "sseishft")
12190 (set_attr "prefix" "vex")
12191 (set_attr "mode" "OI")])
12193 (define_insn "avx2_<lshift>v<mode>"
12194 [(set (match_operand:VI4SD_AVX2 0 "register_operand" "=x")
12195 (vec_concat:VI4SD_AVX2
12196 (vec_concat:<ssehalfvecmode>
12197 (lshift:<ssescalarmode>
12198 (vec_select:<ssescalarmode>
12199 (match_operand:VI4SD_AVX2 1 "register_operand" "x")
12200 (parallel [(const_int 0)]))
12201 (vec_select:<ssescalarmode>
12202 (match_operand:VI4SD_AVX2 2 "nonimmediate_operand" "xm")
12203 (parallel [(const_int 0)])))
12204 (lshift:<ssescalarmode>
12205 (vec_select:<ssescalarmode>
12207 (parallel [(const_int 1)]))
12208 (vec_select:<ssescalarmode>
12210 (parallel [(const_int 1)]))))
12211 (vec_concat:<ssehalfvecmode>
12212 (lshift:<ssescalarmode>
12213 (vec_select:<ssescalarmode>
12215 (parallel [(const_int 2)]))
12216 (vec_select:<ssescalarmode>
12218 (parallel [(const_int 2)])))
12219 (lshift:<ssescalarmode>
12220 (vec_select:<ssescalarmode>
12222 (parallel [(const_int 3)]))
12223 (vec_select:<ssescalarmode>
12225 (parallel [(const_int 3)]))))))]
12227 "vp<lshift_insn>v<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12228 [(set_attr "type" "sseishft")
12229 (set_attr "prefix" "vex")
12230 (set_attr "mode" "<sseinsnmode>")])
12232 (define_insn "avx2_<lshift>vv2di"
12233 [(set (match_operand:V2DI 0 "register_operand" "=x")
12237 (match_operand:V2DI 1 "register_operand" "x")
12238 (parallel [(const_int 0)]))
12240 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
12241 (parallel [(const_int 0)])))
12245 (parallel [(const_int 1)]))
12248 (parallel [(const_int 1)])))))]
12250 "vp<lshift_insn>vq\t{%2, %1, %0|%0, %1, %2}"
12251 [(set_attr "type" "sseishft")
12252 (set_attr "prefix" "vex")
12253 (set_attr "mode" "TI")])
12255 (define_insn "*vec_concat<mode>_avx"
12256 [(set (match_operand:V_256 0 "register_operand" "=x,x")
12258 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
12259 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))]
12262 switch (which_alternative)
12265 return "vinsertf128\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
12267 switch (get_attr_mode (insn))
12270 return "vmovaps\t{%1, %x0|%x0, %1}";
12272 return "vmovapd\t{%1, %x0|%x0, %1}";
12274 return "vmovdqa\t{%1, %x0|%x0, %1}";
12277 gcc_unreachable ();
12280 [(set_attr "type" "sselog,ssemov")
12281 (set_attr "prefix_extra" "1,*")
12282 (set_attr "length_immediate" "1,*")
12283 (set_attr "prefix" "vex")
12284 (set_attr "mode" "<sseinsnmode>")])
12286 (define_insn "vcvtph2ps"
12287 [(set (match_operand:V4SF 0 "register_operand" "=x")
12289 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
12291 (parallel [(const_int 0) (const_int 1)
12292 (const_int 1) (const_int 2)])))]
12294 "vcvtph2ps\t{%1, %0|%0, %1}"
12295 [(set_attr "type" "ssecvt")
12296 (set_attr "prefix" "vex")
12297 (set_attr "mode" "V4SF")])
12299 (define_insn "*vcvtph2ps_load"
12300 [(set (match_operand:V4SF 0 "register_operand" "=x")
12301 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
12302 UNSPEC_VCVTPH2PS))]
12304 "vcvtph2ps\t{%1, %0|%0, %1}"
12305 [(set_attr "type" "ssecvt")
12306 (set_attr "prefix" "vex")
12307 (set_attr "mode" "V8SF")])
12309 (define_insn "vcvtph2ps256"
12310 [(set (match_operand:V8SF 0 "register_operand" "=x")
12311 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
12312 UNSPEC_VCVTPH2PS))]
12314 "vcvtph2ps\t{%1, %0|%0, %1}"
12315 [(set_attr "type" "ssecvt")
12316 (set_attr "prefix" "vex")
12317 (set_attr "mode" "V8SF")])
12319 (define_expand "vcvtps2ph"
12320 [(set (match_operand:V8HI 0 "register_operand" "")
12322 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "")
12323 (match_operand:SI 2 "const_0_to_255_operand" "")]
12327 "operands[3] = CONST0_RTX (V4HImode);")
12329 (define_insn "*vcvtps2ph"
12330 [(set (match_operand:V8HI 0 "register_operand" "=x")
12332 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12333 (match_operand:SI 2 "const_0_to_255_operand" "N")]
12335 (match_operand:V4HI 3 "const0_operand" "")))]
12337 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12338 [(set_attr "type" "ssecvt")
12339 (set_attr "prefix" "vex")
12340 (set_attr "mode" "V4SF")])
12342 (define_insn "*vcvtps2ph_store"
12343 [(set (match_operand:V4HI 0 "memory_operand" "=m")
12344 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12345 (match_operand:SI 2 "const_0_to_255_operand" "N")]
12346 UNSPEC_VCVTPS2PH))]
12348 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12349 [(set_attr "type" "ssecvt")
12350 (set_attr "prefix" "vex")
12351 (set_attr "mode" "V4SF")])
12353 (define_insn "vcvtps2ph256"
12354 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
12355 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
12356 (match_operand:SI 2 "const_0_to_255_operand" "N")]
12357 UNSPEC_VCVTPS2PH))]
12359 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12360 [(set_attr "type" "ssecvt")
12361 (set_attr "prefix" "vex")
12362 (set_attr "mode" "V8SF")])
12364 ;; For gather* insn patterns
12365 (define_mode_iterator VEC_GATHER_MODE
12366 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
12367 (define_mode_attr VEC_GATHER_MODE
12368 [(V2DI "V4SI") (V2DF "V4SI")
12369 (V4DI "V4SI") (V4DF "V4SI")
12370 (V4SI "V4SI") (V4SF "V4SI")
12371 (V8SI "V8SI") (V8SF "V8SI")])
12373 (define_expand "avx2_gathersi<mode>"
12374 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "")
12375 (unspec:VEC_GATHER_MODE
12376 [(match_operand:VEC_GATHER_MODE 1 "register_operand" "")
12377 (match_operand:<ssescalarmode> 2 "memory_operand" "")
12378 (match_operand:<VEC_GATHER_MODE> 3 "register_operand" "")
12379 (match_operand:VEC_GATHER_MODE 4 "register_operand" "")
12380 (match_operand:SI 5 "const1248_operand " "")]
12384 (define_insn "*avx2_gathersi<mode>"
12385 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=x")
12386 (unspec:VEC_GATHER_MODE
12387 [(match_operand:VEC_GATHER_MODE 1 "register_operand" "0")
12388 (mem:<ssescalarmode>
12389 (match_operand:P 2 "register_operand" "r"))
12390 (match_operand:<VEC_GATHER_MODE> 3 "register_operand" "x")
12391 (match_operand:VEC_GATHER_MODE 4 "register_operand" "x")
12392 (match_operand:SI 5 "const1248_operand" "n")]
12395 "v<gthrfirstp>gatherd<gthrlastp>\t{%4, (%2, %3, %c5), %0|%0, (%2, %3, %c5), %4}"
12396 [(set_attr "type" "ssemov")
12397 (set_attr "prefix" "vex")
12398 (set_attr "mode" "<sseinsnmode>")])
12400 (define_expand "avx2_gatherdi<mode>"
12401 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "")
12402 (unspec:VEC_GATHER_MODE
12403 [(match_operand:VEC_GATHER_MODE 1 "register_operand" "")
12404 (match_operand:<ssescalarmode> 2 "memory_operand" "")
12405 (match_operand:<AVXMODE48P_DI> 3 "register_operand" "")
12406 (match_operand:VEC_GATHER_MODE 4 "register_operand" "")
12407 (match_operand:SI 5 "const1248_operand " "")]
12411 (define_insn "*avx2_gatherdi<mode>"
12412 [(set (match_operand:AVXMODE48P_DI 0 "register_operand" "=x")
12413 (unspec:AVXMODE48P_DI
12414 [(match_operand:AVXMODE48P_DI 1 "register_operand" "0")
12415 (mem:<ssescalarmode>
12416 (match_operand:P 2 "register_operand" "r"))
12417 (match_operand:<AVXMODE48P_DI> 3 "register_operand" "x")
12418 (match_operand:AVXMODE48P_DI 4 "register_operand" "x")
12419 (match_operand:SI 5 "const1248_operand" "n")]
12422 "v<gthrfirstp>gatherq<gthrlastp>\t{%4, (%2, %3, %c5), %0|%0, (%2, %3, %c5), %4}"
12423 [(set_attr "type" "ssemov")
12424 (set_attr "prefix" "vex")
12425 (set_attr "mode" "<sseinsnmode>")])
12427 ;; Special handling for VEX.256 with float arguments
12428 ;; since there're still xmms as operands
12429 (define_expand "avx2_gatherdi<mode>256"
12430 [(set (match_operand:VI4F_128 0 "register_operand" "")
12432 [(match_operand:VI4F_128 1 "register_operand" "")
12433 (match_operand:<ssescalarmode> 2 "memory_operand" "")
12434 (match_operand:V4DI 3 "register_operand" "")
12435 (match_operand:VI4F_128 4 "register_operand" "")
12436 (match_operand:SI 5 "const1248_operand " "")]
12440 (define_insn "*avx2_gatherdi<mode>256"
12441 [(set (match_operand:VI4F_128 0 "register_operand" "=x")
12443 [(match_operand:VI4F_128 1 "register_operand" "0")
12444 (mem:<ssescalarmode>
12445 (match_operand:P 2 "register_operand" "r"))
12446 (match_operand:V4DI 3 "register_operand" "x")
12447 (match_operand:VI4F_128 4 "register_operand" "x")
12448 (match_operand:SI 5 "const1248_operand" "n")]
12451 "v<gthrfirstp>gatherq<gthrlastp>\t{%4, (%2, %3, %c5), %0|%0, (%2, %3, %c5), %4}"
12452 [(set_attr "type" "ssemov")
12453 (set_attr "prefix" "vex")
12454 (set_attr "mode" "<sseinsnmode>")])