1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 ;; All vector modes including V1TImode, used in move patterns.
22 (define_mode_iterator V16
23 [(V32QI "TARGET_AVX") V16QI
24 (V16HI "TARGET_AVX") V8HI
25 (V8SI "TARGET_AVX") V4SI
26 (V4DI "TARGET_AVX") V2DI
28 (V8SF "TARGET_AVX") V4SF
29 (V4DF "TARGET_AVX") V2DF])
32 (define_mode_iterator V
33 [(V32QI "TARGET_AVX") V16QI
34 (V16HI "TARGET_AVX") V8HI
35 (V8SI "TARGET_AVX") V4SI
36 (V4DI "TARGET_AVX") V2DI
37 (V8SF "TARGET_AVX") V4SF
38 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
40 ;; All 128bit vector modes
41 (define_mode_iterator V_128
42 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
44 ;; All 256bit vector modes
45 (define_mode_iterator V_256
46 [V32QI V16HI V8SI V4DI V8SF V4DF])
48 ;; All vector float modes
49 (define_mode_iterator VF
50 [(V8SF "TARGET_AVX") V4SF
51 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
53 ;; All SFmode vector float modes
54 (define_mode_iterator VF1
55 [(V8SF "TARGET_AVX") V4SF])
57 ;; All DFmode vector float modes
58 (define_mode_iterator VF2
59 [(V4DF "TARGET_AVX") V2DF])
61 ;; All 128bit vector float modes
62 (define_mode_iterator VF_128
63 [V4SF (V2DF "TARGET_SSE2")])
65 ;; All 256bit vector float modes
66 (define_mode_iterator VF_256
69 ;; All vector integer modes
70 (define_mode_iterator VI
71 [(V32QI "TARGET_AVX") V16QI
72 (V16HI "TARGET_AVX") V8HI
73 (V8SI "TARGET_AVX") V4SI
74 (V4DI "TARGET_AVX") V2DI])
76 (define_mode_iterator VI_AVX2
77 [(V32QI "TARGET_AVX2") V16QI
78 (V16HI "TARGET_AVX2") V8HI
79 (V8SI "TARGET_AVX2") V4SI
80 (V4DI "TARGET_AVX2") V2DI])
82 ;; All QImode vector integer modes
83 (define_mode_iterator VI1
84 [(V32QI "TARGET_AVX") V16QI])
86 ;; All DImode vector integer modes
87 (define_mode_iterator VI8
88 [(V4DI "TARGET_AVX") V2DI])
90 (define_mode_iterator VI1_AVX2
91 [(V32QI "TARGET_AVX2") V16QI])
93 (define_mode_iterator VI2_AVX2
94 [(V16HI "TARGET_AVX2") V8HI])
96 (define_mode_iterator VI4_AVX2
97 [(V8SI "TARGET_AVX2") V4SI])
99 (define_mode_iterator VI8_AVX2
100 [(V4DI "TARGET_AVX2") V2DI])
102 (define_mode_iterator VIMAX_AVX2
103 [(V4DI "TARGET_AVX2") V1TI])
105 (define_mode_iterator SSESCALARMODE
106 [(V4DI "TARGET_AVX2") TI])
108 (define_mode_iterator VI12_AVX2
109 [(V32QI "TARGET_AVX2") V16QI
110 (V16HI "TARGET_AVX2") V8HI])
112 (define_mode_iterator VI24_AVX2
113 [(V16HI "TARGET_AVX2") V8HI
114 (V8SI "TARGET_AVX2") V4SI])
116 (define_mode_iterator VI124_AVX2
117 [(V32QI "TARGET_AVX2") V16QI
118 (V16HI "TARGET_AVX2") V8HI
119 (V8SI "TARGET_AVX2") V4SI])
121 (define_mode_iterator VI248_AVX2
122 [(V16HI "TARGET_AVX2") V8HI
123 (V8SI "TARGET_AVX2") V4SI
124 (V4DI "TARGET_AVX2") V2DI])
126 (define_mode_iterator VI48_AVX2
127 [V8SI V4SI V4DI V2DI])
129 (define_mode_iterator VI4SD_AVX2
132 (define_mode_iterator V48_AVX2
135 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
136 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
138 (define_mode_attr sse2_avx2
139 [(V16QI "sse2") (V32QI "avx2")
140 (V8HI "sse2") (V16HI "avx2")
141 (V4SI "sse2") (V8SI "avx2")
142 (V2DI "sse2") (V4DI "avx2")
145 (define_mode_attr ssse3_avx2
146 [(V16QI "ssse3") (V32QI "avx2")
147 (V8HI "ssse3") (V16HI "avx2")
148 (V4SI "ssse3") (V8SI "avx2")
149 (V2DI "ssse3") (V4DI "avx2")
152 (define_mode_attr sse4_1_avx2
153 [(V16QI "sse4_1") (V32QI "avx2")
154 (V8HI "sse4_1") (V16HI "avx2")
155 (V4SI "sse4_1") (V8SI "avx2")
156 (V2DI "sse4_1") (V4DI "avx2")])
158 (define_mode_attr avx_avx2
159 [(V4SF "avx") (V2DF "avx")
160 (V8SF "avx") (V4DF "avx")
161 (V4SI "avx2") (V2DI "avx2")
162 (V8SI "avx2") (V4DI "avx2")])
164 ;; Mapping of logic-shift operators
165 (define_code_iterator lshift [lshiftrt ashift])
167 ;; Base name for define_insn
168 (define_code_attr lshift_insn [(lshiftrt "srl") (ashift "sll")])
170 ;; Base name for insn mnemonic
171 (define_code_attr lshift [(lshiftrt "lshr") (ashift "lshl")])
173 (define_mode_attr ssedoublemode
174 [(V16HI "V16SI") (V8HI "V8SI")])
176 (define_mode_attr ssebytemode
177 [(V4DI "V32QI") (V2DI "V16QI")])
179 ;; All 128bit vector integer modes
180 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
182 ;; All 256bit vector integer modes
183 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
185 ;; Random 128bit vector integer mode combinations
186 (define_mode_iterator VI12_128 [V16QI V8HI])
187 (define_mode_iterator VI14_128 [V16QI V4SI])
188 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
189 (define_mode_iterator VI24_128 [V8HI V4SI])
190 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
192 ;; Random 256bit vector integer mode combinations
193 (define_mode_iterator VI124_256 [V32QI V16HI V8SI])
194 (define_mode_iterator VI1248_256 [V32QI V16HI V8SI V4DI])
195 (define_mode_iterator VI248_256 [V16HI V8SI V4DI])
197 ;; Int-float size matches
198 (define_mode_iterator VI4F_128 [V4SI V4SF])
199 (define_mode_iterator VI8F_128 [V2DI V2DF])
200 (define_mode_iterator VI4F_256 [V8SI V8SF])
201 (define_mode_iterator VI8F_256 [V4DI V4DF])
203 ;; Mapping from float mode to required SSE level
204 (define_mode_attr sse
205 [(SF "sse") (DF "sse2")
206 (V4SF "sse") (V2DF "sse2")
207 (V8SF "avx") (V4DF "avx")])
209 (define_mode_attr sse2
210 [(V16QI "sse2") (V32QI "avx")
211 (V2DI "sse2") (V4DI "avx")])
213 (define_mode_attr sse3
214 [(V16QI "sse3") (V32QI "avx")])
216 (define_mode_attr sse4_1
217 [(V4SF "sse4_1") (V2DF "sse4_1")
218 (V8SF "avx") (V4DF "avx")])
220 (define_mode_attr avxsizesuffix
221 [(V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
222 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
223 (V8SF "256") (V4DF "256")
224 (V4SF "") (V2DF "")])
226 ;; SSE instruction mode
227 (define_mode_attr sseinsnmode
228 [(V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")
229 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
230 (V8SF "V8SF") (V4DF "V4DF")
231 (V4SF "V4SF") (V2DF "V2DF")
232 (TI "TI") (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")])
234 ;; Mapping of vector float modes to an integer mode of the same size
235 (define_mode_attr sseintvecmode
236 [(V8SF "V8SI") (V4DF "V4DI")
237 (V4SF "V4SI") (V2DF "V2DI")
238 (V4DF "V4DI") (V8SF "V8SI")
239 (V8SI "V8SI") (V4DI "V4DI")
240 (V4SI "V4SI") (V2DI "V2DI")])
242 ;; Mapping of vector modes to a vector mode of double size
243 (define_mode_attr ssedoublevecmode
244 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
245 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
246 (V8SF "V16SF") (V4DF "V8DF")
247 (V4SF "V8SF") (V2DF "V4DF")])
249 ;; Mapping of vector modes to a vector mode of half size
250 (define_mode_attr ssehalfvecmode
251 [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
252 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
253 (V8SF "V4SF") (V4DF "V2DF")
256 ;; Mapping of vector modes back to the scalar modes
257 (define_mode_attr ssescalarmode
258 [(V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI")
259 (V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI")
260 (V8SF "SF") (V4DF "DF")
261 (V4SF "SF") (V2DF "DF")])
263 ;; Number of scalar elements in each vector type
264 (define_mode_attr ssescalarnum
265 [(V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
266 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
267 (V8SF "8") (V4DF "4")
268 (V4SF "4") (V2DF "2")])
270 ;; SSE scalar suffix for vector modes
271 (define_mode_attr ssescalarmodesuffix
273 (V8SF "ss") (V4DF "sd")
274 (V4SF "ss") (V2DF "sd")
275 (V8SI "ss") (V4DI "sd")
278 ;; Pack/unpack vector modes
279 (define_mode_attr sseunpackmode
280 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
281 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")])
283 (define_mode_attr ssepackmode
284 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
285 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")])
287 ;; Mapping of the max integer size for xop rotate immediate constraint
288 (define_mode_attr sserotatemax
289 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
291 ;; Mapping of mode to cast intrinsic name
292 (define_mode_attr castmode [(V8SI "si") (V8SF "ps") (V4DF "pd")])
294 ;; Instruction suffix for sign and zero extensions.
295 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
298 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
300 (define_mode_iterator AVXMODE48P_DI
301 [V2DI V2DF V4DI V4DF V4SF V4SI])
302 (define_mode_attr AVXMODE48P_DI
303 [(V2DI "V2DI") (V2DF "V2DI")
304 (V4DI "V4DI") (V4DF "V4DI")
305 (V4SI "V2DI") (V4SF "V2DI")
306 (V8SI "V4DI") (V8SF "V4DI")])
307 (define_mode_attr gthrfirstp
308 [(V2DI "p") (V2DF "")
311 (V8SI "p") (V8SF "")])
312 (define_mode_attr gthrlastp
313 [(V2DI "q") (V2DF "pd")
314 (V4DI "q") (V4DF "pd")
315 (V4SI "d") (V4SF "ps")
316 (V8SI "d") (V8SF "ps")])
318 (define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
320 ;; Mapping of immediate bits for blend instructions
321 (define_mode_attr blendbits
322 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
324 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
326 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
330 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
332 ;; All of these patterns are enabled for SSE1 as well as SSE2.
333 ;; This is essential for maintaining stable calling conventions.
335 (define_expand "mov<mode>"
336 [(set (match_operand:V16 0 "nonimmediate_operand" "")
337 (match_operand:V16 1 "nonimmediate_operand" ""))]
340 ix86_expand_vector_move (<MODE>mode, operands);
344 (define_insn "*mov<mode>_internal"
345 [(set (match_operand:V16 0 "nonimmediate_operand" "=x,x ,m")
346 (match_operand:V16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
348 && (register_operand (operands[0], <MODE>mode)
349 || register_operand (operands[1], <MODE>mode))"
351 switch (which_alternative)
354 return standard_sse_constant_opcode (insn, operands[1]);
357 switch (get_attr_mode (insn))
362 && (misaligned_operand (operands[0], <MODE>mode)
363 || misaligned_operand (operands[1], <MODE>mode)))
364 return "vmovups\t{%1, %0|%0, %1}";
366 return "%vmovaps\t{%1, %0|%0, %1}";
371 && (misaligned_operand (operands[0], <MODE>mode)
372 || misaligned_operand (operands[1], <MODE>mode)))
373 return "vmovupd\t{%1, %0|%0, %1}";
374 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
375 return "%vmovaps\t{%1, %0|%0, %1}";
377 return "%vmovapd\t{%1, %0|%0, %1}";
382 && (misaligned_operand (operands[0], <MODE>mode)
383 || misaligned_operand (operands[1], <MODE>mode)))
384 return "vmovdqu\t{%1, %0|%0, %1}";
385 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
386 return "%vmovaps\t{%1, %0|%0, %1}";
388 return "%vmovdqa\t{%1, %0|%0, %1}";
397 [(set_attr "type" "sselog1,ssemov,ssemov")
398 (set_attr "prefix" "maybe_vex")
400 (cond [(match_test "TARGET_AVX")
401 (const_string "<sseinsnmode>")
402 (ior (ior (match_test "optimize_function_for_size_p (cfun)")
403 (not (match_test "TARGET_SSE2")))
404 (and (eq_attr "alternative" "2")
405 (match_test "TARGET_SSE_TYPELESS_STORES")))
406 (const_string "V4SF")
407 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
408 (const_string "V4SF")
409 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
410 (const_string "V2DF")
412 (const_string "TI")))])
414 (define_insn "sse2_movq128"
415 [(set (match_operand:V2DI 0 "register_operand" "=x")
418 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
419 (parallel [(const_int 0)]))
422 "%vmovq\t{%1, %0|%0, %1}"
423 [(set_attr "type" "ssemov")
424 (set_attr "prefix" "maybe_vex")
425 (set_attr "mode" "TI")])
427 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
428 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
429 ;; from memory, we'd prefer to load the memory directly into the %xmm
430 ;; register. To facilitate this happy circumstance, this pattern won't
431 ;; split until after register allocation. If the 64-bit value didn't
432 ;; come from memory, this is the best we can do. This is much better
433 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
436 (define_insn_and_split "movdi_to_sse"
438 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
439 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
440 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
441 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
443 "&& reload_completed"
446 if (register_operand (operands[1], DImode))
448 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
449 Assemble the 64-bit DImode value in an xmm register. */
450 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
451 gen_rtx_SUBREG (SImode, operands[1], 0)));
452 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
453 gen_rtx_SUBREG (SImode, operands[1], 4)));
454 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
457 else if (memory_operand (operands[1], DImode))
458 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
459 operands[1], const0_rtx));
465 [(set (match_operand:V4SF 0 "register_operand" "")
466 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
467 "TARGET_SSE && reload_completed"
470 (vec_duplicate:V4SF (match_dup 1))
474 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
475 operands[2] = CONST0_RTX (V4SFmode);
479 [(set (match_operand:V2DF 0 "register_operand" "")
480 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
481 "TARGET_SSE2 && reload_completed"
482 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
484 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
485 operands[2] = CONST0_RTX (DFmode);
488 (define_expand "push<mode>1"
489 [(match_operand:V16 0 "register_operand" "")]
492 ix86_expand_push (<MODE>mode, operands[0]);
496 (define_expand "movmisalign<mode>"
497 [(set (match_operand:V16 0 "nonimmediate_operand" "")
498 (match_operand:V16 1 "nonimmediate_operand" ""))]
501 ix86_expand_vector_move_misalign (<MODE>mode, operands);
505 (define_expand "<sse>_movu<ssemodesuffix><avxsizesuffix>"
506 [(set (match_operand:VF 0 "nonimmediate_operand" "")
508 [(match_operand:VF 1 "nonimmediate_operand" "")]
512 if (MEM_P (operands[0]) && MEM_P (operands[1]))
513 operands[1] = force_reg (<MODE>mode, operands[1]);
516 (define_insn "*<sse>_movu<ssemodesuffix><avxsizesuffix>"
517 [(set (match_operand:VF 0 "nonimmediate_operand" "=x,m")
519 [(match_operand:VF 1 "nonimmediate_operand" "xm,x")]
521 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
522 "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
523 [(set_attr "type" "ssemov")
524 (set_attr "movu" "1")
525 (set_attr "prefix" "maybe_vex")
526 (set_attr "mode" "<MODE>")])
528 (define_expand "<sse2>_movdqu<avxsizesuffix>"
529 [(set (match_operand:VI1 0 "nonimmediate_operand" "")
530 (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "")]
534 if (MEM_P (operands[0]) && MEM_P (operands[1]))
535 operands[1] = force_reg (<MODE>mode, operands[1]);
538 (define_insn "*<sse2>_movdqu<avxsizesuffix>"
539 [(set (match_operand:VI1 0 "nonimmediate_operand" "=x,m")
540 (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "xm,x")]
542 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
543 "%vmovdqu\t{%1, %0|%0, %1}"
544 [(set_attr "type" "ssemov")
545 (set_attr "movu" "1")
546 (set (attr "prefix_data16")
548 (match_test "TARGET_AVX")
551 (set_attr "prefix" "maybe_vex")
552 (set_attr "mode" "<sseinsnmode>")])
554 (define_insn "<sse3>_lddqu<avxsizesuffix>"
555 [(set (match_operand:VI1 0 "register_operand" "=x")
556 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
559 "%vlddqu\t{%1, %0|%0, %1}"
560 [(set_attr "type" "ssemov")
561 (set_attr "movu" "1")
562 (set (attr "prefix_data16")
564 (match_test "TARGET_AVX")
567 (set (attr "prefix_rep")
569 (match_test "TARGET_AVX")
572 (set_attr "prefix" "maybe_vex")
573 (set_attr "mode" "<sseinsnmode>")])
575 (define_insn "sse2_movntsi"
576 [(set (match_operand:SI 0 "memory_operand" "=m")
577 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
580 "movnti\t{%1, %0|%0, %1}"
581 [(set_attr "type" "ssemov")
582 (set_attr "prefix_data16" "0")
583 (set_attr "mode" "V2DF")])
585 (define_insn "<sse>_movnt<mode>"
586 [(set (match_operand:VF 0 "memory_operand" "=m")
587 (unspec:VF [(match_operand:VF 1 "register_operand" "x")]
590 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
591 [(set_attr "type" "ssemov")
592 (set_attr "prefix" "maybe_vex")
593 (set_attr "mode" "<MODE>")])
595 (define_insn "<sse2>_movnt<mode>"
596 [(set (match_operand:VI8 0 "memory_operand" "=m")
597 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "x")]
600 "%vmovntdq\t{%1, %0|%0, %1}"
601 [(set_attr "type" "ssecvt")
602 (set (attr "prefix_data16")
604 (match_test "TARGET_AVX")
607 (set_attr "prefix" "maybe_vex")
608 (set_attr "mode" "<sseinsnmode>")])
610 ; Expand patterns for non-temporal stores. At the moment, only those
611 ; that directly map to insns are defined; it would be possible to
612 ; define patterns for other modes that would expand to several insns.
614 ;; Modes handled by storent patterns.
615 (define_mode_iterator STORENT_MODE
616 [(SI "TARGET_SSE2") (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
618 (V8SF "TARGET_AVX") V4SF
619 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
621 (define_expand "storent<mode>"
622 [(set (match_operand:STORENT_MODE 0 "memory_operand" "")
624 [(match_operand:STORENT_MODE 1 "register_operand" "")]
628 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
630 ;; Parallel floating point arithmetic
632 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
634 (define_expand "<code><mode>2"
635 [(set (match_operand:VF 0 "register_operand" "")
637 (match_operand:VF 1 "register_operand" "")))]
639 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
641 (define_insn_and_split "*absneg<mode>2"
642 [(set (match_operand:VF 0 "register_operand" "=x,x,x,x")
643 (match_operator:VF 3 "absneg_operator"
644 [(match_operand:VF 1 "nonimmediate_operand" "0, xm,x, m")]))
645 (use (match_operand:VF 2 "nonimmediate_operand" "xm,0, xm,x"))]
648 "&& reload_completed"
651 enum rtx_code absneg_op;
657 if (MEM_P (operands[1]))
658 op1 = operands[2], op2 = operands[1];
660 op1 = operands[1], op2 = operands[2];
665 if (rtx_equal_p (operands[0], operands[1]))
671 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
672 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
673 t = gen_rtx_SET (VOIDmode, operands[0], t);
677 [(set_attr "isa" "noavx,noavx,avx,avx")])
679 (define_expand "<plusminus_insn><mode>3"
680 [(set (match_operand:VF 0 "register_operand" "")
682 (match_operand:VF 1 "nonimmediate_operand" "")
683 (match_operand:VF 2 "nonimmediate_operand" "")))]
685 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
687 (define_insn "*<plusminus_insn><mode>3"
688 [(set (match_operand:VF 0 "register_operand" "=x,x")
690 (match_operand:VF 1 "nonimmediate_operand" "<comm>0,x")
691 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
692 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
694 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
695 v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
696 [(set_attr "isa" "noavx,avx")
697 (set_attr "type" "sseadd")
698 (set_attr "prefix" "orig,vex")
699 (set_attr "mode" "<MODE>")])
701 (define_insn "<sse>_vm<plusminus_insn><mode>3"
702 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
705 (match_operand:VF_128 1 "register_operand" "0,x")
706 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
711 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}
712 v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
713 [(set_attr "isa" "noavx,avx")
714 (set_attr "type" "sseadd")
715 (set_attr "prefix" "orig,vex")
716 (set_attr "mode" "<ssescalarmode>")])
718 (define_expand "mul<mode>3"
719 [(set (match_operand:VF 0 "register_operand" "")
721 (match_operand:VF 1 "nonimmediate_operand" "")
722 (match_operand:VF 2 "nonimmediate_operand" "")))]
724 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
726 (define_insn "*mul<mode>3"
727 [(set (match_operand:VF 0 "register_operand" "=x,x")
729 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
730 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
731 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
733 mul<ssemodesuffix>\t{%2, %0|%0, %2}
734 vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
735 [(set_attr "isa" "noavx,avx")
736 (set_attr "type" "ssemul")
737 (set_attr "prefix" "orig,vex")
738 (set_attr "mode" "<MODE>")])
740 (define_insn "<sse>_vmmul<mode>3"
741 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
744 (match_operand:VF_128 1 "register_operand" "0,x")
745 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
750 mul<ssescalarmodesuffix>\t{%2, %0|%0, %2}
751 vmul<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
752 [(set_attr "isa" "noavx,avx")
753 (set_attr "type" "ssemul")
754 (set_attr "prefix" "orig,vex")
755 (set_attr "mode" "<ssescalarmode>")])
757 (define_expand "div<mode>3"
758 [(set (match_operand:VF2 0 "register_operand" "")
759 (div:VF2 (match_operand:VF2 1 "register_operand" "")
760 (match_operand:VF2 2 "nonimmediate_operand" "")))]
762 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
764 (define_expand "div<mode>3"
765 [(set (match_operand:VF1 0 "register_operand" "")
766 (div:VF1 (match_operand:VF1 1 "register_operand" "")
767 (match_operand:VF1 2 "nonimmediate_operand" "")))]
770 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
772 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
773 && flag_finite_math_only && !flag_trapping_math
774 && flag_unsafe_math_optimizations)
776 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
781 (define_insn "<sse>_div<mode>3"
782 [(set (match_operand:VF 0 "register_operand" "=x,x")
784 (match_operand:VF 1 "register_operand" "0,x")
785 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
788 div<ssemodesuffix>\t{%2, %0|%0, %2}
789 vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
790 [(set_attr "isa" "noavx,avx")
791 (set_attr "type" "ssediv")
792 (set_attr "prefix" "orig,vex")
793 (set_attr "mode" "<MODE>")])
795 (define_insn "<sse>_vmdiv<mode>3"
796 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
799 (match_operand:VF_128 1 "register_operand" "0,x")
800 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
805 div<ssescalarmodesuffix>\t{%2, %0|%0, %2}
806 vdiv<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
807 [(set_attr "isa" "noavx,avx")
808 (set_attr "type" "ssediv")
809 (set_attr "prefix" "orig,vex")
810 (set_attr "mode" "<ssescalarmode>")])
812 (define_insn "<sse>_rcp<mode>2"
813 [(set (match_operand:VF1 0 "register_operand" "=x")
815 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
817 "%vrcpps\t{%1, %0|%0, %1}"
818 [(set_attr "type" "sse")
819 (set_attr "atom_sse_attr" "rcp")
820 (set_attr "prefix" "maybe_vex")
821 (set_attr "mode" "<MODE>")])
823 (define_insn "sse_vmrcpv4sf2"
824 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
826 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
828 (match_operand:V4SF 2 "register_operand" "0,x")
832 rcpss\t{%1, %0|%0, %1}
833 vrcpss\t{%1, %2, %0|%0, %2, %1}"
834 [(set_attr "isa" "noavx,avx")
835 (set_attr "type" "sse")
836 (set_attr "atom_sse_attr" "rcp")
837 (set_attr "prefix" "orig,vex")
838 (set_attr "mode" "SF")])
840 (define_expand "sqrt<mode>2"
841 [(set (match_operand:VF2 0 "register_operand" "")
842 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand" "")))]
845 (define_expand "sqrt<mode>2"
846 [(set (match_operand:VF1 0 "register_operand" "")
847 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand" "")))]
850 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
851 && flag_finite_math_only && !flag_trapping_math
852 && flag_unsafe_math_optimizations)
854 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
859 (define_insn "<sse>_sqrt<mode>2"
860 [(set (match_operand:VF 0 "register_operand" "=x")
861 (sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "xm")))]
863 "%vsqrt<ssemodesuffix>\t{%1, %0|%0, %1}"
864 [(set_attr "type" "sse")
865 (set_attr "atom_sse_attr" "sqrt")
866 (set_attr "prefix" "maybe_vex")
867 (set_attr "mode" "<MODE>")])
869 (define_insn "<sse>_vmsqrt<mode>2"
870 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
873 (match_operand:VF_128 1 "nonimmediate_operand" "xm,xm"))
874 (match_operand:VF_128 2 "register_operand" "0,x")
878 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}
879 vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
880 [(set_attr "isa" "noavx,avx")
881 (set_attr "type" "sse")
882 (set_attr "atom_sse_attr" "sqrt")
883 (set_attr "prefix" "orig,vex")
884 (set_attr "mode" "<ssescalarmode>")])
886 (define_expand "rsqrt<mode>2"
887 [(set (match_operand:VF1 0 "register_operand" "")
889 [(match_operand:VF1 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
892 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
896 (define_insn "<sse>_rsqrt<mode>2"
897 [(set (match_operand:VF1 0 "register_operand" "=x")
899 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
901 "%vrsqrtps\t{%1, %0|%0, %1}"
902 [(set_attr "type" "sse")
903 (set_attr "prefix" "maybe_vex")
904 (set_attr "mode" "<MODE>")])
906 (define_insn "sse_vmrsqrtv4sf2"
907 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
909 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
911 (match_operand:V4SF 2 "register_operand" "0,x")
915 rsqrtss\t{%1, %0|%0, %1}
916 vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
917 [(set_attr "isa" "noavx,avx")
918 (set_attr "type" "sse")
919 (set_attr "prefix" "orig,vex")
920 (set_attr "mode" "SF")])
922 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
923 ;; isn't really correct, as those rtl operators aren't defined when
924 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
926 (define_expand "<code><mode>3"
927 [(set (match_operand:VF 0 "register_operand" "")
929 (match_operand:VF 1 "nonimmediate_operand" "")
930 (match_operand:VF 2 "nonimmediate_operand" "")))]
933 if (!flag_finite_math_only)
934 operands[1] = force_reg (<MODE>mode, operands[1]);
935 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
938 (define_insn "*<code><mode>3_finite"
939 [(set (match_operand:VF 0 "register_operand" "=x,x")
941 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
942 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
943 "TARGET_SSE && flag_finite_math_only
944 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
946 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
947 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
948 [(set_attr "isa" "noavx,avx")
949 (set_attr "type" "sseadd")
950 (set_attr "prefix" "orig,vex")
951 (set_attr "mode" "<MODE>")])
953 (define_insn "*<code><mode>3"
954 [(set (match_operand:VF 0 "register_operand" "=x,x")
956 (match_operand:VF 1 "register_operand" "0,x")
957 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
958 "TARGET_SSE && !flag_finite_math_only"
960 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
961 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
962 [(set_attr "isa" "noavx,avx")
963 (set_attr "type" "sseadd")
964 (set_attr "prefix" "orig,vex")
965 (set_attr "mode" "<MODE>")])
967 (define_insn "<sse>_vm<code><mode>3"
968 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
971 (match_operand:VF_128 1 "register_operand" "0,x")
972 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
977 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %2}
978 v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
979 [(set_attr "isa" "noavx,avx")
980 (set_attr "type" "sse")
981 (set_attr "prefix" "orig,vex")
982 (set_attr "mode" "<ssescalarmode>")])
984 ;; These versions of the min/max patterns implement exactly the operations
985 ;; min = (op1 < op2 ? op1 : op2)
986 ;; max = (!(op1 < op2) ? op1 : op2)
987 ;; Their operands are not commutative, and thus they may be used in the
988 ;; presence of -0.0 and NaN.
990 (define_insn "*ieee_smin<mode>3"
991 [(set (match_operand:VF 0 "register_operand" "=x,x")
993 [(match_operand:VF 1 "register_operand" "0,x")
994 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
998 min<ssemodesuffix>\t{%2, %0|%0, %2}
999 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1000 [(set_attr "isa" "noavx,avx")
1001 (set_attr "type" "sseadd")
1002 (set_attr "prefix" "orig,vex")
1003 (set_attr "mode" "<MODE>")])
1005 (define_insn "*ieee_smax<mode>3"
1006 [(set (match_operand:VF 0 "register_operand" "=x,x")
1008 [(match_operand:VF 1 "register_operand" "0,x")
1009 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
1013 max<ssemodesuffix>\t{%2, %0|%0, %2}
1014 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1015 [(set_attr "isa" "noavx,avx")
1016 (set_attr "type" "sseadd")
1017 (set_attr "prefix" "orig,vex")
1018 (set_attr "mode" "<MODE>")])
1020 (define_insn "avx_addsubv4df3"
1021 [(set (match_operand:V4DF 0 "register_operand" "=x")
1024 (match_operand:V4DF 1 "register_operand" "x")
1025 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1026 (minus:V4DF (match_dup 1) (match_dup 2))
1029 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1030 [(set_attr "type" "sseadd")
1031 (set_attr "prefix" "vex")
1032 (set_attr "mode" "V4DF")])
1034 (define_insn "sse3_addsubv2df3"
1035 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1038 (match_operand:V2DF 1 "register_operand" "0,x")
1039 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
1040 (minus:V2DF (match_dup 1) (match_dup 2))
1044 addsubpd\t{%2, %0|%0, %2}
1045 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1046 [(set_attr "isa" "noavx,avx")
1047 (set_attr "type" "sseadd")
1048 (set_attr "atom_unit" "complex")
1049 (set_attr "prefix" "orig,vex")
1050 (set_attr "mode" "V2DF")])
1052 (define_insn "avx_addsubv8sf3"
1053 [(set (match_operand:V8SF 0 "register_operand" "=x")
1056 (match_operand:V8SF 1 "register_operand" "x")
1057 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1058 (minus:V8SF (match_dup 1) (match_dup 2))
1061 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1062 [(set_attr "type" "sseadd")
1063 (set_attr "prefix" "vex")
1064 (set_attr "mode" "V8SF")])
1066 (define_insn "sse3_addsubv4sf3"
1067 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1070 (match_operand:V4SF 1 "register_operand" "0,x")
1071 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
1072 (minus:V4SF (match_dup 1) (match_dup 2))
1076 addsubps\t{%2, %0|%0, %2}
1077 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1078 [(set_attr "isa" "noavx,avx")
1079 (set_attr "type" "sseadd")
1080 (set_attr "prefix" "orig,vex")
1081 (set_attr "prefix_rep" "1,*")
1082 (set_attr "mode" "V4SF")])
1084 (define_insn "avx_h<plusminus_insn>v4df3"
1085 [(set (match_operand:V4DF 0 "register_operand" "=x")
1090 (match_operand:V4DF 1 "register_operand" "x")
1091 (parallel [(const_int 0)]))
1092 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1094 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1095 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
1099 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1100 (parallel [(const_int 0)]))
1101 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
1103 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1104 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1106 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1107 [(set_attr "type" "sseadd")
1108 (set_attr "prefix" "vex")
1109 (set_attr "mode" "V4DF")])
1111 (define_insn "sse3_h<plusminus_insn>v2df3"
1112 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1116 (match_operand:V2DF 1 "register_operand" "0,x")
1117 (parallel [(const_int 0)]))
1118 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1121 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
1122 (parallel [(const_int 0)]))
1123 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1126 h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}
1127 vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1128 [(set_attr "isa" "noavx,avx")
1129 (set_attr "type" "sseadd")
1130 (set_attr "prefix" "orig,vex")
1131 (set_attr "mode" "V2DF")])
1133 (define_insn "avx_h<plusminus_insn>v8sf3"
1134 [(set (match_operand:V8SF 0 "register_operand" "=x")
1140 (match_operand:V8SF 1 "register_operand" "x")
1141 (parallel [(const_int 0)]))
1142 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1144 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1145 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1149 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1150 (parallel [(const_int 0)]))
1151 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1153 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1154 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1158 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1159 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1161 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1162 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1165 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1166 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1168 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1169 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1171 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1172 [(set_attr "type" "sseadd")
1173 (set_attr "prefix" "vex")
1174 (set_attr "mode" "V8SF")])
1176 (define_insn "sse3_h<plusminus_insn>v4sf3"
1177 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1182 (match_operand:V4SF 1 "register_operand" "0,x")
1183 (parallel [(const_int 0)]))
1184 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1186 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1187 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1191 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
1192 (parallel [(const_int 0)]))
1193 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1195 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1196 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1199 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
1200 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1201 [(set_attr "isa" "noavx,avx")
1202 (set_attr "type" "sseadd")
1203 (set_attr "atom_unit" "complex")
1204 (set_attr "prefix" "orig,vex")
1205 (set_attr "prefix_rep" "1,*")
1206 (set_attr "mode" "V4SF")])
1208 (define_expand "reduc_splus_v4df"
1209 [(match_operand:V4DF 0 "register_operand" "")
1210 (match_operand:V4DF 1 "register_operand" "")]
1213 rtx tmp = gen_reg_rtx (V4DFmode);
1214 rtx tmp2 = gen_reg_rtx (V4DFmode);
1215 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
1216 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
1217 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
1221 (define_expand "reduc_splus_v2df"
1222 [(match_operand:V2DF 0 "register_operand" "")
1223 (match_operand:V2DF 1 "register_operand" "")]
1226 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1230 (define_expand "reduc_splus_v8sf"
1231 [(match_operand:V8SF 0 "register_operand" "")
1232 (match_operand:V8SF 1 "register_operand" "")]
1235 rtx tmp = gen_reg_rtx (V8SFmode);
1236 rtx tmp2 = gen_reg_rtx (V8SFmode);
1237 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
1238 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
1239 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
1240 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
1244 (define_expand "reduc_splus_v4sf"
1245 [(match_operand:V4SF 0 "register_operand" "")
1246 (match_operand:V4SF 1 "register_operand" "")]
1251 rtx tmp = gen_reg_rtx (V4SFmode);
1252 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1253 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1256 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
1261 (define_expand "reduc_smax_v4sf"
1262 [(match_operand:V4SF 0 "register_operand" "")
1263 (match_operand:V4SF 1 "register_operand" "")]
1266 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
1270 (define_expand "reduc_smin_v4sf"
1271 [(match_operand:V4SF 0 "register_operand" "")
1272 (match_operand:V4SF 1 "register_operand" "")]
1275 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
1279 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1281 ;; Parallel floating point comparisons
1283 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1285 (define_insn "avx_cmp<mode>3"
1286 [(set (match_operand:VF 0 "register_operand" "=x")
1288 [(match_operand:VF 1 "register_operand" "x")
1289 (match_operand:VF 2 "nonimmediate_operand" "xm")
1290 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1293 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1294 [(set_attr "type" "ssecmp")
1295 (set_attr "length_immediate" "1")
1296 (set_attr "prefix" "vex")
1297 (set_attr "mode" "<MODE>")])
1299 (define_insn "avx_vmcmp<mode>3"
1300 [(set (match_operand:VF_128 0 "register_operand" "=x")
1303 [(match_operand:VF_128 1 "register_operand" "x")
1304 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
1305 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1310 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1311 [(set_attr "type" "ssecmp")
1312 (set_attr "length_immediate" "1")
1313 (set_attr "prefix" "vex")
1314 (set_attr "mode" "<ssescalarmode>")])
1316 (define_insn "*<sse>_maskcmp<mode>3_comm"
1317 [(set (match_operand:VF 0 "register_operand" "=x,x")
1318 (match_operator:VF 3 "sse_comparison_operator"
1319 [(match_operand:VF 1 "register_operand" "%0,x")
1320 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1322 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
1324 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1325 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1326 [(set_attr "isa" "noavx,avx")
1327 (set_attr "type" "ssecmp")
1328 (set_attr "length_immediate" "1")
1329 (set_attr "prefix" "orig,vex")
1330 (set_attr "mode" "<MODE>")])
1332 (define_insn "<sse>_maskcmp<mode>3"
1333 [(set (match_operand:VF 0 "register_operand" "=x,x")
1334 (match_operator:VF 3 "sse_comparison_operator"
1335 [(match_operand:VF 1 "register_operand" "0,x")
1336 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1339 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1340 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1341 [(set_attr "isa" "noavx,avx")
1342 (set_attr "type" "ssecmp")
1343 (set_attr "length_immediate" "1")
1344 (set_attr "prefix" "orig,vex")
1345 (set_attr "mode" "<MODE>")])
1347 (define_insn "<sse>_vmmaskcmp<mode>3"
1348 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1350 (match_operator:VF_128 3 "sse_comparison_operator"
1351 [(match_operand:VF_128 1 "register_operand" "0,x")
1352 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
1357 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %2}
1358 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1359 [(set_attr "isa" "noavx,avx")
1360 (set_attr "type" "ssecmp")
1361 (set_attr "length_immediate" "1,*")
1362 (set_attr "prefix" "orig,vex")
1363 (set_attr "mode" "<ssescalarmode>")])
1365 (define_insn "<sse>_comi"
1366 [(set (reg:CCFP FLAGS_REG)
1369 (match_operand:<ssevecmode> 0 "register_operand" "x")
1370 (parallel [(const_int 0)]))
1372 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1373 (parallel [(const_int 0)]))))]
1374 "SSE_FLOAT_MODE_P (<MODE>mode)"
1375 "%vcomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1376 [(set_attr "type" "ssecomi")
1377 (set_attr "prefix" "maybe_vex")
1378 (set_attr "prefix_rep" "0")
1379 (set (attr "prefix_data16")
1380 (if_then_else (eq_attr "mode" "DF")
1382 (const_string "0")))
1383 (set_attr "mode" "<MODE>")])
1385 (define_insn "<sse>_ucomi"
1386 [(set (reg:CCFPU FLAGS_REG)
1389 (match_operand:<ssevecmode> 0 "register_operand" "x")
1390 (parallel [(const_int 0)]))
1392 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1393 (parallel [(const_int 0)]))))]
1394 "SSE_FLOAT_MODE_P (<MODE>mode)"
1395 "%vucomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1396 [(set_attr "type" "ssecomi")
1397 (set_attr "prefix" "maybe_vex")
1398 (set_attr "prefix_rep" "0")
1399 (set (attr "prefix_data16")
1400 (if_then_else (eq_attr "mode" "DF")
1402 (const_string "0")))
1403 (set_attr "mode" "<MODE>")])
1405 (define_expand "vcond<V_256:mode><VF_256:mode>"
1406 [(set (match_operand:V_256 0 "register_operand" "")
1408 (match_operator 3 ""
1409 [(match_operand:VF_256 4 "nonimmediate_operand" "")
1410 (match_operand:VF_256 5 "nonimmediate_operand" "")])
1411 (match_operand:V_256 1 "general_operand" "")
1412 (match_operand:V_256 2 "general_operand" "")))]
1414 && (GET_MODE_NUNITS (<V_256:MODE>mode)
1415 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
1417 bool ok = ix86_expand_fp_vcond (operands);
1422 (define_expand "vcond<V_128:mode><VF_128:mode>"
1423 [(set (match_operand:V_128 0 "register_operand" "")
1425 (match_operator 3 ""
1426 [(match_operand:VF_128 4 "nonimmediate_operand" "")
1427 (match_operand:VF_128 5 "nonimmediate_operand" "")])
1428 (match_operand:V_128 1 "general_operand" "")
1429 (match_operand:V_128 2 "general_operand" "")))]
1431 && (GET_MODE_NUNITS (<V_128:MODE>mode)
1432 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
1434 bool ok = ix86_expand_fp_vcond (operands);
1439 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1441 ;; Parallel floating point logical operations
1443 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1445 (define_insn "<sse>_andnot<mode>3"
1446 [(set (match_operand:VF 0 "register_operand" "=x,x")
1449 (match_operand:VF 1 "register_operand" "0,x"))
1450 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1453 static char buf[32];
1456 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1458 switch (which_alternative)
1461 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1464 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1470 snprintf (buf, sizeof (buf), insn, suffix);
1473 [(set_attr "isa" "noavx,avx")
1474 (set_attr "type" "sselog")
1475 (set_attr "prefix" "orig,vex")
1476 (set_attr "mode" "<MODE>")])
1478 (define_expand "<code><mode>3"
1479 [(set (match_operand:VF 0 "register_operand" "")
1481 (match_operand:VF 1 "nonimmediate_operand" "")
1482 (match_operand:VF 2 "nonimmediate_operand" "")))]
1484 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1486 (define_insn "*<code><mode>3"
1487 [(set (match_operand:VF 0 "register_operand" "=x,x")
1489 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
1490 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1491 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1493 static char buf[32];
1496 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1498 switch (which_alternative)
1501 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1504 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1510 snprintf (buf, sizeof (buf), insn, suffix);
1513 [(set_attr "isa" "noavx,avx")
1514 (set_attr "type" "sselog")
1515 (set_attr "prefix" "orig,vex")
1516 (set_attr "mode" "<MODE>")])
1518 (define_expand "copysign<mode>3"
1521 (not:VF (match_dup 3))
1522 (match_operand:VF 1 "nonimmediate_operand" "")))
1524 (and:VF (match_dup 3)
1525 (match_operand:VF 2 "nonimmediate_operand" "")))
1526 (set (match_operand:VF 0 "register_operand" "")
1527 (ior:VF (match_dup 4) (match_dup 5)))]
1530 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
1532 operands[4] = gen_reg_rtx (<MODE>mode);
1533 operands[5] = gen_reg_rtx (<MODE>mode);
1536 ;; Also define scalar versions. These are used for abs, neg, and
1537 ;; conditional move. Using subregs into vector modes causes register
1538 ;; allocation lossage. These patterns do not allow memory operands
1539 ;; because the native instructions read the full 128-bits.
1541 (define_insn "*andnot<mode>3"
1542 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1545 (match_operand:MODEF 1 "register_operand" "0,x"))
1546 (match_operand:MODEF 2 "register_operand" "x,x")))]
1547 "SSE_FLOAT_MODE_P (<MODE>mode)"
1549 static char buf[32];
1552 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1554 switch (which_alternative)
1557 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1560 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1566 snprintf (buf, sizeof (buf), insn, suffix);
1569 [(set_attr "isa" "noavx,avx")
1570 (set_attr "type" "sselog")
1571 (set_attr "prefix" "orig,vex")
1572 (set_attr "mode" "<ssevecmode>")])
1574 (define_insn "*<code><mode>3"
1575 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1577 (match_operand:MODEF 1 "register_operand" "%0,x")
1578 (match_operand:MODEF 2 "register_operand" "x,x")))]
1579 "SSE_FLOAT_MODE_P (<MODE>mode)"
1581 static char buf[32];
1584 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1586 switch (which_alternative)
1589 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1592 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1598 snprintf (buf, sizeof (buf), insn, suffix);
1601 [(set_attr "isa" "noavx,avx")
1602 (set_attr "type" "sselog")
1603 (set_attr "prefix" "orig,vex")
1604 (set_attr "mode" "<ssevecmode>")])
1606 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1608 ;; FMA4 floating point multiply/accumulate instructions. This
1609 ;; includes the scalar version of the instructions as well as the
1612 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1614 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1615 ;; combine to generate a multiply/add with two memory references. We then
1616 ;; split this insn, into loading up the destination register with one of the
1617 ;; memory operations. If we don't manage to split the insn, reload will
1618 ;; generate the appropriate moves. The reason this is needed, is that combine
1619 ;; has already folded one of the memory references into both the multiply and
1620 ;; add insns, and it can't generate a new pseudo. I.e.:
1621 ;; (set (reg1) (mem (addr1)))
1622 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1623 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1625 ;; ??? This is historic, pre-dating the gimple fma transformation.
1626 ;; We could now properly represent that only one memory operand is
1627 ;; allowed and not be penalized during optimization.
1629 ;; Intrinsic FMA operations.
1631 ;; The standard names for fma is only available with SSE math enabled.
1632 (define_expand "fma<mode>4"
1633 [(set (match_operand:FMAMODE 0 "register_operand")
1635 (match_operand:FMAMODE 1 "nonimmediate_operand")
1636 (match_operand:FMAMODE 2 "nonimmediate_operand")
1637 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1638 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1640 (define_expand "fms<mode>4"
1641 [(set (match_operand:FMAMODE 0 "register_operand")
1643 (match_operand:FMAMODE 1 "nonimmediate_operand")
1644 (match_operand:FMAMODE 2 "nonimmediate_operand")
1645 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1646 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1648 (define_expand "fnma<mode>4"
1649 [(set (match_operand:FMAMODE 0 "register_operand")
1651 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1652 (match_operand:FMAMODE 2 "nonimmediate_operand")
1653 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1654 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1656 (define_expand "fnms<mode>4"
1657 [(set (match_operand:FMAMODE 0 "register_operand")
1659 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1660 (match_operand:FMAMODE 2 "nonimmediate_operand")
1661 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1662 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1664 ;; The builtin for fma4intrin.h is not constrained by SSE math enabled.
1665 (define_expand "fma4i_fmadd_<mode>"
1666 [(set (match_operand:FMAMODE 0 "register_operand")
1668 (match_operand:FMAMODE 1 "nonimmediate_operand")
1669 (match_operand:FMAMODE 2 "nonimmediate_operand")
1670 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1671 "TARGET_FMA || TARGET_FMA4")
1673 (define_insn "*fma4i_fmadd_<mode>"
1674 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1676 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1677 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1678 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1680 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1681 [(set_attr "type" "ssemuladd")
1682 (set_attr "mode" "<MODE>")])
1684 (define_insn "*fma4i_fmsub_<mode>"
1685 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1687 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1688 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1690 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1692 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1693 [(set_attr "type" "ssemuladd")
1694 (set_attr "mode" "<MODE>")])
1696 (define_insn "*fma4i_fnmadd_<mode>"
1697 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1700 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1701 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1702 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1704 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1705 [(set_attr "type" "ssemuladd")
1706 (set_attr "mode" "<MODE>")])
1708 (define_insn "*fma4i_fnmsub_<mode>"
1709 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1712 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1713 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1715 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1717 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1718 [(set_attr "type" "ssemuladd")
1719 (set_attr "mode" "<MODE>")])
1721 ;; Scalar versions of the above. Unlike ADDSS et al, these write the
1722 ;; entire destination register, with the high-order elements zeroed.
1724 (define_expand "fma4i_vmfmadd_<mode>"
1725 [(set (match_operand:VF_128 0 "register_operand")
1728 (match_operand:VF_128 1 "nonimmediate_operand")
1729 (match_operand:VF_128 2 "nonimmediate_operand")
1730 (match_operand:VF_128 3 "nonimmediate_operand"))
1735 operands[4] = CONST0_RTX (<MODE>mode);
1738 (define_expand "fmai_vmfmadd_<mode>"
1739 [(set (match_operand:VF_128 0 "register_operand")
1742 (match_operand:VF_128 1 "nonimmediate_operand")
1743 (match_operand:VF_128 2 "nonimmediate_operand")
1744 (match_operand:VF_128 3 "nonimmediate_operand"))
1749 (define_insn "*fmai_fmadd_<mode>"
1750 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1753 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")
1754 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1755 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))
1760 vfmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1761 vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1762 vfmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1763 [(set_attr "type" "ssemuladd")
1764 (set_attr "mode" "<MODE>")])
1766 (define_insn "*fmai_fmsub_<mode>"
1767 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1770 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")
1771 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1773 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")))
1778 vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1779 vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1780 vfmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1781 [(set_attr "type" "ssemuladd")
1782 (set_attr "mode" "<MODE>")])
1784 (define_insn "*fmai_fnmadd_<mode>"
1785 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1789 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x"))
1790 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1791 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))
1796 vfnmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1797 vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1798 vfnmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1799 [(set_attr "type" "ssemuladd")
1800 (set_attr "mode" "<MODE>")])
1802 (define_insn "*fmai_fnmsub_<mode>"
1803 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1807 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x"))
1808 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1810 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")))
1815 vfnmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1816 vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1817 vfnmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1818 [(set_attr "type" "ssemuladd")
1819 (set_attr "mode" "<MODE>")])
1821 (define_insn "*fma4i_vmfmadd_<mode>"
1822 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1825 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
1826 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1827 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
1828 (match_operand:VF_128 4 "const0_operand" "")
1831 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1832 [(set_attr "type" "ssemuladd")
1833 (set_attr "mode" "<MODE>")])
1835 (define_insn "*fma4i_vmfmsub_<mode>"
1836 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1839 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
1840 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1842 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
1843 (match_operand:VF_128 4 "const0_operand" "")
1846 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1847 [(set_attr "type" "ssemuladd")
1848 (set_attr "mode" "<MODE>")])
1850 (define_insn "*fma4i_vmfnmadd_<mode>"
1851 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1855 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
1856 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1857 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
1858 (match_operand:VF_128 4 "const0_operand" "")
1861 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1862 [(set_attr "type" "ssemuladd")
1863 (set_attr "mode" "<MODE>")])
1865 (define_insn "*fma4i_vmfnmsub_<mode>"
1866 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1870 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
1871 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1873 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
1874 (match_operand:VF_128 4 "const0_operand" "")
1877 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1878 [(set_attr "type" "ssemuladd")
1879 (set_attr "mode" "<MODE>")])
1881 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1883 ;; FMA4 Parallel floating point multiply addsub and subadd operations.
1885 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1887 ;; It would be possible to represent these without the UNSPEC as
1890 ;; (fma op1 op2 op3)
1891 ;; (fma op1 op2 (neg op3))
1894 ;; But this doesn't seem useful in practice.
1896 (define_expand "fmaddsub_<mode>"
1897 [(set (match_operand:VF 0 "register_operand")
1899 [(match_operand:VF 1 "nonimmediate_operand")
1900 (match_operand:VF 2 "nonimmediate_operand")
1901 (match_operand:VF 3 "nonimmediate_operand")]
1903 "TARGET_FMA || TARGET_FMA4")
1905 (define_insn "*fma4_fmaddsub_<mode>"
1906 [(set (match_operand:VF 0 "register_operand" "=x,x")
1908 [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
1909 (match_operand:VF 2 "nonimmediate_operand" " x,m")
1910 (match_operand:VF 3 "nonimmediate_operand" "xm,x")]
1913 "vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1914 [(set_attr "type" "ssemuladd")
1915 (set_attr "mode" "<MODE>")])
1917 (define_insn "*fma4_fmsubadd_<mode>"
1918 [(set (match_operand:VF 0 "register_operand" "=x,x")
1920 [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
1921 (match_operand:VF 2 "nonimmediate_operand" " x,m")
1923 (match_operand:VF 3 "nonimmediate_operand" "xm,x"))]
1926 "vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1927 [(set_attr "type" "ssemuladd")
1928 (set_attr "mode" "<MODE>")])
1930 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1932 ;; FMA3 floating point multiply/accumulate instructions.
1934 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1936 (define_insn "*fma_fmadd_<mode>"
1937 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1939 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
1940 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1941 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
1944 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1945 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1946 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1947 [(set_attr "type" "ssemuladd")
1948 (set_attr "mode" "<MODE>")])
1950 (define_insn "*fma_fmsub_<mode>"
1951 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1953 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
1954 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1956 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
1959 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1960 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1961 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1962 [(set_attr "type" "ssemuladd")
1963 (set_attr "mode" "<MODE>")])
1965 (define_insn "*fma_fnmadd_<mode>"
1966 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1969 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
1970 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1971 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
1974 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1975 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1976 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1977 [(set_attr "type" "ssemuladd")
1978 (set_attr "mode" "<MODE>")])
1980 (define_insn "*fma_fnmsub_<mode>"
1981 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1984 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
1985 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1987 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
1990 vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1991 vfnmsub231<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1992 vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1993 [(set_attr "type" "ssemuladd")
1994 (set_attr "mode" "<MODE>")])
1996 (define_insn "*fma_fmaddsub_<mode>"
1997 [(set (match_operand:VF 0 "register_operand" "=x,x,x")
1999 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
2000 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
2001 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0")]
2005 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2006 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2007 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2008 [(set_attr "type" "ssemuladd")
2009 (set_attr "mode" "<MODE>")])
2011 (define_insn "*fma_fmsubadd_<mode>"
2012 [(set (match_operand:VF 0 "register_operand" "=x,x,x")
2014 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
2015 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
2017 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0"))]
2021 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2022 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2023 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2024 [(set_attr "type" "ssemuladd")
2025 (set_attr "mode" "<MODE>")])
2027 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2029 ;; Parallel single-precision floating point conversion operations
2031 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2033 (define_insn "sse_cvtpi2ps"
2034 [(set (match_operand:V4SF 0 "register_operand" "=x")
2037 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2038 (match_operand:V4SF 1 "register_operand" "0")
2041 "cvtpi2ps\t{%2, %0|%0, %2}"
2042 [(set_attr "type" "ssecvt")
2043 (set_attr "mode" "V4SF")])
2045 (define_insn "sse_cvtps2pi"
2046 [(set (match_operand:V2SI 0 "register_operand" "=y")
2048 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2050 (parallel [(const_int 0) (const_int 1)])))]
2052 "cvtps2pi\t{%1, %0|%0, %1}"
2053 [(set_attr "type" "ssecvt")
2054 (set_attr "unit" "mmx")
2055 (set_attr "mode" "DI")])
2057 (define_insn "sse_cvttps2pi"
2058 [(set (match_operand:V2SI 0 "register_operand" "=y")
2060 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2061 (parallel [(const_int 0) (const_int 1)])))]
2063 "cvttps2pi\t{%1, %0|%0, %1}"
2064 [(set_attr "type" "ssecvt")
2065 (set_attr "unit" "mmx")
2066 (set_attr "prefix_rep" "0")
2067 (set_attr "mode" "SF")])
2069 (define_insn "sse_cvtsi2ss"
2070 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2073 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2074 (match_operand:V4SF 1 "register_operand" "0,0,x")
2078 cvtsi2ss\t{%2, %0|%0, %2}
2079 cvtsi2ss\t{%2, %0|%0, %2}
2080 vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2081 [(set_attr "isa" "noavx,noavx,avx")
2082 (set_attr "type" "sseicvt")
2083 (set_attr "athlon_decode" "vector,double,*")
2084 (set_attr "amdfam10_decode" "vector,double,*")
2085 (set_attr "bdver1_decode" "double,direct,*")
2086 (set_attr "prefix" "orig,orig,vex")
2087 (set_attr "mode" "SF")])
2089 (define_insn "sse_cvtsi2ssq"
2090 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2093 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2094 (match_operand:V4SF 1 "register_operand" "0,0,x")
2096 "TARGET_SSE && TARGET_64BIT"
2098 cvtsi2ssq\t{%2, %0|%0, %2}
2099 cvtsi2ssq\t{%2, %0|%0, %2}
2100 vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2101 [(set_attr "isa" "noavx,noavx,avx")
2102 (set_attr "type" "sseicvt")
2103 (set_attr "athlon_decode" "vector,double,*")
2104 (set_attr "amdfam10_decode" "vector,double,*")
2105 (set_attr "bdver1_decode" "double,direct,*")
2106 (set_attr "length_vex" "*,*,4")
2107 (set_attr "prefix_rex" "1,1,*")
2108 (set_attr "prefix" "orig,orig,vex")
2109 (set_attr "mode" "SF")])
2111 (define_insn "sse_cvtss2si"
2112 [(set (match_operand:SI 0 "register_operand" "=r,r")
2115 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2116 (parallel [(const_int 0)]))]
2117 UNSPEC_FIX_NOTRUNC))]
2119 "%vcvtss2si\t{%1, %0|%0, %1}"
2120 [(set_attr "type" "sseicvt")
2121 (set_attr "athlon_decode" "double,vector")
2122 (set_attr "bdver1_decode" "double,double")
2123 (set_attr "prefix_rep" "1")
2124 (set_attr "prefix" "maybe_vex")
2125 (set_attr "mode" "SI")])
2127 (define_insn "sse_cvtss2si_2"
2128 [(set (match_operand:SI 0 "register_operand" "=r,r")
2129 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2130 UNSPEC_FIX_NOTRUNC))]
2132 "%vcvtss2si\t{%1, %0|%0, %1}"
2133 [(set_attr "type" "sseicvt")
2134 (set_attr "athlon_decode" "double,vector")
2135 (set_attr "amdfam10_decode" "double,double")
2136 (set_attr "bdver1_decode" "double,double")
2137 (set_attr "prefix_rep" "1")
2138 (set_attr "prefix" "maybe_vex")
2139 (set_attr "mode" "SI")])
2141 (define_insn "sse_cvtss2siq"
2142 [(set (match_operand:DI 0 "register_operand" "=r,r")
2145 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2146 (parallel [(const_int 0)]))]
2147 UNSPEC_FIX_NOTRUNC))]
2148 "TARGET_SSE && TARGET_64BIT"
2149 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2150 [(set_attr "type" "sseicvt")
2151 (set_attr "athlon_decode" "double,vector")
2152 (set_attr "bdver1_decode" "double,double")
2153 (set_attr "prefix_rep" "1")
2154 (set_attr "prefix" "maybe_vex")
2155 (set_attr "mode" "DI")])
2157 (define_insn "sse_cvtss2siq_2"
2158 [(set (match_operand:DI 0 "register_operand" "=r,r")
2159 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2160 UNSPEC_FIX_NOTRUNC))]
2161 "TARGET_SSE && TARGET_64BIT"
2162 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2163 [(set_attr "type" "sseicvt")
2164 (set_attr "athlon_decode" "double,vector")
2165 (set_attr "amdfam10_decode" "double,double")
2166 (set_attr "bdver1_decode" "double,double")
2167 (set_attr "prefix_rep" "1")
2168 (set_attr "prefix" "maybe_vex")
2169 (set_attr "mode" "DI")])
2171 (define_insn "sse_cvttss2si"
2172 [(set (match_operand:SI 0 "register_operand" "=r,r")
2175 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2176 (parallel [(const_int 0)]))))]
2178 "%vcvttss2si\t{%1, %0|%0, %1}"
2179 [(set_attr "type" "sseicvt")
2180 (set_attr "athlon_decode" "double,vector")
2181 (set_attr "amdfam10_decode" "double,double")
2182 (set_attr "bdver1_decode" "double,double")
2183 (set_attr "prefix_rep" "1")
2184 (set_attr "prefix" "maybe_vex")
2185 (set_attr "mode" "SI")])
2187 (define_insn "sse_cvttss2siq"
2188 [(set (match_operand:DI 0 "register_operand" "=r,r")
2191 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2192 (parallel [(const_int 0)]))))]
2193 "TARGET_SSE && TARGET_64BIT"
2194 "%vcvttss2si{q}\t{%1, %0|%0, %1}"
2195 [(set_attr "type" "sseicvt")
2196 (set_attr "athlon_decode" "double,vector")
2197 (set_attr "amdfam10_decode" "double,double")
2198 (set_attr "bdver1_decode" "double,double")
2199 (set_attr "prefix_rep" "1")
2200 (set_attr "prefix" "maybe_vex")
2201 (set_attr "mode" "DI")])
2203 (define_insn "avx_cvtdq2ps256"
2204 [(set (match_operand:V8SF 0 "register_operand" "=x")
2205 (float:V8SF (match_operand:V8SI 1 "nonimmediate_operand" "xm")))]
2207 "vcvtdq2ps\t{%1, %0|%0, %1}"
2208 [(set_attr "type" "ssecvt")
2209 (set_attr "prefix" "vex")
2210 (set_attr "mode" "V8SF")])
2212 (define_insn "sse2_cvtdq2ps"
2213 [(set (match_operand:V4SF 0 "register_operand" "=x")
2214 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2216 "%vcvtdq2ps\t{%1, %0|%0, %1}"
2217 [(set_attr "type" "ssecvt")
2218 (set_attr "prefix" "maybe_vex")
2219 (set_attr "mode" "V4SF")])
2221 (define_expand "sse2_cvtudq2ps"
2223 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "")))
2225 (lt:V4SF (match_dup 5) (match_dup 3)))
2227 (and:V4SF (match_dup 6) (match_dup 4)))
2228 (set (match_operand:V4SF 0 "register_operand" "")
2229 (plus:V4SF (match_dup 5) (match_dup 7)))]
2232 REAL_VALUE_TYPE TWO32r;
2236 real_ldexp (&TWO32r, &dconst1, 32);
2237 x = const_double_from_real_value (TWO32r, SFmode);
2239 operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
2240 operands[4] = force_reg (V4SFmode,
2241 ix86_build_const_vector (V4SFmode, 1, x));
2243 for (i = 5; i < 8; i++)
2244 operands[i] = gen_reg_rtx (V4SFmode);
2247 (define_insn "avx_cvtps2dq256"
2248 [(set (match_operand:V8SI 0 "register_operand" "=x")
2249 (unspec:V8SI [(match_operand:V8SF 1 "nonimmediate_operand" "xm")]
2250 UNSPEC_FIX_NOTRUNC))]
2252 "vcvtps2dq\t{%1, %0|%0, %1}"
2253 [(set_attr "type" "ssecvt")
2254 (set_attr "prefix" "vex")
2255 (set_attr "mode" "OI")])
2257 (define_insn "sse2_cvtps2dq"
2258 [(set (match_operand:V4SI 0 "register_operand" "=x")
2259 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2260 UNSPEC_FIX_NOTRUNC))]
2262 "%vcvtps2dq\t{%1, %0|%0, %1}"
2263 [(set_attr "type" "ssecvt")
2264 (set (attr "prefix_data16")
2266 (match_test "TARGET_AVX")
2268 (const_string "1")))
2269 (set_attr "prefix" "maybe_vex")
2270 (set_attr "mode" "TI")])
2272 (define_insn "avx_cvttps2dq256"
2273 [(set (match_operand:V8SI 0 "register_operand" "=x")
2274 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
2276 "vcvttps2dq\t{%1, %0|%0, %1}"
2277 [(set_attr "type" "ssecvt")
2278 (set_attr "prefix" "vex")
2279 (set_attr "mode" "OI")])
2281 (define_insn "sse2_cvttps2dq"
2282 [(set (match_operand:V4SI 0 "register_operand" "=x")
2283 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2285 "%vcvttps2dq\t{%1, %0|%0, %1}"
2286 [(set_attr "type" "ssecvt")
2287 (set (attr "prefix_rep")
2289 (match_test "TARGET_AVX")
2291 (const_string "1")))
2292 (set (attr "prefix_data16")
2294 (match_test "TARGET_AVX")
2296 (const_string "0")))
2297 (set_attr "prefix_data16" "0")
2298 (set_attr "prefix" "maybe_vex")
2299 (set_attr "mode" "TI")])
2301 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2303 ;; Parallel double-precision floating point conversion operations
2305 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2307 (define_insn "sse2_cvtpi2pd"
2308 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2309 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2311 "cvtpi2pd\t{%1, %0|%0, %1}"
2312 [(set_attr "type" "ssecvt")
2313 (set_attr "unit" "mmx,*")
2314 (set_attr "prefix_data16" "1,*")
2315 (set_attr "mode" "V2DF")])
2317 (define_insn "sse2_cvtpd2pi"
2318 [(set (match_operand:V2SI 0 "register_operand" "=y")
2319 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2320 UNSPEC_FIX_NOTRUNC))]
2322 "cvtpd2pi\t{%1, %0|%0, %1}"
2323 [(set_attr "type" "ssecvt")
2324 (set_attr "unit" "mmx")
2325 (set_attr "bdver1_decode" "double")
2326 (set_attr "prefix_data16" "1")
2327 (set_attr "mode" "DI")])
2329 (define_insn "sse2_cvttpd2pi"
2330 [(set (match_operand:V2SI 0 "register_operand" "=y")
2331 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2333 "cvttpd2pi\t{%1, %0|%0, %1}"
2334 [(set_attr "type" "ssecvt")
2335 (set_attr "unit" "mmx")
2336 (set_attr "bdver1_decode" "double")
2337 (set_attr "prefix_data16" "1")
2338 (set_attr "mode" "TI")])
2340 (define_insn "sse2_cvtsi2sd"
2341 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2344 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2345 (match_operand:V2DF 1 "register_operand" "0,0,x")
2349 cvtsi2sd\t{%2, %0|%0, %2}
2350 cvtsi2sd\t{%2, %0|%0, %2}
2351 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2352 [(set_attr "isa" "noavx,noavx,avx")
2353 (set_attr "type" "sseicvt")
2354 (set_attr "athlon_decode" "double,direct,*")
2355 (set_attr "amdfam10_decode" "vector,double,*")
2356 (set_attr "bdver1_decode" "double,direct,*")
2357 (set_attr "prefix" "orig,orig,vex")
2358 (set_attr "mode" "DF")])
2360 (define_insn "sse2_cvtsi2sdq"
2361 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2364 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2365 (match_operand:V2DF 1 "register_operand" "0,0,x")
2367 "TARGET_SSE2 && TARGET_64BIT"
2369 cvtsi2sdq\t{%2, %0|%0, %2}
2370 cvtsi2sdq\t{%2, %0|%0, %2}
2371 vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2372 [(set_attr "isa" "noavx,noavx,avx")
2373 (set_attr "type" "sseicvt")
2374 (set_attr "athlon_decode" "double,direct,*")
2375 (set_attr "amdfam10_decode" "vector,double,*")
2376 (set_attr "bdver1_decode" "double,direct,*")
2377 (set_attr "length_vex" "*,*,4")
2378 (set_attr "prefix_rex" "1,1,*")
2379 (set_attr "prefix" "orig,orig,vex")
2380 (set_attr "mode" "DF")])
2382 (define_insn "sse2_cvtsd2si"
2383 [(set (match_operand:SI 0 "register_operand" "=r,r")
2386 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2387 (parallel [(const_int 0)]))]
2388 UNSPEC_FIX_NOTRUNC))]
2390 "%vcvtsd2si\t{%1, %0|%0, %1}"
2391 [(set_attr "type" "sseicvt")
2392 (set_attr "athlon_decode" "double,vector")
2393 (set_attr "bdver1_decode" "double,double")
2394 (set_attr "prefix_rep" "1")
2395 (set_attr "prefix" "maybe_vex")
2396 (set_attr "mode" "SI")])
2398 (define_insn "sse2_cvtsd2si_2"
2399 [(set (match_operand:SI 0 "register_operand" "=r,r")
2400 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2401 UNSPEC_FIX_NOTRUNC))]
2403 "%vcvtsd2si\t{%1, %0|%0, %1}"
2404 [(set_attr "type" "sseicvt")
2405 (set_attr "athlon_decode" "double,vector")
2406 (set_attr "amdfam10_decode" "double,double")
2407 (set_attr "bdver1_decode" "double,double")
2408 (set_attr "prefix_rep" "1")
2409 (set_attr "prefix" "maybe_vex")
2410 (set_attr "mode" "SI")])
2412 (define_insn "sse2_cvtsd2siq"
2413 [(set (match_operand:DI 0 "register_operand" "=r,r")
2416 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2417 (parallel [(const_int 0)]))]
2418 UNSPEC_FIX_NOTRUNC))]
2419 "TARGET_SSE2 && TARGET_64BIT"
2420 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2421 [(set_attr "type" "sseicvt")
2422 (set_attr "athlon_decode" "double,vector")
2423 (set_attr "bdver1_decode" "double,double")
2424 (set_attr "prefix_rep" "1")
2425 (set_attr "prefix" "maybe_vex")
2426 (set_attr "mode" "DI")])
2428 (define_insn "sse2_cvtsd2siq_2"
2429 [(set (match_operand:DI 0 "register_operand" "=r,r")
2430 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2431 UNSPEC_FIX_NOTRUNC))]
2432 "TARGET_SSE2 && TARGET_64BIT"
2433 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2434 [(set_attr "type" "sseicvt")
2435 (set_attr "athlon_decode" "double,vector")
2436 (set_attr "amdfam10_decode" "double,double")
2437 (set_attr "bdver1_decode" "double,double")
2438 (set_attr "prefix_rep" "1")
2439 (set_attr "prefix" "maybe_vex")
2440 (set_attr "mode" "DI")])
2442 (define_insn "sse2_cvttsd2si"
2443 [(set (match_operand:SI 0 "register_operand" "=r,r")
2446 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2447 (parallel [(const_int 0)]))))]
2449 "%vcvttsd2si\t{%1, %0|%0, %1}"
2450 [(set_attr "type" "sseicvt")
2451 (set_attr "athlon_decode" "double,vector")
2452 (set_attr "amdfam10_decode" "double,double")
2453 (set_attr "bdver1_decode" "double,double")
2454 (set_attr "prefix_rep" "1")
2455 (set_attr "prefix" "maybe_vex")
2456 (set_attr "mode" "SI")])
2458 (define_insn "sse2_cvttsd2siq"
2459 [(set (match_operand:DI 0 "register_operand" "=r,r")
2462 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2463 (parallel [(const_int 0)]))))]
2464 "TARGET_SSE2 && TARGET_64BIT"
2465 "%vcvttsd2si{q}\t{%1, %0|%0, %1}"
2466 [(set_attr "type" "sseicvt")
2467 (set_attr "athlon_decode" "double,vector")
2468 (set_attr "amdfam10_decode" "double,double")
2469 (set_attr "bdver1_decode" "double,double")
2470 (set_attr "prefix_rep" "1")
2471 (set_attr "prefix" "maybe_vex")
2472 (set_attr "mode" "DI")])
2474 (define_insn "avx_cvtdq2pd256"
2475 [(set (match_operand:V4DF 0 "register_operand" "=x")
2476 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2478 "vcvtdq2pd\t{%1, %0|%0, %1}"
2479 [(set_attr "type" "ssecvt")
2480 (set_attr "prefix" "vex")
2481 (set_attr "mode" "V4DF")])
2483 (define_insn "*avx_cvtdq2pd256_2"
2484 [(set (match_operand:V4DF 0 "register_operand" "=x")
2487 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
2488 (parallel [(const_int 0) (const_int 1)
2489 (const_int 2) (const_int 3)]))))]
2491 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
2492 [(set_attr "type" "ssecvt")
2493 (set_attr "prefix" "vex")
2494 (set_attr "mode" "V4DF")])
2496 (define_insn "sse2_cvtdq2pd"
2497 [(set (match_operand:V2DF 0 "register_operand" "=x")
2500 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2501 (parallel [(const_int 0) (const_int 1)]))))]
2503 "%vcvtdq2pd\t{%1, %0|%0, %1}"
2504 [(set_attr "type" "ssecvt")
2505 (set_attr "prefix" "maybe_vex")
2506 (set_attr "mode" "V2DF")])
2508 (define_insn "avx_cvtpd2dq256"
2509 [(set (match_operand:V4SI 0 "register_operand" "=x")
2510 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2511 UNSPEC_FIX_NOTRUNC))]
2513 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2514 [(set_attr "type" "ssecvt")
2515 (set_attr "prefix" "vex")
2516 (set_attr "mode" "OI")])
2518 (define_expand "sse2_cvtpd2dq"
2519 [(set (match_operand:V4SI 0 "register_operand" "")
2521 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2525 "operands[2] = CONST0_RTX (V2SImode);")
2527 (define_insn "*sse2_cvtpd2dq"
2528 [(set (match_operand:V4SI 0 "register_operand" "=x")
2530 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2532 (match_operand:V2SI 2 "const0_operand" "")))]
2536 return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
2538 return "cvtpd2dq\t{%1, %0|%0, %1}";
2540 [(set_attr "type" "ssecvt")
2541 (set_attr "prefix_rep" "1")
2542 (set_attr "prefix_data16" "0")
2543 (set_attr "prefix" "maybe_vex")
2544 (set_attr "mode" "TI")
2545 (set_attr "amdfam10_decode" "double")
2546 (set_attr "athlon_decode" "vector")
2547 (set_attr "bdver1_decode" "double")])
2549 (define_insn "avx_cvttpd2dq256"
2550 [(set (match_operand:V4SI 0 "register_operand" "=x")
2551 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2553 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2554 [(set_attr "type" "ssecvt")
2555 (set_attr "prefix" "vex")
2556 (set_attr "mode" "OI")])
2558 (define_expand "sse2_cvttpd2dq"
2559 [(set (match_operand:V4SI 0 "register_operand" "")
2561 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2564 "operands[2] = CONST0_RTX (V2SImode);")
2566 (define_insn "*sse2_cvttpd2dq"
2567 [(set (match_operand:V4SI 0 "register_operand" "=x")
2569 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2570 (match_operand:V2SI 2 "const0_operand" "")))]
2574 return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
2576 return "cvttpd2dq\t{%1, %0|%0, %1}";
2578 [(set_attr "type" "ssecvt")
2579 (set_attr "amdfam10_decode" "double")
2580 (set_attr "athlon_decode" "vector")
2581 (set_attr "bdver1_decode" "double")
2582 (set_attr "prefix" "maybe_vex")
2583 (set_attr "mode" "TI")])
2585 (define_insn "sse2_cvtsd2ss"
2586 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2589 (float_truncate:V2SF
2590 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,xm")))
2591 (match_operand:V4SF 1 "register_operand" "0,0,x")
2595 cvtsd2ss\t{%2, %0|%0, %2}
2596 cvtsd2ss\t{%2, %0|%0, %2}
2597 vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2598 [(set_attr "isa" "noavx,noavx,avx")
2599 (set_attr "type" "ssecvt")
2600 (set_attr "athlon_decode" "vector,double,*")
2601 (set_attr "amdfam10_decode" "vector,double,*")
2602 (set_attr "bdver1_decode" "direct,direct,*")
2603 (set_attr "prefix" "orig,orig,vex")
2604 (set_attr "mode" "SF")])
2606 (define_insn "sse2_cvtss2sd"
2607 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2611 (match_operand:V4SF 2 "nonimmediate_operand" "x,m,xm")
2612 (parallel [(const_int 0) (const_int 1)])))
2613 (match_operand:V2DF 1 "register_operand" "0,0,x")
2617 cvtss2sd\t{%2, %0|%0, %2}
2618 cvtss2sd\t{%2, %0|%0, %2}
2619 vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
2620 [(set_attr "isa" "noavx,noavx,avx")
2621 (set_attr "type" "ssecvt")
2622 (set_attr "amdfam10_decode" "vector,double,*")
2623 (set_attr "athlon_decode" "direct,direct,*")
2624 (set_attr "bdver1_decode" "direct,direct,*")
2625 (set_attr "prefix" "orig,orig,vex")
2626 (set_attr "mode" "DF")])
2628 (define_insn "avx_cvtpd2ps256"
2629 [(set (match_operand:V4SF 0 "register_operand" "=x")
2630 (float_truncate:V4SF
2631 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2633 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
2634 [(set_attr "type" "ssecvt")
2635 (set_attr "prefix" "vex")
2636 (set_attr "mode" "V4SF")])
2638 (define_expand "sse2_cvtpd2ps"
2639 [(set (match_operand:V4SF 0 "register_operand" "")
2641 (float_truncate:V2SF
2642 (match_operand:V2DF 1 "nonimmediate_operand" ""))
2645 "operands[2] = CONST0_RTX (V2SFmode);")
2647 (define_insn "*sse2_cvtpd2ps"
2648 [(set (match_operand:V4SF 0 "register_operand" "=x")
2650 (float_truncate:V2SF
2651 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2652 (match_operand:V2SF 2 "const0_operand" "")))]
2656 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
2658 return "cvtpd2ps\t{%1, %0|%0, %1}";
2660 [(set_attr "type" "ssecvt")
2661 (set_attr "amdfam10_decode" "double")
2662 (set_attr "athlon_decode" "vector")
2663 (set_attr "bdver1_decode" "double")
2664 (set_attr "prefix_data16" "1")
2665 (set_attr "prefix" "maybe_vex")
2666 (set_attr "mode" "V4SF")])
2668 (define_insn "avx_cvtps2pd256"
2669 [(set (match_operand:V4DF 0 "register_operand" "=x")
2671 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2673 "vcvtps2pd\t{%1, %0|%0, %1}"
2674 [(set_attr "type" "ssecvt")
2675 (set_attr "prefix" "vex")
2676 (set_attr "mode" "V4DF")])
2678 (define_insn "*avx_cvtps2pd256_2"
2679 [(set (match_operand:V4DF 0 "register_operand" "=x")
2682 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
2683 (parallel [(const_int 0) (const_int 1)
2684 (const_int 2) (const_int 3)]))))]
2686 "vcvtps2pd\t{%x1, %0|%0, %x1}"
2687 [(set_attr "type" "ssecvt")
2688 (set_attr "prefix" "vex")
2689 (set_attr "mode" "V4DF")])
2691 (define_insn "sse2_cvtps2pd"
2692 [(set (match_operand:V2DF 0 "register_operand" "=x")
2695 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2696 (parallel [(const_int 0) (const_int 1)]))))]
2698 "%vcvtps2pd\t{%1, %0|%0, %1}"
2699 [(set_attr "type" "ssecvt")
2700 (set_attr "amdfam10_decode" "direct")
2701 (set_attr "athlon_decode" "double")
2702 (set_attr "bdver1_decode" "double")
2703 (set_attr "prefix_data16" "0")
2704 (set_attr "prefix" "maybe_vex")
2705 (set_attr "mode" "V2DF")])
2707 (define_expand "vec_unpacks_hi_v4sf"
2712 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2713 (parallel [(const_int 6) (const_int 7)
2714 (const_int 2) (const_int 3)])))
2715 (set (match_operand:V2DF 0 "register_operand" "")
2719 (parallel [(const_int 0) (const_int 1)]))))]
2721 "operands[2] = gen_reg_rtx (V4SFmode);")
2723 (define_expand "vec_unpacks_hi_v8sf"
2726 (match_operand:V8SF 1 "nonimmediate_operand" "")
2727 (parallel [(const_int 4) (const_int 5)
2728 (const_int 6) (const_int 7)])))
2729 (set (match_operand:V4DF 0 "register_operand" "")
2733 "operands[2] = gen_reg_rtx (V4SFmode);")
2735 (define_expand "vec_unpacks_lo_v4sf"
2736 [(set (match_operand:V2DF 0 "register_operand" "")
2739 (match_operand:V4SF 1 "nonimmediate_operand" "")
2740 (parallel [(const_int 0) (const_int 1)]))))]
2743 (define_expand "vec_unpacks_lo_v8sf"
2744 [(set (match_operand:V4DF 0 "register_operand" "")
2747 (match_operand:V8SF 1 "nonimmediate_operand" "")
2748 (parallel [(const_int 0) (const_int 1)
2749 (const_int 2) (const_int 3)]))))]
2752 (define_expand "vec_unpacks_float_hi_v8hi"
2753 [(match_operand:V4SF 0 "register_operand" "")
2754 (match_operand:V8HI 1 "register_operand" "")]
2757 rtx tmp = gen_reg_rtx (V4SImode);
2759 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
2760 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2764 (define_expand "vec_unpacks_float_lo_v8hi"
2765 [(match_operand:V4SF 0 "register_operand" "")
2766 (match_operand:V8HI 1 "register_operand" "")]
2769 rtx tmp = gen_reg_rtx (V4SImode);
2771 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
2772 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2776 (define_expand "vec_unpacku_float_hi_v8hi"
2777 [(match_operand:V4SF 0 "register_operand" "")
2778 (match_operand:V8HI 1 "register_operand" "")]
2781 rtx tmp = gen_reg_rtx (V4SImode);
2783 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
2784 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2788 (define_expand "vec_unpacku_float_lo_v8hi"
2789 [(match_operand:V4SF 0 "register_operand" "")
2790 (match_operand:V8HI 1 "register_operand" "")]
2793 rtx tmp = gen_reg_rtx (V4SImode);
2795 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
2796 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2800 (define_expand "vec_unpacks_float_hi_v4si"
2803 (match_operand:V4SI 1 "nonimmediate_operand" "")
2804 (parallel [(const_int 2) (const_int 3)
2805 (const_int 2) (const_int 3)])))
2806 (set (match_operand:V2DF 0 "register_operand" "")
2810 (parallel [(const_int 0) (const_int 1)]))))]
2812 "operands[2] = gen_reg_rtx (V4SImode);")
2814 (define_expand "vec_unpacks_float_lo_v4si"
2815 [(set (match_operand:V2DF 0 "register_operand" "")
2818 (match_operand:V4SI 1 "nonimmediate_operand" "")
2819 (parallel [(const_int 0) (const_int 1)]))))]
2822 (define_expand "vec_unpacks_float_hi_v8si"
2825 (match_operand:V8SI 1 "nonimmediate_operand" "")
2826 (parallel [(const_int 4) (const_int 5)
2827 (const_int 6) (const_int 7)])))
2828 (set (match_operand:V4DF 0 "register_operand" "")
2832 "operands[2] = gen_reg_rtx (V4SImode);")
2834 (define_expand "vec_unpacks_float_lo_v8si"
2835 [(set (match_operand:V4DF 0 "register_operand" "")
2838 (match_operand:V8SI 1 "nonimmediate_operand" "")
2839 (parallel [(const_int 0) (const_int 1)
2840 (const_int 2) (const_int 3)]))))]
2843 (define_expand "vec_unpacku_float_hi_v4si"
2846 (match_operand:V4SI 1 "nonimmediate_operand" "")
2847 (parallel [(const_int 2) (const_int 3)
2848 (const_int 2) (const_int 3)])))
2853 (parallel [(const_int 0) (const_int 1)]))))
2855 (lt:V2DF (match_dup 6) (match_dup 3)))
2857 (and:V2DF (match_dup 7) (match_dup 4)))
2858 (set (match_operand:V2DF 0 "register_operand" "")
2859 (plus:V2DF (match_dup 6) (match_dup 8)))]
2862 REAL_VALUE_TYPE TWO32r;
2866 real_ldexp (&TWO32r, &dconst1, 32);
2867 x = const_double_from_real_value (TWO32r, DFmode);
2869 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2870 operands[4] = force_reg (V2DFmode,
2871 ix86_build_const_vector (V2DFmode, 1, x));
2873 operands[5] = gen_reg_rtx (V4SImode);
2875 for (i = 6; i < 9; i++)
2876 operands[i] = gen_reg_rtx (V2DFmode);
2879 (define_expand "vec_unpacku_float_lo_v4si"
2883 (match_operand:V4SI 1 "nonimmediate_operand" "")
2884 (parallel [(const_int 0) (const_int 1)]))))
2886 (lt:V2DF (match_dup 5) (match_dup 3)))
2888 (and:V2DF (match_dup 6) (match_dup 4)))
2889 (set (match_operand:V2DF 0 "register_operand" "")
2890 (plus:V2DF (match_dup 5) (match_dup 7)))]
2893 REAL_VALUE_TYPE TWO32r;
2897 real_ldexp (&TWO32r, &dconst1, 32);
2898 x = const_double_from_real_value (TWO32r, DFmode);
2900 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2901 operands[4] = force_reg (V2DFmode,
2902 ix86_build_const_vector (V2DFmode, 1, x));
2904 for (i = 5; i < 8; i++)
2905 operands[i] = gen_reg_rtx (V2DFmode);
2908 (define_expand "vec_pack_trunc_v4df"
2910 (float_truncate:V4SF
2911 (match_operand:V4DF 1 "nonimmediate_operand" "")))
2913 (float_truncate:V4SF
2914 (match_operand:V4DF 2 "nonimmediate_operand" "")))
2915 (set (match_operand:V8SF 0 "register_operand" "")
2921 operands[3] = gen_reg_rtx (V4SFmode);
2922 operands[4] = gen_reg_rtx (V4SFmode);
2925 (define_expand "vec_pack_trunc_v2df"
2926 [(match_operand:V4SF 0 "register_operand" "")
2927 (match_operand:V2DF 1 "nonimmediate_operand" "")
2928 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2933 r1 = gen_reg_rtx (V4SFmode);
2934 r2 = gen_reg_rtx (V4SFmode);
2936 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
2937 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
2938 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
2942 (define_expand "vec_pack_sfix_trunc_v2df"
2943 [(match_operand:V4SI 0 "register_operand" "")
2944 (match_operand:V2DF 1 "nonimmediate_operand" "")
2945 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2950 r1 = gen_reg_rtx (V4SImode);
2951 r2 = gen_reg_rtx (V4SImode);
2953 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
2954 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
2955 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
2956 gen_lowpart (V2DImode, r1),
2957 gen_lowpart (V2DImode, r2)));
2961 (define_expand "vec_pack_sfix_v2df"
2962 [(match_operand:V4SI 0 "register_operand" "")
2963 (match_operand:V2DF 1 "nonimmediate_operand" "")
2964 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2969 r1 = gen_reg_rtx (V4SImode);
2970 r2 = gen_reg_rtx (V4SImode);
2972 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
2973 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
2974 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
2975 gen_lowpart (V2DImode, r1),
2976 gen_lowpart (V2DImode, r2)));
2980 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2982 ;; Parallel single-precision floating point element swizzling
2984 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2986 (define_expand "sse_movhlps_exp"
2987 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
2990 (match_operand:V4SF 1 "nonimmediate_operand" "")
2991 (match_operand:V4SF 2 "nonimmediate_operand" ""))
2992 (parallel [(const_int 6)
2998 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3000 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
3002 /* Fix up the destination if needed. */
3003 if (dst != operands[0])
3004 emit_move_insn (operands[0], dst);
3009 (define_insn "sse_movhlps"
3010 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
3013 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3014 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
3015 (parallel [(const_int 6)
3019 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3021 movhlps\t{%2, %0|%0, %2}
3022 vmovhlps\t{%2, %1, %0|%0, %1, %2}
3023 movlps\t{%H2, %0|%0, %H2}
3024 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3025 %vmovhps\t{%2, %0|%0, %2}"
3026 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3027 (set_attr "type" "ssemov")
3028 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3029 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3031 (define_expand "sse_movlhps_exp"
3032 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3035 (match_operand:V4SF 1 "nonimmediate_operand" "")
3036 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3037 (parallel [(const_int 0)
3043 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3045 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
3047 /* Fix up the destination if needed. */
3048 if (dst != operands[0])
3049 emit_move_insn (operands[0], dst);
3054 (define_insn "sse_movlhps"
3055 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
3058 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3059 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,x,x"))
3060 (parallel [(const_int 0)
3064 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3066 movlhps\t{%2, %0|%0, %2}
3067 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3068 movhps\t{%2, %0|%0, %2}
3069 vmovhps\t{%2, %1, %0|%0, %1, %2}
3070 %vmovlps\t{%2, %H0|%H0, %2}"
3071 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3072 (set_attr "type" "ssemov")
3073 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3074 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3076 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3077 (define_insn "avx_unpckhps256"
3078 [(set (match_operand:V8SF 0 "register_operand" "=x")
3081 (match_operand:V8SF 1 "register_operand" "x")
3082 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3083 (parallel [(const_int 2) (const_int 10)
3084 (const_int 3) (const_int 11)
3085 (const_int 6) (const_int 14)
3086 (const_int 7) (const_int 15)])))]
3088 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3089 [(set_attr "type" "sselog")
3090 (set_attr "prefix" "vex")
3091 (set_attr "mode" "V8SF")])
3093 (define_expand "vec_interleave_highv8sf"
3097 (match_operand:V8SF 1 "register_operand" "x")
3098 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3099 (parallel [(const_int 0) (const_int 8)
3100 (const_int 1) (const_int 9)
3101 (const_int 4) (const_int 12)
3102 (const_int 5) (const_int 13)])))
3108 (parallel [(const_int 2) (const_int 10)
3109 (const_int 3) (const_int 11)
3110 (const_int 6) (const_int 14)
3111 (const_int 7) (const_int 15)])))
3112 (set (match_operand:V8SF 0 "register_operand" "")
3117 (parallel [(const_int 4) (const_int 5)
3118 (const_int 6) (const_int 7)
3119 (const_int 12) (const_int 13)
3120 (const_int 14) (const_int 15)])))]
3123 operands[3] = gen_reg_rtx (V8SFmode);
3124 operands[4] = gen_reg_rtx (V8SFmode);
3127 (define_insn "vec_interleave_highv4sf"
3128 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3131 (match_operand:V4SF 1 "register_operand" "0,x")
3132 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3133 (parallel [(const_int 2) (const_int 6)
3134 (const_int 3) (const_int 7)])))]
3137 unpckhps\t{%2, %0|%0, %2}
3138 vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3139 [(set_attr "isa" "noavx,avx")
3140 (set_attr "type" "sselog")
3141 (set_attr "prefix" "orig,vex")
3142 (set_attr "mode" "V4SF")])
3144 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3145 (define_insn "avx_unpcklps256"
3146 [(set (match_operand:V8SF 0 "register_operand" "=x")
3149 (match_operand:V8SF 1 "register_operand" "x")
3150 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3151 (parallel [(const_int 0) (const_int 8)
3152 (const_int 1) (const_int 9)
3153 (const_int 4) (const_int 12)
3154 (const_int 5) (const_int 13)])))]
3156 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3157 [(set_attr "type" "sselog")
3158 (set_attr "prefix" "vex")
3159 (set_attr "mode" "V8SF")])
3161 (define_expand "vec_interleave_lowv8sf"
3165 (match_operand:V8SF 1 "register_operand" "x")
3166 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3167 (parallel [(const_int 0) (const_int 8)
3168 (const_int 1) (const_int 9)
3169 (const_int 4) (const_int 12)
3170 (const_int 5) (const_int 13)])))
3176 (parallel [(const_int 2) (const_int 10)
3177 (const_int 3) (const_int 11)
3178 (const_int 6) (const_int 14)
3179 (const_int 7) (const_int 15)])))
3180 (set (match_operand:V8SF 0 "register_operand" "")
3185 (parallel [(const_int 0) (const_int 1)
3186 (const_int 2) (const_int 3)
3187 (const_int 8) (const_int 9)
3188 (const_int 10) (const_int 11)])))]
3191 operands[3] = gen_reg_rtx (V8SFmode);
3192 operands[4] = gen_reg_rtx (V8SFmode);
3195 (define_insn "vec_interleave_lowv4sf"
3196 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3199 (match_operand:V4SF 1 "register_operand" "0,x")
3200 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3201 (parallel [(const_int 0) (const_int 4)
3202 (const_int 1) (const_int 5)])))]
3205 unpcklps\t{%2, %0|%0, %2}
3206 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3207 [(set_attr "isa" "noavx,avx")
3208 (set_attr "type" "sselog")
3209 (set_attr "prefix" "orig,vex")
3210 (set_attr "mode" "V4SF")])
3212 ;; These are modeled with the same vec_concat as the others so that we
3213 ;; capture users of shufps that can use the new instructions
3214 (define_insn "avx_movshdup256"
3215 [(set (match_operand:V8SF 0 "register_operand" "=x")
3218 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3220 (parallel [(const_int 1) (const_int 1)
3221 (const_int 3) (const_int 3)
3222 (const_int 5) (const_int 5)
3223 (const_int 7) (const_int 7)])))]
3225 "vmovshdup\t{%1, %0|%0, %1}"
3226 [(set_attr "type" "sse")
3227 (set_attr "prefix" "vex")
3228 (set_attr "mode" "V8SF")])
3230 (define_insn "sse3_movshdup"
3231 [(set (match_operand:V4SF 0 "register_operand" "=x")
3234 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3236 (parallel [(const_int 1)
3241 "%vmovshdup\t{%1, %0|%0, %1}"
3242 [(set_attr "type" "sse")
3243 (set_attr "prefix_rep" "1")
3244 (set_attr "prefix" "maybe_vex")
3245 (set_attr "mode" "V4SF")])
3247 (define_insn "avx_movsldup256"
3248 [(set (match_operand:V8SF 0 "register_operand" "=x")
3251 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3253 (parallel [(const_int 0) (const_int 0)
3254 (const_int 2) (const_int 2)
3255 (const_int 4) (const_int 4)
3256 (const_int 6) (const_int 6)])))]
3258 "vmovsldup\t{%1, %0|%0, %1}"
3259 [(set_attr "type" "sse")
3260 (set_attr "prefix" "vex")
3261 (set_attr "mode" "V8SF")])
3263 (define_insn "sse3_movsldup"
3264 [(set (match_operand:V4SF 0 "register_operand" "=x")
3267 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3269 (parallel [(const_int 0)
3274 "%vmovsldup\t{%1, %0|%0, %1}"
3275 [(set_attr "type" "sse")
3276 (set_attr "prefix_rep" "1")
3277 (set_attr "prefix" "maybe_vex")
3278 (set_attr "mode" "V4SF")])
3280 (define_expand "avx_shufps256"
3281 [(match_operand:V8SF 0 "register_operand" "")
3282 (match_operand:V8SF 1 "register_operand" "")
3283 (match_operand:V8SF 2 "nonimmediate_operand" "")
3284 (match_operand:SI 3 "const_int_operand" "")]
3287 int mask = INTVAL (operands[3]);
3288 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3289 GEN_INT ((mask >> 0) & 3),
3290 GEN_INT ((mask >> 2) & 3),
3291 GEN_INT (((mask >> 4) & 3) + 8),
3292 GEN_INT (((mask >> 6) & 3) + 8),
3293 GEN_INT (((mask >> 0) & 3) + 4),
3294 GEN_INT (((mask >> 2) & 3) + 4),
3295 GEN_INT (((mask >> 4) & 3) + 12),
3296 GEN_INT (((mask >> 6) & 3) + 12)));
3300 ;; One bit in mask selects 2 elements.
3301 (define_insn "avx_shufps256_1"
3302 [(set (match_operand:V8SF 0 "register_operand" "=x")
3305 (match_operand:V8SF 1 "register_operand" "x")
3306 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3307 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3308 (match_operand 4 "const_0_to_3_operand" "")
3309 (match_operand 5 "const_8_to_11_operand" "")
3310 (match_operand 6 "const_8_to_11_operand" "")
3311 (match_operand 7 "const_4_to_7_operand" "")
3312 (match_operand 8 "const_4_to_7_operand" "")
3313 (match_operand 9 "const_12_to_15_operand" "")
3314 (match_operand 10 "const_12_to_15_operand" "")])))]
3316 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3317 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3318 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3319 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3322 mask = INTVAL (operands[3]);
3323 mask |= INTVAL (operands[4]) << 2;
3324 mask |= (INTVAL (operands[5]) - 8) << 4;
3325 mask |= (INTVAL (operands[6]) - 8) << 6;
3326 operands[3] = GEN_INT (mask);
3328 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3330 [(set_attr "type" "sselog")
3331 (set_attr "length_immediate" "1")
3332 (set_attr "prefix" "vex")
3333 (set_attr "mode" "V8SF")])
3335 (define_expand "sse_shufps"
3336 [(match_operand:V4SF 0 "register_operand" "")
3337 (match_operand:V4SF 1 "register_operand" "")
3338 (match_operand:V4SF 2 "nonimmediate_operand" "")
3339 (match_operand:SI 3 "const_int_operand" "")]
3342 int mask = INTVAL (operands[3]);
3343 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3344 GEN_INT ((mask >> 0) & 3),
3345 GEN_INT ((mask >> 2) & 3),
3346 GEN_INT (((mask >> 4) & 3) + 4),
3347 GEN_INT (((mask >> 6) & 3) + 4)));
3351 (define_insn "sse_shufps_<mode>"
3352 [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
3353 (vec_select:VI4F_128
3354 (vec_concat:<ssedoublevecmode>
3355 (match_operand:VI4F_128 1 "register_operand" "0,x")
3356 (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm"))
3357 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3358 (match_operand 4 "const_0_to_3_operand" "")
3359 (match_operand 5 "const_4_to_7_operand" "")
3360 (match_operand 6 "const_4_to_7_operand" "")])))]
3364 mask |= INTVAL (operands[3]) << 0;
3365 mask |= INTVAL (operands[4]) << 2;
3366 mask |= (INTVAL (operands[5]) - 4) << 4;
3367 mask |= (INTVAL (operands[6]) - 4) << 6;
3368 operands[3] = GEN_INT (mask);
3370 switch (which_alternative)
3373 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3375 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3380 [(set_attr "isa" "noavx,avx")
3381 (set_attr "type" "sselog")
3382 (set_attr "length_immediate" "1")
3383 (set_attr "prefix" "orig,vex")
3384 (set_attr "mode" "V4SF")])
3386 (define_insn "sse_storehps"
3387 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3389 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3390 (parallel [(const_int 2) (const_int 3)])))]
3393 %vmovhps\t{%1, %0|%0, %1}
3394 %vmovhlps\t{%1, %d0|%d0, %1}
3395 %vmovlps\t{%H1, %d0|%d0, %H1}"
3396 [(set_attr "type" "ssemov")
3397 (set_attr "prefix" "maybe_vex")
3398 (set_attr "mode" "V2SF,V4SF,V2SF")])
3400 (define_expand "sse_loadhps_exp"
3401 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3404 (match_operand:V4SF 1 "nonimmediate_operand" "")
3405 (parallel [(const_int 0) (const_int 1)]))
3406 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3409 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3411 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
3413 /* Fix up the destination if needed. */
3414 if (dst != operands[0])
3415 emit_move_insn (operands[0], dst);
3420 (define_insn "sse_loadhps"
3421 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
3424 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3425 (parallel [(const_int 0) (const_int 1)]))
3426 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))]
3429 movhps\t{%2, %0|%0, %2}
3430 vmovhps\t{%2, %1, %0|%0, %1, %2}
3431 movlhps\t{%2, %0|%0, %2}
3432 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3433 %vmovlps\t{%2, %H0|%H0, %2}"
3434 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3435 (set_attr "type" "ssemov")
3436 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3437 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
3439 (define_insn "sse_storelps"
3440 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3442 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
3443 (parallel [(const_int 0) (const_int 1)])))]
3446 %vmovlps\t{%1, %0|%0, %1}
3447 %vmovaps\t{%1, %0|%0, %1}
3448 %vmovlps\t{%1, %d0|%d0, %1}"
3449 [(set_attr "type" "ssemov")
3450 (set_attr "prefix" "maybe_vex")
3451 (set_attr "mode" "V2SF,V4SF,V2SF")])
3453 (define_expand "sse_loadlps_exp"
3454 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3456 (match_operand:V2SF 2 "nonimmediate_operand" "")
3458 (match_operand:V4SF 1 "nonimmediate_operand" "")
3459 (parallel [(const_int 2) (const_int 3)]))))]
3462 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3464 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
3466 /* Fix up the destination if needed. */
3467 if (dst != operands[0])
3468 emit_move_insn (operands[0], dst);
3473 (define_insn "sse_loadlps"
3474 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
3476 (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,x,x")
3478 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
3479 (parallel [(const_int 2) (const_int 3)]))))]
3482 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3483 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3484 movlps\t{%2, %0|%0, %2}
3485 vmovlps\t{%2, %1, %0|%0, %1, %2}
3486 %vmovlps\t{%2, %0|%0, %2}"
3487 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3488 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
3489 (set_attr "length_immediate" "1,1,*,*,*")
3490 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3491 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3493 (define_insn "sse_movss"
3494 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3496 (match_operand:V4SF 2 "register_operand" " x,x")
3497 (match_operand:V4SF 1 "register_operand" " 0,x")
3501 movss\t{%2, %0|%0, %2}
3502 vmovss\t{%2, %1, %0|%0, %1, %2}"
3503 [(set_attr "isa" "noavx,avx")
3504 (set_attr "type" "ssemov")
3505 (set_attr "prefix" "orig,vex")
3506 (set_attr "mode" "SF")])
3508 (define_expand "vec_dupv4sf"
3509 [(set (match_operand:V4SF 0 "register_operand" "")
3511 (match_operand:SF 1 "nonimmediate_operand" "")))]
3515 operands[1] = force_reg (SFmode, operands[1]);
3518 (define_insn "avx2_vec_dupv4sf"
3519 [(set (match_operand:V4SF 0 "register_operand" "=x")
3522 (match_operand:V4SF 1 "register_operand" "x")
3523 (parallel [(const_int 0)]))))]
3525 "vbroadcastss\t{%1, %0|%0, %1}"
3526 [(set_attr "type" "sselog1")
3527 (set_attr "prefix" "vex")
3528 (set_attr "mode" "V4SF")])
3530 (define_insn "*vec_dupv4sf_avx"
3531 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3533 (match_operand:SF 1 "nonimmediate_operand" "x,m")))]
3536 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
3537 vbroadcastss\t{%1, %0|%0, %1}"
3538 [(set_attr "type" "sselog1,ssemov")
3539 (set_attr "length_immediate" "1,0")
3540 (set_attr "prefix_extra" "0,1")
3541 (set_attr "prefix" "vex")
3542 (set_attr "mode" "V4SF")])
3544 (define_insn "avx2_vec_dupv8sf"
3545 [(set (match_operand:V8SF 0 "register_operand" "=x")
3548 (match_operand:V4SF 1 "register_operand" "x")
3549 (parallel [(const_int 0)]))))]
3551 "vbroadcastss\t{%1, %0|%0, %1}"
3552 [(set_attr "type" "sselog1")
3553 (set_attr "prefix" "vex")
3554 (set_attr "mode" "V8SF")])
3556 (define_insn "*vec_dupv4sf"
3557 [(set (match_operand:V4SF 0 "register_operand" "=x")
3559 (match_operand:SF 1 "register_operand" "0")))]
3561 "shufps\t{$0, %0, %0|%0, %0, 0}"
3562 [(set_attr "type" "sselog1")
3563 (set_attr "length_immediate" "1")
3564 (set_attr "mode" "V4SF")])
3566 ;; Although insertps takes register source, we prefer
3567 ;; unpcklps with register source since it is shorter.
3568 (define_insn "*vec_concatv2sf_sse4_1"
3569 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,x,x,*y ,*y")
3571 (match_operand:SF 1 "nonimmediate_operand" " 0,x,0,x,m, 0 , m")
3572 (match_operand:SF 2 "vector_move_operand" " x,x,m,m,C,*ym, C")))]
3575 unpcklps\t{%2, %0|%0, %2}
3576 vunpcklps\t{%2, %1, %0|%0, %1, %2}
3577 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3578 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3579 %vmovss\t{%1, %0|%0, %1}
3580 punpckldq\t{%2, %0|%0, %2}
3581 movd\t{%1, %0|%0, %1}"
3582 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
3583 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
3584 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
3585 (set_attr "prefix_extra" "*,*,1,1,*,*,*")
3586 (set_attr "length_immediate" "*,*,1,1,*,*,*")
3587 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
3588 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
3590 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3591 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3592 ;; alternatives pretty much forces the MMX alternative to be chosen.
3593 (define_insn "*vec_concatv2sf_sse"
3594 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
3596 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3597 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
3600 unpcklps\t{%2, %0|%0, %2}
3601 movss\t{%1, %0|%0, %1}
3602 punpckldq\t{%2, %0|%0, %2}
3603 movd\t{%1, %0|%0, %1}"
3604 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3605 (set_attr "mode" "V4SF,SF,DI,DI")])
3607 (define_insn "*vec_concatv4sf"
3608 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x")
3610 (match_operand:V2SF 1 "register_operand" " 0,x,0,x")
3611 (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
3614 movlhps\t{%2, %0|%0, %2}
3615 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3616 movhps\t{%2, %0|%0, %2}
3617 vmovhps\t{%2, %1, %0|%0, %1, %2}"
3618 [(set_attr "isa" "noavx,avx,noavx,avx")
3619 (set_attr "type" "ssemov")
3620 (set_attr "prefix" "orig,vex,orig,vex")
3621 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
3623 (define_expand "vec_init<mode>"
3624 [(match_operand:V_128 0 "register_operand" "")
3625 (match_operand 1 "" "")]
3628 ix86_expand_vector_init (false, operands[0], operands[1]);
3632 ;; Avoid combining registers from different units in a single alternative,
3633 ;; see comment above inline_secondary_memory_needed function in i386.c
3634 (define_insn "vec_set<mode>_0"
3635 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
3636 "=x,x,x ,x,x,x,x ,x ,m,m ,m")
3638 (vec_duplicate:VI4F_128
3639 (match_operand:<ssescalarmode> 2 "general_operand"
3640 " x,m,*r,m,x,x,*rm,*rm,x,fF,*r"))
3641 (match_operand:VI4F_128 1 "vector_move_operand"
3642 " C,C,C ,C,0,x,0 ,x ,0,0 ,0")
3646 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
3647 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
3648 %vmovd\t{%2, %0|%0, %2}
3649 movss\t{%2, %0|%0, %2}
3650 movss\t{%2, %0|%0, %2}
3651 vmovss\t{%2, %1, %0|%0, %1, %2}
3652 pinsrd\t{$0, %2, %0|%0, %2, 0}
3653 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
3657 [(set_attr "isa" "sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,avx,*,*,*")
3659 (cond [(eq_attr "alternative" "0,6,7")
3660 (const_string "sselog")
3661 (eq_attr "alternative" "9")
3662 (const_string "fmov")
3663 (eq_attr "alternative" "10")
3664 (const_string "imov")
3666 (const_string "ssemov")))
3667 (set_attr "prefix_extra" "*,*,*,*,*,*,1,1,*,*,*")
3668 (set_attr "length_immediate" "*,*,*,*,*,*,1,1,*,*,*")
3669 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex,*,*,*")
3670 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,*,*,*")])
3672 ;; A subset is vec_setv4sf.
3673 (define_insn "*vec_setv4sf_sse4_1"
3674 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3677 (match_operand:SF 2 "nonimmediate_operand" "xm,xm"))
3678 (match_operand:V4SF 1 "register_operand" "0,x")
3679 (match_operand:SI 3 "const_int_operand" "")))]
3681 && ((unsigned) exact_log2 (INTVAL (operands[3]))
3682 < GET_MODE_NUNITS (V4SFmode))"
3684 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
3685 switch (which_alternative)
3688 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3690 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3695 [(set_attr "isa" "noavx,avx")
3696 (set_attr "type" "sselog")
3697 (set_attr "prefix_data16" "1,*")
3698 (set_attr "prefix_extra" "1")
3699 (set_attr "length_immediate" "1")
3700 (set_attr "prefix" "orig,vex")
3701 (set_attr "mode" "V4SF")])
3703 (define_insn "sse4_1_insertps"
3704 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3705 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
3706 (match_operand:V4SF 1 "register_operand" "0,x")
3707 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
3711 if (MEM_P (operands[2]))
3713 unsigned count_s = INTVAL (operands[3]) >> 6;
3715 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
3716 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
3718 switch (which_alternative)
3721 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3723 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3728 [(set_attr "isa" "noavx,avx")
3729 (set_attr "type" "sselog")
3730 (set_attr "prefix_data16" "1,*")
3731 (set_attr "prefix_extra" "1")
3732 (set_attr "length_immediate" "1")
3733 (set_attr "prefix" "orig,vex")
3734 (set_attr "mode" "V4SF")])
3737 [(set (match_operand:VI4F_128 0 "memory_operand" "")
3739 (vec_duplicate:VI4F_128
3740 (match_operand:<ssescalarmode> 1 "nonmemory_operand" ""))
3743 "TARGET_SSE && reload_completed"
3746 emit_move_insn (adjust_address (operands[0], <ssescalarmode>mode, 0),
3751 (define_expand "vec_set<mode>"
3752 [(match_operand:V_128 0 "register_operand" "")
3753 (match_operand:<ssescalarmode> 1 "register_operand" "")
3754 (match_operand 2 "const_int_operand" "")]
3757 ix86_expand_vector_set (false, operands[0], operands[1],
3758 INTVAL (operands[2]));
3762 (define_insn_and_split "*vec_extractv4sf_0"
3763 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
3765 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
3766 (parallel [(const_int 0)])))]
3767 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3769 "&& reload_completed"
3772 rtx op1 = operands[1];
3774 op1 = gen_rtx_REG (SFmode, REGNO (op1));
3776 op1 = gen_lowpart (SFmode, op1);
3777 emit_move_insn (operands[0], op1);
3781 (define_expand "avx_vextractf128<mode>"
3782 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "")
3783 (match_operand:V_256 1 "register_operand" "")
3784 (match_operand:SI 2 "const_0_to_1_operand" "")]
3787 rtx (*insn)(rtx, rtx);
3789 switch (INTVAL (operands[2]))
3792 insn = gen_vec_extract_lo_<mode>;
3795 insn = gen_vec_extract_hi_<mode>;
3801 emit_insn (insn (operands[0], operands[1]));
3805 (define_insn_and_split "vec_extract_lo_<mode>"
3806 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
3807 (vec_select:<ssehalfvecmode>
3808 (match_operand:VI8F_256 1 "nonimmediate_operand" "xm,x")
3809 (parallel [(const_int 0) (const_int 1)])))]
3810 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3812 "&& reload_completed"
3815 rtx op1 = operands[1];
3817 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
3819 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
3820 emit_move_insn (operands[0], op1);
3824 (define_insn "vec_extract_hi_<mode>"
3825 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
3826 (vec_select:<ssehalfvecmode>
3827 (match_operand:VI8F_256 1 "register_operand" "x,x")
3828 (parallel [(const_int 2) (const_int 3)])))]
3830 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3831 [(set_attr "type" "sselog")
3832 (set_attr "prefix_extra" "1")
3833 (set_attr "length_immediate" "1")
3834 (set_attr "memory" "none,store")
3835 (set_attr "prefix" "vex")
3836 (set_attr "mode" "V8SF")])
3838 (define_insn_and_split "vec_extract_lo_<mode>"
3839 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
3840 (vec_select:<ssehalfvecmode>
3841 (match_operand:VI4F_256 1 "nonimmediate_operand" "xm,x")
3842 (parallel [(const_int 0) (const_int 1)
3843 (const_int 2) (const_int 3)])))]
3844 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3846 "&& reload_completed"
3849 rtx op1 = operands[1];
3851 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
3853 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
3854 emit_move_insn (operands[0], op1);
3858 (define_insn "vec_extract_hi_<mode>"
3859 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
3860 (vec_select:<ssehalfvecmode>
3861 (match_operand:VI4F_256 1 "register_operand" "x,x")
3862 (parallel [(const_int 4) (const_int 5)
3863 (const_int 6) (const_int 7)])))]
3865 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3866 [(set_attr "type" "sselog")
3867 (set_attr "prefix_extra" "1")
3868 (set_attr "length_immediate" "1")
3869 (set_attr "memory" "none,store")
3870 (set_attr "prefix" "vex")
3871 (set_attr "mode" "V8SF")])
3873 (define_insn_and_split "vec_extract_lo_v16hi"
3874 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
3876 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
3877 (parallel [(const_int 0) (const_int 1)
3878 (const_int 2) (const_int 3)
3879 (const_int 4) (const_int 5)
3880 (const_int 6) (const_int 7)])))]
3881 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3883 "&& reload_completed"
3886 rtx op1 = operands[1];
3888 op1 = gen_rtx_REG (V8HImode, REGNO (op1));
3890 op1 = gen_lowpart (V8HImode, op1);
3891 emit_move_insn (operands[0], op1);
3895 (define_insn "vec_extract_hi_v16hi"
3896 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
3898 (match_operand:V16HI 1 "register_operand" "x,x")
3899 (parallel [(const_int 8) (const_int 9)
3900 (const_int 10) (const_int 11)
3901 (const_int 12) (const_int 13)
3902 (const_int 14) (const_int 15)])))]
3904 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3905 [(set_attr "type" "sselog")
3906 (set_attr "prefix_extra" "1")
3907 (set_attr "length_immediate" "1")
3908 (set_attr "memory" "none,store")
3909 (set_attr "prefix" "vex")
3910 (set_attr "mode" "V8SF")])
3912 (define_insn_and_split "vec_extract_lo_v32qi"
3913 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
3915 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
3916 (parallel [(const_int 0) (const_int 1)
3917 (const_int 2) (const_int 3)
3918 (const_int 4) (const_int 5)
3919 (const_int 6) (const_int 7)
3920 (const_int 8) (const_int 9)
3921 (const_int 10) (const_int 11)
3922 (const_int 12) (const_int 13)
3923 (const_int 14) (const_int 15)])))]
3924 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3926 "&& reload_completed"
3929 rtx op1 = operands[1];
3931 op1 = gen_rtx_REG (V16QImode, REGNO (op1));
3933 op1 = gen_lowpart (V16QImode, op1);
3934 emit_move_insn (operands[0], op1);
3938 (define_insn "vec_extract_hi_v32qi"
3939 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
3941 (match_operand:V32QI 1 "register_operand" "x,x")
3942 (parallel [(const_int 16) (const_int 17)
3943 (const_int 18) (const_int 19)
3944 (const_int 20) (const_int 21)
3945 (const_int 22) (const_int 23)
3946 (const_int 24) (const_int 25)
3947 (const_int 26) (const_int 27)
3948 (const_int 28) (const_int 29)
3949 (const_int 30) (const_int 31)])))]
3951 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3952 [(set_attr "type" "sselog")
3953 (set_attr "prefix_extra" "1")
3954 (set_attr "length_immediate" "1")
3955 (set_attr "memory" "none,store")
3956 (set_attr "prefix" "vex")
3957 (set_attr "mode" "V8SF")])
3959 (define_insn "*sse4_1_extractps"
3960 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
3962 (match_operand:V4SF 1 "register_operand" "x")
3963 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
3965 "%vextractps\t{%2, %1, %0|%0, %1, %2}"
3966 [(set_attr "type" "sselog")
3967 (set_attr "prefix_data16" "1")
3968 (set_attr "prefix_extra" "1")
3969 (set_attr "length_immediate" "1")
3970 (set_attr "prefix" "maybe_vex")
3971 (set_attr "mode" "V4SF")])
3973 (define_insn_and_split "*vec_extract_v4sf_mem"
3974 [(set (match_operand:SF 0 "register_operand" "=x*rf")
3976 (match_operand:V4SF 1 "memory_operand" "o")
3977 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
3980 "&& reload_completed"
3983 int i = INTVAL (operands[2]);
3985 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
3989 ;; Modes handled by vec_extract patterns.
3990 (define_mode_iterator VEC_EXTRACT_MODE
3991 [V16QI V8HI V4SI V2DI
3992 (V8SF "TARGET_AVX") V4SF
3993 (V4DF "TARGET_AVX") V2DF])
3995 (define_expand "vec_extract<mode>"
3996 [(match_operand:<ssescalarmode> 0 "register_operand" "")
3997 (match_operand:VEC_EXTRACT_MODE 1 "register_operand" "")
3998 (match_operand 2 "const_int_operand" "")]
4001 ix86_expand_vector_extract (false, operands[0], operands[1],
4002 INTVAL (operands[2]));
4006 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4008 ;; Parallel double-precision floating point element swizzling
4010 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4012 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4013 (define_insn "avx_unpckhpd256"
4014 [(set (match_operand:V4DF 0 "register_operand" "=x")
4017 (match_operand:V4DF 1 "register_operand" "x")
4018 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4019 (parallel [(const_int 1) (const_int 5)
4020 (const_int 3) (const_int 7)])))]
4022 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
4023 [(set_attr "type" "sselog")
4024 (set_attr "prefix" "vex")
4025 (set_attr "mode" "V4DF")])
4027 (define_expand "vec_interleave_highv4df"
4031 (match_operand:V4DF 1 "register_operand" "x")
4032 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4033 (parallel [(const_int 0) (const_int 4)
4034 (const_int 2) (const_int 6)])))
4040 (parallel [(const_int 1) (const_int 5)
4041 (const_int 3) (const_int 7)])))
4042 (set (match_operand:V4DF 0 "register_operand" "")
4047 (parallel [(const_int 2) (const_int 3)
4048 (const_int 6) (const_int 7)])))]
4051 operands[3] = gen_reg_rtx (V4DFmode);
4052 operands[4] = gen_reg_rtx (V4DFmode);
4056 (define_expand "vec_interleave_highv2df"
4057 [(set (match_operand:V2DF 0 "register_operand" "")
4060 (match_operand:V2DF 1 "nonimmediate_operand" "")
4061 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4062 (parallel [(const_int 1)
4066 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
4067 operands[2] = force_reg (V2DFmode, operands[2]);
4070 (define_insn "*vec_interleave_highv2df"
4071 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
4074 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x")
4075 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0"))
4076 (parallel [(const_int 1)
4078 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4080 unpckhpd\t{%2, %0|%0, %2}
4081 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
4082 %vmovddup\t{%H1, %0|%0, %H1}
4083 movlpd\t{%H1, %0|%0, %H1}
4084 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
4085 %vmovhpd\t{%1, %0|%0, %1}"
4086 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
4087 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
4088 (set_attr "prefix_data16" "*,*,*,1,*,1")
4089 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
4090 (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")])
4092 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4093 (define_expand "avx_movddup256"
4094 [(set (match_operand:V4DF 0 "register_operand" "")
4097 (match_operand:V4DF 1 "nonimmediate_operand" "")
4099 (parallel [(const_int 0) (const_int 4)
4100 (const_int 2) (const_int 6)])))]
4103 (define_expand "avx_unpcklpd256"
4104 [(set (match_operand:V4DF 0 "register_operand" "")
4107 (match_operand:V4DF 1 "register_operand" "")
4108 (match_operand:V4DF 2 "nonimmediate_operand" ""))
4109 (parallel [(const_int 0) (const_int 4)
4110 (const_int 2) (const_int 6)])))]
4113 (define_insn "*avx_unpcklpd256"
4114 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
4117 (match_operand:V4DF 1 "nonimmediate_operand" "xm,x")
4118 (match_operand:V4DF 2 "nonimmediate_operand" " 1,xm"))
4119 (parallel [(const_int 0) (const_int 4)
4120 (const_int 2) (const_int 6)])))]
4122 && (!MEM_P (operands[1]) || rtx_equal_p (operands[1], operands[2]))"
4124 vmovddup\t{%1, %0|%0, %1}
4125 vunpcklpd\t{%2, %1, %0|%0, %1, %2}"
4126 [(set_attr "type" "sselog")
4127 (set_attr "prefix" "vex")
4128 (set_attr "mode" "V4DF")])
4130 (define_expand "vec_interleave_lowv4df"
4134 (match_operand:V4DF 1 "register_operand" "x")
4135 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4136 (parallel [(const_int 0) (const_int 4)
4137 (const_int 2) (const_int 6)])))
4143 (parallel [(const_int 1) (const_int 5)
4144 (const_int 3) (const_int 7)])))
4145 (set (match_operand:V4DF 0 "register_operand" "")
4150 (parallel [(const_int 0) (const_int 1)
4151 (const_int 4) (const_int 5)])))]
4154 operands[3] = gen_reg_rtx (V4DFmode);
4155 operands[4] = gen_reg_rtx (V4DFmode);
4158 (define_expand "vec_interleave_lowv2df"
4159 [(set (match_operand:V2DF 0 "register_operand" "")
4162 (match_operand:V2DF 1 "nonimmediate_operand" "")
4163 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4164 (parallel [(const_int 0)
4168 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
4169 operands[1] = force_reg (V2DFmode, operands[1]);
4172 (define_insn "*vec_interleave_lowv2df"
4173 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,o")
4176 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0")
4177 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x"))
4178 (parallel [(const_int 0)
4180 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4182 unpcklpd\t{%2, %0|%0, %2}
4183 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4184 %vmovddup\t{%1, %0|%0, %1}
4185 movhpd\t{%2, %0|%0, %2}
4186 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4187 %vmovlpd\t{%2, %H0|%H0, %2}"
4188 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
4189 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
4190 (set_attr "prefix_data16" "*,*,*,1,*,1")
4191 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
4192 (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")])
4195 [(set (match_operand:V2DF 0 "memory_operand" "")
4198 (match_operand:V2DF 1 "register_operand" "")
4200 (parallel [(const_int 0)
4202 "TARGET_SSE3 && reload_completed"
4205 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4206 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4207 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4212 [(set (match_operand:V2DF 0 "register_operand" "")
4215 (match_operand:V2DF 1 "memory_operand" "")
4217 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "")
4218 (match_operand:SI 3 "const_int_operand" "")])))]
4219 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
4220 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
4222 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
4225 (define_expand "avx_shufpd256"
4226 [(match_operand:V4DF 0 "register_operand" "")
4227 (match_operand:V4DF 1 "register_operand" "")
4228 (match_operand:V4DF 2 "nonimmediate_operand" "")
4229 (match_operand:SI 3 "const_int_operand" "")]
4232 int mask = INTVAL (operands[3]);
4233 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4235 GEN_INT (mask & 2 ? 5 : 4),
4236 GEN_INT (mask & 4 ? 3 : 2),
4237 GEN_INT (mask & 8 ? 7 : 6)));
4241 (define_insn "avx_shufpd256_1"
4242 [(set (match_operand:V4DF 0 "register_operand" "=x")
4245 (match_operand:V4DF 1 "register_operand" "x")
4246 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4247 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4248 (match_operand 4 "const_4_to_5_operand" "")
4249 (match_operand 5 "const_2_to_3_operand" "")
4250 (match_operand 6 "const_6_to_7_operand" "")])))]
4254 mask = INTVAL (operands[3]);
4255 mask |= (INTVAL (operands[4]) - 4) << 1;
4256 mask |= (INTVAL (operands[5]) - 2) << 2;
4257 mask |= (INTVAL (operands[6]) - 6) << 3;
4258 operands[3] = GEN_INT (mask);
4260 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4262 [(set_attr "type" "sselog")
4263 (set_attr "length_immediate" "1")
4264 (set_attr "prefix" "vex")
4265 (set_attr "mode" "V4DF")])
4267 (define_expand "sse2_shufpd"
4268 [(match_operand:V2DF 0 "register_operand" "")
4269 (match_operand:V2DF 1 "register_operand" "")
4270 (match_operand:V2DF 2 "nonimmediate_operand" "")
4271 (match_operand:SI 3 "const_int_operand" "")]
4274 int mask = INTVAL (operands[3]);
4275 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4277 GEN_INT (mask & 2 ? 3 : 2)));
4281 ;; Modes handled by vec_extract_even/odd pattern.
4282 (define_mode_iterator VEC_EXTRACT_EVENODD_MODE
4283 [(V16QI "TARGET_SSE2")
4284 (V8HI "TARGET_SSE2")
4285 (V4SI "TARGET_SSE2")
4286 (V2DI "TARGET_SSE2")
4287 (V8SF "TARGET_AVX") V4SF
4288 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
4290 (define_expand "vec_extract_even<mode>"
4291 [(match_operand:VEC_EXTRACT_EVENODD_MODE 0 "register_operand" "")
4292 (match_operand:VEC_EXTRACT_EVENODD_MODE 1 "register_operand" "")
4293 (match_operand:VEC_EXTRACT_EVENODD_MODE 2 "register_operand" "")]
4296 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 0);
4300 (define_expand "vec_extract_odd<mode>"
4301 [(match_operand:VEC_EXTRACT_EVENODD_MODE 0 "register_operand" "")
4302 (match_operand:VEC_EXTRACT_EVENODD_MODE 1 "register_operand" "")
4303 (match_operand:VEC_EXTRACT_EVENODD_MODE 2 "register_operand" "")]
4306 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 1);
4310 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4311 (define_insn "avx2_interleave_highv4di"
4312 [(set (match_operand:V4DI 0 "register_operand" "=x")
4315 (match_operand:V4DI 1 "register_operand" "x")
4316 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
4317 (parallel [(const_int 1)
4322 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4323 [(set_attr "type" "sselog")
4324 (set_attr "prefix" "vex")
4325 (set_attr "mode" "OI")])
4327 (define_insn "vec_interleave_highv2di"
4328 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4331 (match_operand:V2DI 1 "register_operand" "0,x")
4332 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4333 (parallel [(const_int 1)
4337 punpckhqdq\t{%2, %0|%0, %2}
4338 vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4339 [(set_attr "isa" "noavx,avx")
4340 (set_attr "type" "sselog")
4341 (set_attr "prefix_data16" "1,*")
4342 (set_attr "prefix" "orig,vex")
4343 (set_attr "mode" "TI")])
4345 (define_insn "avx2_interleave_lowv4di"
4346 [(set (match_operand:V4DI 0 "register_operand" "=x")
4349 (match_operand:V4DI 1 "register_operand" "x")
4350 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
4351 (parallel [(const_int 0)
4356 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4357 [(set_attr "type" "sselog")
4358 (set_attr "prefix" "vex")
4359 (set_attr "mode" "OI")])
4361 (define_insn "vec_interleave_lowv2di"
4362 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4365 (match_operand:V2DI 1 "register_operand" "0,x")
4366 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4367 (parallel [(const_int 0)
4371 punpcklqdq\t{%2, %0|%0, %2}
4372 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4373 [(set_attr "isa" "noavx,avx")
4374 (set_attr "type" "sselog")
4375 (set_attr "prefix_data16" "1,*")
4376 (set_attr "prefix" "orig,vex")
4377 (set_attr "mode" "TI")])
4379 (define_insn "sse2_shufpd_<mode>"
4380 [(set (match_operand:VI8F_128 0 "register_operand" "=x,x")
4381 (vec_select:VI8F_128
4382 (vec_concat:<ssedoublevecmode>
4383 (match_operand:VI8F_128 1 "register_operand" "0,x")
4384 (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm"))
4385 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4386 (match_operand 4 "const_2_to_3_operand" "")])))]
4390 mask = INTVAL (operands[3]);
4391 mask |= (INTVAL (operands[4]) - 2) << 1;
4392 operands[3] = GEN_INT (mask);
4394 switch (which_alternative)
4397 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4399 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4404 [(set_attr "isa" "noavx,avx")
4405 (set_attr "type" "sselog")
4406 (set_attr "length_immediate" "1")
4407 (set_attr "prefix" "orig,vex")
4408 (set_attr "mode" "V2DF")])
4410 ;; Avoid combining registers from different units in a single alternative,
4411 ;; see comment above inline_secondary_memory_needed function in i386.c
4412 (define_insn "sse2_storehpd"
4413 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r")
4415 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
4416 (parallel [(const_int 1)])))]
4417 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4419 %vmovhpd\t{%1, %0|%0, %1}
4421 vunpckhpd\t{%d1, %0|%0, %d1}
4425 [(set_attr "isa" "*,noavx,avx,*,*,*")
4426 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
4427 (set (attr "prefix_data16")
4429 (and (eq_attr "alternative" "0")
4430 (not (match_test "TARGET_AVX")))
4432 (const_string "*")))
4433 (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
4434 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
4437 [(set (match_operand:DF 0 "register_operand" "")
4439 (match_operand:V2DF 1 "memory_operand" "")
4440 (parallel [(const_int 1)])))]
4441 "TARGET_SSE2 && reload_completed"
4442 [(set (match_dup 0) (match_dup 1))]
4443 "operands[1] = adjust_address (operands[1], DFmode, 8);")
4445 (define_insn "*vec_extractv2df_1_sse"
4446 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4448 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
4449 (parallel [(const_int 1)])))]
4450 "!TARGET_SSE2 && TARGET_SSE
4451 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4453 movhps\t{%1, %0|%0, %1}
4454 movhlps\t{%1, %0|%0, %1}
4455 movlps\t{%H1, %0|%0, %H1}"
4456 [(set_attr "type" "ssemov")
4457 (set_attr "mode" "V2SF,V4SF,V2SF")])
4459 ;; Avoid combining registers from different units in a single alternative,
4460 ;; see comment above inline_secondary_memory_needed function in i386.c
4461 (define_insn "sse2_storelpd"
4462 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4464 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4465 (parallel [(const_int 0)])))]
4466 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4468 %vmovlpd\t{%1, %0|%0, %1}
4473 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4474 (set_attr "prefix_data16" "1,*,*,*,*")
4475 (set_attr "prefix" "maybe_vex")
4476 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4479 [(set (match_operand:DF 0 "register_operand" "")
4481 (match_operand:V2DF 1 "nonimmediate_operand" "")
4482 (parallel [(const_int 0)])))]
4483 "TARGET_SSE2 && reload_completed"
4486 rtx op1 = operands[1];
4488 op1 = gen_rtx_REG (DFmode, REGNO (op1));
4490 op1 = gen_lowpart (DFmode, op1);
4491 emit_move_insn (operands[0], op1);
4495 (define_insn "*vec_extractv2df_0_sse"
4496 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4498 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
4499 (parallel [(const_int 0)])))]
4500 "!TARGET_SSE2 && TARGET_SSE
4501 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4503 movlps\t{%1, %0|%0, %1}
4504 movaps\t{%1, %0|%0, %1}
4505 movlps\t{%1, %0|%0, %1}"
4506 [(set_attr "type" "ssemov")
4507 (set_attr "mode" "V2SF,V4SF,V2SF")])
4509 (define_expand "sse2_loadhpd_exp"
4510 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4513 (match_operand:V2DF 1 "nonimmediate_operand" "")
4514 (parallel [(const_int 0)]))
4515 (match_operand:DF 2 "nonimmediate_operand" "")))]
4518 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4520 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
4522 /* Fix up the destination if needed. */
4523 if (dst != operands[0])
4524 emit_move_insn (operands[0], dst);
4529 ;; Avoid combining registers from different units in a single alternative,
4530 ;; see comment above inline_secondary_memory_needed function in i386.c
4531 (define_insn "sse2_loadhpd"
4532 [(set (match_operand:V2DF 0 "nonimmediate_operand"
4536 (match_operand:V2DF 1 "nonimmediate_operand"
4538 (parallel [(const_int 0)]))
4539 (match_operand:DF 2 "nonimmediate_operand"
4540 " m,m,x,x,x,*f,r")))]
4541 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4543 movhpd\t{%2, %0|%0, %2}
4544 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4545 unpcklpd\t{%2, %0|%0, %2}
4546 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4550 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
4551 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
4552 (set_attr "prefix_data16" "1,*,*,*,*,*,*")
4553 (set_attr "prefix" "orig,vex,orig,vex,*,*,*")
4554 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
4557 [(set (match_operand:V2DF 0 "memory_operand" "")
4559 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
4560 (match_operand:DF 1 "register_operand" "")))]
4561 "TARGET_SSE2 && reload_completed"
4562 [(set (match_dup 0) (match_dup 1))]
4563 "operands[0] = adjust_address (operands[0], DFmode, 8);")
4565 (define_expand "sse2_loadlpd_exp"
4566 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4568 (match_operand:DF 2 "nonimmediate_operand" "")
4570 (match_operand:V2DF 1 "nonimmediate_operand" "")
4571 (parallel [(const_int 1)]))))]
4574 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4576 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
4578 /* Fix up the destination if needed. */
4579 if (dst != operands[0])
4580 emit_move_insn (operands[0], dst);
4585 ;; Avoid combining registers from different units in a single alternative,
4586 ;; see comment above inline_secondary_memory_needed function in i386.c
4587 (define_insn "sse2_loadlpd"
4588 [(set (match_operand:V2DF 0 "nonimmediate_operand"
4589 "=x,x,x,x,x,x,x,x,m,m ,m")
4591 (match_operand:DF 2 "nonimmediate_operand"
4592 " m,m,m,x,x,0,0,x,x,*f,r")
4594 (match_operand:V2DF 1 "vector_move_operand"
4595 " C,0,x,0,x,x,o,o,0,0 ,0")
4596 (parallel [(const_int 1)]))))]
4597 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4599 %vmovsd\t{%2, %0|%0, %2}
4600 movlpd\t{%2, %0|%0, %2}
4601 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4602 movsd\t{%2, %0|%0, %2}
4603 vmovsd\t{%2, %1, %0|%0, %1, %2}
4604 shufpd\t{$2, %1, %0|%0, %1, 2}
4605 movhpd\t{%H1, %0|%0, %H1}
4606 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
4610 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
4612 (cond [(eq_attr "alternative" "5")
4613 (const_string "sselog")
4614 (eq_attr "alternative" "9")
4615 (const_string "fmov")
4616 (eq_attr "alternative" "10")
4617 (const_string "imov")
4619 (const_string "ssemov")))
4620 (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
4621 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
4622 (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
4623 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
4626 [(set (match_operand:V2DF 0 "memory_operand" "")
4628 (match_operand:DF 1 "register_operand" "")
4629 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
4630 "TARGET_SSE2 && reload_completed"
4631 [(set (match_dup 0) (match_dup 1))]
4632 "operands[0] = adjust_address (operands[0], DFmode, 8);")
4634 (define_insn "sse2_movsd"
4635 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o")
4637 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
4638 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
4642 movsd\t{%2, %0|%0, %2}
4643 vmovsd\t{%2, %1, %0|%0, %1, %2}
4644 movlpd\t{%2, %0|%0, %2}
4645 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4646 %vmovlpd\t{%2, %0|%0, %2}
4647 shufpd\t{$2, %1, %0|%0, %1, 2}
4648 movhps\t{%H1, %0|%0, %H1}
4649 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
4650 %vmovhps\t{%1, %H0|%H0, %1}"
4651 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
4654 (eq_attr "alternative" "5")
4655 (const_string "sselog")
4656 (const_string "ssemov")))
4657 (set (attr "prefix_data16")
4659 (and (eq_attr "alternative" "2,4")
4660 (not (match_test "TARGET_AVX")))
4662 (const_string "*")))
4663 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
4664 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
4665 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
4667 (define_expand "vec_dupv2df"
4668 [(set (match_operand:V2DF 0 "register_operand" "")
4670 (match_operand:DF 1 "nonimmediate_operand" "")))]
4674 operands[1] = force_reg (DFmode, operands[1]);
4677 (define_insn "*vec_dupv2df_sse3"
4678 [(set (match_operand:V2DF 0 "register_operand" "=x")
4680 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
4682 "%vmovddup\t{%1, %0|%0, %1}"
4683 [(set_attr "type" "sselog1")
4684 (set_attr "prefix" "maybe_vex")
4685 (set_attr "mode" "DF")])
4687 (define_insn "*vec_dupv2df"
4688 [(set (match_operand:V2DF 0 "register_operand" "=x")
4690 (match_operand:DF 1 "register_operand" "0")))]
4693 [(set_attr "type" "sselog1")
4694 (set_attr "mode" "V2DF")])
4696 (define_insn "*vec_concatv2df_sse3"
4697 [(set (match_operand:V2DF 0 "register_operand" "=x")
4699 (match_operand:DF 1 "nonimmediate_operand" "xm")
4702 "%vmovddup\t{%1, %0|%0, %1}"
4703 [(set_attr "type" "sselog1")
4704 (set_attr "prefix" "maybe_vex")
4705 (set_attr "mode" "DF")])
4707 (define_insn "*vec_concatv2df"
4708 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x,x,x,x,x")
4710 (match_operand:DF 1 "nonimmediate_operand" " 0,x,0,x,m,0,0")
4711 (match_operand:DF 2 "vector_move_operand" " x,x,m,m,C,x,m")))]
4714 unpcklpd\t{%2, %0|%0, %2}
4715 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4716 movhpd\t{%2, %0|%0, %2}
4717 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4718 %vmovsd\t{%1, %0|%0, %1}
4719 movlhps\t{%2, %0|%0, %2}
4720 movhps\t{%2, %0|%0, %2}"
4721 [(set_attr "isa" "sse2_noavx,avx,sse2_noavx,avx,sse2,noavx,noavx")
4724 (eq_attr "alternative" "0,1")
4725 (const_string "sselog")
4726 (const_string "ssemov")))
4727 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
4728 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
4729 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF,DF,V4SF,V2SF")])
4731 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4733 ;; Parallel integral arithmetic
4735 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4737 (define_expand "neg<mode>2"
4738 [(set (match_operand:VI_128 0 "register_operand" "")
4741 (match_operand:VI_128 1 "nonimmediate_operand" "")))]
4743 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
4745 (define_expand "<plusminus_insn><mode>3"
4746 [(set (match_operand:VI_AVX2 0 "register_operand" "")
4748 (match_operand:VI_AVX2 1 "nonimmediate_operand" "")
4749 (match_operand:VI_AVX2 2 "nonimmediate_operand" "")))]
4751 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4753 (define_insn "*<plusminus_insn><mode>3"
4754 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,x")
4756 (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,x")
4757 (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,xm")))]
4758 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4760 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
4761 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
4762 [(set_attr "isa" "noavx,avx")
4763 (set_attr "type" "sseiadd")
4764 (set_attr "prefix_data16" "1,*")
4765 (set_attr "prefix" "orig,vex")
4766 (set_attr "mode" "<sseinsnmode>")])
4768 (define_expand "<sse2_avx2>_<plusminus_insn><mode>3"
4769 [(set (match_operand:VI12_AVX2 0 "register_operand" "")
4770 (sat_plusminus:VI12_AVX2
4771 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "")
4772 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "")))]
4774 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4776 (define_insn "*<sse2_avx2>_<plusminus_insn><mode>3"
4777 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,x")
4778 (sat_plusminus:VI12_AVX2
4779 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,x")
4780 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,xm")))]
4781 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4783 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
4784 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
4785 [(set_attr "isa" "noavx,avx")
4786 (set_attr "type" "sseiadd")
4787 (set_attr "prefix_data16" "1,*")
4788 (set_attr "prefix" "orig,vex")
4789 (set_attr "mode" "TI")])
4791 (define_insn_and_split "mulv16qi3"
4792 [(set (match_operand:V16QI 0 "register_operand" "")
4793 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
4794 (match_operand:V16QI 2 "register_operand" "")))]
4796 && can_create_pseudo_p ()"
4804 for (i = 0; i < 6; ++i)
4805 t[i] = gen_reg_rtx (V16QImode);
4807 /* Unpack data such that we've got a source byte in each low byte of
4808 each word. We don't care what goes into the high byte of each word.
4809 Rather than trying to get zero in there, most convenient is to let
4810 it be a copy of the low byte. */
4811 emit_insn (gen_vec_interleave_highv16qi (t[0], operands[1], operands[1]));
4812 emit_insn (gen_vec_interleave_highv16qi (t[1], operands[2], operands[2]));
4813 emit_insn (gen_vec_interleave_lowv16qi (t[2], operands[1], operands[1]));
4814 emit_insn (gen_vec_interleave_lowv16qi (t[3], operands[2], operands[2]));
4816 /* Multiply words. The end-of-line annotations here give a picture of what
4817 the output of that instruction looks like. Dot means don't care; the
4818 letters are the bytes of the result with A being the most significant. */
4819 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
4820 gen_lowpart (V8HImode, t[0]),
4821 gen_lowpart (V8HImode, t[1])));
4822 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
4823 gen_lowpart (V8HImode, t[2]),
4824 gen_lowpart (V8HImode, t[3])));
4826 /* Extract the even bytes and merge them back together. */
4827 ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0);
4829 set_unique_reg_note (get_last_insn (), REG_EQUAL,
4830 gen_rtx_MULT (V16QImode, operands[1], operands[2]));
4834 (define_expand "mul<mode>3"
4835 [(set (match_operand:VI2_AVX2 0 "register_operand" "")
4836 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "")
4837 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "")))]
4839 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
4841 (define_insn "*mul<mode>3"
4842 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
4843 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x")
4844 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))]
4845 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
4847 pmullw\t{%2, %0|%0, %2}
4848 vpmullw\t{%2, %1, %0|%0, %1, %2}"
4849 [(set_attr "isa" "noavx,avx")
4850 (set_attr "type" "sseimul")
4851 (set_attr "prefix_data16" "1,*")
4852 (set_attr "prefix" "orig,vex")
4853 (set_attr "mode" "<sseinsnmode>")])
4855 (define_expand "<s>mul<mode>3_highpart"
4856 [(set (match_operand:VI2_AVX2 0 "register_operand" "")
4858 (lshiftrt:<ssedoublemode>
4859 (mult:<ssedoublemode>
4860 (any_extend:<ssedoublemode>
4861 (match_operand:VI2_AVX2 1 "nonimmediate_operand" ""))
4862 (any_extend:<ssedoublemode>
4863 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "")))
4866 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
4868 (define_insn "*<s>mul<mode>3_highpart"
4869 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
4871 (lshiftrt:<ssedoublemode>
4872 (mult:<ssedoublemode>
4873 (any_extend:<ssedoublemode>
4874 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
4875 (any_extend:<ssedoublemode>
4876 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
4878 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
4880 pmulh<u>w\t{%2, %0|%0, %2}
4881 vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
4882 [(set_attr "isa" "noavx,avx")
4883 (set_attr "type" "sseimul")
4884 (set_attr "prefix_data16" "1,*")
4885 (set_attr "prefix" "orig,vex")
4886 (set_attr "mode" "<sseinsnmode>")])
4888 (define_expand "avx2_umulv4siv4di3"
4889 [(set (match_operand:V4DI 0 "register_operand" "")
4893 (match_operand:V8SI 1 "nonimmediate_operand" "")
4894 (parallel [(const_int 0) (const_int 2)
4895 (const_int 4) (const_int 6)])))
4898 (match_operand:V8SI 2 "nonimmediate_operand" "")
4899 (parallel [(const_int 0) (const_int 2)
4900 (const_int 4) (const_int 6)])))))]
4902 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
4904 (define_insn "*avx_umulv4siv4di3"
4905 [(set (match_operand:V4DI 0 "register_operand" "=x")
4909 (match_operand:V8SI 1 "nonimmediate_operand" "%x")
4910 (parallel [(const_int 0) (const_int 2)
4911 (const_int 4) (const_int 6)])))
4914 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
4915 (parallel [(const_int 0) (const_int 2)
4916 (const_int 4) (const_int 6)])))))]
4917 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
4918 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
4919 [(set_attr "type" "sseimul")
4920 (set_attr "prefix" "vex")
4921 (set_attr "mode" "OI")])
4923 (define_expand "sse2_umulv2siv2di3"
4924 [(set (match_operand:V2DI 0 "register_operand" "")
4928 (match_operand:V4SI 1 "nonimmediate_operand" "")
4929 (parallel [(const_int 0) (const_int 2)])))
4932 (match_operand:V4SI 2 "nonimmediate_operand" "")
4933 (parallel [(const_int 0) (const_int 2)])))))]
4935 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
4937 (define_insn "*sse2_umulv2siv2di3"
4938 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4942 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
4943 (parallel [(const_int 0) (const_int 2)])))
4946 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
4947 (parallel [(const_int 0) (const_int 2)])))))]
4948 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
4950 pmuludq\t{%2, %0|%0, %2}
4951 vpmuludq\t{%2, %1, %0|%0, %1, %2}"
4952 [(set_attr "isa" "noavx,avx")
4953 (set_attr "type" "sseimul")
4954 (set_attr "prefix_data16" "1,*")
4955 (set_attr "prefix" "orig,vex")
4956 (set_attr "mode" "TI")])
4958 (define_expand "avx2_mulv4siv4di3"
4959 [(set (match_operand:V4DI 0 "register_operand" "")
4963 (match_operand:V8SI 1 "nonimmediate_operand" "")
4964 (parallel [(const_int 0) (const_int 2)
4965 (const_int 4) (const_int 6)])))
4968 (match_operand:V8SI 2 "nonimmediate_operand" "")
4969 (parallel [(const_int 0) (const_int 2)
4970 (const_int 4) (const_int 6)])))))]
4972 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
4974 (define_insn "*avx2_mulv4siv4di3"
4975 [(set (match_operand:V4DI 0 "register_operand" "=x")
4979 (match_operand:V8SI 1 "nonimmediate_operand" "x")
4980 (parallel [(const_int 0) (const_int 2)
4981 (const_int 4) (const_int 6)])))
4984 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
4985 (parallel [(const_int 0) (const_int 2)
4986 (const_int 4) (const_int 6)])))))]
4987 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
4988 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
4989 [(set_attr "isa" "avx")
4990 (set_attr "type" "sseimul")
4991 (set_attr "prefix_extra" "1")
4992 (set_attr "prefix" "vex")
4993 (set_attr "mode" "OI")])
4995 (define_expand "sse4_1_mulv2siv2di3"
4996 [(set (match_operand:V2DI 0 "register_operand" "")
5000 (match_operand:V4SI 1 "nonimmediate_operand" "")
5001 (parallel [(const_int 0) (const_int 2)])))
5004 (match_operand:V4SI 2 "nonimmediate_operand" "")
5005 (parallel [(const_int 0) (const_int 2)])))))]
5007 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5009 (define_insn "*sse4_1_mulv2siv2di3"
5010 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5014 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
5015 (parallel [(const_int 0) (const_int 2)])))
5018 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
5019 (parallel [(const_int 0) (const_int 2)])))))]
5020 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5022 pmuldq\t{%2, %0|%0, %2}
5023 vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5024 [(set_attr "isa" "noavx,avx")
5025 (set_attr "type" "sseimul")
5026 (set_attr "prefix_data16" "1,*")
5027 (set_attr "prefix_extra" "1")
5028 (set_attr "prefix" "orig,vex")
5029 (set_attr "mode" "TI")])
5031 (define_expand "avx2_pmaddwd"
5032 [(set (match_operand:V8SI 0 "register_operand" "")
5037 (match_operand:V16HI 1 "nonimmediate_operand" "")
5038 (parallel [(const_int 0)
5048 (match_operand:V16HI 2 "nonimmediate_operand" "")
5049 (parallel [(const_int 0)
5059 (vec_select:V8HI (match_dup 1)
5060 (parallel [(const_int 1)
5069 (vec_select:V8HI (match_dup 2)
5070 (parallel [(const_int 1)
5077 (const_int 15)]))))))]
5079 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
5081 (define_expand "sse2_pmaddwd"
5082 [(set (match_operand:V4SI 0 "register_operand" "")
5087 (match_operand:V8HI 1 "nonimmediate_operand" "")
5088 (parallel [(const_int 0)
5094 (match_operand:V8HI 2 "nonimmediate_operand" "")
5095 (parallel [(const_int 0)
5101 (vec_select:V4HI (match_dup 1)
5102 (parallel [(const_int 1)
5107 (vec_select:V4HI (match_dup 2)
5108 (parallel [(const_int 1)
5111 (const_int 7)]))))))]
5113 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5115 (define_insn "*avx2_pmaddwd"
5116 [(set (match_operand:V8SI 0 "register_operand" "=x")
5121 (match_operand:V16HI 1 "nonimmediate_operand" "%x")
5122 (parallel [(const_int 0)
5132 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
5133 (parallel [(const_int 0)
5143 (vec_select:V8HI (match_dup 1)
5144 (parallel [(const_int 1)
5153 (vec_select:V8HI (match_dup 2)
5154 (parallel [(const_int 1)
5161 (const_int 15)]))))))]
5162 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
5163 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5164 [(set_attr "type" "sseiadd")
5165 (set_attr "prefix" "vex")
5166 (set_attr "mode" "OI")])
5168 (define_insn "*sse2_pmaddwd"
5169 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
5174 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
5175 (parallel [(const_int 0)
5181 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
5182 (parallel [(const_int 0)
5188 (vec_select:V4HI (match_dup 1)
5189 (parallel [(const_int 1)
5194 (vec_select:V4HI (match_dup 2)
5195 (parallel [(const_int 1)
5198 (const_int 7)]))))))]
5199 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5201 pmaddwd\t{%2, %0|%0, %2}
5202 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5203 [(set_attr "isa" "noavx,avx")
5204 (set_attr "type" "sseiadd")
5205 (set_attr "atom_unit" "simul")
5206 (set_attr "prefix_data16" "1,*")
5207 (set_attr "prefix" "orig,vex")
5208 (set_attr "mode" "TI")])
5210 (define_expand "mul<mode>3"
5211 [(set (match_operand:VI4_AVX2 0 "register_operand" "")
5212 (mult:VI4_AVX2 (match_operand:VI4_AVX2 1 "register_operand" "")
5213 (match_operand:VI4_AVX2 2 "register_operand" "")))]
5216 if (TARGET_SSE4_1 || TARGET_AVX)
5217 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
5220 (define_insn "*<sse4_1_avx2>_mul<mode>3"
5221 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,x")
5222 (mult:VI4_AVX2 (match_operand:VI4_AVX2 1 "nonimmediate_operand" "%0,x")
5223 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm,xm")))]
5224 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
5226 pmulld\t{%2, %0|%0, %2}
5227 vpmulld\t{%2, %1, %0|%0, %1, %2}"
5228 [(set_attr "isa" "noavx,avx")
5229 (set_attr "type" "sseimul")
5230 (set_attr "prefix_extra" "1")
5231 (set_attr "prefix" "orig,vex")
5232 (set_attr "mode" "<sseinsnmode>")])
5234 (define_insn_and_split "*sse2_mulv4si3"
5235 [(set (match_operand:V4SI 0 "register_operand" "")
5236 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5237 (match_operand:V4SI 2 "register_operand" "")))]
5238 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_AVX
5239 && can_create_pseudo_p ()"
5244 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5250 t1 = gen_reg_rtx (V4SImode);
5251 t2 = gen_reg_rtx (V4SImode);
5252 t3 = gen_reg_rtx (V4SImode);
5253 t4 = gen_reg_rtx (V4SImode);
5254 t5 = gen_reg_rtx (V4SImode);
5255 t6 = gen_reg_rtx (V4SImode);
5256 thirtytwo = GEN_INT (32);
5258 /* Multiply elements 2 and 0. */
5259 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
5262 /* Shift both input vectors down one element, so that elements 3
5263 and 1 are now in the slots for elements 2 and 0. For K8, at
5264 least, this is faster than using a shuffle. */
5265 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5266 gen_lowpart (V1TImode, op1),
5268 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5269 gen_lowpart (V1TImode, op2),
5271 /* Multiply elements 3 and 1. */
5272 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
5275 /* Move the results in element 2 down to element 1; we don't care
5276 what goes in elements 2 and 3. */
5277 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
5278 const0_rtx, const0_rtx));
5279 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
5280 const0_rtx, const0_rtx));
5282 /* Merge the parts back together. */
5283 emit_insn (gen_vec_interleave_lowv4si (op0, t5, t6));
5285 set_unique_reg_note (get_last_insn (), REG_EQUAL,
5286 gen_rtx_MULT (V4SImode, operands[1], operands[2]));
5290 (define_insn_and_split "mulv2di3"
5291 [(set (match_operand:V2DI 0 "register_operand" "")
5292 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
5293 (match_operand:V2DI 2 "register_operand" "")))]
5295 && can_create_pseudo_p ()"
5300 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5309 /* op1: A,B,C,D, op2: E,F,G,H */
5310 op1 = gen_lowpart (V4SImode, op1);
5311 op2 = gen_lowpart (V4SImode, op2);
5313 t1 = gen_reg_rtx (V4SImode);
5314 t2 = gen_reg_rtx (V4SImode);
5315 t3 = gen_reg_rtx (V2DImode);
5316 t4 = gen_reg_rtx (V2DImode);
5319 emit_insn (gen_sse2_pshufd_1 (t1, op1,
5325 /* t2: (B*E),(A*F),(D*G),(C*H) */
5326 emit_insn (gen_mulv4si3 (t2, t1, op2));
5328 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
5329 emit_insn (gen_xop_phadddq (t3, t2));
5331 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
5332 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
5334 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
5335 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
5339 t1 = gen_reg_rtx (V2DImode);
5340 t2 = gen_reg_rtx (V2DImode);
5341 t3 = gen_reg_rtx (V2DImode);
5342 t4 = gen_reg_rtx (V2DImode);
5343 t5 = gen_reg_rtx (V2DImode);
5344 t6 = gen_reg_rtx (V2DImode);
5345 thirtytwo = GEN_INT (32);
5347 /* Multiply low parts. */
5348 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
5349 gen_lowpart (V4SImode, op2)));
5351 /* Shift input vectors left 32 bits so we can multiply high parts. */
5352 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
5353 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
5355 /* Multiply high parts by low parts. */
5356 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
5357 gen_lowpart (V4SImode, t3)));
5358 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
5359 gen_lowpart (V4SImode, t2)));
5361 /* Shift them back. */
5362 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
5363 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
5365 /* Add the three parts together. */
5366 emit_insn (gen_addv2di3 (t6, t1, t4));
5367 emit_insn (gen_addv2di3 (op0, t6, t5));
5370 set_unique_reg_note (get_last_insn (), REG_EQUAL,
5371 gen_rtx_MULT (V2DImode, operands[1], operands[2]));
5375 (define_expand "vec_widen_smult_hi_v8hi"
5376 [(match_operand:V4SI 0 "register_operand" "")
5377 (match_operand:V8HI 1 "register_operand" "")
5378 (match_operand:V8HI 2 "register_operand" "")]
5381 rtx op1, op2, t1, t2, dest;
5385 t1 = gen_reg_rtx (V8HImode);
5386 t2 = gen_reg_rtx (V8HImode);
5387 dest = gen_lowpart (V8HImode, operands[0]);
5389 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5390 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5391 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5395 (define_expand "vec_widen_smult_lo_v8hi"
5396 [(match_operand:V4SI 0 "register_operand" "")
5397 (match_operand:V8HI 1 "register_operand" "")
5398 (match_operand:V8HI 2 "register_operand" "")]
5401 rtx op1, op2, t1, t2, dest;
5405 t1 = gen_reg_rtx (V8HImode);
5406 t2 = gen_reg_rtx (V8HImode);
5407 dest = gen_lowpart (V8HImode, operands[0]);
5409 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5410 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5411 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5415 (define_expand "vec_widen_umult_hi_v8hi"
5416 [(match_operand:V4SI 0 "register_operand" "")
5417 (match_operand:V8HI 1 "register_operand" "")
5418 (match_operand:V8HI 2 "register_operand" "")]
5421 rtx op1, op2, t1, t2, dest;
5425 t1 = gen_reg_rtx (V8HImode);
5426 t2 = gen_reg_rtx (V8HImode);
5427 dest = gen_lowpart (V8HImode, operands[0]);
5429 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5430 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5431 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5435 (define_expand "vec_widen_umult_lo_v8hi"
5436 [(match_operand:V4SI 0 "register_operand" "")
5437 (match_operand:V8HI 1 "register_operand" "")
5438 (match_operand:V8HI 2 "register_operand" "")]
5441 rtx op1, op2, t1, t2, dest;
5445 t1 = gen_reg_rtx (V8HImode);
5446 t2 = gen_reg_rtx (V8HImode);
5447 dest = gen_lowpart (V8HImode, operands[0]);
5449 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5450 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5451 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5455 (define_expand "vec_widen_smult_hi_v4si"
5456 [(match_operand:V2DI 0 "register_operand" "")
5457 (match_operand:V4SI 1 "register_operand" "")
5458 (match_operand:V4SI 2 "register_operand" "")]
5463 t1 = gen_reg_rtx (V4SImode);
5464 t2 = gen_reg_rtx (V4SImode);
5466 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5471 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5476 emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
5480 (define_expand "vec_widen_smult_lo_v4si"
5481 [(match_operand:V2DI 0 "register_operand" "")
5482 (match_operand:V4SI 1 "register_operand" "")
5483 (match_operand:V4SI 2 "register_operand" "")]
5488 t1 = gen_reg_rtx (V4SImode);
5489 t2 = gen_reg_rtx (V4SImode);
5491 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5496 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5501 emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
5505 (define_expand "vec_widen_umult_hi_v4si"
5506 [(match_operand:V2DI 0 "register_operand" "")
5507 (match_operand:V4SI 1 "register_operand" "")
5508 (match_operand:V4SI 2 "register_operand" "")]
5511 rtx op1, op2, t1, t2;
5515 t1 = gen_reg_rtx (V4SImode);
5516 t2 = gen_reg_rtx (V4SImode);
5518 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5519 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5520 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5524 (define_expand "vec_widen_umult_lo_v4si"
5525 [(match_operand:V2DI 0 "register_operand" "")
5526 (match_operand:V4SI 1 "register_operand" "")
5527 (match_operand:V4SI 2 "register_operand" "")]
5530 rtx op1, op2, t1, t2;
5534 t1 = gen_reg_rtx (V4SImode);
5535 t2 = gen_reg_rtx (V4SImode);
5537 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5538 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5539 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5543 (define_expand "sdot_prodv8hi"
5544 [(match_operand:V4SI 0 "register_operand" "")
5545 (match_operand:V8HI 1 "register_operand" "")
5546 (match_operand:V8HI 2 "register_operand" "")
5547 (match_operand:V4SI 3 "register_operand" "")]
5550 rtx t = gen_reg_rtx (V4SImode);
5551 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
5552 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
5556 (define_expand "udot_prodv4si"
5557 [(match_operand:V2DI 0 "register_operand" "")
5558 (match_operand:V4SI 1 "register_operand" "")
5559 (match_operand:V4SI 2 "register_operand" "")
5560 (match_operand:V2DI 3 "register_operand" "")]
5565 t1 = gen_reg_rtx (V2DImode);
5566 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
5567 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5569 t2 = gen_reg_rtx (V4SImode);
5570 t3 = gen_reg_rtx (V4SImode);
5571 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5572 gen_lowpart (V1TImode, operands[1]),
5574 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5575 gen_lowpart (V1TImode, operands[2]),
5578 t4 = gen_reg_rtx (V2DImode);
5579 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
5581 emit_insn (gen_addv2di3 (operands[0], t1, t4));
5585 (define_insn "ashr<mode>3"
5586 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
5588 (match_operand:VI24_AVX2 1 "register_operand" "0,x")
5589 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5592 psra<ssemodesuffix>\t{%2, %0|%0, %2}
5593 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5594 [(set_attr "isa" "noavx,avx")
5595 (set_attr "type" "sseishft")
5596 (set (attr "length_immediate")
5597 (if_then_else (match_operand 2 "const_int_operand" "")
5599 (const_string "0")))
5600 (set_attr "prefix_data16" "1,*")
5601 (set_attr "prefix" "orig,vex")
5602 (set_attr "mode" "<sseinsnmode>")])
5604 (define_insn "avx2_lshrqv4di3"
5605 [(set (match_operand:V4DI 0 "register_operand" "=x")
5607 (match_operand:V4DI 1 "register_operand" "x")
5608 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
5611 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5612 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
5614 [(set_attr "type" "sseishft")
5615 (set_attr "prefix" "vex")
5616 (set_attr "length_immediate" "1")
5617 (set_attr "mode" "OI")])
5619 (define_insn "lshr<mode>3"
5620 [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
5621 (lshiftrt:VI248_AVX2
5622 (match_operand:VI248_AVX2 1 "register_operand" "0,x")
5623 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5626 psrl<ssemodesuffix>\t{%2, %0|%0, %2}
5627 vpsrl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5628 [(set_attr "isa" "noavx,avx")
5629 (set_attr "type" "sseishft")
5630 (set (attr "length_immediate")
5631 (if_then_else (match_operand 2 "const_int_operand" "")
5633 (const_string "0")))
5634 (set_attr "prefix_data16" "1,*")
5635 (set_attr "prefix" "orig,vex")
5636 (set_attr "mode" "<sseinsnmode>")])
5638 (define_insn "avx2_lshlqv4di3"
5639 [(set (match_operand:V4DI 0 "register_operand" "=x")
5640 (ashift:V4DI (match_operand:V4DI 1 "register_operand" "x")
5641 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
5644 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5645 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
5647 [(set_attr "type" "sseishft")
5648 (set_attr "prefix" "vex")
5649 (set_attr "length_immediate" "1")
5650 (set_attr "mode" "OI")])
5652 (define_insn "avx2_lshl<mode>3"
5653 [(set (match_operand:VI248_256 0 "register_operand" "=x")
5655 (match_operand:VI248_256 1 "register_operand" "x")
5656 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5658 "vpsll<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5659 [(set_attr "type" "sseishft")
5660 (set_attr "prefix" "vex")
5661 (set (attr "length_immediate")
5662 (if_then_else (match_operand 2 "const_int_operand" "")
5664 (const_string "0")))
5665 (set_attr "mode" "OI")])
5667 (define_insn "ashl<mode>3"
5668 [(set (match_operand:VI248_128 0 "register_operand" "=x,x")
5670 (match_operand:VI248_128 1 "register_operand" "0,x")
5671 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5674 psll<ssemodesuffix>\t{%2, %0|%0, %2}
5675 vpsll<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5676 [(set_attr "isa" "noavx,avx")
5677 (set_attr "type" "sseishft")
5678 (set (attr "length_immediate")
5679 (if_then_else (match_operand 2 "const_int_operand" "")
5681 (const_string "0")))
5682 (set_attr "prefix_data16" "1,*")
5683 (set_attr "prefix" "orig,vex")
5684 (set_attr "mode" "TI")])
5686 (define_expand "vec_shl_<mode>"
5687 [(set (match_operand:VI_128 0 "register_operand" "")
5689 (match_operand:VI_128 1 "register_operand" "")
5690 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5693 operands[0] = gen_lowpart (V1TImode, operands[0]);
5694 operands[1] = gen_lowpart (V1TImode, operands[1]);
5697 (define_insn "<sse2_avx2>_ashl<mode>3"
5698 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
5700 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
5701 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5704 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5706 switch (which_alternative)
5709 return "pslldq\t{%2, %0|%0, %2}";
5711 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
5716 [(set_attr "isa" "noavx,avx")
5717 (set_attr "type" "sseishft")
5718 (set_attr "length_immediate" "1")
5719 (set_attr "prefix_data16" "1,*")
5720 (set_attr "prefix" "orig,vex")
5721 (set_attr "mode" "<sseinsnmode>")])
5723 (define_expand "vec_shr_<mode>"
5724 [(set (match_operand:VI_128 0 "register_operand" "")
5726 (match_operand:VI_128 1 "register_operand" "")
5727 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5730 operands[0] = gen_lowpart (V1TImode, operands[0]);
5731 operands[1] = gen_lowpart (V1TImode, operands[1]);
5734 (define_expand "avx2_<code><mode>3"
5735 [(set (match_operand:VI124_256 0 "register_operand" "")
5737 (match_operand:VI124_256 1 "nonimmediate_operand" "")
5738 (match_operand:VI124_256 2 "nonimmediate_operand" "")))]
5740 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5742 (define_insn "*avx2_<code><mode>3"
5743 [(set (match_operand:VI124_256 0 "register_operand" "=x")
5745 (match_operand:VI124_256 1 "nonimmediate_operand" "%x")
5746 (match_operand:VI124_256 2 "nonimmediate_operand" "xm")))]
5747 "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5748 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5749 [(set_attr "type" "sseiadd")
5750 (set_attr "prefix_extra" "1")
5751 (set_attr "prefix" "vex")
5752 (set_attr "mode" "OI")])
5754 (define_insn "sse2_lshrv1ti3"
5755 [(set (match_operand:V1TI 0 "register_operand" "=x,x")
5757 (match_operand:V1TI 1 "register_operand" "0,x")
5758 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5761 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5763 switch (which_alternative)
5766 return "psrldq\t{%2, %0|%0, %2}";
5768 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
5773 [(set_attr "isa" "noavx,avx")
5774 (set_attr "type" "sseishft")
5775 (set_attr "length_immediate" "1")
5776 (set_attr "atom_unit" "sishuf")
5777 (set_attr "prefix_data16" "1,*")
5778 (set_attr "prefix" "orig,vex")
5779 (set_attr "mode" "TI")])
5781 (define_expand "avx2_<code><mode>3"
5782 [(set (match_operand:VI124_256 0 "register_operand" "")
5784 (match_operand:VI124_256 1 "nonimmediate_operand" "")
5785 (match_operand:VI124_256 2 "nonimmediate_operand" "")))]
5787 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5789 (define_insn "*avx2_<code><mode>3"
5790 [(set (match_operand:VI124_256 0 "register_operand" "=x")
5792 (match_operand:VI124_256 1 "nonimmediate_operand" "%x")
5793 (match_operand:VI124_256 2 "nonimmediate_operand" "xm")))]
5794 "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5795 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5796 [(set_attr "type" "sseiadd")
5797 (set_attr "prefix_extra" "1")
5798 (set_attr "prefix" "vex")
5799 (set_attr "mode" "OI")])
5801 (define_insn "*sse4_1_<code><mode>3"
5802 [(set (match_operand:VI14_128 0 "register_operand" "=x,x")
5804 (match_operand:VI14_128 1 "nonimmediate_operand" "%0,x")
5805 (match_operand:VI14_128 2 "nonimmediate_operand" "xm,xm")))]
5806 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5808 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
5809 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5810 [(set_attr "isa" "noavx,avx")
5811 (set_attr "type" "sseiadd")
5812 (set_attr "prefix_extra" "1,*")
5813 (set_attr "prefix" "orig,vex")
5814 (set_attr "mode" "TI")])
5816 (define_insn "*<code>v8hi3"
5817 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
5819 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
5820 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
5821 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
5823 p<maxmin_int>w\t{%2, %0|%0, %2}
5824 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
5825 [(set_attr "isa" "noavx,avx")
5826 (set_attr "type" "sseiadd")
5827 (set_attr "prefix_data16" "1,*")
5828 (set_attr "prefix_extra" "*,1")
5829 (set_attr "prefix" "orig,vex")
5830 (set_attr "mode" "TI")])
5832 (define_expand "smax<mode>3"
5833 [(set (match_operand:VI14_128 0 "register_operand" "")
5834 (smax:VI14_128 (match_operand:VI14_128 1 "register_operand" "")
5835 (match_operand:VI14_128 2 "register_operand" "")))]
5839 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
5845 xops[0] = operands[0];
5846 xops[1] = operands[1];
5847 xops[2] = operands[2];
5848 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5849 xops[4] = operands[1];
5850 xops[5] = operands[2];
5851 ok = ix86_expand_int_vcond (xops);
5857 (define_expand "smin<mode>3"
5858 [(set (match_operand:VI14_128 0 "register_operand" "")
5859 (smin:VI14_128 (match_operand:VI14_128 1 "register_operand" "")
5860 (match_operand:VI14_128 2 "register_operand" "")))]
5864 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
5870 xops[0] = operands[0];
5871 xops[1] = operands[2];
5872 xops[2] = operands[1];
5873 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5874 xops[4] = operands[1];
5875 xops[5] = operands[2];
5876 ok = ix86_expand_int_vcond (xops);
5882 (define_expand "<code>v8hi3"
5883 [(set (match_operand:V8HI 0 "register_operand" "")
5885 (match_operand:V8HI 1 "nonimmediate_operand" "")
5886 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5888 "ix86_fixup_binary_operands_no_copy (<CODE>, V8HImode, operands);")
5890 (define_expand "smaxv2di3"
5891 [(set (match_operand:V2DI 0 "register_operand" "")
5892 (smax:V2DI (match_operand:V2DI 1 "register_operand" "")
5893 (match_operand:V2DI 2 "register_operand" "")))]
5899 xops[0] = operands[0];
5900 xops[1] = operands[1];
5901 xops[2] = operands[2];
5902 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5903 xops[4] = operands[1];
5904 xops[5] = operands[2];
5905 ok = ix86_expand_int_vcond (xops);
5910 (define_expand "sminv2di3"
5911 [(set (match_operand:V2DI 0 "register_operand" "")
5912 (smin:V2DI (match_operand:V2DI 1 "register_operand" "")
5913 (match_operand:V2DI 2 "register_operand" "")))]
5919 xops[0] = operands[0];
5920 xops[1] = operands[2];
5921 xops[2] = operands[1];
5922 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5923 xops[4] = operands[1];
5924 xops[5] = operands[2];
5925 ok = ix86_expand_int_vcond (xops);
5930 (define_insn "*sse4_1_<code><mode>3"
5931 [(set (match_operand:VI24_128 0 "register_operand" "=x,x")
5933 (match_operand:VI24_128 1 "nonimmediate_operand" "%0,x")
5934 (match_operand:VI24_128 2 "nonimmediate_operand" "xm,xm")))]
5935 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5937 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
5938 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5939 [(set_attr "isa" "noavx,avx")
5940 (set_attr "type" "sseiadd")
5941 (set_attr "prefix_extra" "1,*")
5942 (set_attr "prefix" "orig,vex")
5943 (set_attr "mode" "TI")])
5945 (define_insn "*<code>v16qi3"
5946 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
5948 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")
5949 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))]
5950 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
5952 p<maxmin_int>b\t{%2, %0|%0, %2}
5953 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
5954 [(set_attr "isa" "noavx,avx")
5955 (set_attr "type" "sseiadd")
5956 (set_attr "prefix_data16" "1,*")
5957 (set_attr "prefix_extra" "*,1")
5958 (set_attr "prefix" "orig,vex")
5959 (set_attr "mode" "TI")])
5961 (define_expand "<code>v16qi3"
5962 [(set (match_operand:V16QI 0 "register_operand" "")
5964 (match_operand:V16QI 1 "nonimmediate_operand" "")
5965 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
5967 "ix86_fixup_binary_operands_no_copy (<CODE>, V16QImode, operands);")
5969 (define_expand "umaxv8hi3"
5970 [(set (match_operand:V8HI 0 "register_operand" "")
5971 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
5972 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5976 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
5979 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
5980 if (rtx_equal_p (op3, op2))
5981 op3 = gen_reg_rtx (V8HImode);
5982 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
5983 emit_insn (gen_addv8hi3 (op0, op3, op2));
5988 (define_expand "umaxv4si3"
5989 [(set (match_operand:V4SI 0 "register_operand" "")
5990 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
5991 (match_operand:V4SI 2 "register_operand" "")))]
5995 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
6001 xops[0] = operands[0];
6002 xops[1] = operands[1];
6003 xops[2] = operands[2];
6004 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6005 xops[4] = operands[1];
6006 xops[5] = operands[2];
6007 ok = ix86_expand_int_vcond (xops);
6013 (define_expand "umin<mode>3"
6014 [(set (match_operand:VI24_128 0 "register_operand" "")
6015 (umin:VI24_128 (match_operand:VI24_128 1 "register_operand" "")
6016 (match_operand:VI24_128 2 "register_operand" "")))]
6020 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
6026 xops[0] = operands[0];
6027 xops[1] = operands[2];
6028 xops[2] = operands[1];
6029 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6030 xops[4] = operands[1];
6031 xops[5] = operands[2];
6032 ok = ix86_expand_int_vcond (xops);
6038 (define_expand "umaxv2di3"
6039 [(set (match_operand:V2DI 0 "register_operand" "")
6040 (umax:V2DI (match_operand:V2DI 1 "register_operand" "")
6041 (match_operand:V2DI 2 "register_operand" "")))]
6047 xops[0] = operands[0];
6048 xops[1] = operands[1];
6049 xops[2] = operands[2];
6050 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6051 xops[4] = operands[1];
6052 xops[5] = operands[2];
6053 ok = ix86_expand_int_vcond (xops);
6058 (define_expand "uminv2di3"
6059 [(set (match_operand:V2DI 0 "register_operand" "")
6060 (umin:V2DI (match_operand:V2DI 1 "register_operand" "")
6061 (match_operand:V2DI 2 "register_operand" "")))]
6067 xops[0] = operands[0];
6068 xops[1] = operands[2];
6069 xops[2] = operands[1];
6070 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6071 xops[4] = operands[1];
6072 xops[5] = operands[2];
6073 ok = ix86_expand_int_vcond (xops);
6078 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6080 ;; Parallel integral comparisons
6082 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6084 (define_expand "avx2_eq<mode>3"
6085 [(set (match_operand:VI1248_256 0 "register_operand" "")
6087 (match_operand:VI1248_256 1 "nonimmediate_operand" "")
6088 (match_operand:VI1248_256 2 "nonimmediate_operand" "")))]
6090 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6092 (define_insn "*avx2_eq<mode>3"
6093 [(set (match_operand:VI1248_256 0 "register_operand" "=x")
6095 (match_operand:VI1248_256 1 "nonimmediate_operand" "%x")
6096 (match_operand:VI1248_256 2 "nonimmediate_operand" "xm")))]
6097 "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6098 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6099 [(set_attr "type" "ssecmp")
6100 (set_attr "prefix_extra" "1")
6101 (set_attr "prefix" "vex")
6102 (set_attr "mode" "OI")])
6104 (define_insn "*sse4_1_eqv2di3"
6105 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6107 (match_operand:V2DI 1 "nonimmediate_operand" "%0,x")
6108 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
6109 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
6111 pcmpeqq\t{%2, %0|%0, %2}
6112 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
6113 [(set_attr "isa" "noavx,avx")
6114 (set_attr "type" "ssecmp")
6115 (set_attr "prefix_extra" "1")
6116 (set_attr "prefix" "orig,vex")
6117 (set_attr "mode" "TI")])
6119 (define_insn "*sse2_eq<mode>3"
6120 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
6122 (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x")
6123 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
6124 "TARGET_SSE2 && !TARGET_XOP
6125 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6127 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
6128 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6129 [(set_attr "isa" "noavx,avx")
6130 (set_attr "type" "ssecmp")
6131 (set_attr "prefix_data16" "1,*")
6132 (set_attr "prefix" "orig,vex")
6133 (set_attr "mode" "TI")])
6135 (define_expand "sse2_eq<mode>3"
6136 [(set (match_operand:VI124_128 0 "register_operand" "")
6138 (match_operand:VI124_128 1 "nonimmediate_operand" "")
6139 (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
6140 "TARGET_SSE2 && !TARGET_XOP "
6141 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6143 (define_expand "sse4_1_eqv2di3"
6144 [(set (match_operand:V2DI 0 "register_operand" "")
6146 (match_operand:V2DI 1 "nonimmediate_operand" "")
6147 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
6149 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
6151 (define_insn "sse4_2_gtv2di3"
6152 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6154 (match_operand:V2DI 1 "register_operand" "0,x")
6155 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
6158 pcmpgtq\t{%2, %0|%0, %2}
6159 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
6160 [(set_attr "isa" "noavx,avx")
6161 (set_attr "type" "ssecmp")
6162 (set_attr "prefix_extra" "1")
6163 (set_attr "prefix" "orig,vex")
6164 (set_attr "mode" "TI")])
6166 (define_insn "avx2_gt<mode>3"
6167 [(set (match_operand:VI1248_256 0 "register_operand" "=x")
6169 (match_operand:VI1248_256 1 "register_operand" "x")
6170 (match_operand:VI1248_256 2 "nonimmediate_operand" "xm")))]
6172 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6173 [(set_attr "type" "ssecmp")
6174 (set_attr "prefix_extra" "1")
6175 (set_attr "prefix" "vex")
6176 (set_attr "mode" "OI")])
6178 (define_insn "sse2_gt<mode>3"
6179 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
6181 (match_operand:VI124_128 1 "register_operand" "0,x")
6182 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
6183 "TARGET_SSE2 && !TARGET_XOP"
6185 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
6186 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6187 [(set_attr "isa" "noavx,avx")
6188 (set_attr "type" "ssecmp")
6189 (set_attr "prefix_data16" "1,*")
6190 (set_attr "prefix" "orig,vex")
6191 (set_attr "mode" "TI")])
6193 (define_expand "vcond<V_128:mode><VI124_128:mode>"
6194 [(set (match_operand:V_128 0 "register_operand" "")
6196 (match_operator 3 ""
6197 [(match_operand:VI124_128 4 "nonimmediate_operand" "")
6198 (match_operand:VI124_128 5 "nonimmediate_operand" "")])
6199 (match_operand:V_128 1 "general_operand" "")
6200 (match_operand:V_128 2 "general_operand" "")))]
6202 && (GET_MODE_NUNITS (<V_128:MODE>mode)
6203 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
6205 bool ok = ix86_expand_int_vcond (operands);
6210 (define_expand "vcond<VI8F_128:mode>v2di"
6211 [(set (match_operand:VI8F_128 0 "register_operand" "")
6212 (if_then_else:VI8F_128
6213 (match_operator 3 ""
6214 [(match_operand:V2DI 4 "nonimmediate_operand" "")
6215 (match_operand:V2DI 5 "nonimmediate_operand" "")])
6216 (match_operand:VI8F_128 1 "general_operand" "")
6217 (match_operand:VI8F_128 2 "general_operand" "")))]
6220 bool ok = ix86_expand_int_vcond (operands);
6225 (define_expand "vcondu<V_128:mode><VI124_128:mode>"
6226 [(set (match_operand:V_128 0 "register_operand" "")
6228 (match_operator 3 ""
6229 [(match_operand:VI124_128 4 "nonimmediate_operand" "")
6230 (match_operand:VI124_128 5 "nonimmediate_operand" "")])
6231 (match_operand:V_128 1 "general_operand" "")
6232 (match_operand:V_128 2 "general_operand" "")))]
6234 && (GET_MODE_NUNITS (<V_128:MODE>mode)
6235 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
6237 bool ok = ix86_expand_int_vcond (operands);
6242 (define_expand "vcondu<VI8F_128:mode>v2di"
6243 [(set (match_operand:VI8F_128 0 "register_operand" "")
6244 (if_then_else:VI8F_128
6245 (match_operator 3 ""
6246 [(match_operand:V2DI 4 "nonimmediate_operand" "")
6247 (match_operand:V2DI 5 "nonimmediate_operand" "")])
6248 (match_operand:VI8F_128 1 "general_operand" "")
6249 (match_operand:VI8F_128 2 "general_operand" "")))]
6252 bool ok = ix86_expand_int_vcond (operands);
6257 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6259 ;; Parallel bitwise logical operations
6261 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6263 (define_expand "one_cmpl<mode>2"
6264 [(set (match_operand:VI 0 "register_operand" "")
6265 (xor:VI (match_operand:VI 1 "nonimmediate_operand" "")
6269 int i, n = GET_MODE_NUNITS (<MODE>mode);
6270 rtvec v = rtvec_alloc (n);
6272 for (i = 0; i < n; ++i)
6273 RTVEC_ELT (v, i) = constm1_rtx;
6275 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
6278 (define_expand "<sse2_avx2>_andnot<mode>3"
6279 [(set (match_operand:VI_AVX2 0 "register_operand" "")
6281 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand" ""))
6282 (match_operand:VI_AVX2 2 "nonimmediate_operand" "")))]
6285 (define_insn "*andnot<mode>3"
6286 [(set (match_operand:VI 0 "register_operand" "=x,x")
6288 (not:VI (match_operand:VI 1 "register_operand" "0,x"))
6289 (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
6292 static char buf[32];
6296 switch (get_attr_mode (insn))
6299 gcc_assert (TARGET_AVX2);
6301 gcc_assert (TARGET_SSE2);
6307 gcc_assert (TARGET_AVX);
6309 gcc_assert (TARGET_SSE);
6318 switch (which_alternative)
6321 ops = "%s\t{%%2, %%0|%%0, %%2}";
6324 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
6330 snprintf (buf, sizeof (buf), ops, tmp);
6333 [(set_attr "isa" "noavx,avx")
6334 (set_attr "type" "sselog")
6335 (set (attr "prefix_data16")
6337 (and (eq_attr "alternative" "0")
6338 (eq_attr "mode" "TI"))
6340 (const_string "*")))
6341 (set_attr "prefix" "orig,vex")
6343 (cond [(match_test "TARGET_AVX2")
6345 (match_test "GET_MODE_SIZE (<MODE>mode) > 128")
6346 (const_string "V8SF")
6347 (match_test "TARGET_SSE2")
6350 (const_string "V4SF")))])
6352 (define_expand "<code><mode>3"
6353 [(set (match_operand:VI 0 "register_operand" "")
6355 (match_operand:VI 1 "nonimmediate_operand" "")
6356 (match_operand:VI 2 "nonimmediate_operand" "")))]
6358 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
6360 (define_insn "*<code><mode>3"
6361 [(set (match_operand:VI 0 "register_operand" "=x,x")
6363 (match_operand:VI 1 "nonimmediate_operand" "%0,x")
6364 (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
6366 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6368 static char buf[32];
6372 switch (get_attr_mode (insn))
6375 gcc_assert (TARGET_AVX2);
6377 gcc_assert (TARGET_SSE2);
6383 gcc_assert (TARGET_AVX);
6385 gcc_assert (TARGET_SSE);
6394 switch (which_alternative)
6397 ops = "%s\t{%%2, %%0|%%0, %%2}";
6400 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
6406 snprintf (buf, sizeof (buf), ops, tmp);
6409 [(set_attr "isa" "noavx,avx")
6410 (set_attr "type" "sselog")
6411 (set (attr "prefix_data16")
6413 (and (eq_attr "alternative" "0")
6414 (eq_attr "mode" "TI"))
6416 (const_string "*")))
6417 (set_attr "prefix" "orig,vex")
6419 (cond [(match_test "TARGET_AVX2")
6421 (match_test "GET_MODE_SIZE (<MODE>mode) > 128")
6422 (const_string "V8SF")
6423 (match_test "TARGET_SSE2")
6426 (const_string "V4SF")))])
6428 (define_insn "*andnottf3"
6429 [(set (match_operand:TF 0 "register_operand" "=x,x")
6431 (not:TF (match_operand:TF 1 "register_operand" "0,x"))
6432 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
6435 pandn\t{%2, %0|%0, %2}
6436 vpandn\t{%2, %1, %0|%0, %1, %2}"
6437 [(set_attr "isa" "noavx,avx")
6438 (set_attr "type" "sselog")
6439 (set_attr "prefix_data16" "1,*")
6440 (set_attr "prefix" "orig,vex")
6441 (set_attr "mode" "TI")])
6443 (define_expand "<code>tf3"
6444 [(set (match_operand:TF 0 "register_operand" "")
6446 (match_operand:TF 1 "nonimmediate_operand" "")
6447 (match_operand:TF 2 "nonimmediate_operand" "")))]
6449 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
6451 (define_insn "*<code>tf3"
6452 [(set (match_operand:TF 0 "register_operand" "=x,x")
6454 (match_operand:TF 1 "nonimmediate_operand" "%0,x")
6455 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
6457 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
6459 p<logic>\t{%2, %0|%0, %2}
6460 vp<logic>\t{%2, %1, %0|%0, %1, %2}"
6461 [(set_attr "isa" "noavx,avx")
6462 (set_attr "type" "sselog")
6463 (set_attr "prefix_data16" "1,*")
6464 (set_attr "prefix" "orig,vex")
6465 (set_attr "mode" "TI")])
6467 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6469 ;; Parallel integral element swizzling
6471 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6473 (define_expand "vec_pack_trunc_<mode>"
6474 [(match_operand:<ssepackmode> 0 "register_operand" "")
6475 (match_operand:VI248_128 1 "register_operand" "")
6476 (match_operand:VI248_128 2 "register_operand" "")]
6479 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
6480 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
6481 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6485 (define_insn "<sse2_avx2>_packsswb"
6486 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
6487 (vec_concat:VI1_AVX2
6488 (ss_truncate:<ssehalfvecmode>
6489 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6490 (ss_truncate:<ssehalfvecmode>
6491 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6494 packsswb\t{%2, %0|%0, %2}
6495 vpacksswb\t{%2, %1, %0|%0, %1, %2}"
6496 [(set_attr "isa" "noavx,avx")
6497 (set_attr "type" "sselog")
6498 (set_attr "prefix_data16" "1,*")
6499 (set_attr "prefix" "orig,vex")
6500 (set_attr "mode" "<sseinsnmode>")])
6502 (define_insn "<sse2_avx2>_packssdw"
6503 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
6504 (vec_concat:VI2_AVX2
6505 (ss_truncate:<ssehalfvecmode>
6506 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6507 (ss_truncate:<ssehalfvecmode>
6508 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6511 packssdw\t{%2, %0|%0, %2}
6512 vpackssdw\t{%2, %1, %0|%0, %1, %2}"
6513 [(set_attr "isa" "noavx,avx")
6514 (set_attr "type" "sselog")
6515 (set_attr "prefix_data16" "1,*")
6516 (set_attr "prefix" "orig,vex")
6517 (set_attr "mode" "<sseinsnmode>")])
6519 (define_insn "<sse2_avx2>_packuswb"
6520 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
6521 (vec_concat:VI1_AVX2
6522 (us_truncate:<ssehalfvecmode>
6523 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6524 (us_truncate:<ssehalfvecmode>
6525 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6528 packuswb\t{%2, %0|%0, %2}
6529 vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6530 [(set_attr "isa" "noavx,avx")
6531 (set_attr "type" "sselog")
6532 (set_attr "prefix_data16" "1,*")
6533 (set_attr "prefix" "orig,vex")
6534 (set_attr "mode" "<sseinsnmode>")])
6536 (define_insn "avx2_interleave_highv32qi"
6537 [(set (match_operand:V32QI 0 "register_operand" "=x")
6540 (match_operand:V32QI 1 "register_operand" "x")
6541 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
6542 (parallel [(const_int 8) (const_int 40)
6543 (const_int 9) (const_int 41)
6544 (const_int 10) (const_int 42)
6545 (const_int 11) (const_int 43)
6546 (const_int 12) (const_int 44)
6547 (const_int 13) (const_int 45)
6548 (const_int 14) (const_int 46)
6549 (const_int 15) (const_int 47)
6550 (const_int 24) (const_int 56)
6551 (const_int 25) (const_int 57)
6552 (const_int 26) (const_int 58)
6553 (const_int 27) (const_int 59)
6554 (const_int 28) (const_int 60)
6555 (const_int 29) (const_int 61)
6556 (const_int 30) (const_int 62)
6557 (const_int 31) (const_int 63)
6558 (const_int 32) (const_int 64)])))]
6560 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6561 [(set_attr "type" "sselog")
6562 (set_attr "prefix" "vex")
6563 (set_attr "mode" "OI")])
6565 (define_insn "vec_interleave_highv16qi"
6566 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6569 (match_operand:V16QI 1 "register_operand" "0,x")
6570 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
6571 (parallel [(const_int 8) (const_int 24)
6572 (const_int 9) (const_int 25)
6573 (const_int 10) (const_int 26)
6574 (const_int 11) (const_int 27)
6575 (const_int 12) (const_int 28)
6576 (const_int 13) (const_int 29)
6577 (const_int 14) (const_int 30)
6578 (const_int 15) (const_int 31)])))]
6581 punpckhbw\t{%2, %0|%0, %2}
6582 vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6583 [(set_attr "isa" "noavx,avx")
6584 (set_attr "type" "sselog")
6585 (set_attr "prefix_data16" "1,*")
6586 (set_attr "prefix" "orig,vex")
6587 (set_attr "mode" "TI")])
6589 (define_insn "avx2_interleave_lowv32qi"
6590 [(set (match_operand:V32QI 0 "register_operand" "=x")
6593 (match_operand:V32QI 1 "register_operand" "x")
6594 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
6595 (parallel [(const_int 0) (const_int 32)
6596 (const_int 1) (const_int 33)
6597 (const_int 2) (const_int 34)
6598 (const_int 3) (const_int 35)
6599 (const_int 4) (const_int 36)
6600 (const_int 5) (const_int 37)
6601 (const_int 6) (const_int 38)
6602 (const_int 7) (const_int 39)
6603 (const_int 15) (const_int 47)
6604 (const_int 16) (const_int 48)
6605 (const_int 17) (const_int 49)
6606 (const_int 18) (const_int 50)
6607 (const_int 19) (const_int 51)
6608 (const_int 20) (const_int 52)
6609 (const_int 21) (const_int 53)
6610 (const_int 22) (const_int 54)
6611 (const_int 23) (const_int 55)])))]
6613 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6614 [(set_attr "type" "sselog")
6615 (set_attr "prefix" "vex")
6616 (set_attr "mode" "OI")])
6618 (define_insn "vec_interleave_lowv16qi"
6619 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6622 (match_operand:V16QI 1 "register_operand" "0,x")
6623 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
6624 (parallel [(const_int 0) (const_int 16)
6625 (const_int 1) (const_int 17)
6626 (const_int 2) (const_int 18)
6627 (const_int 3) (const_int 19)
6628 (const_int 4) (const_int 20)
6629 (const_int 5) (const_int 21)
6630 (const_int 6) (const_int 22)
6631 (const_int 7) (const_int 23)])))]
6634 punpcklbw\t{%2, %0|%0, %2}
6635 vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6636 [(set_attr "isa" "noavx,avx")
6637 (set_attr "type" "sselog")
6638 (set_attr "prefix_data16" "1,*")
6639 (set_attr "prefix" "orig,vex")
6640 (set_attr "mode" "TI")])
6642 (define_insn "avx2_interleave_highv16hi"
6643 [(set (match_operand:V16HI 0 "register_operand" "=x")
6646 (match_operand:V16HI 1 "register_operand" "x")
6647 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
6648 (parallel [(const_int 4) (const_int 20)
6649 (const_int 5) (const_int 21)
6650 (const_int 6) (const_int 22)
6651 (const_int 7) (const_int 23)
6652 (const_int 12) (const_int 28)
6653 (const_int 13) (const_int 29)
6654 (const_int 14) (const_int 30)
6655 (const_int 15) (const_int 31)])))]
6657 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6658 [(set_attr "type" "sselog")
6659 (set_attr "prefix" "vex")
6660 (set_attr "mode" "OI")])
6662 (define_insn "vec_interleave_highv8hi"
6663 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6666 (match_operand:V8HI 1 "register_operand" "0,x")
6667 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
6668 (parallel [(const_int 4) (const_int 12)
6669 (const_int 5) (const_int 13)
6670 (const_int 6) (const_int 14)
6671 (const_int 7) (const_int 15)])))]
6674 punpckhwd\t{%2, %0|%0, %2}
6675 vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6676 [(set_attr "isa" "noavx,avx")
6677 (set_attr "type" "sselog")
6678 (set_attr "prefix_data16" "1,*")
6679 (set_attr "prefix" "orig,vex")
6680 (set_attr "mode" "TI")])
6682 (define_insn "avx2_interleave_lowv16hi"
6683 [(set (match_operand:V16HI 0 "register_operand" "=x")
6686 (match_operand:V16HI 1 "register_operand" "x")
6687 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
6688 (parallel [(const_int 0) (const_int 16)
6689 (const_int 1) (const_int 17)
6690 (const_int 2) (const_int 18)
6691 (const_int 3) (const_int 19)
6692 (const_int 8) (const_int 24)
6693 (const_int 9) (const_int 25)
6694 (const_int 10) (const_int 26)
6695 (const_int 11) (const_int 27)])))]
6697 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6698 [(set_attr "type" "sselog")
6699 (set_attr "prefix" "vex")
6700 (set_attr "mode" "OI")])
6702 (define_insn "vec_interleave_lowv8hi"
6703 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6706 (match_operand:V8HI 1 "register_operand" "0,x")
6707 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
6708 (parallel [(const_int 0) (const_int 8)
6709 (const_int 1) (const_int 9)
6710 (const_int 2) (const_int 10)
6711 (const_int 3) (const_int 11)])))]
6714 punpcklwd\t{%2, %0|%0, %2}
6715 vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6716 [(set_attr "isa" "noavx,avx")
6717 (set_attr "type" "sselog")
6718 (set_attr "prefix_data16" "1,*")
6719 (set_attr "prefix" "orig,vex")
6720 (set_attr "mode" "TI")])
6722 (define_insn "avx2_interleave_highv8si"
6723 [(set (match_operand:V8SI 0 "register_operand" "=x")
6726 (match_operand:V8SI 1 "register_operand" "x")
6727 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
6728 (parallel [(const_int 2) (const_int 10)
6729 (const_int 3) (const_int 11)
6730 (const_int 6) (const_int 14)
6731 (const_int 7) (const_int 15)])))]
6733 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6734 [(set_attr "type" "sselog")
6735 (set_attr "prefix" "vex")
6736 (set_attr "mode" "OI")])
6738 (define_insn "vec_interleave_highv4si"
6739 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6742 (match_operand:V4SI 1 "register_operand" "0,x")
6743 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6744 (parallel [(const_int 2) (const_int 6)
6745 (const_int 3) (const_int 7)])))]
6748 punpckhdq\t{%2, %0|%0, %2}
6749 vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6750 [(set_attr "isa" "noavx,avx")
6751 (set_attr "type" "sselog")
6752 (set_attr "prefix_data16" "1,*")
6753 (set_attr "prefix" "orig,vex")
6754 (set_attr "mode" "TI")])
6756 (define_insn "avx2_interleave_lowv8si"
6757 [(set (match_operand:V8SI 0 "register_operand" "=x")
6760 (match_operand:V8SI 1 "register_operand" "x")
6761 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
6762 (parallel [(const_int 0) (const_int 8)
6763 (const_int 1) (const_int 9)
6764 (const_int 4) (const_int 12)
6765 (const_int 5) (const_int 13)])))]
6767 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6768 [(set_attr "type" "sselog")
6769 (set_attr "prefix" "vex")
6770 (set_attr "mode" "OI")])
6772 (define_insn "vec_interleave_lowv4si"
6773 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6776 (match_operand:V4SI 1 "register_operand" "0,x")
6777 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6778 (parallel [(const_int 0) (const_int 4)
6779 (const_int 1) (const_int 5)])))]
6782 punpckldq\t{%2, %0|%0, %2}
6783 vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6784 [(set_attr "isa" "noavx,avx")
6785 (set_attr "type" "sselog")
6786 (set_attr "prefix_data16" "1,*")
6787 (set_attr "prefix" "orig,vex")
6788 (set_attr "mode" "TI")])
6790 ;; Modes handled by pinsr patterns.
6791 (define_mode_iterator PINSR_MODE
6792 [(V16QI "TARGET_SSE4_1") V8HI
6793 (V4SI "TARGET_SSE4_1")
6794 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
6796 (define_mode_attr sse2p4_1
6797 [(V16QI "sse4_1") (V8HI "sse2")
6798 (V4SI "sse4_1") (V2DI "sse4_1")])
6800 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
6801 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
6802 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x")
6803 (vec_merge:PINSR_MODE
6804 (vec_duplicate:PINSR_MODE
6805 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m"))
6806 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x")
6807 (match_operand:SI 3 "const_int_operand" "")))]
6809 && ((unsigned) exact_log2 (INTVAL (operands[3]))
6810 < GET_MODE_NUNITS (<MODE>mode))"
6812 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6814 switch (which_alternative)
6817 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
6818 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
6821 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
6823 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
6824 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
6827 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6832 [(set_attr "isa" "noavx,noavx,avx,avx")
6833 (set_attr "type" "sselog")
6834 (set (attr "prefix_rex")
6836 (and (not (match_test "TARGET_AVX"))
6837 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
6839 (const_string "*")))
6840 (set (attr "prefix_data16")
6842 (and (not (match_test "TARGET_AVX"))
6843 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
6845 (const_string "*")))
6846 (set (attr "prefix_extra")
6848 (and (not (match_test "TARGET_AVX"))
6849 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
6851 (const_string "1")))
6852 (set_attr "length_immediate" "1")
6853 (set_attr "prefix" "orig,orig,vex,vex")
6854 (set_attr "mode" "TI")])
6856 (define_insn "*sse4_1_pextrb_<mode>"
6857 [(set (match_operand:SWI48 0 "register_operand" "=r")
6860 (match_operand:V16QI 1 "register_operand" "x")
6861 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
6863 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
6864 [(set_attr "type" "sselog")
6865 (set_attr "prefix_extra" "1")
6866 (set_attr "length_immediate" "1")
6867 (set_attr "prefix" "maybe_vex")
6868 (set_attr "mode" "TI")])
6870 (define_insn "*sse4_1_pextrb_memory"
6871 [(set (match_operand:QI 0 "memory_operand" "=m")
6873 (match_operand:V16QI 1 "register_operand" "x")
6874 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
6876 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
6877 [(set_attr "type" "sselog")
6878 (set_attr "prefix_extra" "1")
6879 (set_attr "length_immediate" "1")
6880 (set_attr "prefix" "maybe_vex")
6881 (set_attr "mode" "TI")])
6883 (define_insn "*sse2_pextrw_<mode>"
6884 [(set (match_operand:SWI48 0 "register_operand" "=r")
6887 (match_operand:V8HI 1 "register_operand" "x")
6888 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
6890 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
6891 [(set_attr "type" "sselog")
6892 (set_attr "prefix_data16" "1")
6893 (set_attr "length_immediate" "1")
6894 (set_attr "prefix" "maybe_vex")
6895 (set_attr "mode" "TI")])
6897 (define_insn "*sse4_1_pextrw_memory"
6898 [(set (match_operand:HI 0 "memory_operand" "=m")
6900 (match_operand:V8HI 1 "register_operand" "x")
6901 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
6903 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
6904 [(set_attr "type" "sselog")
6905 (set_attr "prefix_extra" "1")
6906 (set_attr "length_immediate" "1")
6907 (set_attr "prefix" "maybe_vex")
6908 (set_attr "mode" "TI")])
6910 (define_insn "*sse4_1_pextrd"
6911 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
6913 (match_operand:V4SI 1 "register_operand" "x")
6914 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
6916 "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
6917 [(set_attr "type" "sselog")
6918 (set_attr "prefix_extra" "1")
6919 (set_attr "length_immediate" "1")
6920 (set_attr "prefix" "maybe_vex")
6921 (set_attr "mode" "TI")])
6923 (define_insn "*sse4_1_pextrd_zext"
6924 [(set (match_operand:DI 0 "register_operand" "=r")
6927 (match_operand:V4SI 1 "register_operand" "x")
6928 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))]
6929 "TARGET_64BIT && TARGET_SSE4_1"
6930 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
6931 [(set_attr "type" "sselog")
6932 (set_attr "prefix_extra" "1")
6933 (set_attr "length_immediate" "1")
6934 (set_attr "prefix" "maybe_vex")
6935 (set_attr "mode" "TI")])
6937 ;; It must come before *vec_extractv2di_1_rex64 since it is preferred.
6938 (define_insn "*sse4_1_pextrq"
6939 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
6941 (match_operand:V2DI 1 "register_operand" "x")
6942 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
6943 "TARGET_SSE4_1 && TARGET_64BIT"
6944 "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
6945 [(set_attr "type" "sselog")
6946 (set_attr "prefix_rex" "1")
6947 (set_attr "prefix_extra" "1")
6948 (set_attr "length_immediate" "1")
6949 (set_attr "prefix" "maybe_vex")
6950 (set_attr "mode" "TI")])
6952 (define_expand "avx2_pshufdv3"
6953 [(match_operand:V8SI 0 "register_operand" "")
6954 (match_operand:V8SI 1 "nonimmediate_operand" "")
6955 (match_operand:SI 2 "const_0_to_255_operand" "")]
6958 int mask = INTVAL (operands[2]);
6959 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
6960 GEN_INT ((mask >> 0) & 3),
6961 GEN_INT ((mask >> 2) & 3),
6962 GEN_INT ((mask >> 4) & 3),
6963 GEN_INT ((mask >> 6) & 3)));
6967 (define_insn "avx2_pshufd_1"
6968 [(set (match_operand:V8SI 0 "register_operand" "=x")
6970 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
6971 (parallel [(match_operand 2 "const_0_to_3_operand" "")
6972 (match_operand 3 "const_0_to_3_operand" "")
6973 (match_operand 4 "const_0_to_3_operand" "")
6974 (match_operand 5 "const_0_to_3_operand" "")
6982 mask |= INTVAL (operands[2]) << 0;
6983 mask |= INTVAL (operands[3]) << 2;
6984 mask |= INTVAL (operands[4]) << 4;
6985 mask |= INTVAL (operands[5]) << 6;
6986 operands[2] = GEN_INT (mask);
6988 return "vpshufd\t{%2, %1, %0|%0, %1, %2}";
6990 [(set_attr "type" "sselog1")
6991 (set_attr "prefix" "vex")
6992 (set_attr "length_immediate" "1")
6993 (set_attr "mode" "OI")])
6995 (define_expand "sse2_pshufd"
6996 [(match_operand:V4SI 0 "register_operand" "")
6997 (match_operand:V4SI 1 "nonimmediate_operand" "")
6998 (match_operand:SI 2 "const_int_operand" "")]
7001 int mask = INTVAL (operands[2]);
7002 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
7003 GEN_INT ((mask >> 0) & 3),
7004 GEN_INT ((mask >> 2) & 3),
7005 GEN_INT ((mask >> 4) & 3),
7006 GEN_INT ((mask >> 6) & 3)));
7010 (define_insn "sse2_pshufd_1"
7011 [(set (match_operand:V4SI 0 "register_operand" "=x")
7013 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7014 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7015 (match_operand 3 "const_0_to_3_operand" "")
7016 (match_operand 4 "const_0_to_3_operand" "")
7017 (match_operand 5 "const_0_to_3_operand" "")])))]
7021 mask |= INTVAL (operands[2]) << 0;
7022 mask |= INTVAL (operands[3]) << 2;
7023 mask |= INTVAL (operands[4]) << 4;
7024 mask |= INTVAL (operands[5]) << 6;
7025 operands[2] = GEN_INT (mask);
7027 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
7029 [(set_attr "type" "sselog1")
7030 (set_attr "prefix_data16" "1")
7031 (set_attr "prefix" "maybe_vex")
7032 (set_attr "length_immediate" "1")
7033 (set_attr "mode" "TI")])
7035 (define_expand "avx2_pshuflwv3"
7036 [(match_operand:V16HI 0 "register_operand" "")
7037 (match_operand:V16HI 1 "nonimmediate_operand" "")
7038 (match_operand:SI 2 "const_0_to_255_operand" "")]
7041 int mask = INTVAL (operands[2]);
7042 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
7043 GEN_INT ((mask >> 0) & 3),
7044 GEN_INT ((mask >> 2) & 3),
7045 GEN_INT ((mask >> 4) & 3),
7046 GEN_INT ((mask >> 6) & 3)));
7050 (define_insn "avx2_pshuflw_1"
7051 [(set (match_operand:V16HI 0 "register_operand" "=x")
7053 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
7054 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7055 (match_operand 3 "const_0_to_3_operand" "")
7056 (match_operand 4 "const_0_to_3_operand" "")
7057 (match_operand 5 "const_0_to_3_operand" "")
7073 mask |= INTVAL (operands[2]) << 0;
7074 mask |= INTVAL (operands[3]) << 2;
7075 mask |= INTVAL (operands[4]) << 4;
7076 mask |= INTVAL (operands[5]) << 6;
7077 operands[2] = GEN_INT (mask);
7079 return "vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7081 [(set_attr "type" "sselog")
7082 (set_attr "prefix" "vex")
7083 (set_attr "length_immediate" "1")
7084 (set_attr "mode" "OI")])
7086 (define_expand "sse2_pshuflw"
7087 [(match_operand:V8HI 0 "register_operand" "")
7088 (match_operand:V8HI 1 "nonimmediate_operand" "")
7089 (match_operand:SI 2 "const_int_operand" "")]
7092 int mask = INTVAL (operands[2]);
7093 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
7094 GEN_INT ((mask >> 0) & 3),
7095 GEN_INT ((mask >> 2) & 3),
7096 GEN_INT ((mask >> 4) & 3),
7097 GEN_INT ((mask >> 6) & 3)));
7101 (define_insn "sse2_pshuflw_1"
7102 [(set (match_operand:V8HI 0 "register_operand" "=x")
7104 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7105 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7106 (match_operand 3 "const_0_to_3_operand" "")
7107 (match_operand 4 "const_0_to_3_operand" "")
7108 (match_operand 5 "const_0_to_3_operand" "")
7116 mask |= INTVAL (operands[2]) << 0;
7117 mask |= INTVAL (operands[3]) << 2;
7118 mask |= INTVAL (operands[4]) << 4;
7119 mask |= INTVAL (operands[5]) << 6;
7120 operands[2] = GEN_INT (mask);
7122 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7124 [(set_attr "type" "sselog")
7125 (set_attr "prefix_data16" "0")
7126 (set_attr "prefix_rep" "1")
7127 (set_attr "prefix" "maybe_vex")
7128 (set_attr "length_immediate" "1")
7129 (set_attr "mode" "TI")])
7131 (define_expand "avx2_pshufhwv3"
7132 [(match_operand:V16HI 0 "register_operand" "")
7133 (match_operand:V16HI 1 "nonimmediate_operand" "")
7134 (match_operand:SI 2 "const_0_to_255_operand" "")]
7137 int mask = INTVAL (operands[2]);
7138 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
7139 GEN_INT (((mask >> 0) & 3) + 4),
7140 GEN_INT (((mask >> 2) & 3) + 4),
7141 GEN_INT (((mask >> 4) & 3) + 4),
7142 GEN_INT (((mask >> 6) & 3) + 4)));
7146 (define_insn "avx2_pshufhw_1"
7147 [(set (match_operand:V16HI 0 "register_operand" "=x")
7149 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
7150 (parallel [(const_int 0)
7154 (match_operand 2 "const_4_to_7_operand" "")
7155 (match_operand 3 "const_4_to_7_operand" "")
7156 (match_operand 4 "const_4_to_7_operand" "")
7157 (match_operand 5 "const_4_to_7_operand" "")
7169 mask |= (INTVAL (operands[2]) - 4) << 0;
7170 mask |= (INTVAL (operands[3]) - 4) << 2;
7171 mask |= (INTVAL (operands[4]) - 4) << 4;
7172 mask |= (INTVAL (operands[5]) - 4) << 6;
7173 operands[2] = GEN_INT (mask);
7175 return "vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7177 [(set_attr "type" "sselog")
7178 (set_attr "prefix" "vex")
7179 (set_attr "length_immediate" "1")
7180 (set_attr "mode" "OI")])
7182 (define_expand "sse2_pshufhw"
7183 [(match_operand:V8HI 0 "register_operand" "")
7184 (match_operand:V8HI 1 "nonimmediate_operand" "")
7185 (match_operand:SI 2 "const_int_operand" "")]
7188 int mask = INTVAL (operands[2]);
7189 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
7190 GEN_INT (((mask >> 0) & 3) + 4),
7191 GEN_INT (((mask >> 2) & 3) + 4),
7192 GEN_INT (((mask >> 4) & 3) + 4),
7193 GEN_INT (((mask >> 6) & 3) + 4)));
7197 (define_insn "sse2_pshufhw_1"
7198 [(set (match_operand:V8HI 0 "register_operand" "=x")
7200 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7201 (parallel [(const_int 0)
7205 (match_operand 2 "const_4_to_7_operand" "")
7206 (match_operand 3 "const_4_to_7_operand" "")
7207 (match_operand 4 "const_4_to_7_operand" "")
7208 (match_operand 5 "const_4_to_7_operand" "")])))]
7212 mask |= (INTVAL (operands[2]) - 4) << 0;
7213 mask |= (INTVAL (operands[3]) - 4) << 2;
7214 mask |= (INTVAL (operands[4]) - 4) << 4;
7215 mask |= (INTVAL (operands[5]) - 4) << 6;
7216 operands[2] = GEN_INT (mask);
7218 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7220 [(set_attr "type" "sselog")
7221 (set_attr "prefix_rep" "1")
7222 (set_attr "prefix_data16" "0")
7223 (set_attr "prefix" "maybe_vex")
7224 (set_attr "length_immediate" "1")
7225 (set_attr "mode" "TI")])
7227 (define_expand "sse2_loadd"
7228 [(set (match_operand:V4SI 0 "register_operand" "")
7231 (match_operand:SI 1 "nonimmediate_operand" ""))
7235 "operands[2] = CONST0_RTX (V4SImode);")
7237 (define_insn "sse2_loadld"
7238 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x,x,x")
7241 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x"))
7242 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,x")
7246 %vmovd\t{%2, %0|%0, %2}
7247 %vmovd\t{%2, %0|%0, %2}
7248 movss\t{%2, %0|%0, %2}
7249 movss\t{%2, %0|%0, %2}
7250 vmovss\t{%2, %1, %0|%0, %1, %2}"
7251 [(set_attr "isa" "sse2,*,noavx,noavx,avx")
7252 (set_attr "type" "ssemov")
7253 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex")
7254 (set_attr "mode" "TI,TI,V4SF,SF,SF")])
7256 (define_insn_and_split "sse2_stored"
7257 [(set (match_operand:SI 0 "nonimmediate_operand" "=xm,r")
7259 (match_operand:V4SI 1 "register_operand" "x,Yi")
7260 (parallel [(const_int 0)])))]
7263 "&& reload_completed
7264 && (TARGET_INTER_UNIT_MOVES
7265 || MEM_P (operands [0])
7266 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7267 [(set (match_dup 0) (match_dup 1))]
7268 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
7270 (define_insn_and_split "*vec_ext_v4si_mem"
7271 [(set (match_operand:SI 0 "register_operand" "=r")
7273 (match_operand:V4SI 1 "memory_operand" "o")
7274 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
7280 int i = INTVAL (operands[2]);
7282 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
7286 (define_expand "sse_storeq"
7287 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7289 (match_operand:V2DI 1 "register_operand" "")
7290 (parallel [(const_int 0)])))]
7293 (define_insn "*sse2_storeq_rex64"
7294 [(set (match_operand:DI 0 "nonimmediate_operand" "=xm,*r,r")
7296 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
7297 (parallel [(const_int 0)])))]
7298 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7302 mov{q}\t{%1, %0|%0, %1}"
7303 [(set_attr "type" "*,*,imov")
7304 (set_attr "mode" "*,*,DI")])
7306 (define_insn "*sse2_storeq"
7307 [(set (match_operand:DI 0 "nonimmediate_operand" "=xm")
7309 (match_operand:V2DI 1 "register_operand" "x")
7310 (parallel [(const_int 0)])))]
7315 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7317 (match_operand:V2DI 1 "register_operand" "")
7318 (parallel [(const_int 0)])))]
7321 && (TARGET_INTER_UNIT_MOVES
7322 || MEM_P (operands [0])
7323 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7324 [(set (match_dup 0) (match_dup 1))]
7325 "operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));")
7327 (define_insn "*vec_extractv2di_1_rex64"
7328 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,x,r")
7330 (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o,o")
7331 (parallel [(const_int 1)])))]
7332 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7334 %vmovhps\t{%1, %0|%0, %1}
7335 psrldq\t{$8, %0|%0, 8}
7336 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7337 %vmovq\t{%H1, %0|%0, %H1}
7338 mov{q}\t{%H1, %0|%0, %H1}"
7339 [(set_attr "isa" "*,noavx,avx,*,*")
7340 (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,imov")
7341 (set_attr "length_immediate" "*,1,1,*,*")
7342 (set_attr "memory" "*,none,none,*,*")
7343 (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig")
7344 (set_attr "mode" "V2SF,TI,TI,TI,DI")])
7346 (define_insn "*vec_extractv2di_1"
7347 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,x,x,x")
7349 (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o,x,o")
7350 (parallel [(const_int 1)])))]
7351 "!TARGET_64BIT && TARGET_SSE
7352 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7354 %vmovhps\t{%1, %0|%0, %1}
7355 psrldq\t{$8, %0|%0, 8}
7356 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7357 %vmovq\t{%H1, %0|%0, %H1}
7358 movhlps\t{%1, %0|%0, %1}
7359 movlps\t{%H1, %0|%0, %H1}"
7360 [(set_attr "isa" "*,sse2_noavx,avx,sse2,noavx,noavx")
7361 (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,ssemov,ssemov")
7362 (set_attr "length_immediate" "*,1,1,*,*,*")
7363 (set_attr "memory" "*,none,none,*,*,*")
7364 (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig,orig")
7365 (set_attr "mode" "V2SF,TI,TI,TI,V4SF,V2SF")])
7367 (define_insn "*vec_dupv4si_avx"
7368 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7370 (match_operand:SI 1 "nonimmediate_operand" " x,m")))]
7373 vpshufd\t{$0, %1, %0|%0, %1, 0}
7374 vbroadcastss\t{%1, %0|%0, %1}"
7375 [(set_attr "type" "sselog1,ssemov")
7376 (set_attr "length_immediate" "1,0")
7377 (set_attr "prefix_extra" "0,1")
7378 (set_attr "prefix" "vex")
7379 (set_attr "mode" "TI,V4SF")])
7381 (define_insn "*vec_dupv4si"
7382 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7384 (match_operand:SI 1 "register_operand" " x,0")))]
7387 pshufd\t{$0, %1, %0|%0, %1, 0}
7388 shufps\t{$0, %0, %0|%0, %0, 0}"
7389 [(set_attr "isa" "sse2,*")
7390 (set_attr "type" "sselog1")
7391 (set_attr "length_immediate" "1")
7392 (set_attr "mode" "TI,V4SF")])
7394 (define_insn "*vec_dupv2di_sse3"
7395 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
7397 (match_operand:DI 1 "nonimmediate_operand" " 0,x,m")))]
7401 vpunpcklqdq\t{%d1, %0|%0, %d1}
7402 %vmovddup\t{%1, %0|%0, %1}"
7403 [(set_attr "isa" "noavx,avx,*")
7404 (set_attr "type" "sselog1")
7405 (set_attr "prefix" "orig,vex,maybe_vex")
7406 (set_attr "mode" "TI,TI,DF")])
7408 (define_insn "*vec_dupv2di"
7409 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7411 (match_operand:DI 1 "register_operand" " 0,0")))]
7416 [(set_attr "isa" "sse2,*")
7417 (set_attr "type" "sselog1,ssemov")
7418 (set_attr "mode" "TI,V4SF")])
7420 (define_insn "*vec_concatv2si_sse4_1"
7421 [(set (match_operand:V2SI 0 "register_operand" "=x, x,x,x, x, *y,*y")
7423 (match_operand:SI 1 "nonimmediate_operand" " 0, x,0,x,rm, 0,rm")
7424 (match_operand:SI 2 "vector_move_operand" "rm,rm,x,x, C,*ym, C")))]
7427 pinsrd\t{$1, %2, %0|%0, %2, 1}
7428 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
7429 punpckldq\t{%2, %0|%0, %2}
7430 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7431 %vmovd\t{%1, %0|%0, %1}
7432 punpckldq\t{%2, %0|%0, %2}
7433 movd\t{%1, %0|%0, %1}"
7434 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
7435 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
7436 (set_attr "prefix_extra" "1,1,*,*,*,*,*")
7437 (set_attr "length_immediate" "1,1,*,*,*,*,*")
7438 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
7439 (set_attr "mode" "TI,TI,TI,TI,TI,DI,DI")])
7441 ;; ??? In theory we can match memory for the MMX alternative, but allowing
7442 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7443 ;; alternatives pretty much forces the MMX alternative to be chosen.
7444 (define_insn "*vec_concatv2si_sse2"
7445 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
7447 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
7448 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
7451 punpckldq\t{%2, %0|%0, %2}
7452 movd\t{%1, %0|%0, %1}
7453 punpckldq\t{%2, %0|%0, %2}
7454 movd\t{%1, %0|%0, %1}"
7455 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7456 (set_attr "mode" "TI,TI,DI,DI")])
7458 (define_insn "*vec_concatv2si_sse"
7459 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
7461 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
7462 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
7465 unpcklps\t{%2, %0|%0, %2}
7466 movss\t{%1, %0|%0, %1}
7467 punpckldq\t{%2, %0|%0, %2}
7468 movd\t{%1, %0|%0, %1}"
7469 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7470 (set_attr "mode" "V4SF,V4SF,DI,DI")])
7472 (define_insn "*vec_concatv4si"
7473 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x,x")
7475 (match_operand:V2SI 1 "register_operand" " 0,x,0,0,x")
7476 (match_operand:V2SI 2 "nonimmediate_operand" " x,x,x,m,m")))]
7479 punpcklqdq\t{%2, %0|%0, %2}
7480 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7481 movlhps\t{%2, %0|%0, %2}
7482 movhps\t{%2, %0|%0, %2}
7483 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7484 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
7485 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
7486 (set_attr "prefix" "orig,vex,orig,orig,vex")
7487 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
7489 ;; movd instead of movq is required to handle broken assemblers.
7490 (define_insn "*vec_concatv2di_rex64"
7491 [(set (match_operand:V2DI 0 "register_operand"
7492 "=x,x ,x ,Yi,!x,x,x,x,x")
7494 (match_operand:DI 1 "nonimmediate_operand"
7495 " 0,x ,xm,r ,*y,0,x,0,x")
7496 (match_operand:DI 2 "vector_move_operand"
7497 "rm,rm,C ,C ,C ,x,x,m,m")))]
7500 pinsrq\t{$1, %2, %0|%0, %2, 1}
7501 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
7502 %vmovq\t{%1, %0|%0, %1}
7503 %vmovd\t{%1, %0|%0, %1}
7504 movq2dq\t{%1, %0|%0, %1}
7505 punpcklqdq\t{%2, %0|%0, %2}
7506 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7507 movhps\t{%2, %0|%0, %2}
7508 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7509 [(set_attr "isa" "sse4_noavx,avx,*,*,*,noavx,avx,noavx,avx")
7512 (eq_attr "alternative" "0,1,5,6")
7513 (const_string "sselog")
7514 (const_string "ssemov")))
7515 (set (attr "prefix_rex")
7517 (and (eq_attr "alternative" "0,3")
7518 (not (match_test "TARGET_AVX")))
7520 (const_string "*")))
7521 (set_attr "prefix_extra" "1,1,*,*,*,*,*,*,*")
7522 (set_attr "length_immediate" "1,1,*,*,*,*,*,*,*")
7523 (set_attr "prefix" "orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex")
7524 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,V2SF,V2SF")])
7526 (define_insn "vec_concatv2di"
7527 [(set (match_operand:V2DI 0 "register_operand" "=x,?x,x,x,x,x,x")
7529 (match_operand:DI 1 "nonimmediate_operand" "xm,*y,0,x,0,0,x")
7530 (match_operand:DI 2 "vector_move_operand" " C, C,x,x,x,m,m")))]
7531 "!TARGET_64BIT && TARGET_SSE"
7533 %vmovq\t{%1, %0|%0, %1}
7534 movq2dq\t{%1, %0|%0, %1}
7535 punpcklqdq\t{%2, %0|%0, %2}
7536 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7537 movlhps\t{%2, %0|%0, %2}
7538 movhps\t{%2, %0|%0, %2}
7539 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7540 [(set_attr "isa" "sse2,sse2,sse2_noavx,avx,noavx,noavx,avx")
7541 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,ssemov,ssemov")
7542 (set_attr "prefix" "maybe_vex,orig,orig,vex,orig,orig,vex")
7543 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")])
7545 (define_expand "vec_unpacks_lo_<mode>"
7546 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7547 (match_operand:VI124_128 1 "register_operand" "")]
7549 "ix86_expand_sse_unpack (operands, false, false); DONE;")
7551 (define_expand "vec_unpacks_hi_<mode>"
7552 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7553 (match_operand:VI124_128 1 "register_operand" "")]
7555 "ix86_expand_sse_unpack (operands, false, true); DONE;")
7557 (define_expand "vec_unpacku_lo_<mode>"
7558 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7559 (match_operand:VI124_128 1 "register_operand" "")]
7561 "ix86_expand_sse_unpack (operands, true, false); DONE;")
7563 (define_expand "vec_unpacku_hi_<mode>"
7564 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7565 (match_operand:VI124_128 1 "register_operand" "")]
7567 "ix86_expand_sse_unpack (operands, true, true); DONE;")
7569 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7573 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7575 (define_expand "avx2_uavgv32qi3"
7576 [(set (match_operand:V32QI 0 "register_operand" "")
7582 (match_operand:V32QI 1 "nonimmediate_operand" ""))
7584 (match_operand:V32QI 2 "nonimmediate_operand" "")))
7585 (const_vector:V32QI [(const_int 1) (const_int 1)
7586 (const_int 1) (const_int 1)
7587 (const_int 1) (const_int 1)
7588 (const_int 1) (const_int 1)
7589 (const_int 1) (const_int 1)
7590 (const_int 1) (const_int 1)
7591 (const_int 1) (const_int 1)
7592 (const_int 1) (const_int 1)
7593 (const_int 1) (const_int 1)
7594 (const_int 1) (const_int 1)
7595 (const_int 1) (const_int 1)
7596 (const_int 1) (const_int 1)
7597 (const_int 1) (const_int 1)
7598 (const_int 1) (const_int 1)
7599 (const_int 1) (const_int 1)
7600 (const_int 1) (const_int 1)]))
7603 "ix86_fixup_binary_operands_no_copy (PLUS, V32QImode, operands);")
7605 (define_expand "sse2_uavgv16qi3"
7606 [(set (match_operand:V16QI 0 "register_operand" "")
7612 (match_operand:V16QI 1 "nonimmediate_operand" ""))
7614 (match_operand:V16QI 2 "nonimmediate_operand" "")))
7615 (const_vector:V16QI [(const_int 1) (const_int 1)
7616 (const_int 1) (const_int 1)
7617 (const_int 1) (const_int 1)
7618 (const_int 1) (const_int 1)
7619 (const_int 1) (const_int 1)
7620 (const_int 1) (const_int 1)
7621 (const_int 1) (const_int 1)
7622 (const_int 1) (const_int 1)]))
7625 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
7627 (define_insn "*avx2_uavgv32qi3"
7628 [(set (match_operand:V32QI 0 "register_operand" "=x")
7634 (match_operand:V32QI 1 "nonimmediate_operand" "%x"))
7636 (match_operand:V32QI 2 "nonimmediate_operand" "xm")))
7637 (const_vector:V32QI [(const_int 1) (const_int 1)
7638 (const_int 1) (const_int 1)
7639 (const_int 1) (const_int 1)
7640 (const_int 1) (const_int 1)
7641 (const_int 1) (const_int 1)
7642 (const_int 1) (const_int 1)
7643 (const_int 1) (const_int 1)
7644 (const_int 1) (const_int 1)
7645 (const_int 1) (const_int 1)
7646 (const_int 1) (const_int 1)
7647 (const_int 1) (const_int 1)
7648 (const_int 1) (const_int 1)
7649 (const_int 1) (const_int 1)
7650 (const_int 1) (const_int 1)
7651 (const_int 1) (const_int 1)
7652 (const_int 1) (const_int 1)]))
7654 "TARGET_AVX2 && ix86_binary_operator_ok (PLUS, V32QImode, operands)"
7655 "vpavgb\t{%2, %1, %0|%0, %1, %2}"
7656 [(set_attr "type" "sseiadd")
7657 (set_attr "prefix" "vex")
7658 (set_attr "mode" "OI")])
7660 (define_insn "*sse2_uavgv16qi3"
7661 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
7667 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x"))
7669 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))
7670 (const_vector:V16QI [(const_int 1) (const_int 1)
7671 (const_int 1) (const_int 1)
7672 (const_int 1) (const_int 1)
7673 (const_int 1) (const_int 1)
7674 (const_int 1) (const_int 1)
7675 (const_int 1) (const_int 1)
7676 (const_int 1) (const_int 1)
7677 (const_int 1) (const_int 1)]))
7679 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7681 pavgb\t{%2, %0|%0, %2}
7682 vpavgb\t{%2, %1, %0|%0, %1, %2}"
7683 [(set_attr "isa" "noavx,avx")
7684 (set_attr "type" "sseiadd")
7685 (set_attr "prefix_data16" "1,*")
7686 (set_attr "prefix" "orig,vex")
7687 (set_attr "mode" "TI")])
7689 (define_expand "avx2_uavgv16hi3"
7690 [(set (match_operand:V16HI 0 "register_operand" "")
7696 (match_operand:V16HI 1 "nonimmediate_operand" ""))
7698 (match_operand:V16HI 2 "nonimmediate_operand" "")))
7699 (const_vector:V16HI [(const_int 1) (const_int 1)
7700 (const_int 1) (const_int 1)
7701 (const_int 1) (const_int 1)
7702 (const_int 1) (const_int 1)
7703 (const_int 1) (const_int 1)
7704 (const_int 1) (const_int 1)
7705 (const_int 1) (const_int 1)
7706 (const_int 1) (const_int 1)]))
7709 "ix86_fixup_binary_operands_no_copy (PLUS, V16HImode, operands);")
7711 (define_expand "sse2_uavgv8hi3"
7712 [(set (match_operand:V8HI 0 "register_operand" "")
7718 (match_operand:V8HI 1 "nonimmediate_operand" ""))
7720 (match_operand:V8HI 2 "nonimmediate_operand" "")))
7721 (const_vector:V8HI [(const_int 1) (const_int 1)
7722 (const_int 1) (const_int 1)
7723 (const_int 1) (const_int 1)
7724 (const_int 1) (const_int 1)]))
7727 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
7729 (define_insn "*avx2_uavgv16hi3"
7730 [(set (match_operand:V16HI 0 "register_operand" "=x")
7736 (match_operand:V16HI 1 "nonimmediate_operand" "%x"))
7738 (match_operand:V16HI 2 "nonimmediate_operand" "xm")))
7739 (const_vector:V16HI [(const_int 1) (const_int 1)
7740 (const_int 1) (const_int 1)
7741 (const_int 1) (const_int 1)
7742 (const_int 1) (const_int 1)
7743 (const_int 1) (const_int 1)
7744 (const_int 1) (const_int 1)
7745 (const_int 1) (const_int 1)
7746 (const_int 1) (const_int 1)]))
7748 "TARGET_AVX2 && ix86_binary_operator_ok (PLUS, V16HImode, operands)"
7749 "vpavgw\t{%2, %1, %0|%0, %1, %2}"
7750 [(set_attr "type" "sseiadd")
7751 (set_attr "prefix" "vex")
7752 (set_attr "mode" "OI")])
7754 (define_insn "*sse2_uavgv8hi3"
7755 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7761 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
7763 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
7764 (const_vector:V8HI [(const_int 1) (const_int 1)
7765 (const_int 1) (const_int 1)
7766 (const_int 1) (const_int 1)
7767 (const_int 1) (const_int 1)]))
7769 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7771 pavgw\t{%2, %0|%0, %2}
7772 vpavgw\t{%2, %1, %0|%0, %1, %2}"
7773 [(set_attr "isa" "noavx,avx")
7774 (set_attr "type" "sseiadd")
7775 (set_attr "prefix_data16" "1,*")
7776 (set_attr "prefix" "orig,vex")
7777 (set_attr "mode" "TI")])
7779 ;; The correct representation for this is absolutely enormous, and
7780 ;; surely not generally useful.
7781 (define_insn "<sse2_avx2>_psadbw"
7782 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x,x")
7783 (unspec:VI8_AVX2 [(match_operand:<ssebytemode> 1 "register_operand" "0,x")
7784 (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,xm")]
7788 psadbw\t{%2, %0|%0, %2}
7789 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
7790 [(set_attr "isa" "noavx,avx")
7791 (set_attr "type" "sseiadd")
7792 (set_attr "atom_unit" "simul")
7793 (set_attr "prefix_data16" "1,*")
7794 (set_attr "prefix" "orig,vex")
7795 (set_attr "mode" "<sseinsnmode>")])
7797 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
7798 [(set (match_operand:SI 0 "register_operand" "=r")
7800 [(match_operand:VF 1 "register_operand" "x")]
7803 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
7804 [(set_attr "type" "ssemov")
7805 (set_attr "prefix" "maybe_vex")
7806 (set_attr "mode" "<MODE>")])
7808 (define_insn "avx2_pmovmskb"
7809 [(set (match_operand:SI 0 "register_operand" "=r")
7810 (unspec:SI [(match_operand:V32QI 1 "register_operand" "x")]
7813 "vpmovmskb\t{%1, %0|%0, %1}"
7814 [(set_attr "type" "ssemov")
7815 (set_attr "prefix" "vex")
7816 (set_attr "mode" "DI")])
7818 (define_insn "sse2_pmovmskb"
7819 [(set (match_operand:SI 0 "register_operand" "=r")
7820 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
7823 "%vpmovmskb\t{%1, %0|%0, %1}"
7824 [(set_attr "type" "ssemov")
7825 (set_attr "prefix_data16" "1")
7826 (set_attr "prefix" "maybe_vex")
7827 (set_attr "mode" "SI")])
7829 (define_expand "sse2_maskmovdqu"
7830 [(set (match_operand:V16QI 0 "memory_operand" "")
7831 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
7832 (match_operand:V16QI 2 "register_operand" "")
7837 (define_insn "*sse2_maskmovdqu"
7838 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
7839 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
7840 (match_operand:V16QI 2 "register_operand" "x")
7841 (mem:V16QI (match_dup 0))]
7844 "%vmaskmovdqu\t{%2, %1|%1, %2}"
7845 [(set_attr "type" "ssemov")
7846 (set_attr "prefix_data16" "1")
7847 ;; The implicit %rdi operand confuses default length_vex computation.
7848 (set (attr "length_vex")
7849 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
7850 (set_attr "prefix" "maybe_vex")
7851 (set_attr "mode" "TI")])
7853 (define_insn "sse_ldmxcsr"
7854 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
7858 [(set_attr "type" "sse")
7859 (set_attr "atom_sse_attr" "mxcsr")
7860 (set_attr "prefix" "maybe_vex")
7861 (set_attr "memory" "load")])
7863 (define_insn "sse_stmxcsr"
7864 [(set (match_operand:SI 0 "memory_operand" "=m")
7865 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
7868 [(set_attr "type" "sse")
7869 (set_attr "atom_sse_attr" "mxcsr")
7870 (set_attr "prefix" "maybe_vex")
7871 (set_attr "memory" "store")])
7873 (define_expand "sse_sfence"
7875 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
7876 "TARGET_SSE || TARGET_3DNOW_A"
7878 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
7879 MEM_VOLATILE_P (operands[0]) = 1;
7882 (define_insn "*sse_sfence"
7883 [(set (match_operand:BLK 0 "" "")
7884 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
7885 "TARGET_SSE || TARGET_3DNOW_A"
7887 [(set_attr "type" "sse")
7888 (set_attr "length_address" "0")
7889 (set_attr "atom_sse_attr" "fence")
7890 (set_attr "memory" "unknown")])
7892 (define_insn "sse2_clflush"
7893 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
7897 [(set_attr "type" "sse")
7898 (set_attr "atom_sse_attr" "fence")
7899 (set_attr "memory" "unknown")])
7901 (define_expand "sse2_mfence"
7903 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
7906 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
7907 MEM_VOLATILE_P (operands[0]) = 1;
7910 (define_insn "*sse2_mfence"
7911 [(set (match_operand:BLK 0 "" "")
7912 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
7913 "TARGET_64BIT || TARGET_SSE2"
7915 [(set_attr "type" "sse")
7916 (set_attr "length_address" "0")
7917 (set_attr "atom_sse_attr" "fence")
7918 (set_attr "memory" "unknown")])
7920 (define_expand "sse2_lfence"
7922 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
7925 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
7926 MEM_VOLATILE_P (operands[0]) = 1;
7929 (define_insn "*sse2_lfence"
7930 [(set (match_operand:BLK 0 "" "")
7931 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
7934 [(set_attr "type" "sse")
7935 (set_attr "length_address" "0")
7936 (set_attr "atom_sse_attr" "lfence")
7937 (set_attr "memory" "unknown")])
7939 (define_insn "sse3_mwait"
7940 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
7941 (match_operand:SI 1 "register_operand" "c")]
7944 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
7945 ;; Since 32bit register operands are implicitly zero extended to 64bit,
7946 ;; we only need to set up 32bit registers.
7948 [(set_attr "length" "3")])
7950 (define_insn "sse3_monitor"
7951 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
7952 (match_operand:SI 1 "register_operand" "c")
7953 (match_operand:SI 2 "register_operand" "d")]
7955 "TARGET_SSE3 && !TARGET_64BIT"
7956 "monitor\t%0, %1, %2"
7957 [(set_attr "length" "3")])
7959 (define_insn "sse3_monitor64"
7960 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
7961 (match_operand:SI 1 "register_operand" "c")
7962 (match_operand:SI 2 "register_operand" "d")]
7964 "TARGET_SSE3 && TARGET_64BIT"
7965 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
7966 ;; RCX and RDX are used. Since 32bit register operands are implicitly
7967 ;; zero extended to 64bit, we only need to set up 32bit registers.
7969 [(set_attr "length" "3")])
7971 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7973 ;; SSSE3 instructions
7975 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7977 (define_insn "avx2_phaddwv16hi3"
7978 [(set (match_operand:V16HI 0 "register_operand" "=x")
7985 (match_operand:V16HI 1 "register_operand" "x")
7986 (parallel [(const_int 0)]))
7987 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7989 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7990 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7993 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7994 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7996 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7997 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8001 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8002 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8004 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8005 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8008 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8009 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8011 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8012 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8018 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8019 (parallel [(const_int 0)]))
8020 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8022 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8023 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8026 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8027 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8029 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8030 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8034 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8035 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8037 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8038 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8041 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8042 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8044 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8045 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8047 "vphaddw\t{%2, %1, %0|%0, %1, %2}"
8048 [(set_attr "type" "sseiadd")
8049 (set_attr "prefix_extra" "1")
8050 (set_attr "prefix" "vex")
8051 (set_attr "mode" "OI")])
8053 (define_insn "ssse3_phaddwv8hi3"
8054 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8060 (match_operand:V8HI 1 "register_operand" "0,x")
8061 (parallel [(const_int 0)]))
8062 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8064 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8065 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8068 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8069 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8071 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8072 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8077 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8078 (parallel [(const_int 0)]))
8079 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8081 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8082 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8085 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8086 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8088 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8089 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8092 phaddw\t{%2, %0|%0, %2}
8093 vphaddw\t{%2, %1, %0|%0, %1, %2}"
8094 [(set_attr "isa" "noavx,avx")
8095 (set_attr "type" "sseiadd")
8096 (set_attr "atom_unit" "complex")
8097 (set_attr "prefix_data16" "1,*")
8098 (set_attr "prefix_extra" "1")
8099 (set_attr "prefix" "orig,vex")
8100 (set_attr "mode" "TI")])
8102 (define_insn "ssse3_phaddwv4hi3"
8103 [(set (match_operand:V4HI 0 "register_operand" "=y")
8108 (match_operand:V4HI 1 "register_operand" "0")
8109 (parallel [(const_int 0)]))
8110 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8112 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8113 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8117 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8118 (parallel [(const_int 0)]))
8119 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8121 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8122 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8124 "phaddw\t{%2, %0|%0, %2}"
8125 [(set_attr "type" "sseiadd")
8126 (set_attr "atom_unit" "complex")
8127 (set_attr "prefix_extra" "1")
8128 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8129 (set_attr "mode" "DI")])
8131 (define_insn "avx2_phadddv8si3"
8132 [(set (match_operand:V8SI 0 "register_operand" "=x")
8138 (match_operand:V8SI 1 "register_operand" "x")
8139 (parallel [(const_int 0)]))
8140 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8142 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8143 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8146 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
8147 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
8149 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
8150 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
8155 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
8156 (parallel [(const_int 0)]))
8157 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8159 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8160 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
8163 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
8164 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
8166 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
8167 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
8169 "vphaddd\t{%2, %1, %0|%0, %1, %2}"
8170 [(set_attr "type" "sseiadd")
8171 (set_attr "prefix_extra" "1")
8172 (set_attr "prefix" "vex")
8173 (set_attr "mode" "OI")])
8175 (define_insn "ssse3_phadddv4si3"
8176 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
8181 (match_operand:V4SI 1 "register_operand" "0,x")
8182 (parallel [(const_int 0)]))
8183 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8185 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8186 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8190 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
8191 (parallel [(const_int 0)]))
8192 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8194 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8195 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8198 phaddd\t{%2, %0|%0, %2}
8199 vphaddd\t{%2, %1, %0|%0, %1, %2}"
8200 [(set_attr "isa" "noavx,avx")
8201 (set_attr "type" "sseiadd")
8202 (set_attr "atom_unit" "complex")
8203 (set_attr "prefix_data16" "1,*")
8204 (set_attr "prefix_extra" "1")
8205 (set_attr "prefix" "orig,vex")
8206 (set_attr "mode" "TI")])
8208 (define_insn "ssse3_phadddv2si3"
8209 [(set (match_operand:V2SI 0 "register_operand" "=y")
8213 (match_operand:V2SI 1 "register_operand" "0")
8214 (parallel [(const_int 0)]))
8215 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8218 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8219 (parallel [(const_int 0)]))
8220 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8222 "phaddd\t{%2, %0|%0, %2}"
8223 [(set_attr "type" "sseiadd")
8224 (set_attr "atom_unit" "complex")
8225 (set_attr "prefix_extra" "1")
8226 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8227 (set_attr "mode" "DI")])
8229 (define_insn "avx2_phaddswv16hi3"
8230 [(set (match_operand:V16HI 0 "register_operand" "=x")
8237 (match_operand:V16HI 1 "register_operand" "x")
8238 (parallel [(const_int 0)]))
8239 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8241 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8242 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8245 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8246 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8248 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8249 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8253 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8254 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8256 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8257 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8260 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8261 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8263 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8264 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8270 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8271 (parallel [(const_int 0)]))
8272 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8274 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8275 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8278 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8279 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8281 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8282 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8286 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8287 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8289 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8290 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8293 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8294 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8296 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8297 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8299 "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8300 [(set_attr "type" "sseiadd")
8301 (set_attr "prefix_extra" "1")
8302 (set_attr "prefix" "vex")
8303 (set_attr "mode" "OI")])
8305 (define_insn "ssse3_phaddswv8hi3"
8306 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8312 (match_operand:V8HI 1 "register_operand" "0,x")
8313 (parallel [(const_int 0)]))
8314 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8316 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8317 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8320 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8321 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8323 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8324 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8329 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8330 (parallel [(const_int 0)]))
8331 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8333 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8334 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8337 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8338 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8340 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8341 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8344 phaddsw\t{%2, %0|%0, %2}
8345 vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8346 [(set_attr "isa" "noavx,avx")
8347 (set_attr "type" "sseiadd")
8348 (set_attr "atom_unit" "complex")
8349 (set_attr "prefix_data16" "1,*")
8350 (set_attr "prefix_extra" "1")
8351 (set_attr "prefix" "orig,vex")
8352 (set_attr "mode" "TI")])
8354 (define_insn "ssse3_phaddswv4hi3"
8355 [(set (match_operand:V4HI 0 "register_operand" "=y")
8360 (match_operand:V4HI 1 "register_operand" "0")
8361 (parallel [(const_int 0)]))
8362 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8364 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8365 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8369 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8370 (parallel [(const_int 0)]))
8371 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8373 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8374 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8376 "phaddsw\t{%2, %0|%0, %2}"
8377 [(set_attr "type" "sseiadd")
8378 (set_attr "atom_unit" "complex")
8379 (set_attr "prefix_extra" "1")
8380 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8381 (set_attr "mode" "DI")])
8383 (define_insn "avx2_phsubwv16hi3"
8384 [(set (match_operand:V16HI 0 "register_operand" "=x")
8391 (match_operand:V16HI 1 "register_operand" "x")
8392 (parallel [(const_int 0)]))
8393 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8395 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8396 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8399 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8400 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8402 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8403 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8407 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8408 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8410 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8411 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8414 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8415 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8417 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8418 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8424 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8425 (parallel [(const_int 0)]))
8426 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8428 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8429 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8432 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8433 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8435 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8436 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8440 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8441 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8443 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8444 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8447 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8448 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8450 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8451 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8453 "vphsubw\t{%2, %1, %0|%0, %1, %2}"
8454 [(set_attr "type" "sseiadd")
8455 (set_attr "prefix_extra" "1")
8456 (set_attr "prefix" "vex")
8457 (set_attr "mode" "OI")])
8459 (define_insn "ssse3_phsubwv8hi3"
8460 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8466 (match_operand:V8HI 1 "register_operand" "0,x")
8467 (parallel [(const_int 0)]))
8468 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8470 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8471 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8474 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8475 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8477 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8478 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8483 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8484 (parallel [(const_int 0)]))
8485 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8487 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8488 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8491 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8492 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8494 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8495 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8498 phsubw\t{%2, %0|%0, %2}
8499 vphsubw\t{%2, %1, %0|%0, %1, %2}"
8500 [(set_attr "isa" "noavx,avx")
8501 (set_attr "type" "sseiadd")
8502 (set_attr "atom_unit" "complex")
8503 (set_attr "prefix_data16" "1,*")
8504 (set_attr "prefix_extra" "1")
8505 (set_attr "prefix" "orig,vex")
8506 (set_attr "mode" "TI")])
8508 (define_insn "ssse3_phsubwv4hi3"
8509 [(set (match_operand:V4HI 0 "register_operand" "=y")
8514 (match_operand:V4HI 1 "register_operand" "0")
8515 (parallel [(const_int 0)]))
8516 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8518 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8519 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8523 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8524 (parallel [(const_int 0)]))
8525 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8527 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8528 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8530 "phsubw\t{%2, %0|%0, %2}"
8531 [(set_attr "type" "sseiadd")
8532 (set_attr "atom_unit" "complex")
8533 (set_attr "prefix_extra" "1")
8534 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8535 (set_attr "mode" "DI")])
8537 (define_insn "avx2_phsubdv8si3"
8538 [(set (match_operand:V8SI 0 "register_operand" "=x")
8544 (match_operand:V8SI 1 "register_operand" "x")
8545 (parallel [(const_int 0)]))
8546 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8548 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8549 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8552 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
8553 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
8555 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
8556 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
8561 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
8562 (parallel [(const_int 0)]))
8563 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8565 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8566 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
8569 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
8570 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
8572 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
8573 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
8575 "vphsubd\t{%2, %1, %0|%0, %1, %2}"
8576 [(set_attr "type" "sseiadd")
8577 (set_attr "prefix_extra" "1")
8578 (set_attr "prefix" "vex")
8579 (set_attr "mode" "OI")])
8581 (define_insn "ssse3_phsubdv4si3"
8582 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
8587 (match_operand:V4SI 1 "register_operand" "0,x")
8588 (parallel [(const_int 0)]))
8589 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8591 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8592 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8596 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
8597 (parallel [(const_int 0)]))
8598 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8600 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8601 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8604 phsubd\t{%2, %0|%0, %2}
8605 vphsubd\t{%2, %1, %0|%0, %1, %2}"
8607 [(set_attr "isa" "noavx,avx")
8608 (set_attr "type" "sseiadd")
8609 (set_attr "atom_unit" "complex")
8610 (set_attr "prefix_data16" "1,*")
8611 (set_attr "prefix_extra" "1")
8612 (set_attr "prefix" "orig,vex")
8613 (set_attr "mode" "TI")])
8615 (define_insn "ssse3_phsubdv2si3"
8616 [(set (match_operand:V2SI 0 "register_operand" "=y")
8620 (match_operand:V2SI 1 "register_operand" "0")
8621 (parallel [(const_int 0)]))
8622 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8625 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8626 (parallel [(const_int 0)]))
8627 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8629 "phsubd\t{%2, %0|%0, %2}"
8630 [(set_attr "type" "sseiadd")
8631 (set_attr "atom_unit" "complex")
8632 (set_attr "prefix_extra" "1")
8633 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8634 (set_attr "mode" "DI")])
8636 (define_insn "avx2_phsubswv16hi3"
8637 [(set (match_operand:V16HI 0 "register_operand" "=x")
8644 (match_operand:V16HI 1 "register_operand" "x")
8645 (parallel [(const_int 0)]))
8646 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8648 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8649 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8652 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8653 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8655 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8656 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8660 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8661 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8663 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8664 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8667 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8668 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8670 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8671 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8677 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8678 (parallel [(const_int 0)]))
8679 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8681 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8682 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8685 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8686 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8688 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8689 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8693 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8694 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8696 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8697 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8700 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8701 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8703 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8704 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8706 "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8707 [(set_attr "type" "sseiadd")
8708 (set_attr "prefix_extra" "1")
8709 (set_attr "prefix" "vex")
8710 (set_attr "mode" "OI")])
8712 (define_insn "ssse3_phsubswv8hi3"
8713 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8719 (match_operand:V8HI 1 "register_operand" "0,x")
8720 (parallel [(const_int 0)]))
8721 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8723 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8724 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8727 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8728 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8730 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8731 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8736 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8737 (parallel [(const_int 0)]))
8738 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8740 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8741 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8744 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8745 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8747 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8748 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8751 phsubsw\t{%2, %0|%0, %2}
8752 vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8753 [(set_attr "isa" "noavx,avx")
8754 (set_attr "type" "sseiadd")
8755 (set_attr "atom_unit" "complex")
8756 (set_attr "prefix_data16" "1,*")
8757 (set_attr "prefix_extra" "1")
8758 (set_attr "prefix" "orig,vex")
8759 (set_attr "mode" "TI")])
8761 (define_insn "ssse3_phsubswv4hi3"
8762 [(set (match_operand:V4HI 0 "register_operand" "=y")
8767 (match_operand:V4HI 1 "register_operand" "0")
8768 (parallel [(const_int 0)]))
8769 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8771 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8772 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8776 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8777 (parallel [(const_int 0)]))
8778 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8780 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8781 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8783 "phsubsw\t{%2, %0|%0, %2}"
8784 [(set_attr "type" "sseiadd")
8785 (set_attr "atom_unit" "complex")
8786 (set_attr "prefix_extra" "1")
8787 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8788 (set_attr "mode" "DI")])
8790 (define_insn "avx2_pmaddubsw256"
8791 [(set (match_operand:V16HI 0 "register_operand" "=x")
8796 (match_operand:V32QI 1 "register_operand" "x")
8797 (parallel [(const_int 0)
8815 (match_operand:V32QI 2 "nonimmediate_operand" "xm")
8816 (parallel [(const_int 0)
8834 (vec_select:V16QI (match_dup 1)
8835 (parallel [(const_int 1)
8852 (vec_select:V16QI (match_dup 2)
8853 (parallel [(const_int 1)
8868 (const_int 31)]))))))]
8870 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8871 [(set_attr "type" "sseiadd")
8872 (set_attr "prefix_extra" "1")
8873 (set_attr "prefix" "vex")
8874 (set_attr "mode" "OI")])
8876 (define_insn "ssse3_pmaddubsw128"
8877 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8882 (match_operand:V16QI 1 "register_operand" "0,x")
8883 (parallel [(const_int 0)
8893 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
8894 (parallel [(const_int 0)
8904 (vec_select:V8QI (match_dup 1)
8905 (parallel [(const_int 1)
8914 (vec_select:V8QI (match_dup 2)
8915 (parallel [(const_int 1)
8922 (const_int 15)]))))))]
8925 pmaddubsw\t{%2, %0|%0, %2}
8926 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8927 [(set_attr "isa" "noavx,avx")
8928 (set_attr "type" "sseiadd")
8929 (set_attr "atom_unit" "simul")
8930 (set_attr "prefix_data16" "1,*")
8931 (set_attr "prefix_extra" "1")
8932 (set_attr "prefix" "orig,vex")
8933 (set_attr "mode" "TI")])
8935 (define_insn "ssse3_pmaddubsw"
8936 [(set (match_operand:V4HI 0 "register_operand" "=y")
8941 (match_operand:V8QI 1 "register_operand" "0")
8942 (parallel [(const_int 0)
8948 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
8949 (parallel [(const_int 0)
8955 (vec_select:V4QI (match_dup 1)
8956 (parallel [(const_int 1)
8961 (vec_select:V4QI (match_dup 2)
8962 (parallel [(const_int 1)
8965 (const_int 7)]))))))]
8967 "pmaddubsw\t{%2, %0|%0, %2}"
8968 [(set_attr "type" "sseiadd")
8969 (set_attr "atom_unit" "simul")
8970 (set_attr "prefix_extra" "1")
8971 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8972 (set_attr "mode" "DI")])
8974 (define_expand "avx2_umulhrswv16hi3"
8975 [(set (match_operand:V16HI 0 "register_operand" "")
8982 (match_operand:V16HI 1 "nonimmediate_operand" ""))
8984 (match_operand:V16HI 2 "nonimmediate_operand" "")))
8986 (const_vector:V16HI [(const_int 1) (const_int 1)
8987 (const_int 1) (const_int 1)
8988 (const_int 1) (const_int 1)
8989 (const_int 1) (const_int 1)
8990 (const_int 1) (const_int 1)
8991 (const_int 1) (const_int 1)
8992 (const_int 1) (const_int 1)
8993 (const_int 1) (const_int 1)]))
8996 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
8998 (define_insn "*avx2_umulhrswv16hi3"
8999 [(set (match_operand:V16HI 0 "register_operand" "=x")
9006 (match_operand:V16HI 1 "nonimmediate_operand" "%x"))
9008 (match_operand:V16HI 2 "nonimmediate_operand" "xm")))
9010 (const_vector:V16HI [(const_int 1) (const_int 1)
9011 (const_int 1) (const_int 1)
9012 (const_int 1) (const_int 1)
9013 (const_int 1) (const_int 1)
9014 (const_int 1) (const_int 1)
9015 (const_int 1) (const_int 1)
9016 (const_int 1) (const_int 1)
9017 (const_int 1) (const_int 1)]))
9019 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
9020 "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9021 [(set_attr "type" "sseimul")
9022 (set_attr "prefix_extra" "1")
9023 (set_attr "prefix" "vex")
9024 (set_attr "mode" "OI")])
9026 (define_expand "ssse3_pmulhrswv8hi3"
9027 [(set (match_operand:V8HI 0 "register_operand" "")
9034 (match_operand:V8HI 1 "nonimmediate_operand" ""))
9036 (match_operand:V8HI 2 "nonimmediate_operand" "")))
9038 (const_vector:V8HI [(const_int 1) (const_int 1)
9039 (const_int 1) (const_int 1)
9040 (const_int 1) (const_int 1)
9041 (const_int 1) (const_int 1)]))
9044 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
9046 (define_insn "*ssse3_pmulhrswv8hi3"
9047 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9054 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
9056 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
9058 (const_vector:V8HI [(const_int 1) (const_int 1)
9059 (const_int 1) (const_int 1)
9060 (const_int 1) (const_int 1)
9061 (const_int 1) (const_int 1)]))
9063 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9065 pmulhrsw\t{%2, %0|%0, %2}
9066 vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9067 [(set_attr "isa" "noavx,avx")
9068 (set_attr "type" "sseimul")
9069 (set_attr "prefix_data16" "1,*")
9070 (set_attr "prefix_extra" "1")
9071 (set_attr "prefix" "orig,vex")
9072 (set_attr "mode" "TI")])
9074 (define_expand "ssse3_pmulhrswv4hi3"
9075 [(set (match_operand:V4HI 0 "register_operand" "")
9082 (match_operand:V4HI 1 "nonimmediate_operand" ""))
9084 (match_operand:V4HI 2 "nonimmediate_operand" "")))
9086 (const_vector:V4HI [(const_int 1) (const_int 1)
9087 (const_int 1) (const_int 1)]))
9090 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
9092 (define_insn "*ssse3_pmulhrswv4hi3"
9093 [(set (match_operand:V4HI 0 "register_operand" "=y")
9100 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
9102 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
9104 (const_vector:V4HI [(const_int 1) (const_int 1)
9105 (const_int 1) (const_int 1)]))
9107 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
9108 "pmulhrsw\t{%2, %0|%0, %2}"
9109 [(set_attr "type" "sseimul")
9110 (set_attr "prefix_extra" "1")
9111 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9112 (set_attr "mode" "DI")])
9114 (define_insn "<ssse3_avx2>_pshufb<mode>3"
9115 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
9116 (unspec:VI1_AVX2 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
9117 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")]
9121 pshufb\t{%2, %0|%0, %2}
9122 vpshufb\t{%2, %1, %0|%0, %1, %2}"
9123 [(set_attr "isa" "noavx,avx")
9124 (set_attr "type" "sselog1")
9125 (set_attr "prefix_data16" "1,*")
9126 (set_attr "prefix_extra" "1")
9127 (set_attr "prefix" "orig,vex")
9128 (set_attr "mode" "<sseinsnmode>")])
9130 (define_insn "ssse3_pshufbv8qi3"
9131 [(set (match_operand:V8QI 0 "register_operand" "=y")
9132 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
9133 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
9136 "pshufb\t{%2, %0|%0, %2}";
9137 [(set_attr "type" "sselog1")
9138 (set_attr "prefix_extra" "1")
9139 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9140 (set_attr "mode" "DI")])
9142 (define_insn "<ssse3_avx2>_psign<mode>3"
9143 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
9145 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
9146 (match_operand:VI124_AVX2 2 "nonimmediate_operand" "xm,xm")]
9150 psign<ssemodesuffix>\t{%2, %0|%0, %2}
9151 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9152 [(set_attr "isa" "noavx,avx")
9153 (set_attr "type" "sselog1")
9154 (set_attr "prefix_data16" "1,*")
9155 (set_attr "prefix_extra" "1")
9156 (set_attr "prefix" "orig,vex")
9157 (set_attr "mode" "<sseinsnmode>")])
9159 (define_insn "ssse3_psign<mode>3"
9160 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9162 [(match_operand:MMXMODEI 1 "register_operand" "0")
9163 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
9166 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
9167 [(set_attr "type" "sselog1")
9168 (set_attr "prefix_extra" "1")
9169 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9170 (set_attr "mode" "DI")])
9172 (define_insn "<ssse3_avx2>_palignr<mode>"
9173 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x")
9174 (unspec:SSESCALARMODE [(match_operand:SSESCALARMODE 1 "register_operand" "0,x")
9175 (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,xm")
9176 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
9180 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9182 switch (which_alternative)
9185 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9187 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9192 [(set_attr "isa" "noavx,avx")
9193 (set_attr "type" "sseishft")
9194 (set_attr "atom_unit" "sishuf")
9195 (set_attr "prefix_data16" "1,*")
9196 (set_attr "prefix_extra" "1")
9197 (set_attr "length_immediate" "1")
9198 (set_attr "prefix" "orig,vex")
9199 (set_attr "mode" "<sseinsnmode>")])
9201 (define_insn "ssse3_palignrdi"
9202 [(set (match_operand:DI 0 "register_operand" "=y")
9203 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
9204 (match_operand:DI 2 "nonimmediate_operand" "ym")
9205 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9209 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9210 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9212 [(set_attr "type" "sseishft")
9213 (set_attr "atom_unit" "sishuf")
9214 (set_attr "prefix_extra" "1")
9215 (set_attr "length_immediate" "1")
9216 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9217 (set_attr "mode" "DI")])
9219 (define_insn "abs<mode>2"
9220 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x")
9222 (match_operand:VI124_AVX2 1 "nonimmediate_operand" "xm")))]
9224 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
9225 [(set_attr "type" "sselog1")
9226 (set_attr "prefix_data16" "1")
9227 (set_attr "prefix_extra" "1")
9228 (set_attr "prefix" "maybe_vex")
9229 (set_attr "mode" "<sseinsnmode>")])
9231 (define_insn "abs<mode>2"
9232 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9234 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
9236 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
9237 [(set_attr "type" "sselog1")
9238 (set_attr "prefix_rep" "0")
9239 (set_attr "prefix_extra" "1")
9240 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9241 (set_attr "mode" "DI")])
9243 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9245 ;; AMD SSE4A instructions
9247 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9249 (define_insn "sse4a_movnt<mode>"
9250 [(set (match_operand:MODEF 0 "memory_operand" "=m")
9252 [(match_operand:MODEF 1 "register_operand" "x")]
9255 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
9256 [(set_attr "type" "ssemov")
9257 (set_attr "mode" "<MODE>")])
9259 (define_insn "sse4a_vmmovnt<mode>"
9260 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
9261 (unspec:<ssescalarmode>
9262 [(vec_select:<ssescalarmode>
9263 (match_operand:VF_128 1 "register_operand" "x")
9264 (parallel [(const_int 0)]))]
9267 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
9268 [(set_attr "type" "ssemov")
9269 (set_attr "mode" "<ssescalarmode>")])
9271 (define_insn "sse4a_extrqi"
9272 [(set (match_operand:V2DI 0 "register_operand" "=x")
9273 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9274 (match_operand 2 "const_0_to_255_operand" "")
9275 (match_operand 3 "const_0_to_255_operand" "")]
9278 "extrq\t{%3, %2, %0|%0, %2, %3}"
9279 [(set_attr "type" "sse")
9280 (set_attr "prefix_data16" "1")
9281 (set_attr "length_immediate" "2")
9282 (set_attr "mode" "TI")])
9284 (define_insn "sse4a_extrq"
9285 [(set (match_operand:V2DI 0 "register_operand" "=x")
9286 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9287 (match_operand:V16QI 2 "register_operand" "x")]
9290 "extrq\t{%2, %0|%0, %2}"
9291 [(set_attr "type" "sse")
9292 (set_attr "prefix_data16" "1")
9293 (set_attr "mode" "TI")])
9295 (define_insn "sse4a_insertqi"
9296 [(set (match_operand:V2DI 0 "register_operand" "=x")
9297 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9298 (match_operand:V2DI 2 "register_operand" "x")
9299 (match_operand 3 "const_0_to_255_operand" "")
9300 (match_operand 4 "const_0_to_255_operand" "")]
9303 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
9304 [(set_attr "type" "sseins")
9305 (set_attr "prefix_data16" "0")
9306 (set_attr "prefix_rep" "1")
9307 (set_attr "length_immediate" "2")
9308 (set_attr "mode" "TI")])
9310 (define_insn "sse4a_insertq"
9311 [(set (match_operand:V2DI 0 "register_operand" "=x")
9312 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9313 (match_operand:V2DI 2 "register_operand" "x")]
9316 "insertq\t{%2, %0|%0, %2}"
9317 [(set_attr "type" "sseins")
9318 (set_attr "prefix_data16" "0")
9319 (set_attr "prefix_rep" "1")
9320 (set_attr "mode" "TI")])
9322 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9324 ;; Intel SSE4.1 instructions
9326 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9328 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
9329 [(set (match_operand:VF 0 "register_operand" "=x,x")
9331 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
9332 (match_operand:VF 1 "register_operand" "0,x")
9333 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "")))]
9336 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
9337 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9338 [(set_attr "isa" "noavx,avx")
9339 (set_attr "type" "ssemov")
9340 (set_attr "length_immediate" "1")
9341 (set_attr "prefix_data16" "1,*")
9342 (set_attr "prefix_extra" "1")
9343 (set_attr "prefix" "orig,vex")
9344 (set_attr "mode" "<MODE>")])
9346 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
9347 [(set (match_operand:VF 0 "reg_not_xmm0_operand_maybe_avx" "=x,x")
9349 [(match_operand:VF 1 "reg_not_xmm0_operand_maybe_avx" "0,x")
9350 (match_operand:VF 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
9351 (match_operand:VF 3 "register_operand" "Yz,x")]
9355 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
9356 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9357 [(set_attr "isa" "noavx,avx")
9358 (set_attr "type" "ssemov")
9359 (set_attr "length_immediate" "1")
9360 (set_attr "prefix_data16" "1,*")
9361 (set_attr "prefix_extra" "1")
9362 (set_attr "prefix" "orig,vex")
9363 (set_attr "mode" "<MODE>")])
9365 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
9366 [(set (match_operand:VF 0 "register_operand" "=x,x")
9368 [(match_operand:VF 1 "nonimmediate_operand" "%0,x")
9369 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
9370 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9374 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
9375 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9376 [(set_attr "isa" "noavx,avx")
9377 (set_attr "type" "ssemul")
9378 (set_attr "length_immediate" "1")
9379 (set_attr "prefix_data16" "1,*")
9380 (set_attr "prefix_extra" "1")
9381 (set_attr "prefix" "orig,vex")
9382 (set_attr "mode" "<MODE>")])
9384 (define_insn "<sse4_1_avx2>_movntdqa"
9385 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x")
9386 (unspec:VI8_AVX2 [(match_operand:VI8_AVX2 1 "memory_operand" "m")]
9389 "%vmovntdqa\t{%1, %0|%0, %1}"
9390 [(set_attr "type" "ssemov")
9391 (set_attr "prefix_extra" "1")
9392 (set_attr "prefix" "maybe_vex")
9393 (set_attr "mode" "<sseinsnmode>")])
9395 (define_insn "<sse4_1_avx2>_mpsadbw"
9396 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
9397 (unspec:VI1_AVX2 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
9398 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
9399 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9403 mpsadbw\t{%3, %2, %0|%0, %2, %3}
9404 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9405 [(set_attr "isa" "noavx,avx")
9406 (set_attr "type" "sselog1")
9407 (set_attr "length_immediate" "1")
9408 (set_attr "prefix_extra" "1")
9409 (set_attr "prefix" "orig,vex")
9410 (set_attr "mode" "<sseinsnmode>")])
9412 (define_insn "avx2_packusdw"
9413 [(set (match_operand:V16HI 0 "register_operand" "=x")
9416 (match_operand:V8SI 1 "register_operand" "x"))
9418 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))))]
9420 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9421 [(set_attr "type" "sselog")
9422 (set_attr "prefix_extra" "1")
9423 (set_attr "prefix" "vex")
9424 (set_attr "mode" "OI")])
9426 (define_insn "sse4_1_packusdw"
9427 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9430 (match_operand:V4SI 1 "register_operand" "0,x"))
9432 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))]
9435 packusdw\t{%2, %0|%0, %2}
9436 vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9437 [(set_attr "isa" "noavx,avx")
9438 (set_attr "type" "sselog")
9439 (set_attr "prefix_extra" "1")
9440 (set_attr "prefix" "orig,vex")
9441 (set_attr "mode" "TI")])
9443 (define_insn "<sse4_1_avx2>_pblendvb"
9444 [(set (match_operand:VI1_AVX2 0 "reg_not_xmm0_operand" "=x,x")
9446 [(match_operand:VI1_AVX2 1 "reg_not_xmm0_operand_maybe_avx" "0,x")
9447 (match_operand:VI1_AVX2 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
9448 (match_operand:VI1_AVX2 3 "register_operand" "Yz,x")]
9452 pblendvb\t{%3, %2, %0|%0, %2, %3}
9453 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9454 [(set_attr "isa" "noavx,avx")
9455 (set_attr "type" "ssemov")
9456 (set_attr "prefix_extra" "1")
9457 (set_attr "length_immediate" "*,1")
9458 (set_attr "prefix" "orig,vex")
9459 (set_attr "mode" "<sseinsnmode>")])
9461 (define_insn "<sse4_1_avx2>_pblendw"
9462 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
9464 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")
9465 (match_operand:VI2_AVX2 1 "register_operand" "0,x")
9466 (match_operand:SI 3 "const_0_to_255_operand" "n,n")))]
9469 pblendw\t{%3, %2, %0|%0, %2, %3}
9470 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9471 [(set_attr "isa" "noavx,avx")
9472 (set_attr "type" "ssemov")
9473 (set_attr "prefix_extra" "1")
9474 (set_attr "length_immediate" "1")
9475 (set_attr "prefix" "orig,vex")
9476 (set_attr "mode" "<sseinsnmode>")])
9478 (define_insn "avx2_pblendd<mode>"
9479 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
9481 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
9482 (match_operand:VI4_AVX2 1 "register_operand" "x")
9483 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9485 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9486 [(set_attr "type" "ssemov")
9487 (set_attr "prefix_extra" "1")
9488 (set_attr "length_immediate" "1")
9489 (set_attr "prefix" "vex")
9490 (set_attr "mode" "<sseinsnmode>")])
9492 (define_insn "sse4_1_phminposuw"
9493 [(set (match_operand:V8HI 0 "register_operand" "=x")
9494 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
9495 UNSPEC_PHMINPOSUW))]
9497 "%vphminposuw\t{%1, %0|%0, %1}"
9498 [(set_attr "type" "sselog1")
9499 (set_attr "prefix_extra" "1")
9500 (set_attr "prefix" "maybe_vex")
9501 (set_attr "mode" "TI")])
9503 (define_insn "avx2_<code>v16qiv16hi2"
9504 [(set (match_operand:V16HI 0 "register_operand" "=x")
9506 (match_operand:V16QI 1 "nonimmediate_operand" "xm")))]
9508 "vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
9509 [(set_attr "type" "ssemov")
9510 (set_attr "prefix_extra" "1")
9511 (set_attr "prefix" "vex")
9512 (set_attr "mode" "OI")])
9514 (define_insn "sse4_1_<code>v8qiv8hi2"
9515 [(set (match_operand:V8HI 0 "register_operand" "=x")
9518 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9519 (parallel [(const_int 0)
9528 "%vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
9529 [(set_attr "type" "ssemov")
9530 (set_attr "prefix_extra" "1")
9531 (set_attr "prefix" "maybe_vex")
9532 (set_attr "mode" "TI")])
9534 (define_insn "avx2_<code>v8qiv8si2"
9535 [(set (match_operand:V8SI 0 "register_operand" "=x")
9538 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9539 (parallel [(const_int 0)
9548 "vpmov<extsuffix>bd\t{%1, %0|%0, %1}"
9549 [(set_attr "type" "ssemov")
9550 (set_attr "prefix_extra" "1")
9551 (set_attr "prefix" "vex")
9552 (set_attr "mode" "OI")])
9554 (define_insn "sse4_1_<code>v4qiv4si2"
9555 [(set (match_operand:V4SI 0 "register_operand" "=x")
9558 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9559 (parallel [(const_int 0)
9564 "%vpmov<extsuffix>bd\t{%1, %0|%0, %1}"
9565 [(set_attr "type" "ssemov")
9566 (set_attr "prefix_extra" "1")
9567 (set_attr "prefix" "maybe_vex")
9568 (set_attr "mode" "TI")])
9570 (define_insn "avx2_<code>v8hiv8si2"
9571 [(set (match_operand:V8SI 0 "register_operand" "=x")
9573 (match_operand:V8HI 1 "nonimmediate_operand" "xm")))]
9575 "vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
9576 [(set_attr "type" "ssemov")
9577 (set_attr "prefix_extra" "1")
9578 (set_attr "prefix" "vex")
9579 (set_attr "mode" "OI")])
9581 (define_insn "sse4_1_<code>v4hiv4si2"
9582 [(set (match_operand:V4SI 0 "register_operand" "=x")
9585 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9586 (parallel [(const_int 0)
9591 "%vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
9592 [(set_attr "type" "ssemov")
9593 (set_attr "prefix_extra" "1")
9594 (set_attr "prefix" "maybe_vex")
9595 (set_attr "mode" "TI")])
9597 (define_insn "avx2_<code>v4qiv4di2"
9598 [(set (match_operand:V4DI 0 "register_operand" "=x")
9601 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9602 (parallel [(const_int 0)
9607 "vpmov<extsuffix>bq\t{%1, %0|%0, %1}"
9608 [(set_attr "type" "ssemov")
9609 (set_attr "prefix_extra" "1")
9610 (set_attr "prefix" "vex")
9611 (set_attr "mode" "OI")])
9613 (define_insn "sse4_1_<code>v2qiv2di2"
9614 [(set (match_operand:V2DI 0 "register_operand" "=x")
9617 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9618 (parallel [(const_int 0)
9621 "%vpmov<extsuffix>bq\t{%1, %0|%0, %1}"
9622 [(set_attr "type" "ssemov")
9623 (set_attr "prefix_extra" "1")
9624 (set_attr "prefix" "maybe_vex")
9625 (set_attr "mode" "TI")])
9627 (define_insn "avx2_<code>v4hiv4di2"
9628 [(set (match_operand:V4DI 0 "register_operand" "=x")
9631 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9632 (parallel [(const_int 0)
9637 "vpmov<extsuffix>wq\t{%1, %0|%0, %1}"
9638 [(set_attr "type" "ssemov")
9639 (set_attr "prefix_extra" "1")
9640 (set_attr "prefix" "vex")
9641 (set_attr "mode" "OI")])
9643 (define_insn "sse4_1_<code>v2hiv2di2"
9644 [(set (match_operand:V2DI 0 "register_operand" "=x")
9647 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9648 (parallel [(const_int 0)
9651 "%vpmov<extsuffix>wq\t{%1, %0|%0, %1}"
9652 [(set_attr "type" "ssemov")
9653 (set_attr "prefix_extra" "1")
9654 (set_attr "prefix" "maybe_vex")
9655 (set_attr "mode" "TI")])
9657 (define_insn "avx2_<code>v4siv4di2"
9658 [(set (match_operand:V4DI 0 "register_operand" "=x")
9660 (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
9662 "vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
9663 [(set_attr "type" "ssemov")
9664 (set_attr "prefix_extra" "1")
9665 (set_attr "mode" "OI")])
9667 (define_insn "sse4_1_<code>v2siv2di2"
9668 [(set (match_operand:V2DI 0 "register_operand" "=x")
9671 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9672 (parallel [(const_int 0)
9675 "%vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
9676 [(set_attr "type" "ssemov")
9677 (set_attr "prefix_extra" "1")
9678 (set_attr "prefix" "maybe_vex")
9679 (set_attr "mode" "TI")])
9681 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
9682 ;; setting FLAGS_REG. But it is not a really compare instruction.
9683 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
9684 [(set (reg:CC FLAGS_REG)
9685 (unspec:CC [(match_operand:VF 0 "register_operand" "x")
9686 (match_operand:VF 1 "nonimmediate_operand" "xm")]
9689 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
9690 [(set_attr "type" "ssecomi")
9691 (set_attr "prefix_extra" "1")
9692 (set_attr "prefix" "vex")
9693 (set_attr "mode" "<MODE>")])
9695 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
9696 ;; But it is not a really compare instruction.
9697 (define_insn "avx_ptest256"
9698 [(set (reg:CC FLAGS_REG)
9699 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
9700 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
9703 "vptest\t{%1, %0|%0, %1}"
9704 [(set_attr "type" "ssecomi")
9705 (set_attr "prefix_extra" "1")
9706 (set_attr "prefix" "vex")
9707 (set_attr "mode" "OI")])
9709 (define_insn "sse4_1_ptest"
9710 [(set (reg:CC FLAGS_REG)
9711 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
9712 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
9715 "%vptest\t{%1, %0|%0, %1}"
9716 [(set_attr "type" "ssecomi")
9717 (set_attr "prefix_extra" "1")
9718 (set_attr "prefix" "maybe_vex")
9719 (set_attr "mode" "TI")])
9721 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
9722 [(set (match_operand:VF 0 "register_operand" "=x")
9724 [(match_operand:VF 1 "nonimmediate_operand" "xm")
9725 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9728 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9729 [(set_attr "type" "ssecvt")
9730 (set (attr "prefix_data16")
9732 (match_test "TARGET_AVX")
9734 (const_string "1")))
9735 (set_attr "prefix_extra" "1")
9736 (set_attr "length_immediate" "1")
9737 (set_attr "prefix" "maybe_vex")
9738 (set_attr "mode" "<MODE>")])
9740 (define_insn "sse4_1_round<ssescalarmodesuffix>"
9741 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
9744 [(match_operand:VF_128 2 "register_operand" "x,x")
9745 (match_operand:SI 3 "const_0_to_15_operand" "n,n")]
9747 (match_operand:VF_128 1 "register_operand" "0,x")
9751 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
9752 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9753 [(set_attr "isa" "noavx,avx")
9754 (set_attr "type" "ssecvt")
9755 (set_attr "length_immediate" "1")
9756 (set_attr "prefix_data16" "1,*")
9757 (set_attr "prefix_extra" "1")
9758 (set_attr "prefix" "orig,vex")
9759 (set_attr "mode" "<MODE>")])
9761 (define_expand "round<mode>2"
9764 (match_operand:VF 1 "nonimmediate_operand" "")
9766 (set (match_operand:VF 0 "register_operand" "")
9768 [(match_dup 4) (match_dup 5)]
9770 "TARGET_ROUND && !flag_trapping_math"
9772 enum machine_mode scalar_mode;
9773 const struct real_format *fmt;
9774 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
9777 scalar_mode = GET_MODE_INNER (<MODE>mode);
9779 /* load nextafter (0.5, 0.0) */
9780 fmt = REAL_MODE_FORMAT (scalar_mode);
9781 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
9782 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
9783 half = const_double_from_real_value (pred_half, scalar_mode);
9785 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
9786 vec_half = force_reg (<MODE>mode, vec_half);
9788 operands[3] = gen_reg_rtx (<MODE>mode);
9789 emit_insn (gen_copysign<mode>3 (operands[3], vec_half, operands[1]));
9791 operands[4] = gen_reg_rtx (<MODE>mode);
9792 operands[5] = GEN_INT (ROUND_TRUNC);
9795 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9797 ;; Intel SSE4.2 string/text processing instructions
9799 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9801 (define_insn_and_split "sse4_2_pcmpestr"
9802 [(set (match_operand:SI 0 "register_operand" "=c,c")
9804 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
9805 (match_operand:SI 3 "register_operand" "a,a")
9806 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
9807 (match_operand:SI 5 "register_operand" "d,d")
9808 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
9810 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9818 (set (reg:CC FLAGS_REG)
9827 && can_create_pseudo_p ()"
9832 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9833 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9834 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9837 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
9838 operands[3], operands[4],
9839 operands[5], operands[6]));
9841 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
9842 operands[3], operands[4],
9843 operands[5], operands[6]));
9844 if (flags && !(ecx || xmm0))
9845 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
9846 operands[2], operands[3],
9847 operands[4], operands[5],
9849 if (!(flags || ecx || xmm0))
9850 emit_note (NOTE_INSN_DELETED);
9854 [(set_attr "type" "sselog")
9855 (set_attr "prefix_data16" "1")
9856 (set_attr "prefix_extra" "1")
9857 (set_attr "length_immediate" "1")
9858 (set_attr "memory" "none,load")
9859 (set_attr "mode" "TI")])
9861 (define_insn "sse4_2_pcmpestri"
9862 [(set (match_operand:SI 0 "register_operand" "=c,c")
9864 [(match_operand:V16QI 1 "register_operand" "x,x")
9865 (match_operand:SI 2 "register_operand" "a,a")
9866 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9867 (match_operand:SI 4 "register_operand" "d,d")
9868 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9870 (set (reg:CC FLAGS_REG)
9879 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
9880 [(set_attr "type" "sselog")
9881 (set_attr "prefix_data16" "1")
9882 (set_attr "prefix_extra" "1")
9883 (set_attr "prefix" "maybe_vex")
9884 (set_attr "length_immediate" "1")
9885 (set_attr "memory" "none,load")
9886 (set_attr "mode" "TI")])
9888 (define_insn "sse4_2_pcmpestrm"
9889 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
9891 [(match_operand:V16QI 1 "register_operand" "x,x")
9892 (match_operand:SI 2 "register_operand" "a,a")
9893 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9894 (match_operand:SI 4 "register_operand" "d,d")
9895 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9897 (set (reg:CC FLAGS_REG)
9906 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
9907 [(set_attr "type" "sselog")
9908 (set_attr "prefix_data16" "1")
9909 (set_attr "prefix_extra" "1")
9910 (set_attr "length_immediate" "1")
9911 (set_attr "prefix" "maybe_vex")
9912 (set_attr "memory" "none,load")
9913 (set_attr "mode" "TI")])
9915 (define_insn "sse4_2_pcmpestr_cconly"
9916 [(set (reg:CC FLAGS_REG)
9918 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
9919 (match_operand:SI 3 "register_operand" "a,a,a,a")
9920 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
9921 (match_operand:SI 5 "register_operand" "d,d,d,d")
9922 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
9924 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
9925 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
9928 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
9929 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
9930 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
9931 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
9932 [(set_attr "type" "sselog")
9933 (set_attr "prefix_data16" "1")
9934 (set_attr "prefix_extra" "1")
9935 (set_attr "length_immediate" "1")
9936 (set_attr "memory" "none,load,none,load")
9937 (set_attr "prefix" "maybe_vex")
9938 (set_attr "mode" "TI")])
9940 (define_insn_and_split "sse4_2_pcmpistr"
9941 [(set (match_operand:SI 0 "register_operand" "=c,c")
9943 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
9944 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
9945 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
9947 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9953 (set (reg:CC FLAGS_REG)
9960 && can_create_pseudo_p ()"
9965 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9966 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9967 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9970 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
9971 operands[3], operands[4]));
9973 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
9974 operands[3], operands[4]));
9975 if (flags && !(ecx || xmm0))
9976 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
9977 operands[2], operands[3],
9979 if (!(flags || ecx || xmm0))
9980 emit_note (NOTE_INSN_DELETED);
9984 [(set_attr "type" "sselog")
9985 (set_attr "prefix_data16" "1")
9986 (set_attr "prefix_extra" "1")
9987 (set_attr "length_immediate" "1")
9988 (set_attr "memory" "none,load")
9989 (set_attr "mode" "TI")])
9991 (define_insn "sse4_2_pcmpistri"
9992 [(set (match_operand:SI 0 "register_operand" "=c,c")
9994 [(match_operand:V16QI 1 "register_operand" "x,x")
9995 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
9996 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9998 (set (reg:CC FLAGS_REG)
10005 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
10006 [(set_attr "type" "sselog")
10007 (set_attr "prefix_data16" "1")
10008 (set_attr "prefix_extra" "1")
10009 (set_attr "length_immediate" "1")
10010 (set_attr "prefix" "maybe_vex")
10011 (set_attr "memory" "none,load")
10012 (set_attr "mode" "TI")])
10014 (define_insn "sse4_2_pcmpistrm"
10015 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10017 [(match_operand:V16QI 1 "register_operand" "x,x")
10018 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10019 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10021 (set (reg:CC FLAGS_REG)
10028 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
10029 [(set_attr "type" "sselog")
10030 (set_attr "prefix_data16" "1")
10031 (set_attr "prefix_extra" "1")
10032 (set_attr "length_immediate" "1")
10033 (set_attr "prefix" "maybe_vex")
10034 (set_attr "memory" "none,load")
10035 (set_attr "mode" "TI")])
10037 (define_insn "sse4_2_pcmpistr_cconly"
10038 [(set (reg:CC FLAGS_REG)
10040 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10041 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
10042 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
10044 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10045 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10048 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10049 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10050 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
10051 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
10052 [(set_attr "type" "sselog")
10053 (set_attr "prefix_data16" "1")
10054 (set_attr "prefix_extra" "1")
10055 (set_attr "length_immediate" "1")
10056 (set_attr "memory" "none,load,none,load")
10057 (set_attr "prefix" "maybe_vex")
10058 (set_attr "mode" "TI")])
10060 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10062 ;; XOP instructions
10064 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10066 ;; XOP parallel integer multiply/add instructions.
10067 ;; Note the XOP multiply/add instructions
10068 ;; a[i] = b[i] * c[i] + d[i];
10069 ;; do not allow the value being added to be a memory operation.
10070 (define_insn "xop_pmacsww"
10071 [(set (match_operand:V8HI 0 "register_operand" "=x")
10074 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10075 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10076 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10078 "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10079 [(set_attr "type" "ssemuladd")
10080 (set_attr "mode" "TI")])
10082 (define_insn "xop_pmacssww"
10083 [(set (match_operand:V8HI 0 "register_operand" "=x")
10085 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10086 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10087 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10089 "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10090 [(set_attr "type" "ssemuladd")
10091 (set_attr "mode" "TI")])
10093 (define_insn "xop_pmacsdd"
10094 [(set (match_operand:V4SI 0 "register_operand" "=x")
10097 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10098 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10099 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10101 "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10102 [(set_attr "type" "ssemuladd")
10103 (set_attr "mode" "TI")])
10105 (define_insn "xop_pmacssdd"
10106 [(set (match_operand:V4SI 0 "register_operand" "=x")
10108 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10109 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10110 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10112 "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10113 [(set_attr "type" "ssemuladd")
10114 (set_attr "mode" "TI")])
10116 (define_insn "xop_pmacssdql"
10117 [(set (match_operand:V2DI 0 "register_operand" "=x")
10122 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10123 (parallel [(const_int 1)
10126 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10127 (parallel [(const_int 1)
10129 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10131 "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10132 [(set_attr "type" "ssemuladd")
10133 (set_attr "mode" "TI")])
10135 (define_insn "xop_pmacssdqh"
10136 [(set (match_operand:V2DI 0 "register_operand" "=x")
10141 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10142 (parallel [(const_int 0)
10146 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10147 (parallel [(const_int 0)
10149 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10151 "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10152 [(set_attr "type" "ssemuladd")
10153 (set_attr "mode" "TI")])
10155 (define_insn "xop_pmacsdql"
10156 [(set (match_operand:V2DI 0 "register_operand" "=x")
10161 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10162 (parallel [(const_int 1)
10166 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10167 (parallel [(const_int 1)
10169 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10171 "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10172 [(set_attr "type" "ssemuladd")
10173 (set_attr "mode" "TI")])
10175 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10176 ;; fake it with a multiply/add. In general, we expect the define_split to
10177 ;; occur before register allocation, so we have to handle the corner case where
10178 ;; the target is the same as operands 1/2
10179 (define_insn_and_split "xop_mulv2div2di3_low"
10180 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10184 (match_operand:V4SI 1 "register_operand" "%x")
10185 (parallel [(const_int 1)
10189 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10190 (parallel [(const_int 1)
10191 (const_int 3)])))))]
10194 "&& reload_completed"
10195 [(set (match_dup 0)
10203 (parallel [(const_int 1)
10208 (parallel [(const_int 1)
10212 operands[3] = CONST0_RTX (V2DImode);
10214 [(set_attr "type" "ssemul")
10215 (set_attr "mode" "TI")])
10217 (define_insn "xop_pmacsdqh"
10218 [(set (match_operand:V2DI 0 "register_operand" "=x")
10223 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10224 (parallel [(const_int 0)
10228 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10229 (parallel [(const_int 0)
10231 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10233 "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10234 [(set_attr "type" "ssemuladd")
10235 (set_attr "mode" "TI")])
10237 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10238 ;; fake it with a multiply/add. In general, we expect the define_split to
10239 ;; occur before register allocation, so we have to handle the corner case where
10240 ;; the target is the same as either operands[1] or operands[2]
10241 (define_insn_and_split "xop_mulv2div2di3_high"
10242 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10246 (match_operand:V4SI 1 "register_operand" "%x")
10247 (parallel [(const_int 0)
10251 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10252 (parallel [(const_int 0)
10253 (const_int 2)])))))]
10256 "&& reload_completed"
10257 [(set (match_dup 0)
10265 (parallel [(const_int 0)
10270 (parallel [(const_int 0)
10274 operands[3] = CONST0_RTX (V2DImode);
10276 [(set_attr "type" "ssemul")
10277 (set_attr "mode" "TI")])
10279 ;; XOP parallel integer multiply/add instructions for the intrinisics
10280 (define_insn "xop_pmacsswd"
10281 [(set (match_operand:V4SI 0 "register_operand" "=x")
10286 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10287 (parallel [(const_int 1)
10293 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10294 (parallel [(const_int 1)
10298 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10300 "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10301 [(set_attr "type" "ssemuladd")
10302 (set_attr "mode" "TI")])
10304 (define_insn "xop_pmacswd"
10305 [(set (match_operand:V4SI 0 "register_operand" "=x")
10310 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10311 (parallel [(const_int 1)
10317 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10318 (parallel [(const_int 1)
10322 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10324 "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10325 [(set_attr "type" "ssemuladd")
10326 (set_attr "mode" "TI")])
10328 (define_insn "xop_pmadcsswd"
10329 [(set (match_operand:V4SI 0 "register_operand" "=x")
10335 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10336 (parallel [(const_int 0)
10342 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10343 (parallel [(const_int 0)
10351 (parallel [(const_int 1)
10358 (parallel [(const_int 1)
10361 (const_int 7)])))))
10362 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10364 "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10365 [(set_attr "type" "ssemuladd")
10366 (set_attr "mode" "TI")])
10368 (define_insn "xop_pmadcswd"
10369 [(set (match_operand:V4SI 0 "register_operand" "=x")
10375 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10376 (parallel [(const_int 0)
10382 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10383 (parallel [(const_int 0)
10391 (parallel [(const_int 1)
10398 (parallel [(const_int 1)
10401 (const_int 7)])))))
10402 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10404 "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10405 [(set_attr "type" "ssemuladd")
10406 (set_attr "mode" "TI")])
10408 ;; XOP parallel XMM conditional moves
10409 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
10410 [(set (match_operand:V 0 "register_operand" "=x,x")
10412 (match_operand:V 3 "nonimmediate_operand" "x,m")
10413 (match_operand:V 1 "vector_move_operand" "x,x")
10414 (match_operand:V 2 "vector_move_operand" "xm,x")))]
10416 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10417 [(set_attr "type" "sse4arg")])
10419 ;; XOP horizontal add/subtract instructions
10420 (define_insn "xop_phaddbw"
10421 [(set (match_operand:V8HI 0 "register_operand" "=x")
10425 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10426 (parallel [(const_int 0)
10437 (parallel [(const_int 1)
10444 (const_int 15)])))))]
10446 "vphaddbw\t{%1, %0|%0, %1}"
10447 [(set_attr "type" "sseiadd1")])
10449 (define_insn "xop_phaddbd"
10450 [(set (match_operand:V4SI 0 "register_operand" "=x")
10455 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10456 (parallel [(const_int 0)
10463 (parallel [(const_int 1)
10466 (const_int 13)]))))
10471 (parallel [(const_int 2)
10478 (parallel [(const_int 3)
10481 (const_int 15)]))))))]
10483 "vphaddbd\t{%1, %0|%0, %1}"
10484 [(set_attr "type" "sseiadd1")])
10486 (define_insn "xop_phaddbq"
10487 [(set (match_operand:V2DI 0 "register_operand" "=x")
10493 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10494 (parallel [(const_int 0)
10499 (parallel [(const_int 1)
10505 (parallel [(const_int 2)
10510 (parallel [(const_int 3)
10511 (const_int 7)])))))
10517 (parallel [(const_int 8)
10522 (parallel [(const_int 9)
10523 (const_int 13)]))))
10528 (parallel [(const_int 10)
10533 (parallel [(const_int 11)
10534 (const_int 15)])))))))]
10536 "vphaddbq\t{%1, %0|%0, %1}"
10537 [(set_attr "type" "sseiadd1")])
10539 (define_insn "xop_phaddwd"
10540 [(set (match_operand:V4SI 0 "register_operand" "=x")
10544 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10545 (parallel [(const_int 0)
10552 (parallel [(const_int 1)
10555 (const_int 7)])))))]
10557 "vphaddwd\t{%1, %0|%0, %1}"
10558 [(set_attr "type" "sseiadd1")])
10560 (define_insn "xop_phaddwq"
10561 [(set (match_operand:V2DI 0 "register_operand" "=x")
10566 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10567 (parallel [(const_int 0)
10572 (parallel [(const_int 1)
10578 (parallel [(const_int 2)
10583 (parallel [(const_int 3)
10584 (const_int 7)]))))))]
10586 "vphaddwq\t{%1, %0|%0, %1}"
10587 [(set_attr "type" "sseiadd1")])
10589 (define_insn "xop_phadddq"
10590 [(set (match_operand:V2DI 0 "register_operand" "=x")
10594 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10595 (parallel [(const_int 0)
10600 (parallel [(const_int 1)
10601 (const_int 3)])))))]
10603 "vphadddq\t{%1, %0|%0, %1}"
10604 [(set_attr "type" "sseiadd1")])
10606 (define_insn "xop_phaddubw"
10607 [(set (match_operand:V8HI 0 "register_operand" "=x")
10611 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10612 (parallel [(const_int 0)
10623 (parallel [(const_int 1)
10630 (const_int 15)])))))]
10632 "vphaddubw\t{%1, %0|%0, %1}"
10633 [(set_attr "type" "sseiadd1")])
10635 (define_insn "xop_phaddubd"
10636 [(set (match_operand:V4SI 0 "register_operand" "=x")
10641 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10642 (parallel [(const_int 0)
10649 (parallel [(const_int 1)
10652 (const_int 13)]))))
10657 (parallel [(const_int 2)
10664 (parallel [(const_int 3)
10667 (const_int 15)]))))))]
10669 "vphaddubd\t{%1, %0|%0, %1}"
10670 [(set_attr "type" "sseiadd1")])
10672 (define_insn "xop_phaddubq"
10673 [(set (match_operand:V2DI 0 "register_operand" "=x")
10679 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10680 (parallel [(const_int 0)
10685 (parallel [(const_int 1)
10691 (parallel [(const_int 2)
10696 (parallel [(const_int 3)
10697 (const_int 7)])))))
10703 (parallel [(const_int 8)
10708 (parallel [(const_int 9)
10709 (const_int 13)]))))
10714 (parallel [(const_int 10)
10719 (parallel [(const_int 11)
10720 (const_int 15)])))))))]
10722 "vphaddubq\t{%1, %0|%0, %1}"
10723 [(set_attr "type" "sseiadd1")])
10725 (define_insn "xop_phadduwd"
10726 [(set (match_operand:V4SI 0 "register_operand" "=x")
10730 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10731 (parallel [(const_int 0)
10738 (parallel [(const_int 1)
10741 (const_int 7)])))))]
10743 "vphadduwd\t{%1, %0|%0, %1}"
10744 [(set_attr "type" "sseiadd1")])
10746 (define_insn "xop_phadduwq"
10747 [(set (match_operand:V2DI 0 "register_operand" "=x")
10752 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10753 (parallel [(const_int 0)
10758 (parallel [(const_int 1)
10764 (parallel [(const_int 2)
10769 (parallel [(const_int 3)
10770 (const_int 7)]))))))]
10772 "vphadduwq\t{%1, %0|%0, %1}"
10773 [(set_attr "type" "sseiadd1")])
10775 (define_insn "xop_phaddudq"
10776 [(set (match_operand:V2DI 0 "register_operand" "=x")
10780 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10781 (parallel [(const_int 0)
10786 (parallel [(const_int 1)
10787 (const_int 3)])))))]
10789 "vphaddudq\t{%1, %0|%0, %1}"
10790 [(set_attr "type" "sseiadd1")])
10792 (define_insn "xop_phsubbw"
10793 [(set (match_operand:V8HI 0 "register_operand" "=x")
10797 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10798 (parallel [(const_int 0)
10809 (parallel [(const_int 1)
10816 (const_int 15)])))))]
10818 "vphsubbw\t{%1, %0|%0, %1}"
10819 [(set_attr "type" "sseiadd1")])
10821 (define_insn "xop_phsubwd"
10822 [(set (match_operand:V4SI 0 "register_operand" "=x")
10826 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10827 (parallel [(const_int 0)
10834 (parallel [(const_int 1)
10837 (const_int 7)])))))]
10839 "vphsubwd\t{%1, %0|%0, %1}"
10840 [(set_attr "type" "sseiadd1")])
10842 (define_insn "xop_phsubdq"
10843 [(set (match_operand:V2DI 0 "register_operand" "=x")
10847 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10848 (parallel [(const_int 0)
10853 (parallel [(const_int 1)
10854 (const_int 3)])))))]
10856 "vphsubdq\t{%1, %0|%0, %1}"
10857 [(set_attr "type" "sseiadd1")])
10859 ;; XOP permute instructions
10860 (define_insn "xop_pperm"
10861 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
10863 [(match_operand:V16QI 1 "register_operand" "x,x")
10864 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10865 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
10866 UNSPEC_XOP_PERMUTE))]
10867 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10868 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10869 [(set_attr "type" "sse4arg")
10870 (set_attr "mode" "TI")])
10872 ;; XOP pack instructions that combine two vectors into a smaller vector
10873 (define_insn "xop_pperm_pack_v2di_v4si"
10874 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10877 (match_operand:V2DI 1 "register_operand" "x,x"))
10879 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
10880 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
10881 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10882 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10883 [(set_attr "type" "sse4arg")
10884 (set_attr "mode" "TI")])
10886 (define_insn "xop_pperm_pack_v4si_v8hi"
10887 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10890 (match_operand:V4SI 1 "register_operand" "x,x"))
10892 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
10893 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
10894 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10895 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10896 [(set_attr "type" "sse4arg")
10897 (set_attr "mode" "TI")])
10899 (define_insn "xop_pperm_pack_v8hi_v16qi"
10900 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
10903 (match_operand:V8HI 1 "register_operand" "x,x"))
10905 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
10906 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
10907 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10908 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10909 [(set_attr "type" "sse4arg")
10910 (set_attr "mode" "TI")])
10912 ;; XOP packed rotate instructions
10913 (define_expand "rotl<mode>3"
10914 [(set (match_operand:VI_128 0 "register_operand" "")
10916 (match_operand:VI_128 1 "nonimmediate_operand" "")
10917 (match_operand:SI 2 "general_operand")))]
10920 /* If we were given a scalar, convert it to parallel */
10921 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
10923 rtvec vs = rtvec_alloc (<ssescalarnum>);
10924 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
10925 rtx reg = gen_reg_rtx (<MODE>mode);
10926 rtx op2 = operands[2];
10929 if (GET_MODE (op2) != <ssescalarmode>mode)
10931 op2 = gen_reg_rtx (<ssescalarmode>mode);
10932 convert_move (op2, operands[2], false);
10935 for (i = 0; i < <ssescalarnum>; i++)
10936 RTVEC_ELT (vs, i) = op2;
10938 emit_insn (gen_vec_init<mode> (reg, par));
10939 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
10944 (define_expand "rotr<mode>3"
10945 [(set (match_operand:VI_128 0 "register_operand" "")
10947 (match_operand:VI_128 1 "nonimmediate_operand" "")
10948 (match_operand:SI 2 "general_operand")))]
10951 /* If we were given a scalar, convert it to parallel */
10952 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
10954 rtvec vs = rtvec_alloc (<ssescalarnum>);
10955 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
10956 rtx neg = gen_reg_rtx (<MODE>mode);
10957 rtx reg = gen_reg_rtx (<MODE>mode);
10958 rtx op2 = operands[2];
10961 if (GET_MODE (op2) != <ssescalarmode>mode)
10963 op2 = gen_reg_rtx (<ssescalarmode>mode);
10964 convert_move (op2, operands[2], false);
10967 for (i = 0; i < <ssescalarnum>; i++)
10968 RTVEC_ELT (vs, i) = op2;
10970 emit_insn (gen_vec_init<mode> (reg, par));
10971 emit_insn (gen_neg<mode>2 (neg, reg));
10972 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
10977 (define_insn "xop_rotl<mode>3"
10978 [(set (match_operand:VI_128 0 "register_operand" "=x")
10980 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
10981 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
10983 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10984 [(set_attr "type" "sseishft")
10985 (set_attr "length_immediate" "1")
10986 (set_attr "mode" "TI")])
10988 (define_insn "xop_rotr<mode>3"
10989 [(set (match_operand:VI_128 0 "register_operand" "=x")
10991 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
10992 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
10995 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
10996 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
10998 [(set_attr "type" "sseishft")
10999 (set_attr "length_immediate" "1")
11000 (set_attr "mode" "TI")])
11002 (define_expand "vrotr<mode>3"
11003 [(match_operand:VI_128 0 "register_operand" "")
11004 (match_operand:VI_128 1 "register_operand" "")
11005 (match_operand:VI_128 2 "register_operand" "")]
11008 rtx reg = gen_reg_rtx (<MODE>mode);
11009 emit_insn (gen_neg<mode>2 (reg, operands[2]));
11010 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11014 (define_expand "vrotl<mode>3"
11015 [(match_operand:VI_128 0 "register_operand" "")
11016 (match_operand:VI_128 1 "register_operand" "")
11017 (match_operand:VI_128 2 "register_operand" "")]
11020 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
11024 (define_insn "xop_vrotl<mode>3"
11025 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
11026 (if_then_else:VI_128
11028 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
11031 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
11035 (neg:VI_128 (match_dup 2)))))]
11036 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11037 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11038 [(set_attr "type" "sseishft")
11039 (set_attr "prefix_data16" "0")
11040 (set_attr "prefix_extra" "2")
11041 (set_attr "mode" "TI")])
11043 ;; XOP packed shift instructions.
11044 ;; FIXME: add V2DI back in
11045 (define_expand "vlshr<mode>3"
11046 [(match_operand:VI124_128 0 "register_operand" "")
11047 (match_operand:VI124_128 1 "register_operand" "")
11048 (match_operand:VI124_128 2 "register_operand" "")]
11051 rtx neg = gen_reg_rtx (<MODE>mode);
11052 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11053 emit_insn (gen_xop_lshl<mode>3 (operands[0], operands[1], neg));
11057 (define_expand "vashr<mode>3"
11058 [(match_operand:VI124_128 0 "register_operand" "")
11059 (match_operand:VI124_128 1 "register_operand" "")
11060 (match_operand:VI124_128 2 "register_operand" "")]
11063 rtx neg = gen_reg_rtx (<MODE>mode);
11064 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11065 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], neg));
11069 (define_expand "vashl<mode>3"
11070 [(match_operand:VI124_128 0 "register_operand" "")
11071 (match_operand:VI124_128 1 "register_operand" "")
11072 (match_operand:VI124_128 2 "register_operand" "")]
11075 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], operands[2]));
11079 (define_insn "xop_ashl<mode>3"
11080 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
11081 (if_then_else:VI_128
11083 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
11086 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
11090 (neg:VI_128 (match_dup 2)))))]
11091 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11092 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11093 [(set_attr "type" "sseishft")
11094 (set_attr "prefix_data16" "0")
11095 (set_attr "prefix_extra" "2")
11096 (set_attr "mode" "TI")])
11098 (define_insn "xop_lshl<mode>3"
11099 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
11100 (if_then_else:VI_128
11102 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
11105 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
11109 (neg:VI_128 (match_dup 2)))))]
11110 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11111 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11112 [(set_attr "type" "sseishft")
11113 (set_attr "prefix_data16" "0")
11114 (set_attr "prefix_extra" "2")
11115 (set_attr "mode" "TI")])
11117 ;; SSE2 doesn't have some shift varients, so define versions for XOP
11118 (define_expand "ashlv16qi3"
11119 [(match_operand:V16QI 0 "register_operand" "")
11120 (match_operand:V16QI 1 "register_operand" "")
11121 (match_operand:SI 2 "nonmemory_operand" "")]
11124 rtvec vs = rtvec_alloc (16);
11125 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11126 rtx reg = gen_reg_rtx (V16QImode);
11128 for (i = 0; i < 16; i++)
11129 RTVEC_ELT (vs, i) = operands[2];
11131 emit_insn (gen_vec_initv16qi (reg, par));
11132 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11136 (define_expand "lshlv16qi3"
11137 [(match_operand:V16QI 0 "register_operand" "")
11138 (match_operand:V16QI 1 "register_operand" "")
11139 (match_operand:SI 2 "nonmemory_operand" "")]
11142 rtvec vs = rtvec_alloc (16);
11143 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11144 rtx reg = gen_reg_rtx (V16QImode);
11146 for (i = 0; i < 16; i++)
11147 RTVEC_ELT (vs, i) = operands[2];
11149 emit_insn (gen_vec_initv16qi (reg, par));
11150 emit_insn (gen_xop_lshlv16qi3 (operands[0], operands[1], reg));
11154 (define_expand "ashrv16qi3"
11155 [(match_operand:V16QI 0 "register_operand" "")
11156 (match_operand:V16QI 1 "register_operand" "")
11157 (match_operand:SI 2 "nonmemory_operand" "")]
11160 rtvec vs = rtvec_alloc (16);
11161 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11162 rtx reg = gen_reg_rtx (V16QImode);
11164 rtx ele = ((CONST_INT_P (operands[2]))
11165 ? GEN_INT (- INTVAL (operands[2]))
11168 for (i = 0; i < 16; i++)
11169 RTVEC_ELT (vs, i) = ele;
11171 emit_insn (gen_vec_initv16qi (reg, par));
11173 if (!CONST_INT_P (operands[2]))
11175 rtx neg = gen_reg_rtx (V16QImode);
11176 emit_insn (gen_negv16qi2 (neg, reg));
11177 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], neg));
11180 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11185 (define_expand "ashrv2di3"
11186 [(match_operand:V2DI 0 "register_operand" "")
11187 (match_operand:V2DI 1 "register_operand" "")
11188 (match_operand:DI 2 "nonmemory_operand" "")]
11191 rtvec vs = rtvec_alloc (2);
11192 rtx par = gen_rtx_PARALLEL (V2DImode, vs);
11193 rtx reg = gen_reg_rtx (V2DImode);
11196 if (CONST_INT_P (operands[2]))
11197 ele = GEN_INT (- INTVAL (operands[2]));
11198 else if (GET_MODE (operands[2]) != DImode)
11200 rtx move = gen_reg_rtx (DImode);
11201 ele = gen_reg_rtx (DImode);
11202 convert_move (move, operands[2], false);
11203 emit_insn (gen_negdi2 (ele, move));
11207 ele = gen_reg_rtx (DImode);
11208 emit_insn (gen_negdi2 (ele, operands[2]));
11211 RTVEC_ELT (vs, 0) = ele;
11212 RTVEC_ELT (vs, 1) = ele;
11213 emit_insn (gen_vec_initv2di (reg, par));
11214 emit_insn (gen_xop_ashlv2di3 (operands[0], operands[1], reg));
11218 ;; XOP FRCZ support
11219 (define_insn "xop_frcz<mode>2"
11220 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
11222 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
11225 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
11226 [(set_attr "type" "ssecvt1")
11227 (set_attr "mode" "<MODE>")])
11230 (define_expand "xop_vmfrcz<mode>2"
11231 [(set (match_operand:VF_128 0 "register_operand")
11234 [(match_operand:VF_128 1 "nonimmediate_operand")]
11240 operands[3] = CONST0_RTX (<MODE>mode);
11243 (define_insn "*xop_vmfrcz_<mode>"
11244 [(set (match_operand:VF_128 0 "register_operand" "=x")
11247 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
11249 (match_operand:VF_128 2 "const0_operand")
11252 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
11253 [(set_attr "type" "ssecvt1")
11254 (set_attr "mode" "<MODE>")])
11256 (define_insn "xop_maskcmp<mode>3"
11257 [(set (match_operand:VI_128 0 "register_operand" "=x")
11258 (match_operator:VI_128 1 "ix86_comparison_int_operator"
11259 [(match_operand:VI_128 2 "register_operand" "x")
11260 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
11262 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
11263 [(set_attr "type" "sse4arg")
11264 (set_attr "prefix_data16" "0")
11265 (set_attr "prefix_rep" "0")
11266 (set_attr "prefix_extra" "2")
11267 (set_attr "length_immediate" "1")
11268 (set_attr "mode" "TI")])
11270 (define_insn "xop_maskcmp_uns<mode>3"
11271 [(set (match_operand:VI_128 0 "register_operand" "=x")
11272 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
11273 [(match_operand:VI_128 2 "register_operand" "x")
11274 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
11276 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
11277 [(set_attr "type" "ssecmp")
11278 (set_attr "prefix_data16" "0")
11279 (set_attr "prefix_rep" "0")
11280 (set_attr "prefix_extra" "2")
11281 (set_attr "length_immediate" "1")
11282 (set_attr "mode" "TI")])
11284 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
11285 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
11286 ;; the exact instruction generated for the intrinsic.
11287 (define_insn "xop_maskcmp_uns2<mode>3"
11288 [(set (match_operand:VI_128 0 "register_operand" "=x")
11290 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
11291 [(match_operand:VI_128 2 "register_operand" "x")
11292 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
11293 UNSPEC_XOP_UNSIGNED_CMP))]
11295 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
11296 [(set_attr "type" "ssecmp")
11297 (set_attr "prefix_data16" "0")
11298 (set_attr "prefix_extra" "2")
11299 (set_attr "length_immediate" "1")
11300 (set_attr "mode" "TI")])
11302 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
11303 ;; being added here to be complete.
11304 (define_insn "xop_pcom_tf<mode>3"
11305 [(set (match_operand:VI_128 0 "register_operand" "=x")
11307 [(match_operand:VI_128 1 "register_operand" "x")
11308 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
11309 (match_operand:SI 3 "const_int_operand" "n")]
11310 UNSPEC_XOP_TRUEFALSE))]
11313 return ((INTVAL (operands[3]) != 0)
11314 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11315 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
11317 [(set_attr "type" "ssecmp")
11318 (set_attr "prefix_data16" "0")
11319 (set_attr "prefix_extra" "2")
11320 (set_attr "length_immediate" "1")
11321 (set_attr "mode" "TI")])
11323 (define_insn "xop_vpermil2<mode>3"
11324 [(set (match_operand:VF 0 "register_operand" "=x")
11326 [(match_operand:VF 1 "register_operand" "x")
11327 (match_operand:VF 2 "nonimmediate_operand" "%x")
11328 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm")
11329 (match_operand:SI 4 "const_0_to_3_operand" "n")]
11332 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
11333 [(set_attr "type" "sse4arg")
11334 (set_attr "length_immediate" "1")
11335 (set_attr "mode" "<MODE>")])
11337 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11339 (define_insn "aesenc"
11340 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11341 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11342 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11346 aesenc\t{%2, %0|%0, %2}
11347 vaesenc\t{%2, %1, %0|%0, %1, %2}"
11348 [(set_attr "isa" "noavx,avx")
11349 (set_attr "type" "sselog1")
11350 (set_attr "prefix_extra" "1")
11351 (set_attr "prefix" "orig,vex")
11352 (set_attr "mode" "TI")])
11354 (define_insn "aesenclast"
11355 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11356 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11357 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11358 UNSPEC_AESENCLAST))]
11361 aesenclast\t{%2, %0|%0, %2}
11362 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
11363 [(set_attr "isa" "noavx,avx")
11364 (set_attr "type" "sselog1")
11365 (set_attr "prefix_extra" "1")
11366 (set_attr "prefix" "orig,vex")
11367 (set_attr "mode" "TI")])
11369 (define_insn "aesdec"
11370 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11371 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11372 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11376 aesdec\t{%2, %0|%0, %2}
11377 vaesdec\t{%2, %1, %0|%0, %1, %2}"
11378 [(set_attr "isa" "noavx,avx")
11379 (set_attr "type" "sselog1")
11380 (set_attr "prefix_extra" "1")
11381 (set_attr "prefix" "orig,vex")
11382 (set_attr "mode" "TI")])
11384 (define_insn "aesdeclast"
11385 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11386 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11387 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11388 UNSPEC_AESDECLAST))]
11391 aesdeclast\t{%2, %0|%0, %2}
11392 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
11393 [(set_attr "isa" "noavx,avx")
11394 (set_attr "type" "sselog1")
11395 (set_attr "prefix_extra" "1")
11396 (set_attr "prefix" "orig,vex")
11397 (set_attr "mode" "TI")])
11399 (define_insn "aesimc"
11400 [(set (match_operand:V2DI 0 "register_operand" "=x")
11401 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
11404 "%vaesimc\t{%1, %0|%0, %1}"
11405 [(set_attr "type" "sselog1")
11406 (set_attr "prefix_extra" "1")
11407 (set_attr "prefix" "maybe_vex")
11408 (set_attr "mode" "TI")])
11410 (define_insn "aeskeygenassist"
11411 [(set (match_operand:V2DI 0 "register_operand" "=x")
11412 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
11413 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11414 UNSPEC_AESKEYGENASSIST))]
11416 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
11417 [(set_attr "type" "sselog1")
11418 (set_attr "prefix_extra" "1")
11419 (set_attr "length_immediate" "1")
11420 (set_attr "prefix" "maybe_vex")
11421 (set_attr "mode" "TI")])
11423 (define_insn "pclmulqdq"
11424 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11425 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11426 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")
11427 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
11431 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
11432 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11433 [(set_attr "isa" "noavx,avx")
11434 (set_attr "type" "sselog1")
11435 (set_attr "prefix_extra" "1")
11436 (set_attr "length_immediate" "1")
11437 (set_attr "prefix" "orig,vex")
11438 (set_attr "mode" "TI")])
11440 (define_expand "avx_vzeroall"
11441 [(match_par_dup 0 [(const_int 0)])]
11444 int nregs = TARGET_64BIT ? 16 : 8;
11447 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11449 XVECEXP (operands[0], 0, 0)
11450 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11453 for (regno = 0; regno < nregs; regno++)
11454 XVECEXP (operands[0], 0, regno + 1)
11455 = gen_rtx_SET (VOIDmode,
11456 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
11457 CONST0_RTX (V8SImode));
11460 (define_insn "*avx_vzeroall"
11461 [(match_parallel 0 "vzeroall_operation"
11462 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
11465 [(set_attr "type" "sse")
11466 (set_attr "modrm" "0")
11467 (set_attr "memory" "none")
11468 (set_attr "prefix" "vex")
11469 (set_attr "mode" "OI")])
11471 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
11472 ;; if the upper 128bits are unused.
11473 (define_insn "avx_vzeroupper"
11474 [(unspec_volatile [(match_operand 0 "const_int_operand" "")]
11475 UNSPECV_VZEROUPPER)]
11478 [(set_attr "type" "sse")
11479 (set_attr "modrm" "0")
11480 (set_attr "memory" "none")
11481 (set_attr "prefix" "vex")
11482 (set_attr "mode" "OI")])
11484 (define_mode_attr AVXTOSSEMODE
11485 [(V4DI "V2DI") (V2DI "V2DI")
11486 (V8SI "V4SI") (V4SI "V4SI")
11487 (V16HI "V8HI") (V8HI "V8HI")
11488 (V32QI "V16QI") (V16QI "V16QI")])
11490 (define_insn "avx2_pbroadcast<mode>"
11491 [(set (match_operand:VI 0 "register_operand" "=x")
11493 (vec_select:<ssescalarmode>
11494 (match_operand:<AVXTOSSEMODE> 1 "nonimmediate_operand" "xm")
11495 (parallel [(const_int 0)]))))]
11497 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %1}"
11498 [(set_attr "type" "ssemov")
11499 (set_attr "prefix_extra" "1")
11500 (set_attr "prefix" "vex")
11501 (set_attr "mode" "<sseinsnmode>")])
11503 (define_insn "avx2_permvarv8si"
11504 [(set (match_operand:V8SI 0 "register_operand" "=x")
11506 [(match_operand:V8SI 1 "register_operand" "x")
11507 (match_operand:V8SI 2 "nonimmediate_operand" "xm")]
11510 "vpermd\t{%2, %1, %0|%0, %1, %2}"
11511 [(set_attr "type" "sselog")
11512 (set_attr "prefix" "vex")
11513 (set_attr "mode" "OI")])
11515 (define_insn "avx2_permv4df"
11516 [(set (match_operand:V4DF 0 "register_operand" "=x")
11518 [(match_operand:V4DF 1 "register_operand" "xm")
11519 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11522 "vpermpd\t{%2, %1, %0|%0, %1, %2}"
11523 [(set_attr "type" "sselog")
11524 (set_attr "prefix_extra" "1")
11525 (set_attr "prefix" "vex")
11526 (set_attr "mode" "OI")])
11528 (define_insn "avx2_permvarv8sf"
11529 [(set (match_operand:V8SF 0 "register_operand" "=x")
11531 [(match_operand:V8SF 1 "register_operand" "x")
11532 (match_operand:V8SF 2 "nonimmediate_operand" "xm")]
11535 "vpermps\t{%2, %1, %0|%0, %1, %2}"
11536 [(set_attr "type" "sselog")
11537 (set_attr "prefix" "vex")
11538 (set_attr "mode" "OI")])
11540 (define_insn "avx2_permv4di"
11541 [(set (match_operand:V4DI 0 "register_operand" "=x")
11543 [(match_operand:V4DI 1 "register_operand" "xm")
11544 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11547 "vpermq\t{%2, %1, %0|%0, %1, %2}"
11548 [(set_attr "type" "sselog")
11549 (set_attr "prefix" "vex")
11550 (set_attr "mode" "OI")])
11552 (define_insn "avx2_permv2ti"
11553 [(set (match_operand:V4DI 0 "register_operand" "=x")
11555 [(match_operand:V4DI 1 "register_operand" "x")
11556 (match_operand:V4DI 2 "register_operand" "xm")
11557 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11560 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11561 [(set_attr "type" "sselog")
11562 (set_attr "prefix" "vex")
11563 (set_attr "mode" "OI")])
11565 (define_insn "avx2_vec_dupv4df"
11566 [(set (match_operand:V4DF 0 "register_operand" "=x")
11567 (vec_duplicate:V4DF
11569 (match_operand:V2DF 1 "register_operand" "x")
11570 (parallel [(const_int 0)]))))]
11572 "vbroadcastsd\t{%1, %0|%0, %1}"
11573 [(set_attr "type" "sselog1")
11574 (set_attr "prefix" "vex")
11575 (set_attr "mode" "V4DF")])
11577 ;; Modes handled by AVX vec_dup patterns.
11578 (define_mode_iterator AVX_VEC_DUP_MODE
11579 [V8SI V8SF V4DI V4DF])
11581 (define_insn "vec_dup<mode>"
11582 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x")
11583 (vec_duplicate:AVX_VEC_DUP_MODE
11584 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,?x")))]
11587 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
11589 [(set_attr "type" "ssemov")
11590 (set_attr "prefix_extra" "1")
11591 (set_attr "prefix" "vex")
11592 (set_attr "mode" "V8SF")])
11594 (define_insn "avx2_vbroadcasti128_<mode>"
11595 [(set (match_operand:VI_256 0 "register_operand" "=x")
11597 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m")
11600 "vbroadcasti128\t{%1, %0|%0, %1}"
11601 [(set_attr "type" "ssemov")
11602 (set_attr "prefix_extra" "1")
11603 (set_attr "prefix" "vex")
11604 (set_attr "mode" "OI")])
11607 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "")
11608 (vec_duplicate:AVX_VEC_DUP_MODE
11609 (match_operand:<ssescalarmode> 1 "register_operand" "")))]
11610 "TARGET_AVX && reload_completed"
11611 [(set (match_dup 2)
11612 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
11614 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
11615 "operands[2] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[0]));")
11617 (define_insn "avx_vbroadcastf128_<mode>"
11618 [(set (match_operand:V_256 0 "register_operand" "=x,x,x")
11620 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
11624 vbroadcastf128\t{%1, %0|%0, %1}
11625 vinsertf128\t{$1, %1, %0, %0|%0, %0, %1, 1}
11626 vperm2f128\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
11627 [(set_attr "type" "ssemov,sselog1,sselog1")
11628 (set_attr "prefix_extra" "1")
11629 (set_attr "length_immediate" "0,1,1")
11630 (set_attr "prefix" "vex")
11631 (set_attr "mode" "V4SF,V8SF,V8SF")])
11633 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
11634 ;; If it so happens that the input is in memory, use vbroadcast.
11635 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
11636 (define_insn "*avx_vperm_broadcast_v4sf"
11637 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
11639 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
11640 (match_parallel 2 "avx_vbroadcast_operand"
11641 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11644 int elt = INTVAL (operands[3]);
11645 switch (which_alternative)
11649 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
11650 return "vbroadcastss\t{%1, %0|%0, %1}";
11652 operands[2] = GEN_INT (elt * 0x55);
11653 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
11655 gcc_unreachable ();
11658 [(set_attr "type" "ssemov,ssemov,sselog1")
11659 (set_attr "prefix_extra" "1")
11660 (set_attr "length_immediate" "0,0,1")
11661 (set_attr "prefix" "vex")
11662 (set_attr "mode" "SF,SF,V4SF")])
11664 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
11665 [(set (match_operand:VF_256 0 "register_operand" "=x,x,x")
11667 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x")
11668 (match_parallel 2 "avx_vbroadcast_operand"
11669 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11672 "&& reload_completed"
11673 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
11675 rtx op0 = operands[0], op1 = operands[1];
11676 int elt = INTVAL (operands[3]);
11682 /* Shuffle element we care about into all elements of the 128-bit lane.
11683 The other lane gets shuffled too, but we don't care. */
11684 if (<MODE>mode == V4DFmode)
11685 mask = (elt & 1 ? 15 : 0);
11687 mask = (elt & 3) * 0x55;
11688 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
11690 /* Shuffle the lane we care about into both lanes of the dest. */
11691 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
11692 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
11696 operands[1] = adjust_address_nv (op1, <ssescalarmode>mode,
11697 elt * GET_MODE_SIZE (<ssescalarmode>mode));
11700 (define_expand "avx_vpermil<mode>"
11701 [(set (match_operand:VF2 0 "register_operand" "")
11703 (match_operand:VF2 1 "nonimmediate_operand" "")
11704 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11707 int mask = INTVAL (operands[2]);
11708 rtx perm[<ssescalarnum>];
11710 perm[0] = GEN_INT (mask & 1);
11711 perm[1] = GEN_INT ((mask >> 1) & 1);
11712 if (<MODE>mode == V4DFmode)
11714 perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
11715 perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
11719 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11722 (define_expand "avx_vpermil<mode>"
11723 [(set (match_operand:VF1 0 "register_operand" "")
11725 (match_operand:VF1 1 "nonimmediate_operand" "")
11726 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11729 int mask = INTVAL (operands[2]);
11730 rtx perm[<ssescalarnum>];
11732 perm[0] = GEN_INT (mask & 3);
11733 perm[1] = GEN_INT ((mask >> 2) & 3);
11734 perm[2] = GEN_INT ((mask >> 4) & 3);
11735 perm[3] = GEN_INT ((mask >> 6) & 3);
11736 if (<MODE>mode == V8SFmode)
11738 perm[4] = GEN_INT ((mask & 3) + 4);
11739 perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
11740 perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
11741 perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
11745 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11748 (define_insn "*avx_vpermilp<mode>"
11749 [(set (match_operand:VF 0 "register_operand" "=x")
11751 (match_operand:VF 1 "nonimmediate_operand" "xm")
11752 (match_parallel 2 ""
11753 [(match_operand 3 "const_int_operand" "")])))]
11755 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
11757 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
11758 operands[2] = GEN_INT (mask);
11759 return "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
11761 [(set_attr "type" "sselog")
11762 (set_attr "prefix_extra" "1")
11763 (set_attr "length_immediate" "1")
11764 (set_attr "prefix" "vex")
11765 (set_attr "mode" "<MODE>")])
11767 (define_insn "avx_vpermilvar<mode>3"
11768 [(set (match_operand:VF 0 "register_operand" "=x")
11770 [(match_operand:VF 1 "register_operand" "x")
11771 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "xm")]
11774 "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11775 [(set_attr "type" "sselog")
11776 (set_attr "prefix_extra" "1")
11777 (set_attr "prefix" "vex")
11778 (set_attr "mode" "<MODE>")])
11780 (define_expand "avx_vperm2f128<mode>3"
11781 [(set (match_operand:AVX256MODE2P 0 "register_operand" "")
11782 (unspec:AVX256MODE2P
11783 [(match_operand:AVX256MODE2P 1 "register_operand" "")
11784 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "")
11785 (match_operand:SI 3 "const_0_to_255_operand" "")]
11786 UNSPEC_VPERMIL2F128))]
11789 int mask = INTVAL (operands[3]);
11790 if ((mask & 0x88) == 0)
11792 rtx perm[<ssescalarnum>], t1, t2;
11793 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
11795 base = (mask & 3) * nelt2;
11796 for (i = 0; i < nelt2; ++i)
11797 perm[i] = GEN_INT (base + i);
11799 base = ((mask >> 4) & 3) * nelt2;
11800 for (i = 0; i < nelt2; ++i)
11801 perm[i + nelt2] = GEN_INT (base + i);
11803 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
11804 operands[1], operands[2]);
11805 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
11806 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
11807 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
11813 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
11814 ;; means that in order to represent this properly in rtl we'd have to
11815 ;; nest *another* vec_concat with a zero operand and do the select from
11816 ;; a 4x wide vector. That doesn't seem very nice.
11817 (define_insn "*avx_vperm2f128<mode>_full"
11818 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
11819 (unspec:AVX256MODE2P
11820 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
11821 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
11822 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11823 UNSPEC_VPERMIL2F128))]
11825 "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11826 [(set_attr "type" "sselog")
11827 (set_attr "prefix_extra" "1")
11828 (set_attr "length_immediate" "1")
11829 (set_attr "prefix" "vex")
11830 (set_attr "mode" "V8SF")])
11832 (define_insn "*avx_vperm2f128<mode>_nozero"
11833 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
11834 (vec_select:AVX256MODE2P
11835 (vec_concat:<ssedoublevecmode>
11836 (match_operand:AVX256MODE2P 1 "register_operand" "x")
11837 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
11838 (match_parallel 3 ""
11839 [(match_operand 4 "const_int_operand" "")])))]
11841 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
11843 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
11844 operands[3] = GEN_INT (mask);
11845 return "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}";
11847 [(set_attr "type" "sselog")
11848 (set_attr "prefix_extra" "1")
11849 (set_attr "length_immediate" "1")
11850 (set_attr "prefix" "vex")
11851 (set_attr "mode" "V8SF")])
11853 (define_expand "avx_vinsertf128<mode>"
11854 [(match_operand:V_256 0 "register_operand" "")
11855 (match_operand:V_256 1 "register_operand" "")
11856 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "")
11857 (match_operand:SI 3 "const_0_to_1_operand" "")]
11860 rtx (*insn)(rtx, rtx, rtx);
11862 switch (INTVAL (operands[3]))
11865 insn = gen_vec_set_lo_<mode>;
11868 insn = gen_vec_set_hi_<mode>;
11871 gcc_unreachable ();
11874 emit_insn (insn (operands[0], operands[1], operands[2]));
11878 (define_insn "avx2_vec_set_lo_v4di"
11879 [(set (match_operand:V4DI 0 "register_operand" "=x")
11881 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11883 (match_operand:V4DI 1 "register_operand" "x")
11884 (parallel [(const_int 2) (const_int 3)]))))]
11886 "vinserti128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11887 [(set_attr "type" "sselog")
11888 (set_attr "prefix_extra" "1")
11889 (set_attr "length_immediate" "1")
11890 (set_attr "prefix" "vex")
11891 (set_attr "mode" "OI")])
11893 (define_insn "avx2_vec_set_hi_v4di"
11894 [(set (match_operand:V4DI 0 "register_operand" "=x")
11897 (match_operand:V4DI 1 "register_operand" "x")
11898 (parallel [(const_int 0) (const_int 1)]))
11899 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
11901 "vinserti128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11902 [(set_attr "type" "sselog")
11903 (set_attr "prefix_extra" "1")
11904 (set_attr "length_immediate" "1")
11905 (set_attr "prefix" "vex")
11906 (set_attr "mode" "OI")])
11908 (define_insn "vec_set_lo_<mode>"
11909 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
11910 (vec_concat:VI8F_256
11911 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
11912 (vec_select:<ssehalfvecmode>
11913 (match_operand:VI8F_256 1 "register_operand" "x")
11914 (parallel [(const_int 2) (const_int 3)]))))]
11916 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11917 [(set_attr "type" "sselog")
11918 (set_attr "prefix_extra" "1")
11919 (set_attr "length_immediate" "1")
11920 (set_attr "prefix" "vex")
11921 (set_attr "mode" "V8SF")])
11923 (define_insn "vec_set_hi_<mode>"
11924 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
11925 (vec_concat:VI8F_256
11926 (vec_select:<ssehalfvecmode>
11927 (match_operand:VI8F_256 1 "register_operand" "x")
11928 (parallel [(const_int 0) (const_int 1)]))
11929 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
11931 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11932 [(set_attr "type" "sselog")
11933 (set_attr "prefix_extra" "1")
11934 (set_attr "length_immediate" "1")
11935 (set_attr "prefix" "vex")
11936 (set_attr "mode" "V8SF")])
11938 (define_insn "vec_set_lo_<mode>"
11939 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
11940 (vec_concat:VI4F_256
11941 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
11942 (vec_select:<ssehalfvecmode>
11943 (match_operand:VI4F_256 1 "register_operand" "x")
11944 (parallel [(const_int 4) (const_int 5)
11945 (const_int 6) (const_int 7)]))))]
11947 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11948 [(set_attr "type" "sselog")
11949 (set_attr "prefix_extra" "1")
11950 (set_attr "length_immediate" "1")
11951 (set_attr "prefix" "vex")
11952 (set_attr "mode" "V8SF")])
11954 (define_insn "vec_set_hi_<mode>"
11955 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
11956 (vec_concat:VI4F_256
11957 (vec_select:<ssehalfvecmode>
11958 (match_operand:VI4F_256 1 "register_operand" "x")
11959 (parallel [(const_int 0) (const_int 1)
11960 (const_int 2) (const_int 3)]))
11961 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
11963 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11964 [(set_attr "type" "sselog")
11965 (set_attr "prefix_extra" "1")
11966 (set_attr "length_immediate" "1")
11967 (set_attr "prefix" "vex")
11968 (set_attr "mode" "V8SF")])
11970 (define_insn "vec_set_lo_v16hi"
11971 [(set (match_operand:V16HI 0 "register_operand" "=x")
11973 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
11975 (match_operand:V16HI 1 "register_operand" "x")
11976 (parallel [(const_int 8) (const_int 9)
11977 (const_int 10) (const_int 11)
11978 (const_int 12) (const_int 13)
11979 (const_int 14) (const_int 15)]))))]
11981 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11982 [(set_attr "type" "sselog")
11983 (set_attr "prefix_extra" "1")
11984 (set_attr "length_immediate" "1")
11985 (set_attr "prefix" "vex")
11986 (set_attr "mode" "V8SF")])
11988 (define_insn "vec_set_hi_v16hi"
11989 [(set (match_operand:V16HI 0 "register_operand" "=x")
11992 (match_operand:V16HI 1 "register_operand" "x")
11993 (parallel [(const_int 0) (const_int 1)
11994 (const_int 2) (const_int 3)
11995 (const_int 4) (const_int 5)
11996 (const_int 6) (const_int 7)]))
11997 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
11999 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12000 [(set_attr "type" "sselog")
12001 (set_attr "prefix_extra" "1")
12002 (set_attr "length_immediate" "1")
12003 (set_attr "prefix" "vex")
12004 (set_attr "mode" "V8SF")])
12006 (define_insn "vec_set_lo_v32qi"
12007 [(set (match_operand:V32QI 0 "register_operand" "=x")
12009 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
12011 (match_operand:V32QI 1 "register_operand" "x")
12012 (parallel [(const_int 16) (const_int 17)
12013 (const_int 18) (const_int 19)
12014 (const_int 20) (const_int 21)
12015 (const_int 22) (const_int 23)
12016 (const_int 24) (const_int 25)
12017 (const_int 26) (const_int 27)
12018 (const_int 28) (const_int 29)
12019 (const_int 30) (const_int 31)]))))]
12021 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12022 [(set_attr "type" "sselog")
12023 (set_attr "prefix_extra" "1")
12024 (set_attr "length_immediate" "1")
12025 (set_attr "prefix" "vex")
12026 (set_attr "mode" "V8SF")])
12028 (define_insn "vec_set_hi_v32qi"
12029 [(set (match_operand:V32QI 0 "register_operand" "=x")
12032 (match_operand:V32QI 1 "register_operand" "x")
12033 (parallel [(const_int 0) (const_int 1)
12034 (const_int 2) (const_int 3)
12035 (const_int 4) (const_int 5)
12036 (const_int 6) (const_int 7)
12037 (const_int 8) (const_int 9)
12038 (const_int 10) (const_int 11)
12039 (const_int 12) (const_int 13)
12040 (const_int 14) (const_int 15)]))
12041 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
12043 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12044 [(set_attr "type" "sselog")
12045 (set_attr "prefix_extra" "1")
12046 (set_attr "length_immediate" "1")
12047 (set_attr "prefix" "vex")
12048 (set_attr "mode" "V8SF")])
12050 (define_expand "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
12051 [(set (match_operand:V48_AVX2 0 "register_operand" "")
12053 [(match_operand:<sseintvecmode> 2 "register_operand" "")
12054 (match_operand:V48_AVX2 1 "memory_operand" "")
12059 (define_expand "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
12060 [(set (match_operand:V48_AVX2 0 "memory_operand" "")
12062 [(match_operand:<sseintvecmode> 1 "register_operand" "")
12063 (match_operand:V48_AVX2 2 "register_operand" "")
12068 (define_insn "*avx2_maskmov<ssemodesuffix><avxsizesuffix>"
12069 [(set (match_operand:VI48_AVX2 0 "nonimmediate_operand" "=x,m")
12071 [(match_operand:<sseintvecmode> 1 "register_operand" "x,x")
12072 (match_operand:VI48_AVX2 2 "nonimmediate_operand" "m,x")
12076 && (REG_P (operands[0]) == MEM_P (operands[2]))"
12077 "vpmaskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12078 [(set_attr "type" "sselog1")
12079 (set_attr "prefix_extra" "1")
12080 (set_attr "prefix" "vex")
12081 (set_attr "mode" "<sseinsnmode>")])
12083 (define_insn "*avx_maskmov<ssemodesuffix><avxsizesuffix>"
12084 [(set (match_operand:VF 0 "nonimmediate_operand" "=x,m")
12086 [(match_operand:<sseintvecmode> 1 "register_operand" "x,x")
12087 (match_operand:VF 2 "nonimmediate_operand" "m,x")
12091 && (REG_P (operands[0]) == MEM_P (operands[2]))"
12092 "vmaskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12093 [(set_attr "type" "sselog1")
12094 (set_attr "prefix_extra" "1")
12095 (set_attr "prefix" "vex")
12096 (set_attr "mode" "<MODE>")])
12098 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
12099 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
12100 (unspec:AVX256MODE2P
12101 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
12105 "&& reload_completed"
12108 rtx op0 = operands[0];
12109 rtx op1 = operands[1];
12111 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
12113 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
12114 emit_move_insn (op0, op1);
12118 (define_expand "vec_init<mode>"
12119 [(match_operand:V_256 0 "register_operand" "")
12120 (match_operand 1 "" "")]
12123 ix86_expand_vector_init (false, operands[0], operands[1]);
12127 (define_insn "avx2_extracti128"
12128 [(set (match_operand:V2DI 0 "register_operand" "=x")
12130 (match_operand:V4DI 1 "nonimmediate_operand" "xm")
12131 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
12133 "vextracti128\t{%2, %1, %0|%0, %1, %2}"
12134 [(set_attr "type" "ssemov")
12135 (set_attr "prefix_extra" "1")
12136 (set_attr "prefix" "vex")
12137 (set_attr "mode" "OI")])
12139 (define_expand "avx2_inserti128"
12140 [(match_operand:V4DI 0 "register_operand" "")
12141 (match_operand:V4DI 1 "register_operand" "")
12142 (match_operand:V2DI 2 "nonimmediate_operand" "")
12143 (match_operand:SI 3 "const_0_to_1_operand" "")]
12146 rtx (*insn)(rtx, rtx, rtx);
12148 switch (INTVAL (operands[3]))
12151 insn = gen_avx2_vec_set_lo_v4di;
12154 insn = gen_avx2_vec_set_hi_v4di;
12157 gcc_unreachable ();
12160 emit_insn (insn (operands[0], operands[1], operands[2]));
12164 (define_insn "avx2_ashrvv8si"
12165 [(set (match_operand:V8SI 0 "register_operand" "=x")
12171 (match_operand:V8SI 1 "register_operand" "x")
12172 (parallel [(const_int 0)]))
12174 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
12175 (parallel [(const_int 0)])))
12179 (parallel [(const_int 1)]))
12182 (parallel [(const_int 1)]))))
12187 (parallel [(const_int 2)]))
12190 (parallel [(const_int 2)])))
12194 (parallel [(const_int 3)]))
12197 (parallel [(const_int 3)])))))
12203 (parallel [(const_int 0)]))
12206 (parallel [(const_int 0)])))
12210 (parallel [(const_int 1)]))
12213 (parallel [(const_int 1)]))))
12218 (parallel [(const_int 2)]))
12221 (parallel [(const_int 2)])))
12225 (parallel [(const_int 3)]))
12228 (parallel [(const_int 3)])))))))]
12230 "vpsravd\t{%2, %1, %0|%0, %1, %2}"
12231 [(set_attr "type" "sseishft")
12232 (set_attr "prefix" "vex")
12233 (set_attr "mode" "OI")])
12235 (define_insn "avx2_ashrvv4si"
12236 [(set (match_operand:V4SI 0 "register_operand" "=x")
12241 (match_operand:V4SI 1 "register_operand" "x")
12242 (parallel [(const_int 0)]))
12244 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
12245 (parallel [(const_int 0)])))
12249 (parallel [(const_int 1)]))
12252 (parallel [(const_int 1)]))))
12257 (parallel [(const_int 2)]))
12260 (parallel [(const_int 2)])))
12264 (parallel [(const_int 3)]))
12267 (parallel [(const_int 3)]))))))]
12269 "vpsravd\t{%2, %1, %0|%0, %1, %2}"
12270 [(set_attr "type" "sseishft")
12271 (set_attr "prefix" "vex")
12272 (set_attr "mode" "TI")])
12274 (define_insn "avx2_<lshift>vv8si"
12275 [(set (match_operand:V8SI 0 "register_operand" "=x")
12281 (match_operand:V8SI 1 "register_operand" "x")
12282 (parallel [(const_int 0)]))
12284 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
12285 (parallel [(const_int 0)])))
12289 (parallel [(const_int 1)]))
12292 (parallel [(const_int 1)]))))
12297 (parallel [(const_int 2)]))
12300 (parallel [(const_int 2)])))
12304 (parallel [(const_int 3)]))
12307 (parallel [(const_int 3)])))))
12313 (parallel [(const_int 0)]))
12316 (parallel [(const_int 0)])))
12320 (parallel [(const_int 1)]))
12323 (parallel [(const_int 1)]))))
12328 (parallel [(const_int 2)]))
12331 (parallel [(const_int 2)])))
12335 (parallel [(const_int 3)]))
12338 (parallel [(const_int 3)])))))))]
12340 "vp<lshift_insn>vd\t{%2, %1, %0|%0, %1, %2}"
12341 [(set_attr "type" "sseishft")
12342 (set_attr "prefix" "vex")
12343 (set_attr "mode" "OI")])
12345 (define_insn "avx2_<lshift>v<mode>"
12346 [(set (match_operand:VI4SD_AVX2 0 "register_operand" "=x")
12347 (vec_concat:VI4SD_AVX2
12348 (vec_concat:<ssehalfvecmode>
12349 (lshift:<ssescalarmode>
12350 (vec_select:<ssescalarmode>
12351 (match_operand:VI4SD_AVX2 1 "register_operand" "x")
12352 (parallel [(const_int 0)]))
12353 (vec_select:<ssescalarmode>
12354 (match_operand:VI4SD_AVX2 2 "nonimmediate_operand" "xm")
12355 (parallel [(const_int 0)])))
12356 (lshift:<ssescalarmode>
12357 (vec_select:<ssescalarmode>
12359 (parallel [(const_int 1)]))
12360 (vec_select:<ssescalarmode>
12362 (parallel [(const_int 1)]))))
12363 (vec_concat:<ssehalfvecmode>
12364 (lshift:<ssescalarmode>
12365 (vec_select:<ssescalarmode>
12367 (parallel [(const_int 2)]))
12368 (vec_select:<ssescalarmode>
12370 (parallel [(const_int 2)])))
12371 (lshift:<ssescalarmode>
12372 (vec_select:<ssescalarmode>
12374 (parallel [(const_int 3)]))
12375 (vec_select:<ssescalarmode>
12377 (parallel [(const_int 3)]))))))]
12379 "vp<lshift_insn>v<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12380 [(set_attr "type" "sseishft")
12381 (set_attr "prefix" "vex")
12382 (set_attr "mode" "<sseinsnmode>")])
12384 (define_insn "avx2_<lshift>vv2di"
12385 [(set (match_operand:V2DI 0 "register_operand" "=x")
12389 (match_operand:V2DI 1 "register_operand" "x")
12390 (parallel [(const_int 0)]))
12392 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
12393 (parallel [(const_int 0)])))
12397 (parallel [(const_int 1)]))
12400 (parallel [(const_int 1)])))))]
12402 "vp<lshift_insn>vq\t{%2, %1, %0|%0, %1, %2}"
12403 [(set_attr "type" "sseishft")
12404 (set_attr "prefix" "vex")
12405 (set_attr "mode" "TI")])
12407 (define_insn "*vec_concat<mode>_avx"
12408 [(set (match_operand:V_256 0 "register_operand" "=x,x")
12410 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
12411 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))]
12414 switch (which_alternative)
12417 return "vinsertf128\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
12419 switch (get_attr_mode (insn))
12422 return "vmovaps\t{%1, %x0|%x0, %1}";
12424 return "vmovapd\t{%1, %x0|%x0, %1}";
12426 return "vmovdqa\t{%1, %x0|%x0, %1}";
12429 gcc_unreachable ();
12432 [(set_attr "type" "sselog,ssemov")
12433 (set_attr "prefix_extra" "1,*")
12434 (set_attr "length_immediate" "1,*")
12435 (set_attr "prefix" "vex")
12436 (set_attr "mode" "<sseinsnmode>")])
12438 (define_insn "vcvtph2ps"
12439 [(set (match_operand:V4SF 0 "register_operand" "=x")
12441 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
12443 (parallel [(const_int 0) (const_int 1)
12444 (const_int 1) (const_int 2)])))]
12446 "vcvtph2ps\t{%1, %0|%0, %1}"
12447 [(set_attr "type" "ssecvt")
12448 (set_attr "prefix" "vex")
12449 (set_attr "mode" "V4SF")])
12451 (define_insn "*vcvtph2ps_load"
12452 [(set (match_operand:V4SF 0 "register_operand" "=x")
12453 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
12454 UNSPEC_VCVTPH2PS))]
12456 "vcvtph2ps\t{%1, %0|%0, %1}"
12457 [(set_attr "type" "ssecvt")
12458 (set_attr "prefix" "vex")
12459 (set_attr "mode" "V8SF")])
12461 (define_insn "vcvtph2ps256"
12462 [(set (match_operand:V8SF 0 "register_operand" "=x")
12463 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
12464 UNSPEC_VCVTPH2PS))]
12466 "vcvtph2ps\t{%1, %0|%0, %1}"
12467 [(set_attr "type" "ssecvt")
12468 (set_attr "prefix" "vex")
12469 (set_attr "mode" "V8SF")])
12471 (define_expand "vcvtps2ph"
12472 [(set (match_operand:V8HI 0 "register_operand" "")
12474 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "")
12475 (match_operand:SI 2 "const_0_to_255_operand" "")]
12479 "operands[3] = CONST0_RTX (V4HImode);")
12481 (define_insn "*vcvtps2ph"
12482 [(set (match_operand:V8HI 0 "register_operand" "=x")
12484 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12485 (match_operand:SI 2 "const_0_to_255_operand" "N")]
12487 (match_operand:V4HI 3 "const0_operand" "")))]
12489 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12490 [(set_attr "type" "ssecvt")
12491 (set_attr "prefix" "vex")
12492 (set_attr "mode" "V4SF")])
12494 (define_insn "*vcvtps2ph_store"
12495 [(set (match_operand:V4HI 0 "memory_operand" "=m")
12496 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12497 (match_operand:SI 2 "const_0_to_255_operand" "N")]
12498 UNSPEC_VCVTPS2PH))]
12500 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12501 [(set_attr "type" "ssecvt")
12502 (set_attr "prefix" "vex")
12503 (set_attr "mode" "V4SF")])
12505 (define_insn "vcvtps2ph256"
12506 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
12507 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
12508 (match_operand:SI 2 "const_0_to_255_operand" "N")]
12509 UNSPEC_VCVTPS2PH))]
12511 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12512 [(set_attr "type" "ssecvt")
12513 (set_attr "prefix" "vex")
12514 (set_attr "mode" "V8SF")])
12516 ;; For gather* insn patterns
12517 (define_mode_iterator VEC_GATHER_MODE
12518 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
12519 (define_mode_attr VEC_GATHER_MODE
12520 [(V2DI "V4SI") (V2DF "V4SI")
12521 (V4DI "V4SI") (V4DF "V4SI")
12522 (V4SI "V4SI") (V4SF "V4SI")
12523 (V8SI "V8SI") (V8SF "V8SI")])
12525 (define_expand "avx2_gathersi<mode>"
12526 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "")
12527 (unspec:VEC_GATHER_MODE
12528 [(match_operand:VEC_GATHER_MODE 1 "register_operand" "")
12529 (match_operand:<ssescalarmode> 2 "memory_operand" "")
12530 (match_operand:<VEC_GATHER_MODE> 3 "register_operand" "")
12531 (match_operand:VEC_GATHER_MODE 4 "register_operand" "")
12532 (match_operand:SI 5 "const1248_operand " "")]
12536 (define_insn "*avx2_gathersi<mode>"
12537 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=x")
12538 (unspec:VEC_GATHER_MODE
12539 [(match_operand:VEC_GATHER_MODE 1 "register_operand" "0")
12540 (mem:<ssescalarmode>
12541 (match_operand:P 2 "register_operand" "r"))
12542 (match_operand:<VEC_GATHER_MODE> 3 "register_operand" "x")
12543 (match_operand:VEC_GATHER_MODE 4 "register_operand" "x")
12544 (match_operand:SI 5 "const1248_operand" "n")]
12547 "v<gthrfirstp>gatherd<gthrlastp>\t{%4, (%2, %3, %c5), %0|%0, (%2, %3, %c5), %4}"
12548 [(set_attr "type" "ssemov")
12549 (set_attr "prefix" "vex")
12550 (set_attr "mode" "<sseinsnmode>")])
12552 (define_expand "avx2_gatherdi<mode>"
12553 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "")
12554 (unspec:VEC_GATHER_MODE
12555 [(match_operand:VEC_GATHER_MODE 1 "register_operand" "")
12556 (match_operand:<ssescalarmode> 2 "memory_operand" "")
12557 (match_operand:<AVXMODE48P_DI> 3 "register_operand" "")
12558 (match_operand:VEC_GATHER_MODE 4 "register_operand" "")
12559 (match_operand:SI 5 "const1248_operand " "")]
12563 (define_insn "*avx2_gatherdi<mode>"
12564 [(set (match_operand:AVXMODE48P_DI 0 "register_operand" "=x")
12565 (unspec:AVXMODE48P_DI
12566 [(match_operand:AVXMODE48P_DI 1 "register_operand" "0")
12567 (mem:<ssescalarmode>
12568 (match_operand:P 2 "register_operand" "r"))
12569 (match_operand:<AVXMODE48P_DI> 3 "register_operand" "x")
12570 (match_operand:AVXMODE48P_DI 4 "register_operand" "x")
12571 (match_operand:SI 5 "const1248_operand" "n")]
12574 "v<gthrfirstp>gatherq<gthrlastp>\t{%4, (%2, %3, %c5), %0|%0, (%2, %3, %c5), %4}"
12575 [(set_attr "type" "ssemov")
12576 (set_attr "prefix" "vex")
12577 (set_attr "mode" "<sseinsnmode>")])
12579 ;; Special handling for VEX.256 with float arguments
12580 ;; since there're still xmms as operands
12581 (define_expand "avx2_gatherdi<mode>256"
12582 [(set (match_operand:VI4F_128 0 "register_operand" "")
12584 [(match_operand:VI4F_128 1 "register_operand" "")
12585 (match_operand:<ssescalarmode> 2 "memory_operand" "")
12586 (match_operand:V4DI 3 "register_operand" "")
12587 (match_operand:VI4F_128 4 "register_operand" "")
12588 (match_operand:SI 5 "const1248_operand " "")]
12592 (define_insn "*avx2_gatherdi<mode>256"
12593 [(set (match_operand:VI4F_128 0 "register_operand" "=x")
12595 [(match_operand:VI4F_128 1 "register_operand" "0")
12596 (mem:<ssescalarmode>
12597 (match_operand:P 2 "register_operand" "r"))
12598 (match_operand:V4DI 3 "register_operand" "x")
12599 (match_operand:VI4F_128 4 "register_operand" "x")
12600 (match_operand:SI 5 "const1248_operand" "n")]
12603 "v<gthrfirstp>gatherq<gthrlastp>\t{%4, (%2, %3, %c5), %0|%0, (%2, %3, %c5), %4}"
12604 [(set_attr "type" "ssemov")
12605 (set_attr "prefix" "vex")
12606 (set_attr "mode" "<sseinsnmode>")])