1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 ;; All vector modes including V1TImode, used in move patterns.
22 (define_mode_iterator V16
23 [(V32QI "TARGET_AVX") V16QI
24 (V16HI "TARGET_AVX") V8HI
25 (V8SI "TARGET_AVX") V4SI
26 (V4DI "TARGET_AVX") V2DI
28 (V8SF "TARGET_AVX") V4SF
29 (V4DF "TARGET_AVX") V2DF])
32 (define_mode_iterator V
33 [(V32QI "TARGET_AVX") V16QI
34 (V16HI "TARGET_AVX") V8HI
35 (V8SI "TARGET_AVX") V4SI
36 (V4DI "TARGET_AVX") V2DI
37 (V8SF "TARGET_AVX") V4SF
38 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
40 ;; All 128bit vector modes
41 (define_mode_iterator V_128
42 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
44 ;; All 256bit vector modes
45 (define_mode_iterator V_256
46 [V32QI V16HI V8SI V4DI V8SF V4DF])
48 ;; All vector float modes
49 (define_mode_iterator VF
50 [(V8SF "TARGET_AVX") V4SF
51 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
53 ;; All SFmode vector float modes
54 (define_mode_iterator VF1
55 [(V8SF "TARGET_AVX") V4SF])
57 ;; All DFmode vector float modes
58 (define_mode_iterator VF2
59 [(V4DF "TARGET_AVX") V2DF])
61 ;; All 128bit vector float modes
62 (define_mode_iterator VF_128
63 [V4SF (V2DF "TARGET_SSE2")])
65 ;; All 256bit vector float modes
66 (define_mode_iterator VF_256
69 ;; All vector integer modes
70 (define_mode_iterator VI
71 [(V32QI "TARGET_AVX") V16QI
72 (V16HI "TARGET_AVX") V8HI
73 (V8SI "TARGET_AVX") V4SI
74 (V4DI "TARGET_AVX") V2DI])
76 (define_mode_iterator VI_AVX2
77 [(V32QI "TARGET_AVX2") V16QI
78 (V16HI "TARGET_AVX2") V8HI
79 (V8SI "TARGET_AVX2") V4SI
80 (V4DI "TARGET_AVX2") V2DI])
82 ;; All QImode vector integer modes
83 (define_mode_iterator VI1
84 [(V32QI "TARGET_AVX") V16QI])
86 ;; All DImode vector integer modes
87 (define_mode_iterator VI8
88 [(V4DI "TARGET_AVX") V2DI])
90 (define_mode_iterator VI1_AVX2
91 [(V32QI "TARGET_AVX2") V16QI])
93 (define_mode_iterator VI2_AVX2
94 [(V16HI "TARGET_AVX2") V8HI])
96 (define_mode_iterator VI4_AVX2
97 [(V8SI "TARGET_AVX2") V4SI])
99 (define_mode_iterator VI8_AVX2
100 [(V4DI "TARGET_AVX2") V2DI])
102 (define_mode_iterator VIMAX_AVX2
103 [(V2TI "TARGET_AVX2") V1TI])
105 (define_mode_iterator SSESCALARMODE
106 [(V4DI "TARGET_AVX2") TI])
108 (define_mode_iterator VI12_AVX2
109 [(V32QI "TARGET_AVX2") V16QI
110 (V16HI "TARGET_AVX2") V8HI])
112 (define_mode_iterator VI24_AVX2
113 [(V16HI "TARGET_AVX2") V8HI
114 (V8SI "TARGET_AVX2") V4SI])
116 (define_mode_iterator VI124_AVX2
117 [(V32QI "TARGET_AVX2") V16QI
118 (V16HI "TARGET_AVX2") V8HI
119 (V8SI "TARGET_AVX2") V4SI])
121 (define_mode_iterator VI248_AVX2
122 [(V16HI "TARGET_AVX2") V8HI
123 (V8SI "TARGET_AVX2") V4SI
124 (V4DI "TARGET_AVX2") V2DI])
126 (define_mode_iterator VI48_AVX2
127 [V8SI V4SI V4DI V2DI])
129 (define_mode_iterator VI4SD_AVX2
132 (define_mode_iterator V48_AVX2
135 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
136 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
138 (define_mode_attr sse2_avx2
139 [(V16QI "sse2") (V32QI "avx2")
140 (V8HI "sse2") (V16HI "avx2")
141 (V4SI "sse2") (V8SI "avx2")
142 (V2DI "sse2") (V4DI "avx2")
143 (V1TI "sse2") (V2TI "avx2")])
145 (define_mode_attr ssse3_avx2
146 [(V16QI "ssse3") (V32QI "avx2")
147 (V8HI "ssse3") (V16HI "avx2")
148 (V4SI "ssse3") (V8SI "avx2")
149 (V2DI "ssse3") (V4DI "avx2")
152 (define_mode_attr sse4_1_avx2
153 [(V16QI "sse4_1") (V32QI "avx2")
154 (V8HI "sse4_1") (V16HI "avx2")
155 (V4SI "sse4_1") (V8SI "avx2")
156 (V2DI "sse4_1") (V4DI "avx2")])
158 (define_mode_attr avx_avx2
159 [(V4SF "avx") (V2DF "avx")
160 (V8SF "avx") (V4DF "avx")
161 (V4SI "avx2") (V2DI "avx2")
162 (V8SI "avx2") (V4DI "avx2")])
164 ;; Mapping of logic-shift operators
165 (define_code_iterator lshift [lshiftrt ashift])
167 ;; Base name for define_insn
168 (define_code_attr lshift_insn [(lshiftrt "srl") (ashift "sll")])
170 ;; Base name for insn mnemonic
171 (define_code_attr lshift [(lshiftrt "lshr") (ashift "lshl")])
173 (define_mode_attr ssedoublemode
174 [(V16HI "V16SI") (V8HI "V8SI")])
176 (define_mode_attr ssebytemode
177 [(V4DI "V32QI") (V2DI "V16QI")])
179 ;; All 128bit vector integer modes
180 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
182 ;; All 256bit vector integer modes
183 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
185 ;; Random 128bit vector integer mode combinations
186 (define_mode_iterator VI12_128 [V16QI V8HI])
187 (define_mode_iterator VI14_128 [V16QI V4SI])
188 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
189 (define_mode_iterator VI24_128 [V8HI V4SI])
190 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
192 ;; Random 256bit vector integer mode combinations
193 (define_mode_iterator VI124_256 [V32QI V16HI V8SI])
194 (define_mode_iterator VI1248_256 [V32QI V16HI V8SI V4DI])
195 (define_mode_iterator VI248_256 [V16HI V8SI V4DI])
197 ;; Int-float size matches
198 (define_mode_iterator VI4F_128 [V4SI V4SF])
199 (define_mode_iterator VI8F_128 [V2DI V2DF])
200 (define_mode_iterator VI4F_256 [V8SI V8SF])
201 (define_mode_iterator VI8F_256 [V4DI V4DF])
203 ;; Mapping from float mode to required SSE level
204 (define_mode_attr sse
205 [(SF "sse") (DF "sse2")
206 (V4SF "sse") (V2DF "sse2")
207 (V8SF "avx") (V4DF "avx")])
209 (define_mode_attr sse2
210 [(V16QI "sse2") (V32QI "avx")
211 (V2DI "sse2") (V4DI "avx")])
213 (define_mode_attr sse3
214 [(V16QI "sse3") (V32QI "avx")])
216 (define_mode_attr sse4_1
217 [(V4SF "sse4_1") (V2DF "sse4_1")
218 (V8SF "avx") (V4DF "avx")])
220 (define_mode_attr avxsizesuffix
221 [(V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
222 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
223 (V8SF "256") (V4DF "256")
224 (V4SF "") (V2DF "")])
226 ;; SSE instruction mode
227 (define_mode_attr sseinsnmode
228 [(V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
229 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
230 (V8SF "V8SF") (V4DF "V4DF")
231 (V4SF "V4SF") (V2DF "V2DF")
234 ;; Mapping of vector float modes to an integer mode of the same size
235 (define_mode_attr sseintvecmode
236 [(V8SF "V8SI") (V4DF "V4DI")
237 (V4SF "V4SI") (V2DF "V2DI")
238 (V4DF "V4DI") (V8SF "V8SI")
239 (V8SI "V8SI") (V4DI "V4DI")
240 (V4SI "V4SI") (V2DI "V2DI")])
242 ;; Mapping of vector modes to a vector mode of double size
243 (define_mode_attr ssedoublevecmode
244 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
245 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
246 (V8SF "V16SF") (V4DF "V8DF")
247 (V4SF "V8SF") (V2DF "V4DF")])
249 ;; Mapping of vector modes to a vector mode of half size
250 (define_mode_attr ssehalfvecmode
251 [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
252 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
253 (V8SF "V4SF") (V4DF "V2DF")
256 ;; Mapping of vector modes back to the scalar modes
257 (define_mode_attr ssescalarmode
258 [(V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI")
259 (V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI")
260 (V8SF "SF") (V4DF "DF")
261 (V4SF "SF") (V2DF "DF")])
263 ;; Number of scalar elements in each vector type
264 (define_mode_attr ssescalarnum
265 [(V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
266 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
267 (V8SF "8") (V4DF "4")
268 (V4SF "4") (V2DF "2")])
270 ;; SSE scalar suffix for vector modes
271 (define_mode_attr ssescalarmodesuffix
273 (V8SF "ss") (V4DF "sd")
274 (V4SF "ss") (V2DF "sd")
275 (V8SI "ss") (V4DI "sd")
278 ;; Pack/unpack vector modes
279 (define_mode_attr sseunpackmode
280 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
281 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")])
283 (define_mode_attr ssepackmode
284 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
285 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")])
287 ;; Mapping of the max integer size for xop rotate immediate constraint
288 (define_mode_attr sserotatemax
289 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
291 ;; Mapping of mode to cast intrinsic name
292 (define_mode_attr castmode [(V8SI "si") (V8SF "ps") (V4DF "pd")])
294 ;; Instruction suffix for sign and zero extensions.
295 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
298 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
300 (define_mode_iterator AVXMODE48P_DI
301 [V2DI V2DF V4DI V4DF V4SF V4SI])
302 (define_mode_attr AVXMODE48P_DI
303 [(V2DI "V2DI") (V2DF "V2DI")
304 (V4DI "V4DI") (V4DF "V4DI")
305 (V4SI "V2DI") (V4SF "V2DI")
306 (V8SI "V4DI") (V8SF "V4DI")])
307 (define_mode_attr gthrfirstp
308 [(V2DI "p") (V2DF "")
311 (V8SI "p") (V8SF "")])
312 (define_mode_attr gthrlastp
313 [(V2DI "q") (V2DF "pd")
314 (V4DI "q") (V4DF "pd")
315 (V4SI "d") (V4SF "ps")
316 (V8SI "d") (V8SF "ps")])
318 (define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
320 ;; Mapping of immediate bits for blend instructions
321 (define_mode_attr blendbits
322 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
324 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
326 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
330 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
332 ;; All of these patterns are enabled for SSE1 as well as SSE2.
333 ;; This is essential for maintaining stable calling conventions.
335 (define_expand "mov<mode>"
336 [(set (match_operand:V16 0 "nonimmediate_operand" "")
337 (match_operand:V16 1 "nonimmediate_operand" ""))]
340 ix86_expand_vector_move (<MODE>mode, operands);
344 (define_insn "*mov<mode>_internal"
345 [(set (match_operand:V16 0 "nonimmediate_operand" "=x,x ,m")
346 (match_operand:V16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
348 && (register_operand (operands[0], <MODE>mode)
349 || register_operand (operands[1], <MODE>mode))"
351 switch (which_alternative)
354 return standard_sse_constant_opcode (insn, operands[1]);
357 switch (get_attr_mode (insn))
362 && (misaligned_operand (operands[0], <MODE>mode)
363 || misaligned_operand (operands[1], <MODE>mode)))
364 return "vmovups\t{%1, %0|%0, %1}";
366 return "%vmovaps\t{%1, %0|%0, %1}";
371 && (misaligned_operand (operands[0], <MODE>mode)
372 || misaligned_operand (operands[1], <MODE>mode)))
373 return "vmovupd\t{%1, %0|%0, %1}";
374 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
375 return "%vmovaps\t{%1, %0|%0, %1}";
377 return "%vmovapd\t{%1, %0|%0, %1}";
382 && (misaligned_operand (operands[0], <MODE>mode)
383 || misaligned_operand (operands[1], <MODE>mode)))
384 return "vmovdqu\t{%1, %0|%0, %1}";
385 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
386 return "%vmovaps\t{%1, %0|%0, %1}";
388 return "%vmovdqa\t{%1, %0|%0, %1}";
397 [(set_attr "type" "sselog1,ssemov,ssemov")
398 (set_attr "prefix" "maybe_vex")
400 (cond [(match_test "TARGET_AVX")
401 (const_string "<sseinsnmode>")
402 (ior (ior (match_test "optimize_function_for_size_p (cfun)")
403 (not (match_test "TARGET_SSE2")))
404 (and (eq_attr "alternative" "2")
405 (match_test "TARGET_SSE_TYPELESS_STORES")))
406 (const_string "V4SF")
407 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
408 (const_string "V4SF")
409 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
410 (const_string "V2DF")
412 (const_string "TI")))])
414 (define_insn "sse2_movq128"
415 [(set (match_operand:V2DI 0 "register_operand" "=x")
418 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
419 (parallel [(const_int 0)]))
422 "%vmovq\t{%1, %0|%0, %1}"
423 [(set_attr "type" "ssemov")
424 (set_attr "prefix" "maybe_vex")
425 (set_attr "mode" "TI")])
427 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
428 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
429 ;; from memory, we'd prefer to load the memory directly into the %xmm
430 ;; register. To facilitate this happy circumstance, this pattern won't
431 ;; split until after register allocation. If the 64-bit value didn't
432 ;; come from memory, this is the best we can do. This is much better
433 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
436 (define_insn_and_split "movdi_to_sse"
438 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
439 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
440 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
441 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
443 "&& reload_completed"
446 if (register_operand (operands[1], DImode))
448 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
449 Assemble the 64-bit DImode value in an xmm register. */
450 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
451 gen_rtx_SUBREG (SImode, operands[1], 0)));
452 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
453 gen_rtx_SUBREG (SImode, operands[1], 4)));
454 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
457 else if (memory_operand (operands[1], DImode))
458 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
459 operands[1], const0_rtx));
465 [(set (match_operand:V4SF 0 "register_operand" "")
466 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
467 "TARGET_SSE && reload_completed"
470 (vec_duplicate:V4SF (match_dup 1))
474 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
475 operands[2] = CONST0_RTX (V4SFmode);
479 [(set (match_operand:V2DF 0 "register_operand" "")
480 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
481 "TARGET_SSE2 && reload_completed"
482 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
484 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
485 operands[2] = CONST0_RTX (DFmode);
488 (define_expand "push<mode>1"
489 [(match_operand:V16 0 "register_operand" "")]
492 ix86_expand_push (<MODE>mode, operands[0]);
496 (define_expand "movmisalign<mode>"
497 [(set (match_operand:V16 0 "nonimmediate_operand" "")
498 (match_operand:V16 1 "nonimmediate_operand" ""))]
501 ix86_expand_vector_move_misalign (<MODE>mode, operands);
505 (define_expand "<sse>_movu<ssemodesuffix><avxsizesuffix>"
506 [(set (match_operand:VF 0 "nonimmediate_operand" "")
508 [(match_operand:VF 1 "nonimmediate_operand" "")]
512 if (MEM_P (operands[0]) && MEM_P (operands[1]))
513 operands[1] = force_reg (<MODE>mode, operands[1]);
516 (define_insn "*<sse>_movu<ssemodesuffix><avxsizesuffix>"
517 [(set (match_operand:VF 0 "nonimmediate_operand" "=x,m")
519 [(match_operand:VF 1 "nonimmediate_operand" "xm,x")]
521 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
522 "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
523 [(set_attr "type" "ssemov")
524 (set_attr "movu" "1")
525 (set_attr "prefix" "maybe_vex")
526 (set_attr "mode" "<MODE>")])
528 (define_expand "<sse2>_movdqu<avxsizesuffix>"
529 [(set (match_operand:VI1 0 "nonimmediate_operand" "")
530 (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "")]
534 if (MEM_P (operands[0]) && MEM_P (operands[1]))
535 operands[1] = force_reg (<MODE>mode, operands[1]);
538 (define_insn "*<sse2>_movdqu<avxsizesuffix>"
539 [(set (match_operand:VI1 0 "nonimmediate_operand" "=x,m")
540 (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "xm,x")]
542 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
543 "%vmovdqu\t{%1, %0|%0, %1}"
544 [(set_attr "type" "ssemov")
545 (set_attr "movu" "1")
546 (set (attr "prefix_data16")
548 (match_test "TARGET_AVX")
551 (set_attr "prefix" "maybe_vex")
552 (set_attr "mode" "<sseinsnmode>")])
554 (define_insn "<sse3>_lddqu<avxsizesuffix>"
555 [(set (match_operand:VI1 0 "register_operand" "=x")
556 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
559 "%vlddqu\t{%1, %0|%0, %1}"
560 [(set_attr "type" "ssemov")
561 (set_attr "movu" "1")
562 (set (attr "prefix_data16")
564 (match_test "TARGET_AVX")
567 (set (attr "prefix_rep")
569 (match_test "TARGET_AVX")
572 (set_attr "prefix" "maybe_vex")
573 (set_attr "mode" "<sseinsnmode>")])
575 (define_insn "sse2_movntsi"
576 [(set (match_operand:SI 0 "memory_operand" "=m")
577 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
580 "movnti\t{%1, %0|%0, %1}"
581 [(set_attr "type" "ssemov")
582 (set_attr "prefix_data16" "0")
583 (set_attr "mode" "V2DF")])
585 (define_insn "<sse>_movnt<mode>"
586 [(set (match_operand:VF 0 "memory_operand" "=m")
587 (unspec:VF [(match_operand:VF 1 "register_operand" "x")]
590 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
591 [(set_attr "type" "ssemov")
592 (set_attr "prefix" "maybe_vex")
593 (set_attr "mode" "<MODE>")])
595 (define_insn "<sse2>_movnt<mode>"
596 [(set (match_operand:VI8 0 "memory_operand" "=m")
597 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "x")]
600 "%vmovntdq\t{%1, %0|%0, %1}"
601 [(set_attr "type" "ssecvt")
602 (set (attr "prefix_data16")
604 (match_test "TARGET_AVX")
607 (set_attr "prefix" "maybe_vex")
608 (set_attr "mode" "<sseinsnmode>")])
610 ; Expand patterns for non-temporal stores. At the moment, only those
611 ; that directly map to insns are defined; it would be possible to
612 ; define patterns for other modes that would expand to several insns.
614 ;; Modes handled by storent patterns.
615 (define_mode_iterator STORENT_MODE
616 [(SI "TARGET_SSE2") (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
618 (V8SF "TARGET_AVX") V4SF
619 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
621 (define_expand "storent<mode>"
622 [(set (match_operand:STORENT_MODE 0 "memory_operand" "")
624 [(match_operand:STORENT_MODE 1 "register_operand" "")]
628 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
630 ;; Parallel floating point arithmetic
632 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
634 (define_expand "<code><mode>2"
635 [(set (match_operand:VF 0 "register_operand" "")
637 (match_operand:VF 1 "register_operand" "")))]
639 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
641 (define_insn_and_split "*absneg<mode>2"
642 [(set (match_operand:VF 0 "register_operand" "=x,x,x,x")
643 (match_operator:VF 3 "absneg_operator"
644 [(match_operand:VF 1 "nonimmediate_operand" "0, xm,x, m")]))
645 (use (match_operand:VF 2 "nonimmediate_operand" "xm,0, xm,x"))]
648 "&& reload_completed"
651 enum rtx_code absneg_op;
657 if (MEM_P (operands[1]))
658 op1 = operands[2], op2 = operands[1];
660 op1 = operands[1], op2 = operands[2];
665 if (rtx_equal_p (operands[0], operands[1]))
671 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
672 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
673 t = gen_rtx_SET (VOIDmode, operands[0], t);
677 [(set_attr "isa" "noavx,noavx,avx,avx")])
679 (define_expand "<plusminus_insn><mode>3"
680 [(set (match_operand:VF 0 "register_operand" "")
682 (match_operand:VF 1 "nonimmediate_operand" "")
683 (match_operand:VF 2 "nonimmediate_operand" "")))]
685 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
687 (define_insn "*<plusminus_insn><mode>3"
688 [(set (match_operand:VF 0 "register_operand" "=x,x")
690 (match_operand:VF 1 "nonimmediate_operand" "<comm>0,x")
691 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
692 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
694 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
695 v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
696 [(set_attr "isa" "noavx,avx")
697 (set_attr "type" "sseadd")
698 (set_attr "prefix" "orig,vex")
699 (set_attr "mode" "<MODE>")])
701 (define_insn "<sse>_vm<plusminus_insn><mode>3"
702 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
705 (match_operand:VF_128 1 "register_operand" "0,x")
706 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
711 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}
712 v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
713 [(set_attr "isa" "noavx,avx")
714 (set_attr "type" "sseadd")
715 (set_attr "prefix" "orig,vex")
716 (set_attr "mode" "<ssescalarmode>")])
718 (define_expand "mul<mode>3"
719 [(set (match_operand:VF 0 "register_operand" "")
721 (match_operand:VF 1 "nonimmediate_operand" "")
722 (match_operand:VF 2 "nonimmediate_operand" "")))]
724 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
726 (define_insn "*mul<mode>3"
727 [(set (match_operand:VF 0 "register_operand" "=x,x")
729 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
730 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
731 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
733 mul<ssemodesuffix>\t{%2, %0|%0, %2}
734 vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
735 [(set_attr "isa" "noavx,avx")
736 (set_attr "type" "ssemul")
737 (set_attr "prefix" "orig,vex")
738 (set_attr "mode" "<MODE>")])
740 (define_insn "<sse>_vmmul<mode>3"
741 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
744 (match_operand:VF_128 1 "register_operand" "0,x")
745 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
750 mul<ssescalarmodesuffix>\t{%2, %0|%0, %2}
751 vmul<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
752 [(set_attr "isa" "noavx,avx")
753 (set_attr "type" "ssemul")
754 (set_attr "prefix" "orig,vex")
755 (set_attr "mode" "<ssescalarmode>")])
757 (define_expand "div<mode>3"
758 [(set (match_operand:VF2 0 "register_operand" "")
759 (div:VF2 (match_operand:VF2 1 "register_operand" "")
760 (match_operand:VF2 2 "nonimmediate_operand" "")))]
762 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
764 (define_expand "div<mode>3"
765 [(set (match_operand:VF1 0 "register_operand" "")
766 (div:VF1 (match_operand:VF1 1 "register_operand" "")
767 (match_operand:VF1 2 "nonimmediate_operand" "")))]
770 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
772 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
773 && flag_finite_math_only && !flag_trapping_math
774 && flag_unsafe_math_optimizations)
776 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
781 (define_insn "<sse>_div<mode>3"
782 [(set (match_operand:VF 0 "register_operand" "=x,x")
784 (match_operand:VF 1 "register_operand" "0,x")
785 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
788 div<ssemodesuffix>\t{%2, %0|%0, %2}
789 vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
790 [(set_attr "isa" "noavx,avx")
791 (set_attr "type" "ssediv")
792 (set_attr "prefix" "orig,vex")
793 (set_attr "mode" "<MODE>")])
795 (define_insn "<sse>_vmdiv<mode>3"
796 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
799 (match_operand:VF_128 1 "register_operand" "0,x")
800 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
805 div<ssescalarmodesuffix>\t{%2, %0|%0, %2}
806 vdiv<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
807 [(set_attr "isa" "noavx,avx")
808 (set_attr "type" "ssediv")
809 (set_attr "prefix" "orig,vex")
810 (set_attr "mode" "<ssescalarmode>")])
812 (define_insn "<sse>_rcp<mode>2"
813 [(set (match_operand:VF1 0 "register_operand" "=x")
815 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
817 "%vrcpps\t{%1, %0|%0, %1}"
818 [(set_attr "type" "sse")
819 (set_attr "atom_sse_attr" "rcp")
820 (set_attr "prefix" "maybe_vex")
821 (set_attr "mode" "<MODE>")])
823 (define_insn "sse_vmrcpv4sf2"
824 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
826 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
828 (match_operand:V4SF 2 "register_operand" "0,x")
832 rcpss\t{%1, %0|%0, %1}
833 vrcpss\t{%1, %2, %0|%0, %2, %1}"
834 [(set_attr "isa" "noavx,avx")
835 (set_attr "type" "sse")
836 (set_attr "atom_sse_attr" "rcp")
837 (set_attr "prefix" "orig,vex")
838 (set_attr "mode" "SF")])
840 (define_expand "sqrt<mode>2"
841 [(set (match_operand:VF2 0 "register_operand" "")
842 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand" "")))]
845 (define_expand "sqrt<mode>2"
846 [(set (match_operand:VF1 0 "register_operand" "")
847 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand" "")))]
850 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
851 && flag_finite_math_only && !flag_trapping_math
852 && flag_unsafe_math_optimizations)
854 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
859 (define_insn "<sse>_sqrt<mode>2"
860 [(set (match_operand:VF 0 "register_operand" "=x")
861 (sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "xm")))]
863 "%vsqrt<ssemodesuffix>\t{%1, %0|%0, %1}"
864 [(set_attr "type" "sse")
865 (set_attr "atom_sse_attr" "sqrt")
866 (set_attr "prefix" "maybe_vex")
867 (set_attr "mode" "<MODE>")])
869 (define_insn "<sse>_vmsqrt<mode>2"
870 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
873 (match_operand:VF_128 1 "nonimmediate_operand" "xm,xm"))
874 (match_operand:VF_128 2 "register_operand" "0,x")
878 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}
879 vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
880 [(set_attr "isa" "noavx,avx")
881 (set_attr "type" "sse")
882 (set_attr "atom_sse_attr" "sqrt")
883 (set_attr "prefix" "orig,vex")
884 (set_attr "mode" "<ssescalarmode>")])
886 (define_expand "rsqrt<mode>2"
887 [(set (match_operand:VF1 0 "register_operand" "")
889 [(match_operand:VF1 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
892 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
896 (define_insn "<sse>_rsqrt<mode>2"
897 [(set (match_operand:VF1 0 "register_operand" "=x")
899 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
901 "%vrsqrtps\t{%1, %0|%0, %1}"
902 [(set_attr "type" "sse")
903 (set_attr "prefix" "maybe_vex")
904 (set_attr "mode" "<MODE>")])
906 (define_insn "sse_vmrsqrtv4sf2"
907 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
909 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
911 (match_operand:V4SF 2 "register_operand" "0,x")
915 rsqrtss\t{%1, %0|%0, %1}
916 vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
917 [(set_attr "isa" "noavx,avx")
918 (set_attr "type" "sse")
919 (set_attr "prefix" "orig,vex")
920 (set_attr "mode" "SF")])
922 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
923 ;; isn't really correct, as those rtl operators aren't defined when
924 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
926 (define_expand "<code><mode>3"
927 [(set (match_operand:VF 0 "register_operand" "")
929 (match_operand:VF 1 "nonimmediate_operand" "")
930 (match_operand:VF 2 "nonimmediate_operand" "")))]
933 if (!flag_finite_math_only)
934 operands[1] = force_reg (<MODE>mode, operands[1]);
935 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
938 (define_insn "*<code><mode>3_finite"
939 [(set (match_operand:VF 0 "register_operand" "=x,x")
941 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
942 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
943 "TARGET_SSE && flag_finite_math_only
944 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
946 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
947 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
948 [(set_attr "isa" "noavx,avx")
949 (set_attr "type" "sseadd")
950 (set_attr "prefix" "orig,vex")
951 (set_attr "mode" "<MODE>")])
953 (define_insn "*<code><mode>3"
954 [(set (match_operand:VF 0 "register_operand" "=x,x")
956 (match_operand:VF 1 "register_operand" "0,x")
957 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
958 "TARGET_SSE && !flag_finite_math_only"
960 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
961 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
962 [(set_attr "isa" "noavx,avx")
963 (set_attr "type" "sseadd")
964 (set_attr "prefix" "orig,vex")
965 (set_attr "mode" "<MODE>")])
967 (define_insn "<sse>_vm<code><mode>3"
968 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
971 (match_operand:VF_128 1 "register_operand" "0,x")
972 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
977 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %2}
978 v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
979 [(set_attr "isa" "noavx,avx")
980 (set_attr "type" "sse")
981 (set_attr "prefix" "orig,vex")
982 (set_attr "mode" "<ssescalarmode>")])
984 ;; These versions of the min/max patterns implement exactly the operations
985 ;; min = (op1 < op2 ? op1 : op2)
986 ;; max = (!(op1 < op2) ? op1 : op2)
987 ;; Their operands are not commutative, and thus they may be used in the
988 ;; presence of -0.0 and NaN.
990 (define_insn "*ieee_smin<mode>3"
991 [(set (match_operand:VF 0 "register_operand" "=x,x")
993 [(match_operand:VF 1 "register_operand" "0,x")
994 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
998 min<ssemodesuffix>\t{%2, %0|%0, %2}
999 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1000 [(set_attr "isa" "noavx,avx")
1001 (set_attr "type" "sseadd")
1002 (set_attr "prefix" "orig,vex")
1003 (set_attr "mode" "<MODE>")])
1005 (define_insn "*ieee_smax<mode>3"
1006 [(set (match_operand:VF 0 "register_operand" "=x,x")
1008 [(match_operand:VF 1 "register_operand" "0,x")
1009 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
1013 max<ssemodesuffix>\t{%2, %0|%0, %2}
1014 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1015 [(set_attr "isa" "noavx,avx")
1016 (set_attr "type" "sseadd")
1017 (set_attr "prefix" "orig,vex")
1018 (set_attr "mode" "<MODE>")])
1020 (define_insn "avx_addsubv4df3"
1021 [(set (match_operand:V4DF 0 "register_operand" "=x")
1024 (match_operand:V4DF 1 "register_operand" "x")
1025 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1026 (minus:V4DF (match_dup 1) (match_dup 2))
1029 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1030 [(set_attr "type" "sseadd")
1031 (set_attr "prefix" "vex")
1032 (set_attr "mode" "V4DF")])
1034 (define_insn "sse3_addsubv2df3"
1035 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1038 (match_operand:V2DF 1 "register_operand" "0,x")
1039 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
1040 (minus:V2DF (match_dup 1) (match_dup 2))
1044 addsubpd\t{%2, %0|%0, %2}
1045 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1046 [(set_attr "isa" "noavx,avx")
1047 (set_attr "type" "sseadd")
1048 (set_attr "atom_unit" "complex")
1049 (set_attr "prefix" "orig,vex")
1050 (set_attr "mode" "V2DF")])
1052 (define_insn "avx_addsubv8sf3"
1053 [(set (match_operand:V8SF 0 "register_operand" "=x")
1056 (match_operand:V8SF 1 "register_operand" "x")
1057 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1058 (minus:V8SF (match_dup 1) (match_dup 2))
1061 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1062 [(set_attr "type" "sseadd")
1063 (set_attr "prefix" "vex")
1064 (set_attr "mode" "V8SF")])
1066 (define_insn "sse3_addsubv4sf3"
1067 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1070 (match_operand:V4SF 1 "register_operand" "0,x")
1071 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
1072 (minus:V4SF (match_dup 1) (match_dup 2))
1076 addsubps\t{%2, %0|%0, %2}
1077 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1078 [(set_attr "isa" "noavx,avx")
1079 (set_attr "type" "sseadd")
1080 (set_attr "prefix" "orig,vex")
1081 (set_attr "prefix_rep" "1,*")
1082 (set_attr "mode" "V4SF")])
1084 (define_insn "avx_h<plusminus_insn>v4df3"
1085 [(set (match_operand:V4DF 0 "register_operand" "=x")
1090 (match_operand:V4DF 1 "register_operand" "x")
1091 (parallel [(const_int 0)]))
1092 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1094 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1095 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
1099 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1100 (parallel [(const_int 0)]))
1101 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
1103 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1104 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1106 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1107 [(set_attr "type" "sseadd")
1108 (set_attr "prefix" "vex")
1109 (set_attr "mode" "V4DF")])
1111 (define_insn "sse3_h<plusminus_insn>v2df3"
1112 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1116 (match_operand:V2DF 1 "register_operand" "0,x")
1117 (parallel [(const_int 0)]))
1118 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1121 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
1122 (parallel [(const_int 0)]))
1123 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1126 h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}
1127 vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1128 [(set_attr "isa" "noavx,avx")
1129 (set_attr "type" "sseadd")
1130 (set_attr "prefix" "orig,vex")
1131 (set_attr "mode" "V2DF")])
1133 (define_insn "avx_h<plusminus_insn>v8sf3"
1134 [(set (match_operand:V8SF 0 "register_operand" "=x")
1140 (match_operand:V8SF 1 "register_operand" "x")
1141 (parallel [(const_int 0)]))
1142 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1144 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1145 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1149 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1150 (parallel [(const_int 0)]))
1151 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1153 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1154 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1158 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1159 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1161 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1162 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1165 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1166 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1168 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1169 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1171 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1172 [(set_attr "type" "sseadd")
1173 (set_attr "prefix" "vex")
1174 (set_attr "mode" "V8SF")])
1176 (define_insn "sse3_h<plusminus_insn>v4sf3"
1177 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1182 (match_operand:V4SF 1 "register_operand" "0,x")
1183 (parallel [(const_int 0)]))
1184 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1186 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1187 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1191 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
1192 (parallel [(const_int 0)]))
1193 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1195 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1196 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1199 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
1200 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1201 [(set_attr "isa" "noavx,avx")
1202 (set_attr "type" "sseadd")
1203 (set_attr "atom_unit" "complex")
1204 (set_attr "prefix" "orig,vex")
1205 (set_attr "prefix_rep" "1,*")
1206 (set_attr "mode" "V4SF")])
1208 (define_expand "reduc_splus_v4df"
1209 [(match_operand:V4DF 0 "register_operand" "")
1210 (match_operand:V4DF 1 "register_operand" "")]
1213 rtx tmp = gen_reg_rtx (V4DFmode);
1214 rtx tmp2 = gen_reg_rtx (V4DFmode);
1215 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
1216 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
1217 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
1221 (define_expand "reduc_splus_v2df"
1222 [(match_operand:V2DF 0 "register_operand" "")
1223 (match_operand:V2DF 1 "register_operand" "")]
1226 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1230 (define_expand "reduc_splus_v8sf"
1231 [(match_operand:V8SF 0 "register_operand" "")
1232 (match_operand:V8SF 1 "register_operand" "")]
1235 rtx tmp = gen_reg_rtx (V8SFmode);
1236 rtx tmp2 = gen_reg_rtx (V8SFmode);
1237 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
1238 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
1239 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
1240 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
1244 (define_expand "reduc_splus_v4sf"
1245 [(match_operand:V4SF 0 "register_operand" "")
1246 (match_operand:V4SF 1 "register_operand" "")]
1251 rtx tmp = gen_reg_rtx (V4SFmode);
1252 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1253 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1256 ix86_expand_reduc (gen_addv4sf3, operands[0], operands[1]);
1260 ;; Modes handled by reduc_sm{in,ax}* patterns.
1261 (define_mode_iterator REDUC_SMINMAX_MODE
1262 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
1263 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
1264 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
1265 (V4SF "TARGET_SSE")])
1267 (define_expand "reduc_<code>_<mode>"
1268 [(smaxmin:REDUC_SMINMAX_MODE
1269 (match_operand:REDUC_SMINMAX_MODE 0 "register_operand" "")
1270 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand" ""))]
1273 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1277 (define_expand "reduc_<code>_<mode>"
1279 (match_operand:VI_256 0 "register_operand" "")
1280 (match_operand:VI_256 1 "register_operand" ""))]
1283 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1287 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1289 ;; Parallel floating point comparisons
1291 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1293 (define_insn "avx_cmp<mode>3"
1294 [(set (match_operand:VF 0 "register_operand" "=x")
1296 [(match_operand:VF 1 "register_operand" "x")
1297 (match_operand:VF 2 "nonimmediate_operand" "xm")
1298 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1301 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1302 [(set_attr "type" "ssecmp")
1303 (set_attr "length_immediate" "1")
1304 (set_attr "prefix" "vex")
1305 (set_attr "mode" "<MODE>")])
1307 (define_insn "avx_vmcmp<mode>3"
1308 [(set (match_operand:VF_128 0 "register_operand" "=x")
1311 [(match_operand:VF_128 1 "register_operand" "x")
1312 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
1313 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1318 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1319 [(set_attr "type" "ssecmp")
1320 (set_attr "length_immediate" "1")
1321 (set_attr "prefix" "vex")
1322 (set_attr "mode" "<ssescalarmode>")])
1324 (define_insn "*<sse>_maskcmp<mode>3_comm"
1325 [(set (match_operand:VF 0 "register_operand" "=x,x")
1326 (match_operator:VF 3 "sse_comparison_operator"
1327 [(match_operand:VF 1 "register_operand" "%0,x")
1328 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1330 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
1332 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1333 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1334 [(set_attr "isa" "noavx,avx")
1335 (set_attr "type" "ssecmp")
1336 (set_attr "length_immediate" "1")
1337 (set_attr "prefix" "orig,vex")
1338 (set_attr "mode" "<MODE>")])
1340 (define_insn "<sse>_maskcmp<mode>3"
1341 [(set (match_operand:VF 0 "register_operand" "=x,x")
1342 (match_operator:VF 3 "sse_comparison_operator"
1343 [(match_operand:VF 1 "register_operand" "0,x")
1344 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1347 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1348 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1349 [(set_attr "isa" "noavx,avx")
1350 (set_attr "type" "ssecmp")
1351 (set_attr "length_immediate" "1")
1352 (set_attr "prefix" "orig,vex")
1353 (set_attr "mode" "<MODE>")])
1355 (define_insn "<sse>_vmmaskcmp<mode>3"
1356 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1358 (match_operator:VF_128 3 "sse_comparison_operator"
1359 [(match_operand:VF_128 1 "register_operand" "0,x")
1360 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
1365 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %2}
1366 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1367 [(set_attr "isa" "noavx,avx")
1368 (set_attr "type" "ssecmp")
1369 (set_attr "length_immediate" "1,*")
1370 (set_attr "prefix" "orig,vex")
1371 (set_attr "mode" "<ssescalarmode>")])
1373 (define_insn "<sse>_comi"
1374 [(set (reg:CCFP FLAGS_REG)
1377 (match_operand:<ssevecmode> 0 "register_operand" "x")
1378 (parallel [(const_int 0)]))
1380 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1381 (parallel [(const_int 0)]))))]
1382 "SSE_FLOAT_MODE_P (<MODE>mode)"
1383 "%vcomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1384 [(set_attr "type" "ssecomi")
1385 (set_attr "prefix" "maybe_vex")
1386 (set_attr "prefix_rep" "0")
1387 (set (attr "prefix_data16")
1388 (if_then_else (eq_attr "mode" "DF")
1390 (const_string "0")))
1391 (set_attr "mode" "<MODE>")])
1393 (define_insn "<sse>_ucomi"
1394 [(set (reg:CCFPU FLAGS_REG)
1397 (match_operand:<ssevecmode> 0 "register_operand" "x")
1398 (parallel [(const_int 0)]))
1400 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1401 (parallel [(const_int 0)]))))]
1402 "SSE_FLOAT_MODE_P (<MODE>mode)"
1403 "%vucomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1404 [(set_attr "type" "ssecomi")
1405 (set_attr "prefix" "maybe_vex")
1406 (set_attr "prefix_rep" "0")
1407 (set (attr "prefix_data16")
1408 (if_then_else (eq_attr "mode" "DF")
1410 (const_string "0")))
1411 (set_attr "mode" "<MODE>")])
1413 (define_expand "vcond<V_256:mode><VF_256:mode>"
1414 [(set (match_operand:V_256 0 "register_operand" "")
1416 (match_operator 3 ""
1417 [(match_operand:VF_256 4 "nonimmediate_operand" "")
1418 (match_operand:VF_256 5 "nonimmediate_operand" "")])
1419 (match_operand:V_256 1 "general_operand" "")
1420 (match_operand:V_256 2 "general_operand" "")))]
1422 && (GET_MODE_NUNITS (<V_256:MODE>mode)
1423 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
1425 bool ok = ix86_expand_fp_vcond (operands);
1430 (define_expand "vcond<V_128:mode><VF_128:mode>"
1431 [(set (match_operand:V_128 0 "register_operand" "")
1433 (match_operator 3 ""
1434 [(match_operand:VF_128 4 "nonimmediate_operand" "")
1435 (match_operand:VF_128 5 "nonimmediate_operand" "")])
1436 (match_operand:V_128 1 "general_operand" "")
1437 (match_operand:V_128 2 "general_operand" "")))]
1439 && (GET_MODE_NUNITS (<V_128:MODE>mode)
1440 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
1442 bool ok = ix86_expand_fp_vcond (operands);
1447 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1449 ;; Parallel floating point logical operations
1451 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1453 (define_insn "<sse>_andnot<mode>3"
1454 [(set (match_operand:VF 0 "register_operand" "=x,x")
1457 (match_operand:VF 1 "register_operand" "0,x"))
1458 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1461 static char buf[32];
1464 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1466 switch (which_alternative)
1469 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1472 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1478 snprintf (buf, sizeof (buf), insn, suffix);
1481 [(set_attr "isa" "noavx,avx")
1482 (set_attr "type" "sselog")
1483 (set_attr "prefix" "orig,vex")
1484 (set_attr "mode" "<MODE>")])
1486 (define_expand "<code><mode>3"
1487 [(set (match_operand:VF 0 "register_operand" "")
1489 (match_operand:VF 1 "nonimmediate_operand" "")
1490 (match_operand:VF 2 "nonimmediate_operand" "")))]
1492 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1494 (define_insn "*<code><mode>3"
1495 [(set (match_operand:VF 0 "register_operand" "=x,x")
1497 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
1498 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1499 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1501 static char buf[32];
1504 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1506 switch (which_alternative)
1509 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1512 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1518 snprintf (buf, sizeof (buf), insn, suffix);
1521 [(set_attr "isa" "noavx,avx")
1522 (set_attr "type" "sselog")
1523 (set_attr "prefix" "orig,vex")
1524 (set_attr "mode" "<MODE>")])
1526 (define_expand "copysign<mode>3"
1529 (not:VF (match_dup 3))
1530 (match_operand:VF 1 "nonimmediate_operand" "")))
1532 (and:VF (match_dup 3)
1533 (match_operand:VF 2 "nonimmediate_operand" "")))
1534 (set (match_operand:VF 0 "register_operand" "")
1535 (ior:VF (match_dup 4) (match_dup 5)))]
1538 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
1540 operands[4] = gen_reg_rtx (<MODE>mode);
1541 operands[5] = gen_reg_rtx (<MODE>mode);
1544 ;; Also define scalar versions. These are used for abs, neg, and
1545 ;; conditional move. Using subregs into vector modes causes register
1546 ;; allocation lossage. These patterns do not allow memory operands
1547 ;; because the native instructions read the full 128-bits.
1549 (define_insn "*andnot<mode>3"
1550 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1553 (match_operand:MODEF 1 "register_operand" "0,x"))
1554 (match_operand:MODEF 2 "register_operand" "x,x")))]
1555 "SSE_FLOAT_MODE_P (<MODE>mode)"
1557 static char buf[32];
1560 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1562 switch (which_alternative)
1565 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1568 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1574 snprintf (buf, sizeof (buf), insn, suffix);
1577 [(set_attr "isa" "noavx,avx")
1578 (set_attr "type" "sselog")
1579 (set_attr "prefix" "orig,vex")
1580 (set_attr "mode" "<ssevecmode>")])
1582 (define_insn "*<code><mode>3"
1583 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1585 (match_operand:MODEF 1 "register_operand" "%0,x")
1586 (match_operand:MODEF 2 "register_operand" "x,x")))]
1587 "SSE_FLOAT_MODE_P (<MODE>mode)"
1589 static char buf[32];
1592 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1594 switch (which_alternative)
1597 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1600 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1606 snprintf (buf, sizeof (buf), insn, suffix);
1609 [(set_attr "isa" "noavx,avx")
1610 (set_attr "type" "sselog")
1611 (set_attr "prefix" "orig,vex")
1612 (set_attr "mode" "<ssevecmode>")])
1614 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1616 ;; FMA4 floating point multiply/accumulate instructions. This
1617 ;; includes the scalar version of the instructions as well as the
1620 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1622 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1623 ;; combine to generate a multiply/add with two memory references. We then
1624 ;; split this insn, into loading up the destination register with one of the
1625 ;; memory operations. If we don't manage to split the insn, reload will
1626 ;; generate the appropriate moves. The reason this is needed, is that combine
1627 ;; has already folded one of the memory references into both the multiply and
1628 ;; add insns, and it can't generate a new pseudo. I.e.:
1629 ;; (set (reg1) (mem (addr1)))
1630 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1631 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1633 ;; ??? This is historic, pre-dating the gimple fma transformation.
1634 ;; We could now properly represent that only one memory operand is
1635 ;; allowed and not be penalized during optimization.
1637 ;; Intrinsic FMA operations.
1639 ;; The standard names for fma is only available with SSE math enabled.
1640 (define_expand "fma<mode>4"
1641 [(set (match_operand:FMAMODE 0 "register_operand")
1643 (match_operand:FMAMODE 1 "nonimmediate_operand")
1644 (match_operand:FMAMODE 2 "nonimmediate_operand")
1645 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1646 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1648 (define_expand "fms<mode>4"
1649 [(set (match_operand:FMAMODE 0 "register_operand")
1651 (match_operand:FMAMODE 1 "nonimmediate_operand")
1652 (match_operand:FMAMODE 2 "nonimmediate_operand")
1653 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1654 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1656 (define_expand "fnma<mode>4"
1657 [(set (match_operand:FMAMODE 0 "register_operand")
1659 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1660 (match_operand:FMAMODE 2 "nonimmediate_operand")
1661 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1662 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1664 (define_expand "fnms<mode>4"
1665 [(set (match_operand:FMAMODE 0 "register_operand")
1667 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1668 (match_operand:FMAMODE 2 "nonimmediate_operand")
1669 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1670 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1672 ;; The builtin for fma4intrin.h is not constrained by SSE math enabled.
1673 (define_expand "fma4i_fmadd_<mode>"
1674 [(set (match_operand:FMAMODE 0 "register_operand")
1676 (match_operand:FMAMODE 1 "nonimmediate_operand")
1677 (match_operand:FMAMODE 2 "nonimmediate_operand")
1678 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1679 "TARGET_FMA || TARGET_FMA4")
1681 (define_insn "*fma4i_fmadd_<mode>"
1682 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1684 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1685 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1686 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1688 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1689 [(set_attr "type" "ssemuladd")
1690 (set_attr "mode" "<MODE>")])
1692 (define_insn "*fma4i_fmsub_<mode>"
1693 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1695 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1696 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1698 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1700 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1701 [(set_attr "type" "ssemuladd")
1702 (set_attr "mode" "<MODE>")])
1704 (define_insn "*fma4i_fnmadd_<mode>"
1705 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1708 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1709 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1710 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1712 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1713 [(set_attr "type" "ssemuladd")
1714 (set_attr "mode" "<MODE>")])
1716 (define_insn "*fma4i_fnmsub_<mode>"
1717 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1720 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1721 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1723 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1725 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1726 [(set_attr "type" "ssemuladd")
1727 (set_attr "mode" "<MODE>")])
1729 ;; Scalar versions of the above. Unlike ADDSS et al, these write the
1730 ;; entire destination register, with the high-order elements zeroed.
1732 (define_expand "fma4i_vmfmadd_<mode>"
1733 [(set (match_operand:VF_128 0 "register_operand")
1736 (match_operand:VF_128 1 "nonimmediate_operand")
1737 (match_operand:VF_128 2 "nonimmediate_operand")
1738 (match_operand:VF_128 3 "nonimmediate_operand"))
1743 operands[4] = CONST0_RTX (<MODE>mode);
1746 (define_expand "fmai_vmfmadd_<mode>"
1747 [(set (match_operand:VF_128 0 "register_operand")
1750 (match_operand:VF_128 1 "nonimmediate_operand")
1751 (match_operand:VF_128 2 "nonimmediate_operand")
1752 (match_operand:VF_128 3 "nonimmediate_operand"))
1757 (define_insn "*fmai_fmadd_<mode>"
1758 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1761 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")
1762 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1763 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))
1768 vfmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1769 vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1770 vfmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1771 [(set_attr "type" "ssemuladd")
1772 (set_attr "mode" "<MODE>")])
1774 (define_insn "*fmai_fmsub_<mode>"
1775 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1778 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")
1779 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1781 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")))
1786 vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1787 vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1788 vfmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1789 [(set_attr "type" "ssemuladd")
1790 (set_attr "mode" "<MODE>")])
1792 (define_insn "*fmai_fnmadd_<mode>"
1793 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1797 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x"))
1798 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1799 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))
1804 vfnmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1805 vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1806 vfnmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1807 [(set_attr "type" "ssemuladd")
1808 (set_attr "mode" "<MODE>")])
1810 (define_insn "*fmai_fnmsub_<mode>"
1811 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1815 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x"))
1816 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1818 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")))
1823 vfnmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1824 vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1825 vfnmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1826 [(set_attr "type" "ssemuladd")
1827 (set_attr "mode" "<MODE>")])
1829 (define_insn "*fma4i_vmfmadd_<mode>"
1830 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1833 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
1834 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1835 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
1836 (match_operand:VF_128 4 "const0_operand" "")
1839 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1840 [(set_attr "type" "ssemuladd")
1841 (set_attr "mode" "<MODE>")])
1843 (define_insn "*fma4i_vmfmsub_<mode>"
1844 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1847 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
1848 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1850 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
1851 (match_operand:VF_128 4 "const0_operand" "")
1854 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1855 [(set_attr "type" "ssemuladd")
1856 (set_attr "mode" "<MODE>")])
1858 (define_insn "*fma4i_vmfnmadd_<mode>"
1859 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1863 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
1864 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1865 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
1866 (match_operand:VF_128 4 "const0_operand" "")
1869 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1870 [(set_attr "type" "ssemuladd")
1871 (set_attr "mode" "<MODE>")])
1873 (define_insn "*fma4i_vmfnmsub_<mode>"
1874 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1878 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
1879 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1881 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
1882 (match_operand:VF_128 4 "const0_operand" "")
1885 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1886 [(set_attr "type" "ssemuladd")
1887 (set_attr "mode" "<MODE>")])
1889 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1891 ;; FMA4 Parallel floating point multiply addsub and subadd operations.
1893 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1895 ;; It would be possible to represent these without the UNSPEC as
1898 ;; (fma op1 op2 op3)
1899 ;; (fma op1 op2 (neg op3))
1902 ;; But this doesn't seem useful in practice.
1904 (define_expand "fmaddsub_<mode>"
1905 [(set (match_operand:VF 0 "register_operand")
1907 [(match_operand:VF 1 "nonimmediate_operand")
1908 (match_operand:VF 2 "nonimmediate_operand")
1909 (match_operand:VF 3 "nonimmediate_operand")]
1911 "TARGET_FMA || TARGET_FMA4")
1913 (define_insn "*fma4_fmaddsub_<mode>"
1914 [(set (match_operand:VF 0 "register_operand" "=x,x")
1916 [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
1917 (match_operand:VF 2 "nonimmediate_operand" " x,m")
1918 (match_operand:VF 3 "nonimmediate_operand" "xm,x")]
1921 "vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1922 [(set_attr "type" "ssemuladd")
1923 (set_attr "mode" "<MODE>")])
1925 (define_insn "*fma4_fmsubadd_<mode>"
1926 [(set (match_operand:VF 0 "register_operand" "=x,x")
1928 [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
1929 (match_operand:VF 2 "nonimmediate_operand" " x,m")
1931 (match_operand:VF 3 "nonimmediate_operand" "xm,x"))]
1934 "vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1935 [(set_attr "type" "ssemuladd")
1936 (set_attr "mode" "<MODE>")])
1938 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1940 ;; FMA3 floating point multiply/accumulate instructions.
1942 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1944 (define_insn "*fma_fmadd_<mode>"
1945 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1947 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
1948 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1949 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
1952 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1953 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1954 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1955 [(set_attr "type" "ssemuladd")
1956 (set_attr "mode" "<MODE>")])
1958 (define_insn "*fma_fmsub_<mode>"
1959 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1961 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
1962 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1964 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
1967 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1968 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1969 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1970 [(set_attr "type" "ssemuladd")
1971 (set_attr "mode" "<MODE>")])
1973 (define_insn "*fma_fnmadd_<mode>"
1974 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1977 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
1978 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1979 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
1982 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1983 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1984 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1985 [(set_attr "type" "ssemuladd")
1986 (set_attr "mode" "<MODE>")])
1988 (define_insn "*fma_fnmsub_<mode>"
1989 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1992 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
1993 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1995 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
1998 vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1999 vfnmsub231<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2000 vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2001 [(set_attr "type" "ssemuladd")
2002 (set_attr "mode" "<MODE>")])
2004 (define_insn "*fma_fmaddsub_<mode>"
2005 [(set (match_operand:VF 0 "register_operand" "=x,x,x")
2007 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
2008 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
2009 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0")]
2013 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2014 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2015 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2016 [(set_attr "type" "ssemuladd")
2017 (set_attr "mode" "<MODE>")])
2019 (define_insn "*fma_fmsubadd_<mode>"
2020 [(set (match_operand:VF 0 "register_operand" "=x,x,x")
2022 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
2023 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
2025 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0"))]
2029 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2030 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2031 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2032 [(set_attr "type" "ssemuladd")
2033 (set_attr "mode" "<MODE>")])
2035 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2037 ;; Parallel single-precision floating point conversion operations
2039 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2041 (define_insn "sse_cvtpi2ps"
2042 [(set (match_operand:V4SF 0 "register_operand" "=x")
2045 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2046 (match_operand:V4SF 1 "register_operand" "0")
2049 "cvtpi2ps\t{%2, %0|%0, %2}"
2050 [(set_attr "type" "ssecvt")
2051 (set_attr "mode" "V4SF")])
2053 (define_insn "sse_cvtps2pi"
2054 [(set (match_operand:V2SI 0 "register_operand" "=y")
2056 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2058 (parallel [(const_int 0) (const_int 1)])))]
2060 "cvtps2pi\t{%1, %0|%0, %1}"
2061 [(set_attr "type" "ssecvt")
2062 (set_attr "unit" "mmx")
2063 (set_attr "mode" "DI")])
2065 (define_insn "sse_cvttps2pi"
2066 [(set (match_operand:V2SI 0 "register_operand" "=y")
2068 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2069 (parallel [(const_int 0) (const_int 1)])))]
2071 "cvttps2pi\t{%1, %0|%0, %1}"
2072 [(set_attr "type" "ssecvt")
2073 (set_attr "unit" "mmx")
2074 (set_attr "prefix_rep" "0")
2075 (set_attr "mode" "SF")])
2077 (define_insn "sse_cvtsi2ss"
2078 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2081 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2082 (match_operand:V4SF 1 "register_operand" "0,0,x")
2086 cvtsi2ss\t{%2, %0|%0, %2}
2087 cvtsi2ss\t{%2, %0|%0, %2}
2088 vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2089 [(set_attr "isa" "noavx,noavx,avx")
2090 (set_attr "type" "sseicvt")
2091 (set_attr "athlon_decode" "vector,double,*")
2092 (set_attr "amdfam10_decode" "vector,double,*")
2093 (set_attr "bdver1_decode" "double,direct,*")
2094 (set_attr "prefix" "orig,orig,vex")
2095 (set_attr "mode" "SF")])
2097 (define_insn "sse_cvtsi2ssq"
2098 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2101 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2102 (match_operand:V4SF 1 "register_operand" "0,0,x")
2104 "TARGET_SSE && TARGET_64BIT"
2106 cvtsi2ssq\t{%2, %0|%0, %2}
2107 cvtsi2ssq\t{%2, %0|%0, %2}
2108 vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2109 [(set_attr "isa" "noavx,noavx,avx")
2110 (set_attr "type" "sseicvt")
2111 (set_attr "athlon_decode" "vector,double,*")
2112 (set_attr "amdfam10_decode" "vector,double,*")
2113 (set_attr "bdver1_decode" "double,direct,*")
2114 (set_attr "length_vex" "*,*,4")
2115 (set_attr "prefix_rex" "1,1,*")
2116 (set_attr "prefix" "orig,orig,vex")
2117 (set_attr "mode" "SF")])
2119 (define_insn "sse_cvtss2si"
2120 [(set (match_operand:SI 0 "register_operand" "=r,r")
2123 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2124 (parallel [(const_int 0)]))]
2125 UNSPEC_FIX_NOTRUNC))]
2127 "%vcvtss2si\t{%1, %0|%0, %1}"
2128 [(set_attr "type" "sseicvt")
2129 (set_attr "athlon_decode" "double,vector")
2130 (set_attr "bdver1_decode" "double,double")
2131 (set_attr "prefix_rep" "1")
2132 (set_attr "prefix" "maybe_vex")
2133 (set_attr "mode" "SI")])
2135 (define_insn "sse_cvtss2si_2"
2136 [(set (match_operand:SI 0 "register_operand" "=r,r")
2137 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2138 UNSPEC_FIX_NOTRUNC))]
2140 "%vcvtss2si\t{%1, %0|%0, %1}"
2141 [(set_attr "type" "sseicvt")
2142 (set_attr "athlon_decode" "double,vector")
2143 (set_attr "amdfam10_decode" "double,double")
2144 (set_attr "bdver1_decode" "double,double")
2145 (set_attr "prefix_rep" "1")
2146 (set_attr "prefix" "maybe_vex")
2147 (set_attr "mode" "SI")])
2149 (define_insn "sse_cvtss2siq"
2150 [(set (match_operand:DI 0 "register_operand" "=r,r")
2153 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2154 (parallel [(const_int 0)]))]
2155 UNSPEC_FIX_NOTRUNC))]
2156 "TARGET_SSE && TARGET_64BIT"
2157 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2158 [(set_attr "type" "sseicvt")
2159 (set_attr "athlon_decode" "double,vector")
2160 (set_attr "bdver1_decode" "double,double")
2161 (set_attr "prefix_rep" "1")
2162 (set_attr "prefix" "maybe_vex")
2163 (set_attr "mode" "DI")])
2165 (define_insn "sse_cvtss2siq_2"
2166 [(set (match_operand:DI 0 "register_operand" "=r,r")
2167 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2168 UNSPEC_FIX_NOTRUNC))]
2169 "TARGET_SSE && TARGET_64BIT"
2170 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2171 [(set_attr "type" "sseicvt")
2172 (set_attr "athlon_decode" "double,vector")
2173 (set_attr "amdfam10_decode" "double,double")
2174 (set_attr "bdver1_decode" "double,double")
2175 (set_attr "prefix_rep" "1")
2176 (set_attr "prefix" "maybe_vex")
2177 (set_attr "mode" "DI")])
2179 (define_insn "sse_cvttss2si"
2180 [(set (match_operand:SI 0 "register_operand" "=r,r")
2183 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2184 (parallel [(const_int 0)]))))]
2186 "%vcvttss2si\t{%1, %0|%0, %1}"
2187 [(set_attr "type" "sseicvt")
2188 (set_attr "athlon_decode" "double,vector")
2189 (set_attr "amdfam10_decode" "double,double")
2190 (set_attr "bdver1_decode" "double,double")
2191 (set_attr "prefix_rep" "1")
2192 (set_attr "prefix" "maybe_vex")
2193 (set_attr "mode" "SI")])
2195 (define_insn "sse_cvttss2siq"
2196 [(set (match_operand:DI 0 "register_operand" "=r,r")
2199 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2200 (parallel [(const_int 0)]))))]
2201 "TARGET_SSE && TARGET_64BIT"
2202 "%vcvttss2si{q}\t{%1, %0|%0, %1}"
2203 [(set_attr "type" "sseicvt")
2204 (set_attr "athlon_decode" "double,vector")
2205 (set_attr "amdfam10_decode" "double,double")
2206 (set_attr "bdver1_decode" "double,double")
2207 (set_attr "prefix_rep" "1")
2208 (set_attr "prefix" "maybe_vex")
2209 (set_attr "mode" "DI")])
2211 (define_insn "avx_cvtdq2ps256"
2212 [(set (match_operand:V8SF 0 "register_operand" "=x")
2213 (float:V8SF (match_operand:V8SI 1 "nonimmediate_operand" "xm")))]
2215 "vcvtdq2ps\t{%1, %0|%0, %1}"
2216 [(set_attr "type" "ssecvt")
2217 (set_attr "prefix" "vex")
2218 (set_attr "mode" "V8SF")])
2220 (define_insn "sse2_cvtdq2ps"
2221 [(set (match_operand:V4SF 0 "register_operand" "=x")
2222 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2224 "%vcvtdq2ps\t{%1, %0|%0, %1}"
2225 [(set_attr "type" "ssecvt")
2226 (set_attr "prefix" "maybe_vex")
2227 (set_attr "mode" "V4SF")])
2229 (define_expand "sse2_cvtudq2ps"
2231 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "")))
2233 (lt:V4SF (match_dup 5) (match_dup 3)))
2235 (and:V4SF (match_dup 6) (match_dup 4)))
2236 (set (match_operand:V4SF 0 "register_operand" "")
2237 (plus:V4SF (match_dup 5) (match_dup 7)))]
2240 REAL_VALUE_TYPE TWO32r;
2244 real_ldexp (&TWO32r, &dconst1, 32);
2245 x = const_double_from_real_value (TWO32r, SFmode);
2247 operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
2248 operands[4] = force_reg (V4SFmode,
2249 ix86_build_const_vector (V4SFmode, 1, x));
2251 for (i = 5; i < 8; i++)
2252 operands[i] = gen_reg_rtx (V4SFmode);
2255 (define_insn "avx_cvtps2dq256"
2256 [(set (match_operand:V8SI 0 "register_operand" "=x")
2257 (unspec:V8SI [(match_operand:V8SF 1 "nonimmediate_operand" "xm")]
2258 UNSPEC_FIX_NOTRUNC))]
2260 "vcvtps2dq\t{%1, %0|%0, %1}"
2261 [(set_attr "type" "ssecvt")
2262 (set_attr "prefix" "vex")
2263 (set_attr "mode" "OI")])
2265 (define_insn "sse2_cvtps2dq"
2266 [(set (match_operand:V4SI 0 "register_operand" "=x")
2267 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2268 UNSPEC_FIX_NOTRUNC))]
2270 "%vcvtps2dq\t{%1, %0|%0, %1}"
2271 [(set_attr "type" "ssecvt")
2272 (set (attr "prefix_data16")
2274 (match_test "TARGET_AVX")
2276 (const_string "1")))
2277 (set_attr "prefix" "maybe_vex")
2278 (set_attr "mode" "TI")])
2280 (define_insn "avx_cvttps2dq256"
2281 [(set (match_operand:V8SI 0 "register_operand" "=x")
2282 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
2284 "vcvttps2dq\t{%1, %0|%0, %1}"
2285 [(set_attr "type" "ssecvt")
2286 (set_attr "prefix" "vex")
2287 (set_attr "mode" "OI")])
2289 (define_insn "sse2_cvttps2dq"
2290 [(set (match_operand:V4SI 0 "register_operand" "=x")
2291 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2293 "%vcvttps2dq\t{%1, %0|%0, %1}"
2294 [(set_attr "type" "ssecvt")
2295 (set (attr "prefix_rep")
2297 (match_test "TARGET_AVX")
2299 (const_string "1")))
2300 (set (attr "prefix_data16")
2302 (match_test "TARGET_AVX")
2304 (const_string "0")))
2305 (set_attr "prefix_data16" "0")
2306 (set_attr "prefix" "maybe_vex")
2307 (set_attr "mode" "TI")])
2309 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2311 ;; Parallel double-precision floating point conversion operations
2313 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2315 (define_insn "sse2_cvtpi2pd"
2316 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2317 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2319 "cvtpi2pd\t{%1, %0|%0, %1}"
2320 [(set_attr "type" "ssecvt")
2321 (set_attr "unit" "mmx,*")
2322 (set_attr "prefix_data16" "1,*")
2323 (set_attr "mode" "V2DF")])
2325 (define_insn "sse2_cvtpd2pi"
2326 [(set (match_operand:V2SI 0 "register_operand" "=y")
2327 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2328 UNSPEC_FIX_NOTRUNC))]
2330 "cvtpd2pi\t{%1, %0|%0, %1}"
2331 [(set_attr "type" "ssecvt")
2332 (set_attr "unit" "mmx")
2333 (set_attr "bdver1_decode" "double")
2334 (set_attr "prefix_data16" "1")
2335 (set_attr "mode" "DI")])
2337 (define_insn "sse2_cvttpd2pi"
2338 [(set (match_operand:V2SI 0 "register_operand" "=y")
2339 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2341 "cvttpd2pi\t{%1, %0|%0, %1}"
2342 [(set_attr "type" "ssecvt")
2343 (set_attr "unit" "mmx")
2344 (set_attr "bdver1_decode" "double")
2345 (set_attr "prefix_data16" "1")
2346 (set_attr "mode" "TI")])
2348 (define_insn "sse2_cvtsi2sd"
2349 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2352 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2353 (match_operand:V2DF 1 "register_operand" "0,0,x")
2357 cvtsi2sd\t{%2, %0|%0, %2}
2358 cvtsi2sd\t{%2, %0|%0, %2}
2359 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2360 [(set_attr "isa" "noavx,noavx,avx")
2361 (set_attr "type" "sseicvt")
2362 (set_attr "athlon_decode" "double,direct,*")
2363 (set_attr "amdfam10_decode" "vector,double,*")
2364 (set_attr "bdver1_decode" "double,direct,*")
2365 (set_attr "prefix" "orig,orig,vex")
2366 (set_attr "mode" "DF")])
2368 (define_insn "sse2_cvtsi2sdq"
2369 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2372 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2373 (match_operand:V2DF 1 "register_operand" "0,0,x")
2375 "TARGET_SSE2 && TARGET_64BIT"
2377 cvtsi2sdq\t{%2, %0|%0, %2}
2378 cvtsi2sdq\t{%2, %0|%0, %2}
2379 vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2380 [(set_attr "isa" "noavx,noavx,avx")
2381 (set_attr "type" "sseicvt")
2382 (set_attr "athlon_decode" "double,direct,*")
2383 (set_attr "amdfam10_decode" "vector,double,*")
2384 (set_attr "bdver1_decode" "double,direct,*")
2385 (set_attr "length_vex" "*,*,4")
2386 (set_attr "prefix_rex" "1,1,*")
2387 (set_attr "prefix" "orig,orig,vex")
2388 (set_attr "mode" "DF")])
2390 (define_insn "sse2_cvtsd2si"
2391 [(set (match_operand:SI 0 "register_operand" "=r,r")
2394 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2395 (parallel [(const_int 0)]))]
2396 UNSPEC_FIX_NOTRUNC))]
2398 "%vcvtsd2si\t{%1, %0|%0, %1}"
2399 [(set_attr "type" "sseicvt")
2400 (set_attr "athlon_decode" "double,vector")
2401 (set_attr "bdver1_decode" "double,double")
2402 (set_attr "prefix_rep" "1")
2403 (set_attr "prefix" "maybe_vex")
2404 (set_attr "mode" "SI")])
2406 (define_insn "sse2_cvtsd2si_2"
2407 [(set (match_operand:SI 0 "register_operand" "=r,r")
2408 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2409 UNSPEC_FIX_NOTRUNC))]
2411 "%vcvtsd2si\t{%1, %0|%0, %1}"
2412 [(set_attr "type" "sseicvt")
2413 (set_attr "athlon_decode" "double,vector")
2414 (set_attr "amdfam10_decode" "double,double")
2415 (set_attr "bdver1_decode" "double,double")
2416 (set_attr "prefix_rep" "1")
2417 (set_attr "prefix" "maybe_vex")
2418 (set_attr "mode" "SI")])
2420 (define_insn "sse2_cvtsd2siq"
2421 [(set (match_operand:DI 0 "register_operand" "=r,r")
2424 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2425 (parallel [(const_int 0)]))]
2426 UNSPEC_FIX_NOTRUNC))]
2427 "TARGET_SSE2 && TARGET_64BIT"
2428 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2429 [(set_attr "type" "sseicvt")
2430 (set_attr "athlon_decode" "double,vector")
2431 (set_attr "bdver1_decode" "double,double")
2432 (set_attr "prefix_rep" "1")
2433 (set_attr "prefix" "maybe_vex")
2434 (set_attr "mode" "DI")])
2436 (define_insn "sse2_cvtsd2siq_2"
2437 [(set (match_operand:DI 0 "register_operand" "=r,r")
2438 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2439 UNSPEC_FIX_NOTRUNC))]
2440 "TARGET_SSE2 && TARGET_64BIT"
2441 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2442 [(set_attr "type" "sseicvt")
2443 (set_attr "athlon_decode" "double,vector")
2444 (set_attr "amdfam10_decode" "double,double")
2445 (set_attr "bdver1_decode" "double,double")
2446 (set_attr "prefix_rep" "1")
2447 (set_attr "prefix" "maybe_vex")
2448 (set_attr "mode" "DI")])
2450 (define_insn "sse2_cvttsd2si"
2451 [(set (match_operand:SI 0 "register_operand" "=r,r")
2454 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2455 (parallel [(const_int 0)]))))]
2457 "%vcvttsd2si\t{%1, %0|%0, %1}"
2458 [(set_attr "type" "sseicvt")
2459 (set_attr "athlon_decode" "double,vector")
2460 (set_attr "amdfam10_decode" "double,double")
2461 (set_attr "bdver1_decode" "double,double")
2462 (set_attr "prefix_rep" "1")
2463 (set_attr "prefix" "maybe_vex")
2464 (set_attr "mode" "SI")])
2466 (define_insn "sse2_cvttsd2siq"
2467 [(set (match_operand:DI 0 "register_operand" "=r,r")
2470 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2471 (parallel [(const_int 0)]))))]
2472 "TARGET_SSE2 && TARGET_64BIT"
2473 "%vcvttsd2si{q}\t{%1, %0|%0, %1}"
2474 [(set_attr "type" "sseicvt")
2475 (set_attr "athlon_decode" "double,vector")
2476 (set_attr "amdfam10_decode" "double,double")
2477 (set_attr "bdver1_decode" "double,double")
2478 (set_attr "prefix_rep" "1")
2479 (set_attr "prefix" "maybe_vex")
2480 (set_attr "mode" "DI")])
2482 (define_insn "avx_cvtdq2pd256"
2483 [(set (match_operand:V4DF 0 "register_operand" "=x")
2484 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2486 "vcvtdq2pd\t{%1, %0|%0, %1}"
2487 [(set_attr "type" "ssecvt")
2488 (set_attr "prefix" "vex")
2489 (set_attr "mode" "V4DF")])
2491 (define_insn "*avx_cvtdq2pd256_2"
2492 [(set (match_operand:V4DF 0 "register_operand" "=x")
2495 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
2496 (parallel [(const_int 0) (const_int 1)
2497 (const_int 2) (const_int 3)]))))]
2499 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
2500 [(set_attr "type" "ssecvt")
2501 (set_attr "prefix" "vex")
2502 (set_attr "mode" "V4DF")])
2504 (define_insn "sse2_cvtdq2pd"
2505 [(set (match_operand:V2DF 0 "register_operand" "=x")
2508 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2509 (parallel [(const_int 0) (const_int 1)]))))]
2511 "%vcvtdq2pd\t{%1, %0|%0, %1}"
2512 [(set_attr "type" "ssecvt")
2513 (set_attr "prefix" "maybe_vex")
2514 (set_attr "mode" "V2DF")])
2516 (define_insn "avx_cvtpd2dq256"
2517 [(set (match_operand:V4SI 0 "register_operand" "=x")
2518 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2519 UNSPEC_FIX_NOTRUNC))]
2521 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2522 [(set_attr "type" "ssecvt")
2523 (set_attr "prefix" "vex")
2524 (set_attr "mode" "OI")])
2526 (define_expand "sse2_cvtpd2dq"
2527 [(set (match_operand:V4SI 0 "register_operand" "")
2529 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2533 "operands[2] = CONST0_RTX (V2SImode);")
2535 (define_insn "*sse2_cvtpd2dq"
2536 [(set (match_operand:V4SI 0 "register_operand" "=x")
2538 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2540 (match_operand:V2SI 2 "const0_operand" "")))]
2544 return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
2546 return "cvtpd2dq\t{%1, %0|%0, %1}";
2548 [(set_attr "type" "ssecvt")
2549 (set_attr "prefix_rep" "1")
2550 (set_attr "prefix_data16" "0")
2551 (set_attr "prefix" "maybe_vex")
2552 (set_attr "mode" "TI")
2553 (set_attr "amdfam10_decode" "double")
2554 (set_attr "athlon_decode" "vector")
2555 (set_attr "bdver1_decode" "double")])
2557 (define_insn "avx_cvttpd2dq256"
2558 [(set (match_operand:V4SI 0 "register_operand" "=x")
2559 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2561 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2562 [(set_attr "type" "ssecvt")
2563 (set_attr "prefix" "vex")
2564 (set_attr "mode" "OI")])
2566 (define_expand "sse2_cvttpd2dq"
2567 [(set (match_operand:V4SI 0 "register_operand" "")
2569 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2572 "operands[2] = CONST0_RTX (V2SImode);")
2574 (define_insn "*sse2_cvttpd2dq"
2575 [(set (match_operand:V4SI 0 "register_operand" "=x")
2577 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2578 (match_operand:V2SI 2 "const0_operand" "")))]
2582 return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
2584 return "cvttpd2dq\t{%1, %0|%0, %1}";
2586 [(set_attr "type" "ssecvt")
2587 (set_attr "amdfam10_decode" "double")
2588 (set_attr "athlon_decode" "vector")
2589 (set_attr "bdver1_decode" "double")
2590 (set_attr "prefix" "maybe_vex")
2591 (set_attr "mode" "TI")])
2593 (define_insn "sse2_cvtsd2ss"
2594 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2597 (float_truncate:V2SF
2598 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,xm")))
2599 (match_operand:V4SF 1 "register_operand" "0,0,x")
2603 cvtsd2ss\t{%2, %0|%0, %2}
2604 cvtsd2ss\t{%2, %0|%0, %2}
2605 vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2606 [(set_attr "isa" "noavx,noavx,avx")
2607 (set_attr "type" "ssecvt")
2608 (set_attr "athlon_decode" "vector,double,*")
2609 (set_attr "amdfam10_decode" "vector,double,*")
2610 (set_attr "bdver1_decode" "direct,direct,*")
2611 (set_attr "prefix" "orig,orig,vex")
2612 (set_attr "mode" "SF")])
2614 (define_insn "sse2_cvtss2sd"
2615 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2619 (match_operand:V4SF 2 "nonimmediate_operand" "x,m,xm")
2620 (parallel [(const_int 0) (const_int 1)])))
2621 (match_operand:V2DF 1 "register_operand" "0,0,x")
2625 cvtss2sd\t{%2, %0|%0, %2}
2626 cvtss2sd\t{%2, %0|%0, %2}
2627 vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
2628 [(set_attr "isa" "noavx,noavx,avx")
2629 (set_attr "type" "ssecvt")
2630 (set_attr "amdfam10_decode" "vector,double,*")
2631 (set_attr "athlon_decode" "direct,direct,*")
2632 (set_attr "bdver1_decode" "direct,direct,*")
2633 (set_attr "prefix" "orig,orig,vex")
2634 (set_attr "mode" "DF")])
2636 (define_insn "avx_cvtpd2ps256"
2637 [(set (match_operand:V4SF 0 "register_operand" "=x")
2638 (float_truncate:V4SF
2639 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2641 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
2642 [(set_attr "type" "ssecvt")
2643 (set_attr "prefix" "vex")
2644 (set_attr "mode" "V4SF")])
2646 (define_expand "sse2_cvtpd2ps"
2647 [(set (match_operand:V4SF 0 "register_operand" "")
2649 (float_truncate:V2SF
2650 (match_operand:V2DF 1 "nonimmediate_operand" ""))
2653 "operands[2] = CONST0_RTX (V2SFmode);")
2655 (define_insn "*sse2_cvtpd2ps"
2656 [(set (match_operand:V4SF 0 "register_operand" "=x")
2658 (float_truncate:V2SF
2659 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2660 (match_operand:V2SF 2 "const0_operand" "")))]
2664 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
2666 return "cvtpd2ps\t{%1, %0|%0, %1}";
2668 [(set_attr "type" "ssecvt")
2669 (set_attr "amdfam10_decode" "double")
2670 (set_attr "athlon_decode" "vector")
2671 (set_attr "bdver1_decode" "double")
2672 (set_attr "prefix_data16" "1")
2673 (set_attr "prefix" "maybe_vex")
2674 (set_attr "mode" "V4SF")])
2676 (define_insn "avx_cvtps2pd256"
2677 [(set (match_operand:V4DF 0 "register_operand" "=x")
2679 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2681 "vcvtps2pd\t{%1, %0|%0, %1}"
2682 [(set_attr "type" "ssecvt")
2683 (set_attr "prefix" "vex")
2684 (set_attr "mode" "V4DF")])
2686 (define_insn "*avx_cvtps2pd256_2"
2687 [(set (match_operand:V4DF 0 "register_operand" "=x")
2690 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
2691 (parallel [(const_int 0) (const_int 1)
2692 (const_int 2) (const_int 3)]))))]
2694 "vcvtps2pd\t{%x1, %0|%0, %x1}"
2695 [(set_attr "type" "ssecvt")
2696 (set_attr "prefix" "vex")
2697 (set_attr "mode" "V4DF")])
2699 (define_insn "sse2_cvtps2pd"
2700 [(set (match_operand:V2DF 0 "register_operand" "=x")
2703 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2704 (parallel [(const_int 0) (const_int 1)]))))]
2706 "%vcvtps2pd\t{%1, %0|%0, %1}"
2707 [(set_attr "type" "ssecvt")
2708 (set_attr "amdfam10_decode" "direct")
2709 (set_attr "athlon_decode" "double")
2710 (set_attr "bdver1_decode" "double")
2711 (set_attr "prefix_data16" "0")
2712 (set_attr "prefix" "maybe_vex")
2713 (set_attr "mode" "V2DF")])
2715 (define_expand "vec_unpacks_hi_v4sf"
2720 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2721 (parallel [(const_int 6) (const_int 7)
2722 (const_int 2) (const_int 3)])))
2723 (set (match_operand:V2DF 0 "register_operand" "")
2727 (parallel [(const_int 0) (const_int 1)]))))]
2729 "operands[2] = gen_reg_rtx (V4SFmode);")
2731 (define_expand "vec_unpacks_hi_v8sf"
2734 (match_operand:V8SF 1 "nonimmediate_operand" "")
2735 (parallel [(const_int 4) (const_int 5)
2736 (const_int 6) (const_int 7)])))
2737 (set (match_operand:V4DF 0 "register_operand" "")
2741 "operands[2] = gen_reg_rtx (V4SFmode);")
2743 (define_expand "vec_unpacks_lo_v4sf"
2744 [(set (match_operand:V2DF 0 "register_operand" "")
2747 (match_operand:V4SF 1 "nonimmediate_operand" "")
2748 (parallel [(const_int 0) (const_int 1)]))))]
2751 (define_expand "vec_unpacks_lo_v8sf"
2752 [(set (match_operand:V4DF 0 "register_operand" "")
2755 (match_operand:V8SF 1 "nonimmediate_operand" "")
2756 (parallel [(const_int 0) (const_int 1)
2757 (const_int 2) (const_int 3)]))))]
2760 (define_expand "vec_unpacks_float_hi_v8hi"
2761 [(match_operand:V4SF 0 "register_operand" "")
2762 (match_operand:V8HI 1 "register_operand" "")]
2765 rtx tmp = gen_reg_rtx (V4SImode);
2767 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
2768 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2772 (define_expand "vec_unpacks_float_lo_v8hi"
2773 [(match_operand:V4SF 0 "register_operand" "")
2774 (match_operand:V8HI 1 "register_operand" "")]
2777 rtx tmp = gen_reg_rtx (V4SImode);
2779 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
2780 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2784 (define_expand "vec_unpacku_float_hi_v8hi"
2785 [(match_operand:V4SF 0 "register_operand" "")
2786 (match_operand:V8HI 1 "register_operand" "")]
2789 rtx tmp = gen_reg_rtx (V4SImode);
2791 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
2792 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2796 (define_expand "vec_unpacku_float_lo_v8hi"
2797 [(match_operand:V4SF 0 "register_operand" "")
2798 (match_operand:V8HI 1 "register_operand" "")]
2801 rtx tmp = gen_reg_rtx (V4SImode);
2803 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
2804 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2808 (define_expand "vec_unpacks_float_hi_v4si"
2811 (match_operand:V4SI 1 "nonimmediate_operand" "")
2812 (parallel [(const_int 2) (const_int 3)
2813 (const_int 2) (const_int 3)])))
2814 (set (match_operand:V2DF 0 "register_operand" "")
2818 (parallel [(const_int 0) (const_int 1)]))))]
2820 "operands[2] = gen_reg_rtx (V4SImode);")
2822 (define_expand "vec_unpacks_float_lo_v4si"
2823 [(set (match_operand:V2DF 0 "register_operand" "")
2826 (match_operand:V4SI 1 "nonimmediate_operand" "")
2827 (parallel [(const_int 0) (const_int 1)]))))]
2830 (define_expand "vec_unpacks_float_hi_v8si"
2833 (match_operand:V8SI 1 "nonimmediate_operand" "")
2834 (parallel [(const_int 4) (const_int 5)
2835 (const_int 6) (const_int 7)])))
2836 (set (match_operand:V4DF 0 "register_operand" "")
2840 "operands[2] = gen_reg_rtx (V4SImode);")
2842 (define_expand "vec_unpacks_float_lo_v8si"
2843 [(set (match_operand:V4DF 0 "register_operand" "")
2846 (match_operand:V8SI 1 "nonimmediate_operand" "")
2847 (parallel [(const_int 0) (const_int 1)
2848 (const_int 2) (const_int 3)]))))]
2851 (define_expand "vec_unpacku_float_hi_v4si"
2854 (match_operand:V4SI 1 "nonimmediate_operand" "")
2855 (parallel [(const_int 2) (const_int 3)
2856 (const_int 2) (const_int 3)])))
2861 (parallel [(const_int 0) (const_int 1)]))))
2863 (lt:V2DF (match_dup 6) (match_dup 3)))
2865 (and:V2DF (match_dup 7) (match_dup 4)))
2866 (set (match_operand:V2DF 0 "register_operand" "")
2867 (plus:V2DF (match_dup 6) (match_dup 8)))]
2870 REAL_VALUE_TYPE TWO32r;
2874 real_ldexp (&TWO32r, &dconst1, 32);
2875 x = const_double_from_real_value (TWO32r, DFmode);
2877 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2878 operands[4] = force_reg (V2DFmode,
2879 ix86_build_const_vector (V2DFmode, 1, x));
2881 operands[5] = gen_reg_rtx (V4SImode);
2883 for (i = 6; i < 9; i++)
2884 operands[i] = gen_reg_rtx (V2DFmode);
2887 (define_expand "vec_unpacku_float_lo_v4si"
2891 (match_operand:V4SI 1 "nonimmediate_operand" "")
2892 (parallel [(const_int 0) (const_int 1)]))))
2894 (lt:V2DF (match_dup 5) (match_dup 3)))
2896 (and:V2DF (match_dup 6) (match_dup 4)))
2897 (set (match_operand:V2DF 0 "register_operand" "")
2898 (plus:V2DF (match_dup 5) (match_dup 7)))]
2901 REAL_VALUE_TYPE TWO32r;
2905 real_ldexp (&TWO32r, &dconst1, 32);
2906 x = const_double_from_real_value (TWO32r, DFmode);
2908 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2909 operands[4] = force_reg (V2DFmode,
2910 ix86_build_const_vector (V2DFmode, 1, x));
2912 for (i = 5; i < 8; i++)
2913 operands[i] = gen_reg_rtx (V2DFmode);
2916 (define_expand "vec_pack_trunc_v4df"
2918 (float_truncate:V4SF
2919 (match_operand:V4DF 1 "nonimmediate_operand" "")))
2921 (float_truncate:V4SF
2922 (match_operand:V4DF 2 "nonimmediate_operand" "")))
2923 (set (match_operand:V8SF 0 "register_operand" "")
2929 operands[3] = gen_reg_rtx (V4SFmode);
2930 operands[4] = gen_reg_rtx (V4SFmode);
2933 (define_expand "vec_pack_trunc_v2df"
2934 [(match_operand:V4SF 0 "register_operand" "")
2935 (match_operand:V2DF 1 "nonimmediate_operand" "")
2936 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2941 r1 = gen_reg_rtx (V4SFmode);
2942 r2 = gen_reg_rtx (V4SFmode);
2944 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
2945 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
2946 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
2950 (define_expand "vec_pack_sfix_trunc_v2df"
2951 [(match_operand:V4SI 0 "register_operand" "")
2952 (match_operand:V2DF 1 "nonimmediate_operand" "")
2953 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2958 r1 = gen_reg_rtx (V4SImode);
2959 r2 = gen_reg_rtx (V4SImode);
2961 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
2962 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
2963 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
2964 gen_lowpart (V2DImode, r1),
2965 gen_lowpart (V2DImode, r2)));
2969 (define_expand "vec_pack_sfix_v2df"
2970 [(match_operand:V4SI 0 "register_operand" "")
2971 (match_operand:V2DF 1 "nonimmediate_operand" "")
2972 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2977 r1 = gen_reg_rtx (V4SImode);
2978 r2 = gen_reg_rtx (V4SImode);
2980 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
2981 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
2982 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
2983 gen_lowpart (V2DImode, r1),
2984 gen_lowpart (V2DImode, r2)));
2988 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2990 ;; Parallel single-precision floating point element swizzling
2992 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2994 (define_expand "sse_movhlps_exp"
2995 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
2998 (match_operand:V4SF 1 "nonimmediate_operand" "")
2999 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3000 (parallel [(const_int 6)
3006 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3008 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
3010 /* Fix up the destination if needed. */
3011 if (dst != operands[0])
3012 emit_move_insn (operands[0], dst);
3017 (define_insn "sse_movhlps"
3018 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
3021 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3022 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
3023 (parallel [(const_int 6)
3027 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3029 movhlps\t{%2, %0|%0, %2}
3030 vmovhlps\t{%2, %1, %0|%0, %1, %2}
3031 movlps\t{%H2, %0|%0, %H2}
3032 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3033 %vmovhps\t{%2, %0|%0, %2}"
3034 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3035 (set_attr "type" "ssemov")
3036 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3037 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3039 (define_expand "sse_movlhps_exp"
3040 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3043 (match_operand:V4SF 1 "nonimmediate_operand" "")
3044 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3045 (parallel [(const_int 0)
3051 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3053 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
3055 /* Fix up the destination if needed. */
3056 if (dst != operands[0])
3057 emit_move_insn (operands[0], dst);
3062 (define_insn "sse_movlhps"
3063 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
3066 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3067 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,x,x"))
3068 (parallel [(const_int 0)
3072 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3074 movlhps\t{%2, %0|%0, %2}
3075 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3076 movhps\t{%2, %0|%0, %2}
3077 vmovhps\t{%2, %1, %0|%0, %1, %2}
3078 %vmovlps\t{%2, %H0|%H0, %2}"
3079 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3080 (set_attr "type" "ssemov")
3081 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3082 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3084 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3085 (define_insn "avx_unpckhps256"
3086 [(set (match_operand:V8SF 0 "register_operand" "=x")
3089 (match_operand:V8SF 1 "register_operand" "x")
3090 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3091 (parallel [(const_int 2) (const_int 10)
3092 (const_int 3) (const_int 11)
3093 (const_int 6) (const_int 14)
3094 (const_int 7) (const_int 15)])))]
3096 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3097 [(set_attr "type" "sselog")
3098 (set_attr "prefix" "vex")
3099 (set_attr "mode" "V8SF")])
3101 (define_expand "vec_interleave_highv8sf"
3105 (match_operand:V8SF 1 "register_operand" "x")
3106 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3107 (parallel [(const_int 0) (const_int 8)
3108 (const_int 1) (const_int 9)
3109 (const_int 4) (const_int 12)
3110 (const_int 5) (const_int 13)])))
3116 (parallel [(const_int 2) (const_int 10)
3117 (const_int 3) (const_int 11)
3118 (const_int 6) (const_int 14)
3119 (const_int 7) (const_int 15)])))
3120 (set (match_operand:V8SF 0 "register_operand" "")
3125 (parallel [(const_int 4) (const_int 5)
3126 (const_int 6) (const_int 7)
3127 (const_int 12) (const_int 13)
3128 (const_int 14) (const_int 15)])))]
3131 operands[3] = gen_reg_rtx (V8SFmode);
3132 operands[4] = gen_reg_rtx (V8SFmode);
3135 (define_insn "vec_interleave_highv4sf"
3136 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3139 (match_operand:V4SF 1 "register_operand" "0,x")
3140 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3141 (parallel [(const_int 2) (const_int 6)
3142 (const_int 3) (const_int 7)])))]
3145 unpckhps\t{%2, %0|%0, %2}
3146 vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3147 [(set_attr "isa" "noavx,avx")
3148 (set_attr "type" "sselog")
3149 (set_attr "prefix" "orig,vex")
3150 (set_attr "mode" "V4SF")])
3152 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3153 (define_insn "avx_unpcklps256"
3154 [(set (match_operand:V8SF 0 "register_operand" "=x")
3157 (match_operand:V8SF 1 "register_operand" "x")
3158 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3159 (parallel [(const_int 0) (const_int 8)
3160 (const_int 1) (const_int 9)
3161 (const_int 4) (const_int 12)
3162 (const_int 5) (const_int 13)])))]
3164 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3165 [(set_attr "type" "sselog")
3166 (set_attr "prefix" "vex")
3167 (set_attr "mode" "V8SF")])
3169 (define_expand "vec_interleave_lowv8sf"
3173 (match_operand:V8SF 1 "register_operand" "x")
3174 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3175 (parallel [(const_int 0) (const_int 8)
3176 (const_int 1) (const_int 9)
3177 (const_int 4) (const_int 12)
3178 (const_int 5) (const_int 13)])))
3184 (parallel [(const_int 2) (const_int 10)
3185 (const_int 3) (const_int 11)
3186 (const_int 6) (const_int 14)
3187 (const_int 7) (const_int 15)])))
3188 (set (match_operand:V8SF 0 "register_operand" "")
3193 (parallel [(const_int 0) (const_int 1)
3194 (const_int 2) (const_int 3)
3195 (const_int 8) (const_int 9)
3196 (const_int 10) (const_int 11)])))]
3199 operands[3] = gen_reg_rtx (V8SFmode);
3200 operands[4] = gen_reg_rtx (V8SFmode);
3203 (define_insn "vec_interleave_lowv4sf"
3204 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3207 (match_operand:V4SF 1 "register_operand" "0,x")
3208 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3209 (parallel [(const_int 0) (const_int 4)
3210 (const_int 1) (const_int 5)])))]
3213 unpcklps\t{%2, %0|%0, %2}
3214 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3215 [(set_attr "isa" "noavx,avx")
3216 (set_attr "type" "sselog")
3217 (set_attr "prefix" "orig,vex")
3218 (set_attr "mode" "V4SF")])
3220 ;; These are modeled with the same vec_concat as the others so that we
3221 ;; capture users of shufps that can use the new instructions
3222 (define_insn "avx_movshdup256"
3223 [(set (match_operand:V8SF 0 "register_operand" "=x")
3226 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3228 (parallel [(const_int 1) (const_int 1)
3229 (const_int 3) (const_int 3)
3230 (const_int 5) (const_int 5)
3231 (const_int 7) (const_int 7)])))]
3233 "vmovshdup\t{%1, %0|%0, %1}"
3234 [(set_attr "type" "sse")
3235 (set_attr "prefix" "vex")
3236 (set_attr "mode" "V8SF")])
3238 (define_insn "sse3_movshdup"
3239 [(set (match_operand:V4SF 0 "register_operand" "=x")
3242 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3244 (parallel [(const_int 1)
3249 "%vmovshdup\t{%1, %0|%0, %1}"
3250 [(set_attr "type" "sse")
3251 (set_attr "prefix_rep" "1")
3252 (set_attr "prefix" "maybe_vex")
3253 (set_attr "mode" "V4SF")])
3255 (define_insn "avx_movsldup256"
3256 [(set (match_operand:V8SF 0 "register_operand" "=x")
3259 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3261 (parallel [(const_int 0) (const_int 0)
3262 (const_int 2) (const_int 2)
3263 (const_int 4) (const_int 4)
3264 (const_int 6) (const_int 6)])))]
3266 "vmovsldup\t{%1, %0|%0, %1}"
3267 [(set_attr "type" "sse")
3268 (set_attr "prefix" "vex")
3269 (set_attr "mode" "V8SF")])
3271 (define_insn "sse3_movsldup"
3272 [(set (match_operand:V4SF 0 "register_operand" "=x")
3275 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3277 (parallel [(const_int 0)
3282 "%vmovsldup\t{%1, %0|%0, %1}"
3283 [(set_attr "type" "sse")
3284 (set_attr "prefix_rep" "1")
3285 (set_attr "prefix" "maybe_vex")
3286 (set_attr "mode" "V4SF")])
3288 (define_expand "avx_shufps256"
3289 [(match_operand:V8SF 0 "register_operand" "")
3290 (match_operand:V8SF 1 "register_operand" "")
3291 (match_operand:V8SF 2 "nonimmediate_operand" "")
3292 (match_operand:SI 3 "const_int_operand" "")]
3295 int mask = INTVAL (operands[3]);
3296 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3297 GEN_INT ((mask >> 0) & 3),
3298 GEN_INT ((mask >> 2) & 3),
3299 GEN_INT (((mask >> 4) & 3) + 8),
3300 GEN_INT (((mask >> 6) & 3) + 8),
3301 GEN_INT (((mask >> 0) & 3) + 4),
3302 GEN_INT (((mask >> 2) & 3) + 4),
3303 GEN_INT (((mask >> 4) & 3) + 12),
3304 GEN_INT (((mask >> 6) & 3) + 12)));
3308 ;; One bit in mask selects 2 elements.
3309 (define_insn "avx_shufps256_1"
3310 [(set (match_operand:V8SF 0 "register_operand" "=x")
3313 (match_operand:V8SF 1 "register_operand" "x")
3314 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3315 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3316 (match_operand 4 "const_0_to_3_operand" "")
3317 (match_operand 5 "const_8_to_11_operand" "")
3318 (match_operand 6 "const_8_to_11_operand" "")
3319 (match_operand 7 "const_4_to_7_operand" "")
3320 (match_operand 8 "const_4_to_7_operand" "")
3321 (match_operand 9 "const_12_to_15_operand" "")
3322 (match_operand 10 "const_12_to_15_operand" "")])))]
3324 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3325 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3326 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3327 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3330 mask = INTVAL (operands[3]);
3331 mask |= INTVAL (operands[4]) << 2;
3332 mask |= (INTVAL (operands[5]) - 8) << 4;
3333 mask |= (INTVAL (operands[6]) - 8) << 6;
3334 operands[3] = GEN_INT (mask);
3336 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3338 [(set_attr "type" "sselog")
3339 (set_attr "length_immediate" "1")
3340 (set_attr "prefix" "vex")
3341 (set_attr "mode" "V8SF")])
3343 (define_expand "sse_shufps"
3344 [(match_operand:V4SF 0 "register_operand" "")
3345 (match_operand:V4SF 1 "register_operand" "")
3346 (match_operand:V4SF 2 "nonimmediate_operand" "")
3347 (match_operand:SI 3 "const_int_operand" "")]
3350 int mask = INTVAL (operands[3]);
3351 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3352 GEN_INT ((mask >> 0) & 3),
3353 GEN_INT ((mask >> 2) & 3),
3354 GEN_INT (((mask >> 4) & 3) + 4),
3355 GEN_INT (((mask >> 6) & 3) + 4)));
3359 (define_insn "sse_shufps_<mode>"
3360 [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
3361 (vec_select:VI4F_128
3362 (vec_concat:<ssedoublevecmode>
3363 (match_operand:VI4F_128 1 "register_operand" "0,x")
3364 (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm"))
3365 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3366 (match_operand 4 "const_0_to_3_operand" "")
3367 (match_operand 5 "const_4_to_7_operand" "")
3368 (match_operand 6 "const_4_to_7_operand" "")])))]
3372 mask |= INTVAL (operands[3]) << 0;
3373 mask |= INTVAL (operands[4]) << 2;
3374 mask |= (INTVAL (operands[5]) - 4) << 4;
3375 mask |= (INTVAL (operands[6]) - 4) << 6;
3376 operands[3] = GEN_INT (mask);
3378 switch (which_alternative)
3381 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3383 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3388 [(set_attr "isa" "noavx,avx")
3389 (set_attr "type" "sselog")
3390 (set_attr "length_immediate" "1")
3391 (set_attr "prefix" "orig,vex")
3392 (set_attr "mode" "V4SF")])
3394 (define_insn "sse_storehps"
3395 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3397 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3398 (parallel [(const_int 2) (const_int 3)])))]
3401 %vmovhps\t{%1, %0|%0, %1}
3402 %vmovhlps\t{%1, %d0|%d0, %1}
3403 %vmovlps\t{%H1, %d0|%d0, %H1}"
3404 [(set_attr "type" "ssemov")
3405 (set_attr "prefix" "maybe_vex")
3406 (set_attr "mode" "V2SF,V4SF,V2SF")])
3408 (define_expand "sse_loadhps_exp"
3409 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3412 (match_operand:V4SF 1 "nonimmediate_operand" "")
3413 (parallel [(const_int 0) (const_int 1)]))
3414 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3417 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3419 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
3421 /* Fix up the destination if needed. */
3422 if (dst != operands[0])
3423 emit_move_insn (operands[0], dst);
3428 (define_insn "sse_loadhps"
3429 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
3432 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3433 (parallel [(const_int 0) (const_int 1)]))
3434 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))]
3437 movhps\t{%2, %0|%0, %2}
3438 vmovhps\t{%2, %1, %0|%0, %1, %2}
3439 movlhps\t{%2, %0|%0, %2}
3440 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3441 %vmovlps\t{%2, %H0|%H0, %2}"
3442 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3443 (set_attr "type" "ssemov")
3444 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3445 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
3447 (define_insn "sse_storelps"
3448 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3450 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
3451 (parallel [(const_int 0) (const_int 1)])))]
3454 %vmovlps\t{%1, %0|%0, %1}
3455 %vmovaps\t{%1, %0|%0, %1}
3456 %vmovlps\t{%1, %d0|%d0, %1}"
3457 [(set_attr "type" "ssemov")
3458 (set_attr "prefix" "maybe_vex")
3459 (set_attr "mode" "V2SF,V4SF,V2SF")])
3461 (define_expand "sse_loadlps_exp"
3462 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3464 (match_operand:V2SF 2 "nonimmediate_operand" "")
3466 (match_operand:V4SF 1 "nonimmediate_operand" "")
3467 (parallel [(const_int 2) (const_int 3)]))))]
3470 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3472 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
3474 /* Fix up the destination if needed. */
3475 if (dst != operands[0])
3476 emit_move_insn (operands[0], dst);
3481 (define_insn "sse_loadlps"
3482 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
3484 (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,x,x")
3486 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
3487 (parallel [(const_int 2) (const_int 3)]))))]
3490 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3491 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3492 movlps\t{%2, %0|%0, %2}
3493 vmovlps\t{%2, %1, %0|%0, %1, %2}
3494 %vmovlps\t{%2, %0|%0, %2}"
3495 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3496 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
3497 (set_attr "length_immediate" "1,1,*,*,*")
3498 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3499 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3501 (define_insn "sse_movss"
3502 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3504 (match_operand:V4SF 2 "register_operand" " x,x")
3505 (match_operand:V4SF 1 "register_operand" " 0,x")
3509 movss\t{%2, %0|%0, %2}
3510 vmovss\t{%2, %1, %0|%0, %1, %2}"
3511 [(set_attr "isa" "noavx,avx")
3512 (set_attr "type" "ssemov")
3513 (set_attr "prefix" "orig,vex")
3514 (set_attr "mode" "SF")])
3516 (define_expand "vec_dupv4sf"
3517 [(set (match_operand:V4SF 0 "register_operand" "")
3519 (match_operand:SF 1 "nonimmediate_operand" "")))]
3523 operands[1] = force_reg (SFmode, operands[1]);
3526 (define_insn "avx2_vec_dupv4sf"
3527 [(set (match_operand:V4SF 0 "register_operand" "=x")
3530 (match_operand:V4SF 1 "register_operand" "x")
3531 (parallel [(const_int 0)]))))]
3533 "vbroadcastss\t{%1, %0|%0, %1}"
3534 [(set_attr "type" "sselog1")
3535 (set_attr "prefix" "vex")
3536 (set_attr "mode" "V4SF")])
3538 (define_insn "*vec_dupv4sf_avx"
3539 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3541 (match_operand:SF 1 "nonimmediate_operand" "x,m")))]
3544 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
3545 vbroadcastss\t{%1, %0|%0, %1}"
3546 [(set_attr "type" "sselog1,ssemov")
3547 (set_attr "length_immediate" "1,0")
3548 (set_attr "prefix_extra" "0,1")
3549 (set_attr "prefix" "vex")
3550 (set_attr "mode" "V4SF")])
3552 (define_insn "avx2_vec_dupv8sf"
3553 [(set (match_operand:V8SF 0 "register_operand" "=x")
3556 (match_operand:V4SF 1 "register_operand" "x")
3557 (parallel [(const_int 0)]))))]
3559 "vbroadcastss\t{%1, %0|%0, %1}"
3560 [(set_attr "type" "sselog1")
3561 (set_attr "prefix" "vex")
3562 (set_attr "mode" "V8SF")])
3564 (define_insn "*vec_dupv4sf"
3565 [(set (match_operand:V4SF 0 "register_operand" "=x")
3567 (match_operand:SF 1 "register_operand" "0")))]
3569 "shufps\t{$0, %0, %0|%0, %0, 0}"
3570 [(set_attr "type" "sselog1")
3571 (set_attr "length_immediate" "1")
3572 (set_attr "mode" "V4SF")])
3574 ;; Although insertps takes register source, we prefer
3575 ;; unpcklps with register source since it is shorter.
3576 (define_insn "*vec_concatv2sf_sse4_1"
3577 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,x,x,*y ,*y")
3579 (match_operand:SF 1 "nonimmediate_operand" " 0,x,0,x,m, 0 , m")
3580 (match_operand:SF 2 "vector_move_operand" " x,x,m,m,C,*ym, C")))]
3583 unpcklps\t{%2, %0|%0, %2}
3584 vunpcklps\t{%2, %1, %0|%0, %1, %2}
3585 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3586 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3587 %vmovss\t{%1, %0|%0, %1}
3588 punpckldq\t{%2, %0|%0, %2}
3589 movd\t{%1, %0|%0, %1}"
3590 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
3591 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
3592 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
3593 (set_attr "prefix_extra" "*,*,1,1,*,*,*")
3594 (set_attr "length_immediate" "*,*,1,1,*,*,*")
3595 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
3596 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
3598 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3599 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3600 ;; alternatives pretty much forces the MMX alternative to be chosen.
3601 (define_insn "*vec_concatv2sf_sse"
3602 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
3604 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3605 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
3608 unpcklps\t{%2, %0|%0, %2}
3609 movss\t{%1, %0|%0, %1}
3610 punpckldq\t{%2, %0|%0, %2}
3611 movd\t{%1, %0|%0, %1}"
3612 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3613 (set_attr "mode" "V4SF,SF,DI,DI")])
3615 (define_insn "*vec_concatv4sf"
3616 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x")
3618 (match_operand:V2SF 1 "register_operand" " 0,x,0,x")
3619 (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
3622 movlhps\t{%2, %0|%0, %2}
3623 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3624 movhps\t{%2, %0|%0, %2}
3625 vmovhps\t{%2, %1, %0|%0, %1, %2}"
3626 [(set_attr "isa" "noavx,avx,noavx,avx")
3627 (set_attr "type" "ssemov")
3628 (set_attr "prefix" "orig,vex,orig,vex")
3629 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
3631 (define_expand "vec_init<mode>"
3632 [(match_operand:V_128 0 "register_operand" "")
3633 (match_operand 1 "" "")]
3636 ix86_expand_vector_init (false, operands[0], operands[1]);
3640 ;; Avoid combining registers from different units in a single alternative,
3641 ;; see comment above inline_secondary_memory_needed function in i386.c
3642 (define_insn "vec_set<mode>_0"
3643 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
3644 "=x,x,x ,x,x,x,x ,x ,m,m ,m")
3646 (vec_duplicate:VI4F_128
3647 (match_operand:<ssescalarmode> 2 "general_operand"
3648 " x,m,*r,m,x,x,*rm,*rm,x,fF,*r"))
3649 (match_operand:VI4F_128 1 "vector_move_operand"
3650 " C,C,C ,C,0,x,0 ,x ,0,0 ,0")
3654 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
3655 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
3656 %vmovd\t{%2, %0|%0, %2}
3657 movss\t{%2, %0|%0, %2}
3658 movss\t{%2, %0|%0, %2}
3659 vmovss\t{%2, %1, %0|%0, %1, %2}
3660 pinsrd\t{$0, %2, %0|%0, %2, 0}
3661 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
3665 [(set_attr "isa" "sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,avx,*,*,*")
3667 (cond [(eq_attr "alternative" "0,6,7")
3668 (const_string "sselog")
3669 (eq_attr "alternative" "9")
3670 (const_string "fmov")
3671 (eq_attr "alternative" "10")
3672 (const_string "imov")
3674 (const_string "ssemov")))
3675 (set_attr "prefix_extra" "*,*,*,*,*,*,1,1,*,*,*")
3676 (set_attr "length_immediate" "*,*,*,*,*,*,1,1,*,*,*")
3677 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex,*,*,*")
3678 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,*,*,*")])
3680 ;; A subset is vec_setv4sf.
3681 (define_insn "*vec_setv4sf_sse4_1"
3682 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3685 (match_operand:SF 2 "nonimmediate_operand" "xm,xm"))
3686 (match_operand:V4SF 1 "register_operand" "0,x")
3687 (match_operand:SI 3 "const_int_operand" "")))]
3689 && ((unsigned) exact_log2 (INTVAL (operands[3]))
3690 < GET_MODE_NUNITS (V4SFmode))"
3692 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
3693 switch (which_alternative)
3696 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3698 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3703 [(set_attr "isa" "noavx,avx")
3704 (set_attr "type" "sselog")
3705 (set_attr "prefix_data16" "1,*")
3706 (set_attr "prefix_extra" "1")
3707 (set_attr "length_immediate" "1")
3708 (set_attr "prefix" "orig,vex")
3709 (set_attr "mode" "V4SF")])
3711 (define_insn "sse4_1_insertps"
3712 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3713 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
3714 (match_operand:V4SF 1 "register_operand" "0,x")
3715 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
3719 if (MEM_P (operands[2]))
3721 unsigned count_s = INTVAL (operands[3]) >> 6;
3723 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
3724 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
3726 switch (which_alternative)
3729 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3731 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3736 [(set_attr "isa" "noavx,avx")
3737 (set_attr "type" "sselog")
3738 (set_attr "prefix_data16" "1,*")
3739 (set_attr "prefix_extra" "1")
3740 (set_attr "length_immediate" "1")
3741 (set_attr "prefix" "orig,vex")
3742 (set_attr "mode" "V4SF")])
3745 [(set (match_operand:VI4F_128 0 "memory_operand" "")
3747 (vec_duplicate:VI4F_128
3748 (match_operand:<ssescalarmode> 1 "nonmemory_operand" ""))
3751 "TARGET_SSE && reload_completed"
3754 emit_move_insn (adjust_address (operands[0], <ssescalarmode>mode, 0),
3759 (define_expand "vec_set<mode>"
3760 [(match_operand:V_128 0 "register_operand" "")
3761 (match_operand:<ssescalarmode> 1 "register_operand" "")
3762 (match_operand 2 "const_int_operand" "")]
3765 ix86_expand_vector_set (false, operands[0], operands[1],
3766 INTVAL (operands[2]));
3770 (define_insn_and_split "*vec_extractv4sf_0"
3771 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
3773 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
3774 (parallel [(const_int 0)])))]
3775 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3777 "&& reload_completed"
3780 rtx op1 = operands[1];
3782 op1 = gen_rtx_REG (SFmode, REGNO (op1));
3784 op1 = gen_lowpart (SFmode, op1);
3785 emit_move_insn (operands[0], op1);
3789 (define_expand "avx_vextractf128<mode>"
3790 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "")
3791 (match_operand:V_256 1 "register_operand" "")
3792 (match_operand:SI 2 "const_0_to_1_operand" "")]
3795 rtx (*insn)(rtx, rtx);
3797 switch (INTVAL (operands[2]))
3800 insn = gen_vec_extract_lo_<mode>;
3803 insn = gen_vec_extract_hi_<mode>;
3809 emit_insn (insn (operands[0], operands[1]));
3813 (define_insn_and_split "vec_extract_lo_<mode>"
3814 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
3815 (vec_select:<ssehalfvecmode>
3816 (match_operand:VI8F_256 1 "nonimmediate_operand" "xm,x")
3817 (parallel [(const_int 0) (const_int 1)])))]
3818 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3820 "&& reload_completed"
3823 rtx op1 = operands[1];
3825 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
3827 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
3828 emit_move_insn (operands[0], op1);
3832 (define_insn "vec_extract_hi_<mode>"
3833 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
3834 (vec_select:<ssehalfvecmode>
3835 (match_operand:VI8F_256 1 "register_operand" "x,x")
3836 (parallel [(const_int 2) (const_int 3)])))]
3839 if (get_attr_mode (insn) == MODE_OI)
3840 return "vextracti128\t{$0x1, %1, %0|%0, %1, 0x1}";
3842 return "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}";
3844 [(set_attr "type" "sselog")
3845 (set_attr "prefix_extra" "1")
3846 (set_attr "length_immediate" "1")
3847 (set_attr "memory" "none,store")
3848 (set_attr "prefix" "vex")
3851 (and (match_test "TARGET_AVX2")
3852 (eq (const_string "<MODE>mode") (const_string "V4DImode")))
3854 (const_string "V4DF")))])
3856 (define_insn_and_split "vec_extract_lo_<mode>"
3857 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
3858 (vec_select:<ssehalfvecmode>
3859 (match_operand:VI4F_256 1 "nonimmediate_operand" "xm,x")
3860 (parallel [(const_int 0) (const_int 1)
3861 (const_int 2) (const_int 3)])))]
3862 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3864 "&& reload_completed"
3867 rtx op1 = operands[1];
3869 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
3871 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
3872 emit_move_insn (operands[0], op1);
3876 (define_insn "vec_extract_hi_<mode>"
3877 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
3878 (vec_select:<ssehalfvecmode>
3879 (match_operand:VI4F_256 1 "register_operand" "x,x")
3880 (parallel [(const_int 4) (const_int 5)
3881 (const_int 6) (const_int 7)])))]
3884 if (get_attr_mode (insn) == MODE_OI)
3885 return "vextracti128\t{$0x1, %1, %0|%0, %1, 0x1}";
3887 return "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}";
3889 [(set_attr "type" "sselog")
3890 (set_attr "prefix_extra" "1")
3891 (set_attr "length_immediate" "1")
3892 (set_attr "memory" "none,store")
3893 (set_attr "prefix" "vex")
3896 (and (match_test "TARGET_AVX2")
3897 (eq (const_string "<MODE>mode") (const_string "V8SImode")))
3899 (const_string "V8SF")))])
3901 (define_insn_and_split "vec_extract_lo_v16hi"
3902 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
3904 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
3905 (parallel [(const_int 0) (const_int 1)
3906 (const_int 2) (const_int 3)
3907 (const_int 4) (const_int 5)
3908 (const_int 6) (const_int 7)])))]
3909 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3911 "&& reload_completed"
3914 rtx op1 = operands[1];
3916 op1 = gen_rtx_REG (V8HImode, REGNO (op1));
3918 op1 = gen_lowpart (V8HImode, op1);
3919 emit_move_insn (operands[0], op1);
3923 (define_insn "vec_extract_hi_v16hi"
3924 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
3926 (match_operand:V16HI 1 "register_operand" "x,x")
3927 (parallel [(const_int 8) (const_int 9)
3928 (const_int 10) (const_int 11)
3929 (const_int 12) (const_int 13)
3930 (const_int 14) (const_int 15)])))]
3933 if (get_attr_mode (insn) == MODE_OI)
3934 return "vextracti128\t{$0x1, %1, %0|%0, %1, 0x1}";
3936 return "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}";
3938 [(set_attr "type" "sselog")
3939 (set_attr "prefix_extra" "1")
3940 (set_attr "length_immediate" "1")
3941 (set_attr "memory" "none,store")
3942 (set_attr "prefix" "vex")
3944 (if_then_else (match_test "TARGET_AVX2")
3946 (const_string "V8SF")))])
3948 (define_insn_and_split "vec_extract_lo_v32qi"
3949 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
3951 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
3952 (parallel [(const_int 0) (const_int 1)
3953 (const_int 2) (const_int 3)
3954 (const_int 4) (const_int 5)
3955 (const_int 6) (const_int 7)
3956 (const_int 8) (const_int 9)
3957 (const_int 10) (const_int 11)
3958 (const_int 12) (const_int 13)
3959 (const_int 14) (const_int 15)])))]
3960 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3962 "&& reload_completed"
3965 rtx op1 = operands[1];
3967 op1 = gen_rtx_REG (V16QImode, REGNO (op1));
3969 op1 = gen_lowpart (V16QImode, op1);
3970 emit_move_insn (operands[0], op1);
3974 (define_insn "vec_extract_hi_v32qi"
3975 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
3977 (match_operand:V32QI 1 "register_operand" "x,x")
3978 (parallel [(const_int 16) (const_int 17)
3979 (const_int 18) (const_int 19)
3980 (const_int 20) (const_int 21)
3981 (const_int 22) (const_int 23)
3982 (const_int 24) (const_int 25)
3983 (const_int 26) (const_int 27)
3984 (const_int 28) (const_int 29)
3985 (const_int 30) (const_int 31)])))]
3988 if (get_attr_mode (insn) == MODE_OI)
3989 return "vextracti128\t{$0x1, %1, %0|%0, %1, 0x1}";
3991 return "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}";
3993 [(set_attr "type" "sselog")
3994 (set_attr "prefix_extra" "1")
3995 (set_attr "length_immediate" "1")
3996 (set_attr "memory" "none,store")
3997 (set_attr "prefix" "vex")
3999 (if_then_else (match_test "TARGET_AVX2")
4001 (const_string "V8SF")))])
4003 (define_insn_and_split "*sse4_1_extractps"
4004 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,x,x")
4006 (match_operand:V4SF 1 "register_operand" "x,0,x")
4007 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n")])))]
4010 %vextractps\t{%2, %1, %0|%0, %1, %2}
4013 "&& reload_completed && SSE_REG_P (operands[0])"
4016 rtx dest = gen_rtx_REG (V4SFmode, REGNO (operands[0]));
4017 switch (INTVAL (operands[2]))
4021 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
4022 operands[2], operands[2],
4023 GEN_INT (INTVAL (operands[2]) + 4),
4024 GEN_INT (INTVAL (operands[2]) + 4)));
4027 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
4030 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
4035 [(set_attr "isa" "*,noavx,avx")
4036 (set_attr "type" "sselog,*,*")
4037 (set_attr "prefix_data16" "1,*,*")
4038 (set_attr "prefix_extra" "1,*,*")
4039 (set_attr "length_immediate" "1,*,*")
4040 (set_attr "prefix" "maybe_vex,*,*")
4041 (set_attr "mode" "V4SF,*,*")])
4043 (define_insn_and_split "*vec_extract_v4sf_mem"
4044 [(set (match_operand:SF 0 "register_operand" "=x*rf")
4046 (match_operand:V4SF 1 "memory_operand" "o")
4047 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
4050 "&& reload_completed"
4053 int i = INTVAL (operands[2]);
4055 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
4059 ;; Modes handled by vec_extract patterns.
4060 (define_mode_iterator VEC_EXTRACT_MODE
4061 [(V32QI "TARGET_AVX") V16QI
4062 (V16HI "TARGET_AVX") V8HI
4063 (V8SI "TARGET_AVX") V4SI
4064 (V4DI "TARGET_AVX") V2DI
4065 (V8SF "TARGET_AVX") V4SF
4066 (V4DF "TARGET_AVX") V2DF])
4068 (define_expand "vec_extract<mode>"
4069 [(match_operand:<ssescalarmode> 0 "register_operand" "")
4070 (match_operand:VEC_EXTRACT_MODE 1 "register_operand" "")
4071 (match_operand 2 "const_int_operand" "")]
4074 ix86_expand_vector_extract (false, operands[0], operands[1],
4075 INTVAL (operands[2]));
4079 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4081 ;; Parallel double-precision floating point element swizzling
4083 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4085 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4086 (define_insn "avx_unpckhpd256"
4087 [(set (match_operand:V4DF 0 "register_operand" "=x")
4090 (match_operand:V4DF 1 "register_operand" "x")
4091 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4092 (parallel [(const_int 1) (const_int 5)
4093 (const_int 3) (const_int 7)])))]
4095 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
4096 [(set_attr "type" "sselog")
4097 (set_attr "prefix" "vex")
4098 (set_attr "mode" "V4DF")])
4100 (define_expand "vec_interleave_highv4df"
4104 (match_operand:V4DF 1 "register_operand" "x")
4105 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4106 (parallel [(const_int 0) (const_int 4)
4107 (const_int 2) (const_int 6)])))
4113 (parallel [(const_int 1) (const_int 5)
4114 (const_int 3) (const_int 7)])))
4115 (set (match_operand:V4DF 0 "register_operand" "")
4120 (parallel [(const_int 2) (const_int 3)
4121 (const_int 6) (const_int 7)])))]
4124 operands[3] = gen_reg_rtx (V4DFmode);
4125 operands[4] = gen_reg_rtx (V4DFmode);
4129 (define_expand "vec_interleave_highv2df"
4130 [(set (match_operand:V2DF 0 "register_operand" "")
4133 (match_operand:V2DF 1 "nonimmediate_operand" "")
4134 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4135 (parallel [(const_int 1)
4139 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
4140 operands[2] = force_reg (V2DFmode, operands[2]);
4143 (define_insn "*vec_interleave_highv2df"
4144 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
4147 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x")
4148 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0"))
4149 (parallel [(const_int 1)
4151 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4153 unpckhpd\t{%2, %0|%0, %2}
4154 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
4155 %vmovddup\t{%H1, %0|%0, %H1}
4156 movlpd\t{%H1, %0|%0, %H1}
4157 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
4158 %vmovhpd\t{%1, %0|%0, %1}"
4159 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
4160 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
4161 (set_attr "prefix_data16" "*,*,*,1,*,1")
4162 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
4163 (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")])
4165 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4166 (define_expand "avx_movddup256"
4167 [(set (match_operand:V4DF 0 "register_operand" "")
4170 (match_operand:V4DF 1 "nonimmediate_operand" "")
4172 (parallel [(const_int 0) (const_int 4)
4173 (const_int 2) (const_int 6)])))]
4176 (define_expand "avx_unpcklpd256"
4177 [(set (match_operand:V4DF 0 "register_operand" "")
4180 (match_operand:V4DF 1 "register_operand" "")
4181 (match_operand:V4DF 2 "nonimmediate_operand" ""))
4182 (parallel [(const_int 0) (const_int 4)
4183 (const_int 2) (const_int 6)])))]
4186 (define_insn "*avx_unpcklpd256"
4187 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
4190 (match_operand:V4DF 1 "nonimmediate_operand" "xm,x")
4191 (match_operand:V4DF 2 "nonimmediate_operand" " 1,xm"))
4192 (parallel [(const_int 0) (const_int 4)
4193 (const_int 2) (const_int 6)])))]
4195 && (!MEM_P (operands[1]) || rtx_equal_p (operands[1], operands[2]))"
4197 vmovddup\t{%1, %0|%0, %1}
4198 vunpcklpd\t{%2, %1, %0|%0, %1, %2}"
4199 [(set_attr "type" "sselog")
4200 (set_attr "prefix" "vex")
4201 (set_attr "mode" "V4DF")])
4203 (define_expand "vec_interleave_lowv4df"
4207 (match_operand:V4DF 1 "register_operand" "x")
4208 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4209 (parallel [(const_int 0) (const_int 4)
4210 (const_int 2) (const_int 6)])))
4216 (parallel [(const_int 1) (const_int 5)
4217 (const_int 3) (const_int 7)])))
4218 (set (match_operand:V4DF 0 "register_operand" "")
4223 (parallel [(const_int 0) (const_int 1)
4224 (const_int 4) (const_int 5)])))]
4227 operands[3] = gen_reg_rtx (V4DFmode);
4228 operands[4] = gen_reg_rtx (V4DFmode);
4231 (define_expand "vec_interleave_lowv2df"
4232 [(set (match_operand:V2DF 0 "register_operand" "")
4235 (match_operand:V2DF 1 "nonimmediate_operand" "")
4236 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4237 (parallel [(const_int 0)
4241 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
4242 operands[1] = force_reg (V2DFmode, operands[1]);
4245 (define_insn "*vec_interleave_lowv2df"
4246 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,o")
4249 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0")
4250 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x"))
4251 (parallel [(const_int 0)
4253 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4255 unpcklpd\t{%2, %0|%0, %2}
4256 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4257 %vmovddup\t{%1, %0|%0, %1}
4258 movhpd\t{%2, %0|%0, %2}
4259 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4260 %vmovlpd\t{%2, %H0|%H0, %2}"
4261 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
4262 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
4263 (set_attr "prefix_data16" "*,*,*,1,*,1")
4264 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
4265 (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")])
4268 [(set (match_operand:V2DF 0 "memory_operand" "")
4271 (match_operand:V2DF 1 "register_operand" "")
4273 (parallel [(const_int 0)
4275 "TARGET_SSE3 && reload_completed"
4278 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4279 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4280 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4285 [(set (match_operand:V2DF 0 "register_operand" "")
4288 (match_operand:V2DF 1 "memory_operand" "")
4290 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "")
4291 (match_operand:SI 3 "const_int_operand" "")])))]
4292 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
4293 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
4295 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
4298 (define_expand "avx_shufpd256"
4299 [(match_operand:V4DF 0 "register_operand" "")
4300 (match_operand:V4DF 1 "register_operand" "")
4301 (match_operand:V4DF 2 "nonimmediate_operand" "")
4302 (match_operand:SI 3 "const_int_operand" "")]
4305 int mask = INTVAL (operands[3]);
4306 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4308 GEN_INT (mask & 2 ? 5 : 4),
4309 GEN_INT (mask & 4 ? 3 : 2),
4310 GEN_INT (mask & 8 ? 7 : 6)));
4314 (define_insn "avx_shufpd256_1"
4315 [(set (match_operand:V4DF 0 "register_operand" "=x")
4318 (match_operand:V4DF 1 "register_operand" "x")
4319 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4320 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4321 (match_operand 4 "const_4_to_5_operand" "")
4322 (match_operand 5 "const_2_to_3_operand" "")
4323 (match_operand 6 "const_6_to_7_operand" "")])))]
4327 mask = INTVAL (operands[3]);
4328 mask |= (INTVAL (operands[4]) - 4) << 1;
4329 mask |= (INTVAL (operands[5]) - 2) << 2;
4330 mask |= (INTVAL (operands[6]) - 6) << 3;
4331 operands[3] = GEN_INT (mask);
4333 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4335 [(set_attr "type" "sselog")
4336 (set_attr "length_immediate" "1")
4337 (set_attr "prefix" "vex")
4338 (set_attr "mode" "V4DF")])
4340 (define_expand "sse2_shufpd"
4341 [(match_operand:V2DF 0 "register_operand" "")
4342 (match_operand:V2DF 1 "register_operand" "")
4343 (match_operand:V2DF 2 "nonimmediate_operand" "")
4344 (match_operand:SI 3 "const_int_operand" "")]
4347 int mask = INTVAL (operands[3]);
4348 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4350 GEN_INT (mask & 2 ? 3 : 2)));
4354 ;; Modes handled by vec_extract_even/odd pattern.
4355 (define_mode_iterator VEC_EXTRACT_EVENODD_MODE
4356 [(V16QI "TARGET_SSE2")
4357 (V8HI "TARGET_SSE2")
4358 (V4SI "TARGET_SSE2")
4359 (V2DI "TARGET_SSE2")
4360 (V8SF "TARGET_AVX") V4SF
4361 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
4363 (define_expand "vec_extract_even<mode>"
4364 [(match_operand:VEC_EXTRACT_EVENODD_MODE 0 "register_operand" "")
4365 (match_operand:VEC_EXTRACT_EVENODD_MODE 1 "register_operand" "")
4366 (match_operand:VEC_EXTRACT_EVENODD_MODE 2 "register_operand" "")]
4369 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 0);
4373 (define_expand "vec_extract_odd<mode>"
4374 [(match_operand:VEC_EXTRACT_EVENODD_MODE 0 "register_operand" "")
4375 (match_operand:VEC_EXTRACT_EVENODD_MODE 1 "register_operand" "")
4376 (match_operand:VEC_EXTRACT_EVENODD_MODE 2 "register_operand" "")]
4379 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 1);
4383 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4384 (define_insn "avx2_interleave_highv4di"
4385 [(set (match_operand:V4DI 0 "register_operand" "=x")
4388 (match_operand:V4DI 1 "register_operand" "x")
4389 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
4390 (parallel [(const_int 1)
4395 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4396 [(set_attr "type" "sselog")
4397 (set_attr "prefix" "vex")
4398 (set_attr "mode" "OI")])
4400 (define_insn "vec_interleave_highv2di"
4401 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4404 (match_operand:V2DI 1 "register_operand" "0,x")
4405 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4406 (parallel [(const_int 1)
4410 punpckhqdq\t{%2, %0|%0, %2}
4411 vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4412 [(set_attr "isa" "noavx,avx")
4413 (set_attr "type" "sselog")
4414 (set_attr "prefix_data16" "1,*")
4415 (set_attr "prefix" "orig,vex")
4416 (set_attr "mode" "TI")])
4418 (define_insn "avx2_interleave_lowv4di"
4419 [(set (match_operand:V4DI 0 "register_operand" "=x")
4422 (match_operand:V4DI 1 "register_operand" "x")
4423 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
4424 (parallel [(const_int 0)
4429 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4430 [(set_attr "type" "sselog")
4431 (set_attr "prefix" "vex")
4432 (set_attr "mode" "OI")])
4434 (define_insn "vec_interleave_lowv2di"
4435 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4438 (match_operand:V2DI 1 "register_operand" "0,x")
4439 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4440 (parallel [(const_int 0)
4444 punpcklqdq\t{%2, %0|%0, %2}
4445 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4446 [(set_attr "isa" "noavx,avx")
4447 (set_attr "type" "sselog")
4448 (set_attr "prefix_data16" "1,*")
4449 (set_attr "prefix" "orig,vex")
4450 (set_attr "mode" "TI")])
4452 (define_insn "sse2_shufpd_<mode>"
4453 [(set (match_operand:VI8F_128 0 "register_operand" "=x,x")
4454 (vec_select:VI8F_128
4455 (vec_concat:<ssedoublevecmode>
4456 (match_operand:VI8F_128 1 "register_operand" "0,x")
4457 (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm"))
4458 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4459 (match_operand 4 "const_2_to_3_operand" "")])))]
4463 mask = INTVAL (operands[3]);
4464 mask |= (INTVAL (operands[4]) - 2) << 1;
4465 operands[3] = GEN_INT (mask);
4467 switch (which_alternative)
4470 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4472 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4477 [(set_attr "isa" "noavx,avx")
4478 (set_attr "type" "sselog")
4479 (set_attr "length_immediate" "1")
4480 (set_attr "prefix" "orig,vex")
4481 (set_attr "mode" "V2DF")])
4483 ;; Avoid combining registers from different units in a single alternative,
4484 ;; see comment above inline_secondary_memory_needed function in i386.c
4485 (define_insn "sse2_storehpd"
4486 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r")
4488 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
4489 (parallel [(const_int 1)])))]
4490 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4492 %vmovhpd\t{%1, %0|%0, %1}
4494 vunpckhpd\t{%d1, %0|%0, %d1}
4498 [(set_attr "isa" "*,noavx,avx,*,*,*")
4499 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
4500 (set (attr "prefix_data16")
4502 (and (eq_attr "alternative" "0")
4503 (not (match_test "TARGET_AVX")))
4505 (const_string "*")))
4506 (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
4507 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
4510 [(set (match_operand:DF 0 "register_operand" "")
4512 (match_operand:V2DF 1 "memory_operand" "")
4513 (parallel [(const_int 1)])))]
4514 "TARGET_SSE2 && reload_completed"
4515 [(set (match_dup 0) (match_dup 1))]
4516 "operands[1] = adjust_address (operands[1], DFmode, 8);")
4518 (define_insn "*vec_extractv2df_1_sse"
4519 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4521 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
4522 (parallel [(const_int 1)])))]
4523 "!TARGET_SSE2 && TARGET_SSE
4524 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4526 movhps\t{%1, %0|%0, %1}
4527 movhlps\t{%1, %0|%0, %1}
4528 movlps\t{%H1, %0|%0, %H1}"
4529 [(set_attr "type" "ssemov")
4530 (set_attr "mode" "V2SF,V4SF,V2SF")])
4532 ;; Avoid combining registers from different units in a single alternative,
4533 ;; see comment above inline_secondary_memory_needed function in i386.c
4534 (define_insn "sse2_storelpd"
4535 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4537 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4538 (parallel [(const_int 0)])))]
4539 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4541 %vmovlpd\t{%1, %0|%0, %1}
4546 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4547 (set_attr "prefix_data16" "1,*,*,*,*")
4548 (set_attr "prefix" "maybe_vex")
4549 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4552 [(set (match_operand:DF 0 "register_operand" "")
4554 (match_operand:V2DF 1 "nonimmediate_operand" "")
4555 (parallel [(const_int 0)])))]
4556 "TARGET_SSE2 && reload_completed"
4559 rtx op1 = operands[1];
4561 op1 = gen_rtx_REG (DFmode, REGNO (op1));
4563 op1 = gen_lowpart (DFmode, op1);
4564 emit_move_insn (operands[0], op1);
4568 (define_insn "*vec_extractv2df_0_sse"
4569 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4571 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
4572 (parallel [(const_int 0)])))]
4573 "!TARGET_SSE2 && TARGET_SSE
4574 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4576 movlps\t{%1, %0|%0, %1}
4577 movaps\t{%1, %0|%0, %1}
4578 movlps\t{%1, %0|%0, %1}"
4579 [(set_attr "type" "ssemov")
4580 (set_attr "mode" "V2SF,V4SF,V2SF")])
4582 (define_expand "sse2_loadhpd_exp"
4583 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4586 (match_operand:V2DF 1 "nonimmediate_operand" "")
4587 (parallel [(const_int 0)]))
4588 (match_operand:DF 2 "nonimmediate_operand" "")))]
4591 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4593 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
4595 /* Fix up the destination if needed. */
4596 if (dst != operands[0])
4597 emit_move_insn (operands[0], dst);
4602 ;; Avoid combining registers from different units in a single alternative,
4603 ;; see comment above inline_secondary_memory_needed function in i386.c
4604 (define_insn "sse2_loadhpd"
4605 [(set (match_operand:V2DF 0 "nonimmediate_operand"
4609 (match_operand:V2DF 1 "nonimmediate_operand"
4611 (parallel [(const_int 0)]))
4612 (match_operand:DF 2 "nonimmediate_operand"
4613 " m,m,x,x,x,*f,r")))]
4614 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4616 movhpd\t{%2, %0|%0, %2}
4617 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4618 unpcklpd\t{%2, %0|%0, %2}
4619 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4623 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
4624 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
4625 (set_attr "prefix_data16" "1,*,*,*,*,*,*")
4626 (set_attr "prefix" "orig,vex,orig,vex,*,*,*")
4627 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
4630 [(set (match_operand:V2DF 0 "memory_operand" "")
4632 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
4633 (match_operand:DF 1 "register_operand" "")))]
4634 "TARGET_SSE2 && reload_completed"
4635 [(set (match_dup 0) (match_dup 1))]
4636 "operands[0] = adjust_address (operands[0], DFmode, 8);")
4638 (define_expand "sse2_loadlpd_exp"
4639 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4641 (match_operand:DF 2 "nonimmediate_operand" "")
4643 (match_operand:V2DF 1 "nonimmediate_operand" "")
4644 (parallel [(const_int 1)]))))]
4647 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4649 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
4651 /* Fix up the destination if needed. */
4652 if (dst != operands[0])
4653 emit_move_insn (operands[0], dst);
4658 ;; Avoid combining registers from different units in a single alternative,
4659 ;; see comment above inline_secondary_memory_needed function in i386.c
4660 (define_insn "sse2_loadlpd"
4661 [(set (match_operand:V2DF 0 "nonimmediate_operand"
4662 "=x,x,x,x,x,x,x,x,m,m ,m")
4664 (match_operand:DF 2 "nonimmediate_operand"
4665 " m,m,m,x,x,0,0,x,x,*f,r")
4667 (match_operand:V2DF 1 "vector_move_operand"
4668 " C,0,x,0,x,x,o,o,0,0 ,0")
4669 (parallel [(const_int 1)]))))]
4670 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4672 %vmovsd\t{%2, %0|%0, %2}
4673 movlpd\t{%2, %0|%0, %2}
4674 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4675 movsd\t{%2, %0|%0, %2}
4676 vmovsd\t{%2, %1, %0|%0, %1, %2}
4677 shufpd\t{$2, %1, %0|%0, %1, 2}
4678 movhpd\t{%H1, %0|%0, %H1}
4679 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
4683 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
4685 (cond [(eq_attr "alternative" "5")
4686 (const_string "sselog")
4687 (eq_attr "alternative" "9")
4688 (const_string "fmov")
4689 (eq_attr "alternative" "10")
4690 (const_string "imov")
4692 (const_string "ssemov")))
4693 (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
4694 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
4695 (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
4696 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
4699 [(set (match_operand:V2DF 0 "memory_operand" "")
4701 (match_operand:DF 1 "register_operand" "")
4702 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
4703 "TARGET_SSE2 && reload_completed"
4704 [(set (match_dup 0) (match_dup 1))]
4705 "operands[0] = adjust_address (operands[0], DFmode, 8);")
4707 (define_insn "sse2_movsd"
4708 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o")
4710 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
4711 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
4715 movsd\t{%2, %0|%0, %2}
4716 vmovsd\t{%2, %1, %0|%0, %1, %2}
4717 movlpd\t{%2, %0|%0, %2}
4718 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4719 %vmovlpd\t{%2, %0|%0, %2}
4720 shufpd\t{$2, %1, %0|%0, %1, 2}
4721 movhps\t{%H1, %0|%0, %H1}
4722 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
4723 %vmovhps\t{%1, %H0|%H0, %1}"
4724 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
4727 (eq_attr "alternative" "5")
4728 (const_string "sselog")
4729 (const_string "ssemov")))
4730 (set (attr "prefix_data16")
4732 (and (eq_attr "alternative" "2,4")
4733 (not (match_test "TARGET_AVX")))
4735 (const_string "*")))
4736 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
4737 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
4738 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
4740 (define_expand "vec_dupv2df"
4741 [(set (match_operand:V2DF 0 "register_operand" "")
4743 (match_operand:DF 1 "nonimmediate_operand" "")))]
4747 operands[1] = force_reg (DFmode, operands[1]);
4750 (define_insn "*vec_dupv2df_sse3"
4751 [(set (match_operand:V2DF 0 "register_operand" "=x")
4753 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
4755 "%vmovddup\t{%1, %0|%0, %1}"
4756 [(set_attr "type" "sselog1")
4757 (set_attr "prefix" "maybe_vex")
4758 (set_attr "mode" "DF")])
4760 (define_insn "*vec_dupv2df"
4761 [(set (match_operand:V2DF 0 "register_operand" "=x")
4763 (match_operand:DF 1 "register_operand" "0")))]
4766 [(set_attr "type" "sselog1")
4767 (set_attr "mode" "V2DF")])
4769 (define_insn "*vec_concatv2df_sse3"
4770 [(set (match_operand:V2DF 0 "register_operand" "=x")
4772 (match_operand:DF 1 "nonimmediate_operand" "xm")
4775 "%vmovddup\t{%1, %0|%0, %1}"
4776 [(set_attr "type" "sselog1")
4777 (set_attr "prefix" "maybe_vex")
4778 (set_attr "mode" "DF")])
4780 (define_insn "*vec_concatv2df"
4781 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x,x,x,x,x")
4783 (match_operand:DF 1 "nonimmediate_operand" " 0,x,0,x,m,0,0")
4784 (match_operand:DF 2 "vector_move_operand" " x,x,m,m,C,x,m")))]
4787 unpcklpd\t{%2, %0|%0, %2}
4788 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4789 movhpd\t{%2, %0|%0, %2}
4790 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4791 %vmovsd\t{%1, %0|%0, %1}
4792 movlhps\t{%2, %0|%0, %2}
4793 movhps\t{%2, %0|%0, %2}"
4794 [(set_attr "isa" "sse2_noavx,avx,sse2_noavx,avx,sse2,noavx,noavx")
4797 (eq_attr "alternative" "0,1")
4798 (const_string "sselog")
4799 (const_string "ssemov")))
4800 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
4801 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
4802 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF,DF,V4SF,V2SF")])
4804 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4806 ;; Parallel integral arithmetic
4808 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4810 (define_expand "neg<mode>2"
4811 [(set (match_operand:VI_128 0 "register_operand" "")
4814 (match_operand:VI_128 1 "nonimmediate_operand" "")))]
4816 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
4818 (define_expand "<plusminus_insn><mode>3"
4819 [(set (match_operand:VI_AVX2 0 "register_operand" "")
4821 (match_operand:VI_AVX2 1 "nonimmediate_operand" "")
4822 (match_operand:VI_AVX2 2 "nonimmediate_operand" "")))]
4824 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4826 (define_insn "*<plusminus_insn><mode>3"
4827 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,x")
4829 (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,x")
4830 (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,xm")))]
4831 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4833 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
4834 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
4835 [(set_attr "isa" "noavx,avx")
4836 (set_attr "type" "sseiadd")
4837 (set_attr "prefix_data16" "1,*")
4838 (set_attr "prefix" "orig,vex")
4839 (set_attr "mode" "<sseinsnmode>")])
4841 (define_expand "<sse2_avx2>_<plusminus_insn><mode>3"
4842 [(set (match_operand:VI12_AVX2 0 "register_operand" "")
4843 (sat_plusminus:VI12_AVX2
4844 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "")
4845 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "")))]
4847 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4849 (define_insn "*<sse2_avx2>_<plusminus_insn><mode>3"
4850 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,x")
4851 (sat_plusminus:VI12_AVX2
4852 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,x")
4853 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,xm")))]
4854 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4856 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
4857 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
4858 [(set_attr "isa" "noavx,avx")
4859 (set_attr "type" "sseiadd")
4860 (set_attr "prefix_data16" "1,*")
4861 (set_attr "prefix" "orig,vex")
4862 (set_attr "mode" "TI")])
4864 (define_insn_and_split "mulv16qi3"
4865 [(set (match_operand:V16QI 0 "register_operand" "")
4866 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
4867 (match_operand:V16QI 2 "register_operand" "")))]
4869 && can_create_pseudo_p ()"
4877 for (i = 0; i < 6; ++i)
4878 t[i] = gen_reg_rtx (V16QImode);
4880 /* Unpack data such that we've got a source byte in each low byte of
4881 each word. We don't care what goes into the high byte of each word.
4882 Rather than trying to get zero in there, most convenient is to let
4883 it be a copy of the low byte. */
4884 emit_insn (gen_vec_interleave_highv16qi (t[0], operands[1], operands[1]));
4885 emit_insn (gen_vec_interleave_highv16qi (t[1], operands[2], operands[2]));
4886 emit_insn (gen_vec_interleave_lowv16qi (t[2], operands[1], operands[1]));
4887 emit_insn (gen_vec_interleave_lowv16qi (t[3], operands[2], operands[2]));
4889 /* Multiply words. The end-of-line annotations here give a picture of what
4890 the output of that instruction looks like. Dot means don't care; the
4891 letters are the bytes of the result with A being the most significant. */
4892 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
4893 gen_lowpart (V8HImode, t[0]),
4894 gen_lowpart (V8HImode, t[1])));
4895 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
4896 gen_lowpart (V8HImode, t[2]),
4897 gen_lowpart (V8HImode, t[3])));
4899 /* Extract the even bytes and merge them back together. */
4900 ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0);
4902 set_unique_reg_note (get_last_insn (), REG_EQUAL,
4903 gen_rtx_MULT (V16QImode, operands[1], operands[2]));
4907 (define_expand "mul<mode>3"
4908 [(set (match_operand:VI2_AVX2 0 "register_operand" "")
4909 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "")
4910 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "")))]
4912 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
4914 (define_insn "*mul<mode>3"
4915 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
4916 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x")
4917 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))]
4918 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
4920 pmullw\t{%2, %0|%0, %2}
4921 vpmullw\t{%2, %1, %0|%0, %1, %2}"
4922 [(set_attr "isa" "noavx,avx")
4923 (set_attr "type" "sseimul")
4924 (set_attr "prefix_data16" "1,*")
4925 (set_attr "prefix" "orig,vex")
4926 (set_attr "mode" "<sseinsnmode>")])
4928 (define_expand "<s>mul<mode>3_highpart"
4929 [(set (match_operand:VI2_AVX2 0 "register_operand" "")
4931 (lshiftrt:<ssedoublemode>
4932 (mult:<ssedoublemode>
4933 (any_extend:<ssedoublemode>
4934 (match_operand:VI2_AVX2 1 "nonimmediate_operand" ""))
4935 (any_extend:<ssedoublemode>
4936 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "")))
4939 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
4941 (define_insn "*<s>mul<mode>3_highpart"
4942 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
4944 (lshiftrt:<ssedoublemode>
4945 (mult:<ssedoublemode>
4946 (any_extend:<ssedoublemode>
4947 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
4948 (any_extend:<ssedoublemode>
4949 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
4951 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
4953 pmulh<u>w\t{%2, %0|%0, %2}
4954 vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
4955 [(set_attr "isa" "noavx,avx")
4956 (set_attr "type" "sseimul")
4957 (set_attr "prefix_data16" "1,*")
4958 (set_attr "prefix" "orig,vex")
4959 (set_attr "mode" "<sseinsnmode>")])
4961 (define_expand "avx2_umulv4siv4di3"
4962 [(set (match_operand:V4DI 0 "register_operand" "")
4966 (match_operand:V8SI 1 "nonimmediate_operand" "")
4967 (parallel [(const_int 0) (const_int 2)
4968 (const_int 4) (const_int 6)])))
4971 (match_operand:V8SI 2 "nonimmediate_operand" "")
4972 (parallel [(const_int 0) (const_int 2)
4973 (const_int 4) (const_int 6)])))))]
4975 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
4977 (define_insn "*avx_umulv4siv4di3"
4978 [(set (match_operand:V4DI 0 "register_operand" "=x")
4982 (match_operand:V8SI 1 "nonimmediate_operand" "%x")
4983 (parallel [(const_int 0) (const_int 2)
4984 (const_int 4) (const_int 6)])))
4987 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
4988 (parallel [(const_int 0) (const_int 2)
4989 (const_int 4) (const_int 6)])))))]
4990 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
4991 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
4992 [(set_attr "type" "sseimul")
4993 (set_attr "prefix" "vex")
4994 (set_attr "mode" "OI")])
4996 (define_expand "sse2_umulv2siv2di3"
4997 [(set (match_operand:V2DI 0 "register_operand" "")
5001 (match_operand:V4SI 1 "nonimmediate_operand" "")
5002 (parallel [(const_int 0) (const_int 2)])))
5005 (match_operand:V4SI 2 "nonimmediate_operand" "")
5006 (parallel [(const_int 0) (const_int 2)])))))]
5008 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5010 (define_insn "*sse2_umulv2siv2di3"
5011 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5015 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
5016 (parallel [(const_int 0) (const_int 2)])))
5019 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
5020 (parallel [(const_int 0) (const_int 2)])))))]
5021 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5023 pmuludq\t{%2, %0|%0, %2}
5024 vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5025 [(set_attr "isa" "noavx,avx")
5026 (set_attr "type" "sseimul")
5027 (set_attr "prefix_data16" "1,*")
5028 (set_attr "prefix" "orig,vex")
5029 (set_attr "mode" "TI")])
5031 (define_expand "avx2_mulv4siv4di3"
5032 [(set (match_operand:V4DI 0 "register_operand" "")
5036 (match_operand:V8SI 1 "nonimmediate_operand" "")
5037 (parallel [(const_int 0) (const_int 2)
5038 (const_int 4) (const_int 6)])))
5041 (match_operand:V8SI 2 "nonimmediate_operand" "")
5042 (parallel [(const_int 0) (const_int 2)
5043 (const_int 4) (const_int 6)])))))]
5045 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
5047 (define_insn "*avx2_mulv4siv4di3"
5048 [(set (match_operand:V4DI 0 "register_operand" "=x")
5052 (match_operand:V8SI 1 "nonimmediate_operand" "x")
5053 (parallel [(const_int 0) (const_int 2)
5054 (const_int 4) (const_int 6)])))
5057 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
5058 (parallel [(const_int 0) (const_int 2)
5059 (const_int 4) (const_int 6)])))))]
5060 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
5061 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5062 [(set_attr "isa" "avx")
5063 (set_attr "type" "sseimul")
5064 (set_attr "prefix_extra" "1")
5065 (set_attr "prefix" "vex")
5066 (set_attr "mode" "OI")])
5068 (define_expand "sse4_1_mulv2siv2di3"
5069 [(set (match_operand:V2DI 0 "register_operand" "")
5073 (match_operand:V4SI 1 "nonimmediate_operand" "")
5074 (parallel [(const_int 0) (const_int 2)])))
5077 (match_operand:V4SI 2 "nonimmediate_operand" "")
5078 (parallel [(const_int 0) (const_int 2)])))))]
5080 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5082 (define_insn "*sse4_1_mulv2siv2di3"
5083 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5087 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
5088 (parallel [(const_int 0) (const_int 2)])))
5091 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
5092 (parallel [(const_int 0) (const_int 2)])))))]
5093 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5095 pmuldq\t{%2, %0|%0, %2}
5096 vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5097 [(set_attr "isa" "noavx,avx")
5098 (set_attr "type" "sseimul")
5099 (set_attr "prefix_data16" "1,*")
5100 (set_attr "prefix_extra" "1")
5101 (set_attr "prefix" "orig,vex")
5102 (set_attr "mode" "TI")])
5104 (define_expand "avx2_pmaddwd"
5105 [(set (match_operand:V8SI 0 "register_operand" "")
5110 (match_operand:V16HI 1 "nonimmediate_operand" "")
5111 (parallel [(const_int 0)
5121 (match_operand:V16HI 2 "nonimmediate_operand" "")
5122 (parallel [(const_int 0)
5132 (vec_select:V8HI (match_dup 1)
5133 (parallel [(const_int 1)
5142 (vec_select:V8HI (match_dup 2)
5143 (parallel [(const_int 1)
5150 (const_int 15)]))))))]
5152 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
5154 (define_expand "sse2_pmaddwd"
5155 [(set (match_operand:V4SI 0 "register_operand" "")
5160 (match_operand:V8HI 1 "nonimmediate_operand" "")
5161 (parallel [(const_int 0)
5167 (match_operand:V8HI 2 "nonimmediate_operand" "")
5168 (parallel [(const_int 0)
5174 (vec_select:V4HI (match_dup 1)
5175 (parallel [(const_int 1)
5180 (vec_select:V4HI (match_dup 2)
5181 (parallel [(const_int 1)
5184 (const_int 7)]))))))]
5186 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5188 (define_insn "*avx2_pmaddwd"
5189 [(set (match_operand:V8SI 0 "register_operand" "=x")
5194 (match_operand:V16HI 1 "nonimmediate_operand" "%x")
5195 (parallel [(const_int 0)
5205 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
5206 (parallel [(const_int 0)
5216 (vec_select:V8HI (match_dup 1)
5217 (parallel [(const_int 1)
5226 (vec_select:V8HI (match_dup 2)
5227 (parallel [(const_int 1)
5234 (const_int 15)]))))))]
5235 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
5236 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5237 [(set_attr "type" "sseiadd")
5238 (set_attr "prefix" "vex")
5239 (set_attr "mode" "OI")])
5241 (define_insn "*sse2_pmaddwd"
5242 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
5247 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
5248 (parallel [(const_int 0)
5254 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
5255 (parallel [(const_int 0)
5261 (vec_select:V4HI (match_dup 1)
5262 (parallel [(const_int 1)
5267 (vec_select:V4HI (match_dup 2)
5268 (parallel [(const_int 1)
5271 (const_int 7)]))))))]
5272 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5274 pmaddwd\t{%2, %0|%0, %2}
5275 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5276 [(set_attr "isa" "noavx,avx")
5277 (set_attr "type" "sseiadd")
5278 (set_attr "atom_unit" "simul")
5279 (set_attr "prefix_data16" "1,*")
5280 (set_attr "prefix" "orig,vex")
5281 (set_attr "mode" "TI")])
5283 (define_expand "mul<mode>3"
5284 [(set (match_operand:VI4_AVX2 0 "register_operand" "")
5285 (mult:VI4_AVX2 (match_operand:VI4_AVX2 1 "register_operand" "")
5286 (match_operand:VI4_AVX2 2 "register_operand" "")))]
5289 if (TARGET_SSE4_1 || TARGET_AVX)
5290 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
5293 (define_insn "*<sse4_1_avx2>_mul<mode>3"
5294 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,x")
5295 (mult:VI4_AVX2 (match_operand:VI4_AVX2 1 "nonimmediate_operand" "%0,x")
5296 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm,xm")))]
5297 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
5299 pmulld\t{%2, %0|%0, %2}
5300 vpmulld\t{%2, %1, %0|%0, %1, %2}"
5301 [(set_attr "isa" "noavx,avx")
5302 (set_attr "type" "sseimul")
5303 (set_attr "prefix_extra" "1")
5304 (set_attr "prefix" "orig,vex")
5305 (set_attr "mode" "<sseinsnmode>")])
5307 (define_insn_and_split "*sse2_mulv4si3"
5308 [(set (match_operand:V4SI 0 "register_operand" "")
5309 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5310 (match_operand:V4SI 2 "register_operand" "")))]
5311 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_AVX
5312 && can_create_pseudo_p ()"
5317 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5323 t1 = gen_reg_rtx (V4SImode);
5324 t2 = gen_reg_rtx (V4SImode);
5325 t3 = gen_reg_rtx (V4SImode);
5326 t4 = gen_reg_rtx (V4SImode);
5327 t5 = gen_reg_rtx (V4SImode);
5328 t6 = gen_reg_rtx (V4SImode);
5329 thirtytwo = GEN_INT (32);
5331 /* Multiply elements 2 and 0. */
5332 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
5335 /* Shift both input vectors down one element, so that elements 3
5336 and 1 are now in the slots for elements 2 and 0. For K8, at
5337 least, this is faster than using a shuffle. */
5338 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5339 gen_lowpart (V1TImode, op1),
5341 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5342 gen_lowpart (V1TImode, op2),
5344 /* Multiply elements 3 and 1. */
5345 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
5348 /* Move the results in element 2 down to element 1; we don't care
5349 what goes in elements 2 and 3. */
5350 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
5351 const0_rtx, const0_rtx));
5352 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
5353 const0_rtx, const0_rtx));
5355 /* Merge the parts back together. */
5356 emit_insn (gen_vec_interleave_lowv4si (op0, t5, t6));
5358 set_unique_reg_note (get_last_insn (), REG_EQUAL,
5359 gen_rtx_MULT (V4SImode, operands[1], operands[2]));
5363 (define_insn_and_split "mulv2di3"
5364 [(set (match_operand:V2DI 0 "register_operand" "")
5365 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
5366 (match_operand:V2DI 2 "register_operand" "")))]
5368 && can_create_pseudo_p ()"
5373 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5382 /* op1: A,B,C,D, op2: E,F,G,H */
5383 op1 = gen_lowpart (V4SImode, op1);
5384 op2 = gen_lowpart (V4SImode, op2);
5386 t1 = gen_reg_rtx (V4SImode);
5387 t2 = gen_reg_rtx (V4SImode);
5388 t3 = gen_reg_rtx (V2DImode);
5389 t4 = gen_reg_rtx (V2DImode);
5392 emit_insn (gen_sse2_pshufd_1 (t1, op1,
5398 /* t2: (B*E),(A*F),(D*G),(C*H) */
5399 emit_insn (gen_mulv4si3 (t2, t1, op2));
5401 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
5402 emit_insn (gen_xop_phadddq (t3, t2));
5404 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
5405 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
5407 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
5408 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
5412 t1 = gen_reg_rtx (V2DImode);
5413 t2 = gen_reg_rtx (V2DImode);
5414 t3 = gen_reg_rtx (V2DImode);
5415 t4 = gen_reg_rtx (V2DImode);
5416 t5 = gen_reg_rtx (V2DImode);
5417 t6 = gen_reg_rtx (V2DImode);
5418 thirtytwo = GEN_INT (32);
5420 /* Multiply low parts. */
5421 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
5422 gen_lowpart (V4SImode, op2)));
5424 /* Shift input vectors left 32 bits so we can multiply high parts. */
5425 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
5426 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
5428 /* Multiply high parts by low parts. */
5429 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
5430 gen_lowpart (V4SImode, t3)));
5431 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
5432 gen_lowpart (V4SImode, t2)));
5434 /* Shift them back. */
5435 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
5436 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
5438 /* Add the three parts together. */
5439 emit_insn (gen_addv2di3 (t6, t1, t4));
5440 emit_insn (gen_addv2di3 (op0, t6, t5));
5443 set_unique_reg_note (get_last_insn (), REG_EQUAL,
5444 gen_rtx_MULT (V2DImode, operands[1], operands[2]));
5448 (define_expand "vec_widen_smult_hi_v8hi"
5449 [(match_operand:V4SI 0 "register_operand" "")
5450 (match_operand:V8HI 1 "register_operand" "")
5451 (match_operand:V8HI 2 "register_operand" "")]
5454 rtx op1, op2, t1, t2, dest;
5458 t1 = gen_reg_rtx (V8HImode);
5459 t2 = gen_reg_rtx (V8HImode);
5460 dest = gen_lowpart (V8HImode, operands[0]);
5462 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5463 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5464 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5468 (define_expand "vec_widen_smult_lo_v8hi"
5469 [(match_operand:V4SI 0 "register_operand" "")
5470 (match_operand:V8HI 1 "register_operand" "")
5471 (match_operand:V8HI 2 "register_operand" "")]
5474 rtx op1, op2, t1, t2, dest;
5478 t1 = gen_reg_rtx (V8HImode);
5479 t2 = gen_reg_rtx (V8HImode);
5480 dest = gen_lowpart (V8HImode, operands[0]);
5482 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5483 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5484 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5488 (define_expand "vec_widen_umult_hi_v8hi"
5489 [(match_operand:V4SI 0 "register_operand" "")
5490 (match_operand:V8HI 1 "register_operand" "")
5491 (match_operand:V8HI 2 "register_operand" "")]
5494 rtx op1, op2, t1, t2, dest;
5498 t1 = gen_reg_rtx (V8HImode);
5499 t2 = gen_reg_rtx (V8HImode);
5500 dest = gen_lowpart (V8HImode, operands[0]);
5502 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5503 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5504 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5508 (define_expand "vec_widen_umult_lo_v8hi"
5509 [(match_operand:V4SI 0 "register_operand" "")
5510 (match_operand:V8HI 1 "register_operand" "")
5511 (match_operand:V8HI 2 "register_operand" "")]
5514 rtx op1, op2, t1, t2, dest;
5518 t1 = gen_reg_rtx (V8HImode);
5519 t2 = gen_reg_rtx (V8HImode);
5520 dest = gen_lowpart (V8HImode, operands[0]);
5522 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5523 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5524 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5528 (define_expand "vec_widen_smult_hi_v4si"
5529 [(match_operand:V2DI 0 "register_operand" "")
5530 (match_operand:V4SI 1 "register_operand" "")
5531 (match_operand:V4SI 2 "register_operand" "")]
5536 t1 = gen_reg_rtx (V4SImode);
5537 t2 = gen_reg_rtx (V4SImode);
5539 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5544 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5549 emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
5553 (define_expand "vec_widen_smult_lo_v4si"
5554 [(match_operand:V2DI 0 "register_operand" "")
5555 (match_operand:V4SI 1 "register_operand" "")
5556 (match_operand:V4SI 2 "register_operand" "")]
5561 t1 = gen_reg_rtx (V4SImode);
5562 t2 = gen_reg_rtx (V4SImode);
5564 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5569 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5574 emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
5578 (define_expand "vec_widen_umult_hi_v4si"
5579 [(match_operand:V2DI 0 "register_operand" "")
5580 (match_operand:V4SI 1 "register_operand" "")
5581 (match_operand:V4SI 2 "register_operand" "")]
5584 rtx op1, op2, t1, t2;
5588 t1 = gen_reg_rtx (V4SImode);
5589 t2 = gen_reg_rtx (V4SImode);
5591 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5592 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5593 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5597 (define_expand "vec_widen_umult_lo_v4si"
5598 [(match_operand:V2DI 0 "register_operand" "")
5599 (match_operand:V4SI 1 "register_operand" "")
5600 (match_operand:V4SI 2 "register_operand" "")]
5603 rtx op1, op2, t1, t2;
5607 t1 = gen_reg_rtx (V4SImode);
5608 t2 = gen_reg_rtx (V4SImode);
5610 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5611 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5612 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5616 (define_expand "sdot_prodv8hi"
5617 [(match_operand:V4SI 0 "register_operand" "")
5618 (match_operand:V8HI 1 "register_operand" "")
5619 (match_operand:V8HI 2 "register_operand" "")
5620 (match_operand:V4SI 3 "register_operand" "")]
5623 rtx t = gen_reg_rtx (V4SImode);
5624 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
5625 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
5629 (define_expand "udot_prodv4si"
5630 [(match_operand:V2DI 0 "register_operand" "")
5631 (match_operand:V4SI 1 "register_operand" "")
5632 (match_operand:V4SI 2 "register_operand" "")
5633 (match_operand:V2DI 3 "register_operand" "")]
5638 t1 = gen_reg_rtx (V2DImode);
5639 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
5640 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5642 t2 = gen_reg_rtx (V4SImode);
5643 t3 = gen_reg_rtx (V4SImode);
5644 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5645 gen_lowpart (V1TImode, operands[1]),
5647 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5648 gen_lowpart (V1TImode, operands[2]),
5651 t4 = gen_reg_rtx (V2DImode);
5652 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
5654 emit_insn (gen_addv2di3 (operands[0], t1, t4));
5658 (define_insn "ashr<mode>3"
5659 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
5661 (match_operand:VI24_AVX2 1 "register_operand" "0,x")
5662 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5665 psra<ssemodesuffix>\t{%2, %0|%0, %2}
5666 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5667 [(set_attr "isa" "noavx,avx")
5668 (set_attr "type" "sseishft")
5669 (set (attr "length_immediate")
5670 (if_then_else (match_operand 2 "const_int_operand" "")
5672 (const_string "0")))
5673 (set_attr "prefix_data16" "1,*")
5674 (set_attr "prefix" "orig,vex")
5675 (set_attr "mode" "<sseinsnmode>")])
5677 (define_insn "avx2_lshrqv4di3"
5678 [(set (match_operand:V4DI 0 "register_operand" "=x")
5680 (match_operand:V4DI 1 "register_operand" "x")
5681 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
5684 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5685 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
5687 [(set_attr "type" "sseishft")
5688 (set_attr "prefix" "vex")
5689 (set_attr "length_immediate" "1")
5690 (set_attr "mode" "OI")])
5692 (define_insn "lshr<mode>3"
5693 [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
5694 (lshiftrt:VI248_AVX2
5695 (match_operand:VI248_AVX2 1 "register_operand" "0,x")
5696 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5699 psrl<ssemodesuffix>\t{%2, %0|%0, %2}
5700 vpsrl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5701 [(set_attr "isa" "noavx,avx")
5702 (set_attr "type" "sseishft")
5703 (set (attr "length_immediate")
5704 (if_then_else (match_operand 2 "const_int_operand" "")
5706 (const_string "0")))
5707 (set_attr "prefix_data16" "1,*")
5708 (set_attr "prefix" "orig,vex")
5709 (set_attr "mode" "<sseinsnmode>")])
5711 (define_insn "avx2_lshlqv4di3"
5712 [(set (match_operand:V4DI 0 "register_operand" "=x")
5713 (ashift:V4DI (match_operand:V4DI 1 "register_operand" "x")
5714 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
5717 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5718 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
5720 [(set_attr "type" "sseishft")
5721 (set_attr "prefix" "vex")
5722 (set_attr "length_immediate" "1")
5723 (set_attr "mode" "OI")])
5725 (define_insn "avx2_lshl<mode>3"
5726 [(set (match_operand:VI248_256 0 "register_operand" "=x")
5728 (match_operand:VI248_256 1 "register_operand" "x")
5729 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5731 "vpsll<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5732 [(set_attr "type" "sseishft")
5733 (set_attr "prefix" "vex")
5734 (set (attr "length_immediate")
5735 (if_then_else (match_operand 2 "const_int_operand" "")
5737 (const_string "0")))
5738 (set_attr "mode" "OI")])
5740 (define_insn "ashl<mode>3"
5741 [(set (match_operand:VI248_128 0 "register_operand" "=x,x")
5743 (match_operand:VI248_128 1 "register_operand" "0,x")
5744 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5747 psll<ssemodesuffix>\t{%2, %0|%0, %2}
5748 vpsll<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5749 [(set_attr "isa" "noavx,avx")
5750 (set_attr "type" "sseishft")
5751 (set (attr "length_immediate")
5752 (if_then_else (match_operand 2 "const_int_operand" "")
5754 (const_string "0")))
5755 (set_attr "prefix_data16" "1,*")
5756 (set_attr "prefix" "orig,vex")
5757 (set_attr "mode" "TI")])
5759 (define_expand "vec_shl_<mode>"
5760 [(set (match_operand:VI_128 0 "register_operand" "")
5762 (match_operand:VI_128 1 "register_operand" "")
5763 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5766 operands[0] = gen_lowpart (V1TImode, operands[0]);
5767 operands[1] = gen_lowpart (V1TImode, operands[1]);
5770 (define_insn "<sse2_avx2>_ashl<mode>3"
5771 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
5773 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
5774 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5777 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5779 switch (which_alternative)
5782 return "pslldq\t{%2, %0|%0, %2}";
5784 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
5789 [(set_attr "isa" "noavx,avx")
5790 (set_attr "type" "sseishft")
5791 (set_attr "length_immediate" "1")
5792 (set_attr "prefix_data16" "1,*")
5793 (set_attr "prefix" "orig,vex")
5794 (set_attr "mode" "<sseinsnmode>")])
5796 (define_expand "vec_shr_<mode>"
5797 [(set (match_operand:VI_128 0 "register_operand" "")
5799 (match_operand:VI_128 1 "register_operand" "")
5800 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5803 operands[0] = gen_lowpart (V1TImode, operands[0]);
5804 operands[1] = gen_lowpart (V1TImode, operands[1]);
5807 (define_insn "<sse2_avx2>_lshr<mode>3"
5808 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
5809 (lshiftrt:VIMAX_AVX2
5810 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
5811 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5814 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5816 switch (which_alternative)
5819 return "psrldq\t{%2, %0|%0, %2}";
5821 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
5826 [(set_attr "isa" "noavx,avx")
5827 (set_attr "type" "sseishft")
5828 (set_attr "length_immediate" "1")
5829 (set_attr "atom_unit" "sishuf")
5830 (set_attr "prefix_data16" "1,*")
5831 (set_attr "prefix" "orig,vex")
5832 (set_attr "mode" "<sseinsnmode>")])
5834 (define_expand "<code><mode>3"
5835 [(set (match_operand:VI124_256 0 "register_operand" "")
5837 (match_operand:VI124_256 1 "nonimmediate_operand" "")
5838 (match_operand:VI124_256 2 "nonimmediate_operand" "")))]
5840 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5842 (define_insn "*avx2_<code><mode>3"
5843 [(set (match_operand:VI124_256 0 "register_operand" "=x")
5845 (match_operand:VI124_256 1 "nonimmediate_operand" "%x")
5846 (match_operand:VI124_256 2 "nonimmediate_operand" "xm")))]
5847 "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5848 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5849 [(set_attr "type" "sseiadd")
5850 (set_attr "prefix_extra" "1")
5851 (set_attr "prefix" "vex")
5852 (set_attr "mode" "OI")])
5854 (define_expand "<code><mode>3"
5855 [(set (match_operand:VI124_256 0 "register_operand" "")
5857 (match_operand:VI124_256 1 "nonimmediate_operand" "")
5858 (match_operand:VI124_256 2 "nonimmediate_operand" "")))]
5860 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5862 (define_insn "*avx2_<code><mode>3"
5863 [(set (match_operand:VI124_256 0 "register_operand" "=x")
5865 (match_operand:VI124_256 1 "nonimmediate_operand" "%x")
5866 (match_operand:VI124_256 2 "nonimmediate_operand" "xm")))]
5867 "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5868 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5869 [(set_attr "type" "sseiadd")
5870 (set_attr "prefix_extra" "1")
5871 (set_attr "prefix" "vex")
5872 (set_attr "mode" "OI")])
5874 (define_insn "*sse4_1_<code><mode>3"
5875 [(set (match_operand:VI14_128 0 "register_operand" "=x,x")
5877 (match_operand:VI14_128 1 "nonimmediate_operand" "%0,x")
5878 (match_operand:VI14_128 2 "nonimmediate_operand" "xm,xm")))]
5879 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5881 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
5882 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5883 [(set_attr "isa" "noavx,avx")
5884 (set_attr "type" "sseiadd")
5885 (set_attr "prefix_extra" "1,*")
5886 (set_attr "prefix" "orig,vex")
5887 (set_attr "mode" "TI")])
5889 (define_insn "*<code>v8hi3"
5890 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
5892 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
5893 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
5894 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
5896 p<maxmin_int>w\t{%2, %0|%0, %2}
5897 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
5898 [(set_attr "isa" "noavx,avx")
5899 (set_attr "type" "sseiadd")
5900 (set_attr "prefix_data16" "1,*")
5901 (set_attr "prefix_extra" "*,1")
5902 (set_attr "prefix" "orig,vex")
5903 (set_attr "mode" "TI")])
5905 (define_expand "<code><mode>3"
5906 [(set (match_operand:VI14_128 0 "register_operand" "")
5907 (smaxmin:VI14_128 (match_operand:VI14_128 1 "register_operand" "")
5908 (match_operand:VI14_128 2 "register_operand" "")))]
5912 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
5918 xops[0] = operands[0];
5919 xops[1] = operands[<CODE> == SMAX ? 1 : 2];
5920 xops[2] = operands[<CODE> == SMAX ? 2 : 1];
5921 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5922 xops[4] = operands[1];
5923 xops[5] = operands[2];
5924 ok = ix86_expand_int_vcond (xops);
5930 (define_expand "<code>v8hi3"
5931 [(set (match_operand:V8HI 0 "register_operand" "")
5933 (match_operand:V8HI 1 "nonimmediate_operand" "")
5934 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5936 "ix86_fixup_binary_operands_no_copy (<CODE>, V8HImode, operands);")
5938 (define_expand "<code><mode>3"
5939 [(set (match_operand:VI8_AVX2 0 "register_operand" "")
5940 (smaxmin:VI8_AVX2 (match_operand:VI8_AVX2 1 "register_operand" "")
5941 (match_operand:VI8_AVX2 2 "register_operand" "")))]
5947 xops[0] = operands[0];
5948 xops[1] = operands[<CODE> == SMAX ? 1 : 2];
5949 xops[2] = operands[<CODE> == SMAX ? 2 : 1];
5950 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5951 xops[4] = operands[1];
5952 xops[5] = operands[2];
5953 ok = ix86_expand_int_vcond (xops);
5958 (define_insn "*sse4_1_<code><mode>3"
5959 [(set (match_operand:VI24_128 0 "register_operand" "=x,x")
5961 (match_operand:VI24_128 1 "nonimmediate_operand" "%0,x")
5962 (match_operand:VI24_128 2 "nonimmediate_operand" "xm,xm")))]
5963 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5965 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
5966 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5967 [(set_attr "isa" "noavx,avx")
5968 (set_attr "type" "sseiadd")
5969 (set_attr "prefix_extra" "1,*")
5970 (set_attr "prefix" "orig,vex")
5971 (set_attr "mode" "TI")])
5973 (define_insn "*<code>v16qi3"
5974 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
5976 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")
5977 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))]
5978 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
5980 p<maxmin_int>b\t{%2, %0|%0, %2}
5981 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
5982 [(set_attr "isa" "noavx,avx")
5983 (set_attr "type" "sseiadd")
5984 (set_attr "prefix_data16" "1,*")
5985 (set_attr "prefix_extra" "*,1")
5986 (set_attr "prefix" "orig,vex")
5987 (set_attr "mode" "TI")])
5989 (define_expand "<code>v16qi3"
5990 [(set (match_operand:V16QI 0 "register_operand" "")
5992 (match_operand:V16QI 1 "nonimmediate_operand" "")
5993 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
5995 "ix86_fixup_binary_operands_no_copy (<CODE>, V16QImode, operands);")
5997 (define_expand "umaxv8hi3"
5998 [(set (match_operand:V8HI 0 "register_operand" "")
5999 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
6000 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
6004 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
6007 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
6008 if (rtx_equal_p (op3, op2))
6009 op3 = gen_reg_rtx (V8HImode);
6010 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
6011 emit_insn (gen_addv8hi3 (op0, op3, op2));
6016 (define_expand "umaxv4si3"
6017 [(set (match_operand:V4SI 0 "register_operand" "")
6018 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
6019 (match_operand:V4SI 2 "register_operand" "")))]
6023 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
6029 xops[0] = operands[0];
6030 xops[1] = operands[1];
6031 xops[2] = operands[2];
6032 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6033 xops[4] = operands[1];
6034 xops[5] = operands[2];
6035 ok = ix86_expand_int_vcond (xops);
6041 (define_expand "umin<mode>3"
6042 [(set (match_operand:VI24_128 0 "register_operand" "")
6043 (umin:VI24_128 (match_operand:VI24_128 1 "register_operand" "")
6044 (match_operand:VI24_128 2 "register_operand" "")))]
6048 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
6054 xops[0] = operands[0];
6055 xops[1] = operands[2];
6056 xops[2] = operands[1];
6057 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6058 xops[4] = operands[1];
6059 xops[5] = operands[2];
6060 ok = ix86_expand_int_vcond (xops);
6066 (define_expand "<code><mode>3"
6067 [(set (match_operand:VI8_AVX2 0 "register_operand" "")
6068 (umaxmin:VI8_AVX2 (match_operand:VI8_AVX2 1 "register_operand" "")
6069 (match_operand:VI8_AVX2 2 "register_operand" "")))]
6075 xops[0] = operands[0];
6076 xops[1] = operands[<CODE> == UMAX ? 1 : 2];
6077 xops[2] = operands[<CODE> == UMAX ? 2 : 1];
6078 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6079 xops[4] = operands[1];
6080 xops[5] = operands[2];
6081 ok = ix86_expand_int_vcond (xops);
6086 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6088 ;; Parallel integral comparisons
6090 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6092 (define_expand "avx2_eq<mode>3"
6093 [(set (match_operand:VI1248_256 0 "register_operand" "")
6095 (match_operand:VI1248_256 1 "nonimmediate_operand" "")
6096 (match_operand:VI1248_256 2 "nonimmediate_operand" "")))]
6098 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6100 (define_insn "*avx2_eq<mode>3"
6101 [(set (match_operand:VI1248_256 0 "register_operand" "=x")
6103 (match_operand:VI1248_256 1 "nonimmediate_operand" "%x")
6104 (match_operand:VI1248_256 2 "nonimmediate_operand" "xm")))]
6105 "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6106 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6107 [(set_attr "type" "ssecmp")
6108 (set_attr "prefix_extra" "1")
6109 (set_attr "prefix" "vex")
6110 (set_attr "mode" "OI")])
6112 (define_insn "*sse4_1_eqv2di3"
6113 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6115 (match_operand:V2DI 1 "nonimmediate_operand" "%0,x")
6116 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
6117 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
6119 pcmpeqq\t{%2, %0|%0, %2}
6120 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
6121 [(set_attr "isa" "noavx,avx")
6122 (set_attr "type" "ssecmp")
6123 (set_attr "prefix_extra" "1")
6124 (set_attr "prefix" "orig,vex")
6125 (set_attr "mode" "TI")])
6127 (define_insn "*sse2_eq<mode>3"
6128 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
6130 (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x")
6131 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
6132 "TARGET_SSE2 && !TARGET_XOP
6133 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6135 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
6136 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6137 [(set_attr "isa" "noavx,avx")
6138 (set_attr "type" "ssecmp")
6139 (set_attr "prefix_data16" "1,*")
6140 (set_attr "prefix" "orig,vex")
6141 (set_attr "mode" "TI")])
6143 (define_expand "sse2_eq<mode>3"
6144 [(set (match_operand:VI124_128 0 "register_operand" "")
6146 (match_operand:VI124_128 1 "nonimmediate_operand" "")
6147 (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
6148 "TARGET_SSE2 && !TARGET_XOP "
6149 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6151 (define_expand "sse4_1_eqv2di3"
6152 [(set (match_operand:V2DI 0 "register_operand" "")
6154 (match_operand:V2DI 1 "nonimmediate_operand" "")
6155 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
6157 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
6159 (define_insn "sse4_2_gtv2di3"
6160 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6162 (match_operand:V2DI 1 "register_operand" "0,x")
6163 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
6166 pcmpgtq\t{%2, %0|%0, %2}
6167 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
6168 [(set_attr "isa" "noavx,avx")
6169 (set_attr "type" "ssecmp")
6170 (set_attr "prefix_extra" "1")
6171 (set_attr "prefix" "orig,vex")
6172 (set_attr "mode" "TI")])
6174 (define_insn "avx2_gt<mode>3"
6175 [(set (match_operand:VI1248_256 0 "register_operand" "=x")
6177 (match_operand:VI1248_256 1 "register_operand" "x")
6178 (match_operand:VI1248_256 2 "nonimmediate_operand" "xm")))]
6180 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6181 [(set_attr "type" "ssecmp")
6182 (set_attr "prefix_extra" "1")
6183 (set_attr "prefix" "vex")
6184 (set_attr "mode" "OI")])
6186 (define_insn "sse2_gt<mode>3"
6187 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
6189 (match_operand:VI124_128 1 "register_operand" "0,x")
6190 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
6191 "TARGET_SSE2 && !TARGET_XOP"
6193 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
6194 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6195 [(set_attr "isa" "noavx,avx")
6196 (set_attr "type" "ssecmp")
6197 (set_attr "prefix_data16" "1,*")
6198 (set_attr "prefix" "orig,vex")
6199 (set_attr "mode" "TI")])
6201 (define_expand "vcond<V_256:mode><VI_256:mode>"
6202 [(set (match_operand:V_256 0 "register_operand" "")
6204 (match_operator 3 ""
6205 [(match_operand:VI_256 4 "nonimmediate_operand" "")
6206 (match_operand:VI_256 5 "nonimmediate_operand" "")])
6207 (match_operand:V_256 1 "general_operand" "")
6208 (match_operand:V_256 2 "general_operand" "")))]
6210 && (GET_MODE_NUNITS (<V_256:MODE>mode)
6211 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
6213 bool ok = ix86_expand_int_vcond (operands);
6218 (define_expand "vcond<V_128:mode><VI124_128:mode>"
6219 [(set (match_operand:V_128 0 "register_operand" "")
6221 (match_operator 3 ""
6222 [(match_operand:VI124_128 4 "nonimmediate_operand" "")
6223 (match_operand:VI124_128 5 "nonimmediate_operand" "")])
6224 (match_operand:V_128 1 "general_operand" "")
6225 (match_operand:V_128 2 "general_operand" "")))]
6227 && (GET_MODE_NUNITS (<V_128:MODE>mode)
6228 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
6230 bool ok = ix86_expand_int_vcond (operands);
6235 (define_expand "vcond<VI8F_128:mode>v2di"
6236 [(set (match_operand:VI8F_128 0 "register_operand" "")
6237 (if_then_else:VI8F_128
6238 (match_operator 3 ""
6239 [(match_operand:V2DI 4 "nonimmediate_operand" "")
6240 (match_operand:V2DI 5 "nonimmediate_operand" "")])
6241 (match_operand:VI8F_128 1 "general_operand" "")
6242 (match_operand:VI8F_128 2 "general_operand" "")))]
6245 bool ok = ix86_expand_int_vcond (operands);
6250 (define_expand "vcondu<V_256:mode><VI_256:mode>"
6251 [(set (match_operand:V_256 0 "register_operand" "")
6253 (match_operator 3 ""
6254 [(match_operand:VI_256 4 "nonimmediate_operand" "")
6255 (match_operand:VI_256 5 "nonimmediate_operand" "")])
6256 (match_operand:V_256 1 "general_operand" "")
6257 (match_operand:V_256 2 "general_operand" "")))]
6259 && (GET_MODE_NUNITS (<V_256:MODE>mode)
6260 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
6262 bool ok = ix86_expand_int_vcond (operands);
6267 (define_expand "vcondu<V_128:mode><VI124_128:mode>"
6268 [(set (match_operand:V_128 0 "register_operand" "")
6270 (match_operator 3 ""
6271 [(match_operand:VI124_128 4 "nonimmediate_operand" "")
6272 (match_operand:VI124_128 5 "nonimmediate_operand" "")])
6273 (match_operand:V_128 1 "general_operand" "")
6274 (match_operand:V_128 2 "general_operand" "")))]
6276 && (GET_MODE_NUNITS (<V_128:MODE>mode)
6277 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
6279 bool ok = ix86_expand_int_vcond (operands);
6284 (define_expand "vcondu<VI8F_128:mode>v2di"
6285 [(set (match_operand:VI8F_128 0 "register_operand" "")
6286 (if_then_else:VI8F_128
6287 (match_operator 3 ""
6288 [(match_operand:V2DI 4 "nonimmediate_operand" "")
6289 (match_operand:V2DI 5 "nonimmediate_operand" "")])
6290 (match_operand:VI8F_128 1 "general_operand" "")
6291 (match_operand:VI8F_128 2 "general_operand" "")))]
6294 bool ok = ix86_expand_int_vcond (operands);
6299 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6301 ;; Parallel bitwise logical operations
6303 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6305 (define_expand "one_cmpl<mode>2"
6306 [(set (match_operand:VI 0 "register_operand" "")
6307 (xor:VI (match_operand:VI 1 "nonimmediate_operand" "")
6311 int i, n = GET_MODE_NUNITS (<MODE>mode);
6312 rtvec v = rtvec_alloc (n);
6314 for (i = 0; i < n; ++i)
6315 RTVEC_ELT (v, i) = constm1_rtx;
6317 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
6320 (define_expand "<sse2_avx2>_andnot<mode>3"
6321 [(set (match_operand:VI_AVX2 0 "register_operand" "")
6323 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand" ""))
6324 (match_operand:VI_AVX2 2 "nonimmediate_operand" "")))]
6327 (define_insn "*andnot<mode>3"
6328 [(set (match_operand:VI 0 "register_operand" "=x,x")
6330 (not:VI (match_operand:VI 1 "register_operand" "0,x"))
6331 (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
6334 static char buf[32];
6338 switch (get_attr_mode (insn))
6341 gcc_assert (TARGET_AVX2);
6343 gcc_assert (TARGET_SSE2);
6349 gcc_assert (TARGET_AVX);
6351 gcc_assert (TARGET_SSE);
6360 switch (which_alternative)
6363 ops = "%s\t{%%2, %%0|%%0, %%2}";
6366 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
6372 snprintf (buf, sizeof (buf), ops, tmp);
6375 [(set_attr "isa" "noavx,avx")
6376 (set_attr "type" "sselog")
6377 (set (attr "prefix_data16")
6379 (and (eq_attr "alternative" "0")
6380 (eq_attr "mode" "TI"))
6382 (const_string "*")))
6383 (set_attr "prefix" "orig,vex")
6385 (cond [(and (not (match_test "TARGET_AVX2"))
6386 (match_test "GET_MODE_SIZE (<MODE>mode) > 16"))
6387 (const_string "V8SF")
6388 (not (match_test "TARGET_SSE2"))
6389 (const_string "V4SF")
6391 (const_string "<sseinsnmode>")))])
6393 (define_expand "<code><mode>3"
6394 [(set (match_operand:VI 0 "register_operand" "")
6396 (match_operand:VI 1 "nonimmediate_operand" "")
6397 (match_operand:VI 2 "nonimmediate_operand" "")))]
6399 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
6401 (define_insn "*<code><mode>3"
6402 [(set (match_operand:VI 0 "register_operand" "=x,x")
6404 (match_operand:VI 1 "nonimmediate_operand" "%0,x")
6405 (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
6407 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6409 static char buf[32];
6413 switch (get_attr_mode (insn))
6416 gcc_assert (TARGET_AVX2);
6418 gcc_assert (TARGET_SSE2);
6424 gcc_assert (TARGET_AVX);
6426 gcc_assert (TARGET_SSE);
6435 switch (which_alternative)
6438 ops = "%s\t{%%2, %%0|%%0, %%2}";
6441 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
6447 snprintf (buf, sizeof (buf), ops, tmp);
6450 [(set_attr "isa" "noavx,avx")
6451 (set_attr "type" "sselog")
6452 (set (attr "prefix_data16")
6454 (and (eq_attr "alternative" "0")
6455 (eq_attr "mode" "TI"))
6457 (const_string "*")))
6458 (set_attr "prefix" "orig,vex")
6460 (cond [(and (not (match_test "TARGET_AVX2"))
6461 (match_test "GET_MODE_SIZE (<MODE>mode) > 16"))
6462 (const_string "V8SF")
6463 (not (match_test "TARGET_SSE2"))
6464 (const_string "V4SF")
6466 (const_string "<sseinsnmode>")))])
6468 (define_insn "*andnottf3"
6469 [(set (match_operand:TF 0 "register_operand" "=x,x")
6471 (not:TF (match_operand:TF 1 "register_operand" "0,x"))
6472 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
6475 pandn\t{%2, %0|%0, %2}
6476 vpandn\t{%2, %1, %0|%0, %1, %2}"
6477 [(set_attr "isa" "noavx,avx")
6478 (set_attr "type" "sselog")
6479 (set_attr "prefix_data16" "1,*")
6480 (set_attr "prefix" "orig,vex")
6481 (set_attr "mode" "TI")])
6483 (define_expand "<code>tf3"
6484 [(set (match_operand:TF 0 "register_operand" "")
6486 (match_operand:TF 1 "nonimmediate_operand" "")
6487 (match_operand:TF 2 "nonimmediate_operand" "")))]
6489 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
6491 (define_insn "*<code>tf3"
6492 [(set (match_operand:TF 0 "register_operand" "=x,x")
6494 (match_operand:TF 1 "nonimmediate_operand" "%0,x")
6495 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
6497 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
6499 p<logic>\t{%2, %0|%0, %2}
6500 vp<logic>\t{%2, %1, %0|%0, %1, %2}"
6501 [(set_attr "isa" "noavx,avx")
6502 (set_attr "type" "sselog")
6503 (set_attr "prefix_data16" "1,*")
6504 (set_attr "prefix" "orig,vex")
6505 (set_attr "mode" "TI")])
6507 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6509 ;; Parallel integral element swizzling
6511 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6513 (define_expand "vec_pack_trunc_<mode>"
6514 [(match_operand:<ssepackmode> 0 "register_operand" "")
6515 (match_operand:VI248_128 1 "register_operand" "")
6516 (match_operand:VI248_128 2 "register_operand" "")]
6519 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
6520 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
6521 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6525 (define_insn "<sse2_avx2>_packsswb"
6526 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
6527 (vec_concat:VI1_AVX2
6528 (ss_truncate:<ssehalfvecmode>
6529 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6530 (ss_truncate:<ssehalfvecmode>
6531 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6534 packsswb\t{%2, %0|%0, %2}
6535 vpacksswb\t{%2, %1, %0|%0, %1, %2}"
6536 [(set_attr "isa" "noavx,avx")
6537 (set_attr "type" "sselog")
6538 (set_attr "prefix_data16" "1,*")
6539 (set_attr "prefix" "orig,vex")
6540 (set_attr "mode" "<sseinsnmode>")])
6542 (define_insn "<sse2_avx2>_packssdw"
6543 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
6544 (vec_concat:VI2_AVX2
6545 (ss_truncate:<ssehalfvecmode>
6546 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6547 (ss_truncate:<ssehalfvecmode>
6548 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6551 packssdw\t{%2, %0|%0, %2}
6552 vpackssdw\t{%2, %1, %0|%0, %1, %2}"
6553 [(set_attr "isa" "noavx,avx")
6554 (set_attr "type" "sselog")
6555 (set_attr "prefix_data16" "1,*")
6556 (set_attr "prefix" "orig,vex")
6557 (set_attr "mode" "<sseinsnmode>")])
6559 (define_insn "<sse2_avx2>_packuswb"
6560 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
6561 (vec_concat:VI1_AVX2
6562 (us_truncate:<ssehalfvecmode>
6563 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6564 (us_truncate:<ssehalfvecmode>
6565 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6568 packuswb\t{%2, %0|%0, %2}
6569 vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6570 [(set_attr "isa" "noavx,avx")
6571 (set_attr "type" "sselog")
6572 (set_attr "prefix_data16" "1,*")
6573 (set_attr "prefix" "orig,vex")
6574 (set_attr "mode" "<sseinsnmode>")])
6576 (define_insn "avx2_interleave_highv32qi"
6577 [(set (match_operand:V32QI 0 "register_operand" "=x")
6580 (match_operand:V32QI 1 "register_operand" "x")
6581 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
6582 (parallel [(const_int 8) (const_int 40)
6583 (const_int 9) (const_int 41)
6584 (const_int 10) (const_int 42)
6585 (const_int 11) (const_int 43)
6586 (const_int 12) (const_int 44)
6587 (const_int 13) (const_int 45)
6588 (const_int 14) (const_int 46)
6589 (const_int 15) (const_int 47)
6590 (const_int 24) (const_int 56)
6591 (const_int 25) (const_int 57)
6592 (const_int 26) (const_int 58)
6593 (const_int 27) (const_int 59)
6594 (const_int 28) (const_int 60)
6595 (const_int 29) (const_int 61)
6596 (const_int 30) (const_int 62)
6597 (const_int 31) (const_int 63)
6598 (const_int 32) (const_int 64)])))]
6600 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6601 [(set_attr "type" "sselog")
6602 (set_attr "prefix" "vex")
6603 (set_attr "mode" "OI")])
6605 (define_insn "vec_interleave_highv16qi"
6606 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6609 (match_operand:V16QI 1 "register_operand" "0,x")
6610 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
6611 (parallel [(const_int 8) (const_int 24)
6612 (const_int 9) (const_int 25)
6613 (const_int 10) (const_int 26)
6614 (const_int 11) (const_int 27)
6615 (const_int 12) (const_int 28)
6616 (const_int 13) (const_int 29)
6617 (const_int 14) (const_int 30)
6618 (const_int 15) (const_int 31)])))]
6621 punpckhbw\t{%2, %0|%0, %2}
6622 vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6623 [(set_attr "isa" "noavx,avx")
6624 (set_attr "type" "sselog")
6625 (set_attr "prefix_data16" "1,*")
6626 (set_attr "prefix" "orig,vex")
6627 (set_attr "mode" "TI")])
6629 (define_insn "avx2_interleave_lowv32qi"
6630 [(set (match_operand:V32QI 0 "register_operand" "=x")
6633 (match_operand:V32QI 1 "register_operand" "x")
6634 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
6635 (parallel [(const_int 0) (const_int 32)
6636 (const_int 1) (const_int 33)
6637 (const_int 2) (const_int 34)
6638 (const_int 3) (const_int 35)
6639 (const_int 4) (const_int 36)
6640 (const_int 5) (const_int 37)
6641 (const_int 6) (const_int 38)
6642 (const_int 7) (const_int 39)
6643 (const_int 15) (const_int 47)
6644 (const_int 16) (const_int 48)
6645 (const_int 17) (const_int 49)
6646 (const_int 18) (const_int 50)
6647 (const_int 19) (const_int 51)
6648 (const_int 20) (const_int 52)
6649 (const_int 21) (const_int 53)
6650 (const_int 22) (const_int 54)
6651 (const_int 23) (const_int 55)])))]
6653 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6654 [(set_attr "type" "sselog")
6655 (set_attr "prefix" "vex")
6656 (set_attr "mode" "OI")])
6658 (define_insn "vec_interleave_lowv16qi"
6659 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6662 (match_operand:V16QI 1 "register_operand" "0,x")
6663 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
6664 (parallel [(const_int 0) (const_int 16)
6665 (const_int 1) (const_int 17)
6666 (const_int 2) (const_int 18)
6667 (const_int 3) (const_int 19)
6668 (const_int 4) (const_int 20)
6669 (const_int 5) (const_int 21)
6670 (const_int 6) (const_int 22)
6671 (const_int 7) (const_int 23)])))]
6674 punpcklbw\t{%2, %0|%0, %2}
6675 vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6676 [(set_attr "isa" "noavx,avx")
6677 (set_attr "type" "sselog")
6678 (set_attr "prefix_data16" "1,*")
6679 (set_attr "prefix" "orig,vex")
6680 (set_attr "mode" "TI")])
6682 (define_insn "avx2_interleave_highv16hi"
6683 [(set (match_operand:V16HI 0 "register_operand" "=x")
6686 (match_operand:V16HI 1 "register_operand" "x")
6687 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
6688 (parallel [(const_int 4) (const_int 20)
6689 (const_int 5) (const_int 21)
6690 (const_int 6) (const_int 22)
6691 (const_int 7) (const_int 23)
6692 (const_int 12) (const_int 28)
6693 (const_int 13) (const_int 29)
6694 (const_int 14) (const_int 30)
6695 (const_int 15) (const_int 31)])))]
6697 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6698 [(set_attr "type" "sselog")
6699 (set_attr "prefix" "vex")
6700 (set_attr "mode" "OI")])
6702 (define_insn "vec_interleave_highv8hi"
6703 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6706 (match_operand:V8HI 1 "register_operand" "0,x")
6707 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
6708 (parallel [(const_int 4) (const_int 12)
6709 (const_int 5) (const_int 13)
6710 (const_int 6) (const_int 14)
6711 (const_int 7) (const_int 15)])))]
6714 punpckhwd\t{%2, %0|%0, %2}
6715 vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6716 [(set_attr "isa" "noavx,avx")
6717 (set_attr "type" "sselog")
6718 (set_attr "prefix_data16" "1,*")
6719 (set_attr "prefix" "orig,vex")
6720 (set_attr "mode" "TI")])
6722 (define_insn "avx2_interleave_lowv16hi"
6723 [(set (match_operand:V16HI 0 "register_operand" "=x")
6726 (match_operand:V16HI 1 "register_operand" "x")
6727 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
6728 (parallel [(const_int 0) (const_int 16)
6729 (const_int 1) (const_int 17)
6730 (const_int 2) (const_int 18)
6731 (const_int 3) (const_int 19)
6732 (const_int 8) (const_int 24)
6733 (const_int 9) (const_int 25)
6734 (const_int 10) (const_int 26)
6735 (const_int 11) (const_int 27)])))]
6737 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6738 [(set_attr "type" "sselog")
6739 (set_attr "prefix" "vex")
6740 (set_attr "mode" "OI")])
6742 (define_insn "vec_interleave_lowv8hi"
6743 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6746 (match_operand:V8HI 1 "register_operand" "0,x")
6747 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
6748 (parallel [(const_int 0) (const_int 8)
6749 (const_int 1) (const_int 9)
6750 (const_int 2) (const_int 10)
6751 (const_int 3) (const_int 11)])))]
6754 punpcklwd\t{%2, %0|%0, %2}
6755 vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6756 [(set_attr "isa" "noavx,avx")
6757 (set_attr "type" "sselog")
6758 (set_attr "prefix_data16" "1,*")
6759 (set_attr "prefix" "orig,vex")
6760 (set_attr "mode" "TI")])
6762 (define_insn "avx2_interleave_highv8si"
6763 [(set (match_operand:V8SI 0 "register_operand" "=x")
6766 (match_operand:V8SI 1 "register_operand" "x")
6767 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
6768 (parallel [(const_int 2) (const_int 10)
6769 (const_int 3) (const_int 11)
6770 (const_int 6) (const_int 14)
6771 (const_int 7) (const_int 15)])))]
6773 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6774 [(set_attr "type" "sselog")
6775 (set_attr "prefix" "vex")
6776 (set_attr "mode" "OI")])
6778 (define_insn "vec_interleave_highv4si"
6779 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6782 (match_operand:V4SI 1 "register_operand" "0,x")
6783 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6784 (parallel [(const_int 2) (const_int 6)
6785 (const_int 3) (const_int 7)])))]
6788 punpckhdq\t{%2, %0|%0, %2}
6789 vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6790 [(set_attr "isa" "noavx,avx")
6791 (set_attr "type" "sselog")
6792 (set_attr "prefix_data16" "1,*")
6793 (set_attr "prefix" "orig,vex")
6794 (set_attr "mode" "TI")])
6796 (define_insn "avx2_interleave_lowv8si"
6797 [(set (match_operand:V8SI 0 "register_operand" "=x")
6800 (match_operand:V8SI 1 "register_operand" "x")
6801 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
6802 (parallel [(const_int 0) (const_int 8)
6803 (const_int 1) (const_int 9)
6804 (const_int 4) (const_int 12)
6805 (const_int 5) (const_int 13)])))]
6807 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6808 [(set_attr "type" "sselog")
6809 (set_attr "prefix" "vex")
6810 (set_attr "mode" "OI")])
6812 (define_insn "vec_interleave_lowv4si"
6813 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6816 (match_operand:V4SI 1 "register_operand" "0,x")
6817 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6818 (parallel [(const_int 0) (const_int 4)
6819 (const_int 1) (const_int 5)])))]
6822 punpckldq\t{%2, %0|%0, %2}
6823 vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6824 [(set_attr "isa" "noavx,avx")
6825 (set_attr "type" "sselog")
6826 (set_attr "prefix_data16" "1,*")
6827 (set_attr "prefix" "orig,vex")
6828 (set_attr "mode" "TI")])
6830 ;; Modes handled by pinsr patterns.
6831 (define_mode_iterator PINSR_MODE
6832 [(V16QI "TARGET_SSE4_1") V8HI
6833 (V4SI "TARGET_SSE4_1")
6834 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
6836 (define_mode_attr sse2p4_1
6837 [(V16QI "sse4_1") (V8HI "sse2")
6838 (V4SI "sse4_1") (V2DI "sse4_1")])
6840 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
6841 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
6842 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x")
6843 (vec_merge:PINSR_MODE
6844 (vec_duplicate:PINSR_MODE
6845 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m"))
6846 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x")
6847 (match_operand:SI 3 "const_int_operand" "")))]
6849 && ((unsigned) exact_log2 (INTVAL (operands[3]))
6850 < GET_MODE_NUNITS (<MODE>mode))"
6852 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6854 switch (which_alternative)
6857 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
6858 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
6861 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
6863 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
6864 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
6867 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6872 [(set_attr "isa" "noavx,noavx,avx,avx")
6873 (set_attr "type" "sselog")
6874 (set (attr "prefix_rex")
6876 (and (not (match_test "TARGET_AVX"))
6877 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
6879 (const_string "*")))
6880 (set (attr "prefix_data16")
6882 (and (not (match_test "TARGET_AVX"))
6883 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
6885 (const_string "*")))
6886 (set (attr "prefix_extra")
6888 (and (not (match_test "TARGET_AVX"))
6889 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
6891 (const_string "1")))
6892 (set_attr "length_immediate" "1")
6893 (set_attr "prefix" "orig,orig,vex,vex")
6894 (set_attr "mode" "TI")])
6896 (define_insn "*sse4_1_pextrb_<mode>"
6897 [(set (match_operand:SWI48 0 "register_operand" "=r")
6900 (match_operand:V16QI 1 "register_operand" "x")
6901 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
6903 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
6904 [(set_attr "type" "sselog")
6905 (set_attr "prefix_extra" "1")
6906 (set_attr "length_immediate" "1")
6907 (set_attr "prefix" "maybe_vex")
6908 (set_attr "mode" "TI")])
6910 (define_insn "*sse4_1_pextrb_memory"
6911 [(set (match_operand:QI 0 "memory_operand" "=m")
6913 (match_operand:V16QI 1 "register_operand" "x")
6914 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
6916 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
6917 [(set_attr "type" "sselog")
6918 (set_attr "prefix_extra" "1")
6919 (set_attr "length_immediate" "1")
6920 (set_attr "prefix" "maybe_vex")
6921 (set_attr "mode" "TI")])
6923 (define_insn "*sse2_pextrw_<mode>"
6924 [(set (match_operand:SWI48 0 "register_operand" "=r")
6927 (match_operand:V8HI 1 "register_operand" "x")
6928 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
6930 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
6931 [(set_attr "type" "sselog")
6932 (set_attr "prefix_data16" "1")
6933 (set_attr "length_immediate" "1")
6934 (set_attr "prefix" "maybe_vex")
6935 (set_attr "mode" "TI")])
6937 (define_insn "*sse4_1_pextrw_memory"
6938 [(set (match_operand:HI 0 "memory_operand" "=m")
6940 (match_operand:V8HI 1 "register_operand" "x")
6941 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
6943 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
6944 [(set_attr "type" "sselog")
6945 (set_attr "prefix_extra" "1")
6946 (set_attr "length_immediate" "1")
6947 (set_attr "prefix" "maybe_vex")
6948 (set_attr "mode" "TI")])
6950 (define_insn "*sse4_1_pextrd"
6951 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
6953 (match_operand:V4SI 1 "register_operand" "x")
6954 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
6956 "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
6957 [(set_attr "type" "sselog")
6958 (set_attr "prefix_extra" "1")
6959 (set_attr "length_immediate" "1")
6960 (set_attr "prefix" "maybe_vex")
6961 (set_attr "mode" "TI")])
6963 (define_insn "*sse4_1_pextrd_zext"
6964 [(set (match_operand:DI 0 "register_operand" "=r")
6967 (match_operand:V4SI 1 "register_operand" "x")
6968 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))]
6969 "TARGET_64BIT && TARGET_SSE4_1"
6970 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
6971 [(set_attr "type" "sselog")
6972 (set_attr "prefix_extra" "1")
6973 (set_attr "length_immediate" "1")
6974 (set_attr "prefix" "maybe_vex")
6975 (set_attr "mode" "TI")])
6977 ;; It must come before *vec_extractv2di_1_rex64 since it is preferred.
6978 (define_insn "*sse4_1_pextrq"
6979 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
6981 (match_operand:V2DI 1 "register_operand" "x")
6982 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
6983 "TARGET_SSE4_1 && TARGET_64BIT"
6984 "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
6985 [(set_attr "type" "sselog")
6986 (set_attr "prefix_rex" "1")
6987 (set_attr "prefix_extra" "1")
6988 (set_attr "length_immediate" "1")
6989 (set_attr "prefix" "maybe_vex")
6990 (set_attr "mode" "TI")])
6992 (define_expand "avx2_pshufdv3"
6993 [(match_operand:V8SI 0 "register_operand" "")
6994 (match_operand:V8SI 1 "nonimmediate_operand" "")
6995 (match_operand:SI 2 "const_0_to_255_operand" "")]
6998 int mask = INTVAL (operands[2]);
6999 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
7000 GEN_INT ((mask >> 0) & 3),
7001 GEN_INT ((mask >> 2) & 3),
7002 GEN_INT ((mask >> 4) & 3),
7003 GEN_INT ((mask >> 6) & 3)));
7007 (define_insn "avx2_pshufd_1"
7008 [(set (match_operand:V8SI 0 "register_operand" "=x")
7010 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
7011 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7012 (match_operand 3 "const_0_to_3_operand" "")
7013 (match_operand 4 "const_0_to_3_operand" "")
7014 (match_operand 5 "const_0_to_3_operand" "")
7022 mask |= INTVAL (operands[2]) << 0;
7023 mask |= INTVAL (operands[3]) << 2;
7024 mask |= INTVAL (operands[4]) << 4;
7025 mask |= INTVAL (operands[5]) << 6;
7026 operands[2] = GEN_INT (mask);
7028 return "vpshufd\t{%2, %1, %0|%0, %1, %2}";
7030 [(set_attr "type" "sselog1")
7031 (set_attr "prefix" "vex")
7032 (set_attr "length_immediate" "1")
7033 (set_attr "mode" "OI")])
7035 (define_expand "sse2_pshufd"
7036 [(match_operand:V4SI 0 "register_operand" "")
7037 (match_operand:V4SI 1 "nonimmediate_operand" "")
7038 (match_operand:SI 2 "const_int_operand" "")]
7041 int mask = INTVAL (operands[2]);
7042 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
7043 GEN_INT ((mask >> 0) & 3),
7044 GEN_INT ((mask >> 2) & 3),
7045 GEN_INT ((mask >> 4) & 3),
7046 GEN_INT ((mask >> 6) & 3)));
7050 (define_insn "sse2_pshufd_1"
7051 [(set (match_operand:V4SI 0 "register_operand" "=x")
7053 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7054 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7055 (match_operand 3 "const_0_to_3_operand" "")
7056 (match_operand 4 "const_0_to_3_operand" "")
7057 (match_operand 5 "const_0_to_3_operand" "")])))]
7061 mask |= INTVAL (operands[2]) << 0;
7062 mask |= INTVAL (operands[3]) << 2;
7063 mask |= INTVAL (operands[4]) << 4;
7064 mask |= INTVAL (operands[5]) << 6;
7065 operands[2] = GEN_INT (mask);
7067 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
7069 [(set_attr "type" "sselog1")
7070 (set_attr "prefix_data16" "1")
7071 (set_attr "prefix" "maybe_vex")
7072 (set_attr "length_immediate" "1")
7073 (set_attr "mode" "TI")])
7075 (define_expand "avx2_pshuflwv3"
7076 [(match_operand:V16HI 0 "register_operand" "")
7077 (match_operand:V16HI 1 "nonimmediate_operand" "")
7078 (match_operand:SI 2 "const_0_to_255_operand" "")]
7081 int mask = INTVAL (operands[2]);
7082 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
7083 GEN_INT ((mask >> 0) & 3),
7084 GEN_INT ((mask >> 2) & 3),
7085 GEN_INT ((mask >> 4) & 3),
7086 GEN_INT ((mask >> 6) & 3)));
7090 (define_insn "avx2_pshuflw_1"
7091 [(set (match_operand:V16HI 0 "register_operand" "=x")
7093 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
7094 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7095 (match_operand 3 "const_0_to_3_operand" "")
7096 (match_operand 4 "const_0_to_3_operand" "")
7097 (match_operand 5 "const_0_to_3_operand" "")
7113 mask |= INTVAL (operands[2]) << 0;
7114 mask |= INTVAL (operands[3]) << 2;
7115 mask |= INTVAL (operands[4]) << 4;
7116 mask |= INTVAL (operands[5]) << 6;
7117 operands[2] = GEN_INT (mask);
7119 return "vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7121 [(set_attr "type" "sselog")
7122 (set_attr "prefix" "vex")
7123 (set_attr "length_immediate" "1")
7124 (set_attr "mode" "OI")])
7126 (define_expand "sse2_pshuflw"
7127 [(match_operand:V8HI 0 "register_operand" "")
7128 (match_operand:V8HI 1 "nonimmediate_operand" "")
7129 (match_operand:SI 2 "const_int_operand" "")]
7132 int mask = INTVAL (operands[2]);
7133 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
7134 GEN_INT ((mask >> 0) & 3),
7135 GEN_INT ((mask >> 2) & 3),
7136 GEN_INT ((mask >> 4) & 3),
7137 GEN_INT ((mask >> 6) & 3)));
7141 (define_insn "sse2_pshuflw_1"
7142 [(set (match_operand:V8HI 0 "register_operand" "=x")
7144 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7145 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7146 (match_operand 3 "const_0_to_3_operand" "")
7147 (match_operand 4 "const_0_to_3_operand" "")
7148 (match_operand 5 "const_0_to_3_operand" "")
7156 mask |= INTVAL (operands[2]) << 0;
7157 mask |= INTVAL (operands[3]) << 2;
7158 mask |= INTVAL (operands[4]) << 4;
7159 mask |= INTVAL (operands[5]) << 6;
7160 operands[2] = GEN_INT (mask);
7162 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7164 [(set_attr "type" "sselog")
7165 (set_attr "prefix_data16" "0")
7166 (set_attr "prefix_rep" "1")
7167 (set_attr "prefix" "maybe_vex")
7168 (set_attr "length_immediate" "1")
7169 (set_attr "mode" "TI")])
7171 (define_expand "avx2_pshufhwv3"
7172 [(match_operand:V16HI 0 "register_operand" "")
7173 (match_operand:V16HI 1 "nonimmediate_operand" "")
7174 (match_operand:SI 2 "const_0_to_255_operand" "")]
7177 int mask = INTVAL (operands[2]);
7178 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
7179 GEN_INT (((mask >> 0) & 3) + 4),
7180 GEN_INT (((mask >> 2) & 3) + 4),
7181 GEN_INT (((mask >> 4) & 3) + 4),
7182 GEN_INT (((mask >> 6) & 3) + 4)));
7186 (define_insn "avx2_pshufhw_1"
7187 [(set (match_operand:V16HI 0 "register_operand" "=x")
7189 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
7190 (parallel [(const_int 0)
7194 (match_operand 2 "const_4_to_7_operand" "")
7195 (match_operand 3 "const_4_to_7_operand" "")
7196 (match_operand 4 "const_4_to_7_operand" "")
7197 (match_operand 5 "const_4_to_7_operand" "")
7209 mask |= (INTVAL (operands[2]) - 4) << 0;
7210 mask |= (INTVAL (operands[3]) - 4) << 2;
7211 mask |= (INTVAL (operands[4]) - 4) << 4;
7212 mask |= (INTVAL (operands[5]) - 4) << 6;
7213 operands[2] = GEN_INT (mask);
7215 return "vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7217 [(set_attr "type" "sselog")
7218 (set_attr "prefix" "vex")
7219 (set_attr "length_immediate" "1")
7220 (set_attr "mode" "OI")])
7222 (define_expand "sse2_pshufhw"
7223 [(match_operand:V8HI 0 "register_operand" "")
7224 (match_operand:V8HI 1 "nonimmediate_operand" "")
7225 (match_operand:SI 2 "const_int_operand" "")]
7228 int mask = INTVAL (operands[2]);
7229 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
7230 GEN_INT (((mask >> 0) & 3) + 4),
7231 GEN_INT (((mask >> 2) & 3) + 4),
7232 GEN_INT (((mask >> 4) & 3) + 4),
7233 GEN_INT (((mask >> 6) & 3) + 4)));
7237 (define_insn "sse2_pshufhw_1"
7238 [(set (match_operand:V8HI 0 "register_operand" "=x")
7240 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7241 (parallel [(const_int 0)
7245 (match_operand 2 "const_4_to_7_operand" "")
7246 (match_operand 3 "const_4_to_7_operand" "")
7247 (match_operand 4 "const_4_to_7_operand" "")
7248 (match_operand 5 "const_4_to_7_operand" "")])))]
7252 mask |= (INTVAL (operands[2]) - 4) << 0;
7253 mask |= (INTVAL (operands[3]) - 4) << 2;
7254 mask |= (INTVAL (operands[4]) - 4) << 4;
7255 mask |= (INTVAL (operands[5]) - 4) << 6;
7256 operands[2] = GEN_INT (mask);
7258 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7260 [(set_attr "type" "sselog")
7261 (set_attr "prefix_rep" "1")
7262 (set_attr "prefix_data16" "0")
7263 (set_attr "prefix" "maybe_vex")
7264 (set_attr "length_immediate" "1")
7265 (set_attr "mode" "TI")])
7267 (define_expand "sse2_loadd"
7268 [(set (match_operand:V4SI 0 "register_operand" "")
7271 (match_operand:SI 1 "nonimmediate_operand" ""))
7275 "operands[2] = CONST0_RTX (V4SImode);")
7277 (define_insn "sse2_loadld"
7278 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x,x,x")
7281 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x"))
7282 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,x")
7286 %vmovd\t{%2, %0|%0, %2}
7287 %vmovd\t{%2, %0|%0, %2}
7288 movss\t{%2, %0|%0, %2}
7289 movss\t{%2, %0|%0, %2}
7290 vmovss\t{%2, %1, %0|%0, %1, %2}"
7291 [(set_attr "isa" "sse2,*,noavx,noavx,avx")
7292 (set_attr "type" "ssemov")
7293 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex")
7294 (set_attr "mode" "TI,TI,V4SF,SF,SF")])
7296 (define_insn_and_split "sse2_stored"
7297 [(set (match_operand:SI 0 "nonimmediate_operand" "=xm,r")
7299 (match_operand:V4SI 1 "register_operand" "x,Yi")
7300 (parallel [(const_int 0)])))]
7303 "&& reload_completed
7304 && (TARGET_INTER_UNIT_MOVES
7305 || MEM_P (operands [0])
7306 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7307 [(set (match_dup 0) (match_dup 1))]
7308 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
7310 (define_insn_and_split "*vec_ext_v4si_mem"
7311 [(set (match_operand:SI 0 "register_operand" "=r")
7313 (match_operand:V4SI 1 "memory_operand" "o")
7314 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
7320 int i = INTVAL (operands[2]);
7322 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
7326 (define_expand "sse_storeq"
7327 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7329 (match_operand:V2DI 1 "register_operand" "")
7330 (parallel [(const_int 0)])))]
7333 (define_insn "*sse2_storeq_rex64"
7334 [(set (match_operand:DI 0 "nonimmediate_operand" "=xm,*r,r")
7336 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
7337 (parallel [(const_int 0)])))]
7338 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7342 mov{q}\t{%1, %0|%0, %1}"
7343 [(set_attr "type" "*,*,imov")
7344 (set_attr "mode" "*,*,DI")])
7346 (define_insn "*sse2_storeq"
7347 [(set (match_operand:DI 0 "nonimmediate_operand" "=xm")
7349 (match_operand:V2DI 1 "register_operand" "x")
7350 (parallel [(const_int 0)])))]
7355 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7357 (match_operand:V2DI 1 "register_operand" "")
7358 (parallel [(const_int 0)])))]
7361 && (TARGET_INTER_UNIT_MOVES
7362 || MEM_P (operands [0])
7363 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7364 [(set (match_dup 0) (match_dup 1))]
7365 "operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));")
7367 (define_insn "*vec_extractv2di_1_rex64"
7368 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,x,r")
7370 (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o,o")
7371 (parallel [(const_int 1)])))]
7372 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7374 %vmovhps\t{%1, %0|%0, %1}
7375 psrldq\t{$8, %0|%0, 8}
7376 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7377 %vmovq\t{%H1, %0|%0, %H1}
7378 mov{q}\t{%H1, %0|%0, %H1}"
7379 [(set_attr "isa" "*,noavx,avx,*,*")
7380 (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,imov")
7381 (set_attr "length_immediate" "*,1,1,*,*")
7382 (set_attr "memory" "*,none,none,*,*")
7383 (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig")
7384 (set_attr "mode" "V2SF,TI,TI,TI,DI")])
7386 (define_insn "*vec_extractv2di_1"
7387 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,x,x,x")
7389 (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o,x,o")
7390 (parallel [(const_int 1)])))]
7391 "!TARGET_64BIT && TARGET_SSE
7392 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7394 %vmovhps\t{%1, %0|%0, %1}
7395 psrldq\t{$8, %0|%0, 8}
7396 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7397 %vmovq\t{%H1, %0|%0, %H1}
7398 movhlps\t{%1, %0|%0, %1}
7399 movlps\t{%H1, %0|%0, %H1}"
7400 [(set_attr "isa" "*,sse2_noavx,avx,sse2,noavx,noavx")
7401 (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,ssemov,ssemov")
7402 (set_attr "length_immediate" "*,1,1,*,*,*")
7403 (set_attr "memory" "*,none,none,*,*,*")
7404 (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig,orig")
7405 (set_attr "mode" "V2SF,TI,TI,TI,V4SF,V2SF")])
7407 (define_insn "*vec_dupv4si_avx"
7408 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7410 (match_operand:SI 1 "nonimmediate_operand" " x,m")))]
7413 vpshufd\t{$0, %1, %0|%0, %1, 0}
7414 vbroadcastss\t{%1, %0|%0, %1}"
7415 [(set_attr "type" "sselog1,ssemov")
7416 (set_attr "length_immediate" "1,0")
7417 (set_attr "prefix_extra" "0,1")
7418 (set_attr "prefix" "vex")
7419 (set_attr "mode" "TI,V4SF")])
7421 (define_insn "*vec_dupv4si"
7422 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7424 (match_operand:SI 1 "register_operand" " x,0")))]
7427 pshufd\t{$0, %1, %0|%0, %1, 0}
7428 shufps\t{$0, %0, %0|%0, %0, 0}"
7429 [(set_attr "isa" "sse2,*")
7430 (set_attr "type" "sselog1")
7431 (set_attr "length_immediate" "1")
7432 (set_attr "mode" "TI,V4SF")])
7434 (define_insn "*vec_dupv2di_sse3"
7435 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
7437 (match_operand:DI 1 "nonimmediate_operand" " 0,x,m")))]
7441 vpunpcklqdq\t{%d1, %0|%0, %d1}
7442 %vmovddup\t{%1, %0|%0, %1}"
7443 [(set_attr "isa" "noavx,avx,*")
7444 (set_attr "type" "sselog1")
7445 (set_attr "prefix" "orig,vex,maybe_vex")
7446 (set_attr "mode" "TI,TI,DF")])
7448 (define_insn "*vec_dupv2di"
7449 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7451 (match_operand:DI 1 "register_operand" " 0,0")))]
7456 [(set_attr "isa" "sse2,*")
7457 (set_attr "type" "sselog1,ssemov")
7458 (set_attr "mode" "TI,V4SF")])
7460 (define_insn "*vec_concatv2si_sse4_1"
7461 [(set (match_operand:V2SI 0 "register_operand" "=x, x,x,x, x, *y,*y")
7463 (match_operand:SI 1 "nonimmediate_operand" " 0, x,0,x,rm, 0,rm")
7464 (match_operand:SI 2 "vector_move_operand" "rm,rm,x,x, C,*ym, C")))]
7467 pinsrd\t{$1, %2, %0|%0, %2, 1}
7468 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
7469 punpckldq\t{%2, %0|%0, %2}
7470 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7471 %vmovd\t{%1, %0|%0, %1}
7472 punpckldq\t{%2, %0|%0, %2}
7473 movd\t{%1, %0|%0, %1}"
7474 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
7475 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
7476 (set_attr "prefix_extra" "1,1,*,*,*,*,*")
7477 (set_attr "length_immediate" "1,1,*,*,*,*,*")
7478 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
7479 (set_attr "mode" "TI,TI,TI,TI,TI,DI,DI")])
7481 ;; ??? In theory we can match memory for the MMX alternative, but allowing
7482 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7483 ;; alternatives pretty much forces the MMX alternative to be chosen.
7484 (define_insn "*vec_concatv2si_sse2"
7485 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
7487 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
7488 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
7491 punpckldq\t{%2, %0|%0, %2}
7492 movd\t{%1, %0|%0, %1}
7493 punpckldq\t{%2, %0|%0, %2}
7494 movd\t{%1, %0|%0, %1}"
7495 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7496 (set_attr "mode" "TI,TI,DI,DI")])
7498 (define_insn "*vec_concatv2si_sse"
7499 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
7501 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
7502 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
7505 unpcklps\t{%2, %0|%0, %2}
7506 movss\t{%1, %0|%0, %1}
7507 punpckldq\t{%2, %0|%0, %2}
7508 movd\t{%1, %0|%0, %1}"
7509 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7510 (set_attr "mode" "V4SF,V4SF,DI,DI")])
7512 (define_insn "*vec_concatv4si"
7513 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x,x")
7515 (match_operand:V2SI 1 "register_operand" " 0,x,0,0,x")
7516 (match_operand:V2SI 2 "nonimmediate_operand" " x,x,x,m,m")))]
7519 punpcklqdq\t{%2, %0|%0, %2}
7520 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7521 movlhps\t{%2, %0|%0, %2}
7522 movhps\t{%2, %0|%0, %2}
7523 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7524 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
7525 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
7526 (set_attr "prefix" "orig,vex,orig,orig,vex")
7527 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
7529 ;; movd instead of movq is required to handle broken assemblers.
7530 (define_insn "*vec_concatv2di_rex64"
7531 [(set (match_operand:V2DI 0 "register_operand"
7532 "=x,x ,x ,Yi,!x,x,x,x,x")
7534 (match_operand:DI 1 "nonimmediate_operand"
7535 " 0,x ,xm,r ,*y,0,x,0,x")
7536 (match_operand:DI 2 "vector_move_operand"
7537 "rm,rm,C ,C ,C ,x,x,m,m")))]
7540 pinsrq\t{$1, %2, %0|%0, %2, 1}
7541 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
7542 %vmovq\t{%1, %0|%0, %1}
7543 %vmovd\t{%1, %0|%0, %1}
7544 movq2dq\t{%1, %0|%0, %1}
7545 punpcklqdq\t{%2, %0|%0, %2}
7546 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7547 movhps\t{%2, %0|%0, %2}
7548 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7549 [(set_attr "isa" "sse4_noavx,avx,*,*,*,noavx,avx,noavx,avx")
7552 (eq_attr "alternative" "0,1,5,6")
7553 (const_string "sselog")
7554 (const_string "ssemov")))
7555 (set (attr "prefix_rex")
7557 (and (eq_attr "alternative" "0,3")
7558 (not (match_test "TARGET_AVX")))
7560 (const_string "*")))
7561 (set_attr "prefix_extra" "1,1,*,*,*,*,*,*,*")
7562 (set_attr "length_immediate" "1,1,*,*,*,*,*,*,*")
7563 (set_attr "prefix" "orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex")
7564 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,V2SF,V2SF")])
7566 (define_insn "vec_concatv2di"
7567 [(set (match_operand:V2DI 0 "register_operand" "=x,?x,x,x,x,x,x")
7569 (match_operand:DI 1 "nonimmediate_operand" "xm,*y,0,x,0,0,x")
7570 (match_operand:DI 2 "vector_move_operand" " C, C,x,x,x,m,m")))]
7571 "!TARGET_64BIT && TARGET_SSE"
7573 %vmovq\t{%1, %0|%0, %1}
7574 movq2dq\t{%1, %0|%0, %1}
7575 punpcklqdq\t{%2, %0|%0, %2}
7576 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7577 movlhps\t{%2, %0|%0, %2}
7578 movhps\t{%2, %0|%0, %2}
7579 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7580 [(set_attr "isa" "sse2,sse2,sse2_noavx,avx,noavx,noavx,avx")
7581 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,ssemov,ssemov")
7582 (set_attr "prefix" "maybe_vex,orig,orig,vex,orig,orig,vex")
7583 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")])
7585 (define_expand "vec_unpacks_lo_<mode>"
7586 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7587 (match_operand:VI124_128 1 "register_operand" "")]
7589 "ix86_expand_sse_unpack (operands, false, false); DONE;")
7591 (define_expand "vec_unpacks_hi_<mode>"
7592 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7593 (match_operand:VI124_128 1 "register_operand" "")]
7595 "ix86_expand_sse_unpack (operands, false, true); DONE;")
7597 (define_expand "vec_unpacku_lo_<mode>"
7598 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7599 (match_operand:VI124_128 1 "register_operand" "")]
7601 "ix86_expand_sse_unpack (operands, true, false); DONE;")
7603 (define_expand "vec_unpacku_hi_<mode>"
7604 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7605 (match_operand:VI124_128 1 "register_operand" "")]
7607 "ix86_expand_sse_unpack (operands, true, true); DONE;")
7609 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7613 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7615 (define_expand "avx2_uavgv32qi3"
7616 [(set (match_operand:V32QI 0 "register_operand" "")
7622 (match_operand:V32QI 1 "nonimmediate_operand" ""))
7624 (match_operand:V32QI 2 "nonimmediate_operand" "")))
7625 (const_vector:V32QI [(const_int 1) (const_int 1)
7626 (const_int 1) (const_int 1)
7627 (const_int 1) (const_int 1)
7628 (const_int 1) (const_int 1)
7629 (const_int 1) (const_int 1)
7630 (const_int 1) (const_int 1)
7631 (const_int 1) (const_int 1)
7632 (const_int 1) (const_int 1)
7633 (const_int 1) (const_int 1)
7634 (const_int 1) (const_int 1)
7635 (const_int 1) (const_int 1)
7636 (const_int 1) (const_int 1)
7637 (const_int 1) (const_int 1)
7638 (const_int 1) (const_int 1)
7639 (const_int 1) (const_int 1)
7640 (const_int 1) (const_int 1)]))
7643 "ix86_fixup_binary_operands_no_copy (PLUS, V32QImode, operands);")
7645 (define_expand "sse2_uavgv16qi3"
7646 [(set (match_operand:V16QI 0 "register_operand" "")
7652 (match_operand:V16QI 1 "nonimmediate_operand" ""))
7654 (match_operand:V16QI 2 "nonimmediate_operand" "")))
7655 (const_vector:V16QI [(const_int 1) (const_int 1)
7656 (const_int 1) (const_int 1)
7657 (const_int 1) (const_int 1)
7658 (const_int 1) (const_int 1)
7659 (const_int 1) (const_int 1)
7660 (const_int 1) (const_int 1)
7661 (const_int 1) (const_int 1)
7662 (const_int 1) (const_int 1)]))
7665 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
7667 (define_insn "*avx2_uavgv32qi3"
7668 [(set (match_operand:V32QI 0 "register_operand" "=x")
7674 (match_operand:V32QI 1 "nonimmediate_operand" "%x"))
7676 (match_operand:V32QI 2 "nonimmediate_operand" "xm")))
7677 (const_vector:V32QI [(const_int 1) (const_int 1)
7678 (const_int 1) (const_int 1)
7679 (const_int 1) (const_int 1)
7680 (const_int 1) (const_int 1)
7681 (const_int 1) (const_int 1)
7682 (const_int 1) (const_int 1)
7683 (const_int 1) (const_int 1)
7684 (const_int 1) (const_int 1)
7685 (const_int 1) (const_int 1)
7686 (const_int 1) (const_int 1)
7687 (const_int 1) (const_int 1)
7688 (const_int 1) (const_int 1)
7689 (const_int 1) (const_int 1)
7690 (const_int 1) (const_int 1)
7691 (const_int 1) (const_int 1)
7692 (const_int 1) (const_int 1)]))
7694 "TARGET_AVX2 && ix86_binary_operator_ok (PLUS, V32QImode, operands)"
7695 "vpavgb\t{%2, %1, %0|%0, %1, %2}"
7696 [(set_attr "type" "sseiadd")
7697 (set_attr "prefix" "vex")
7698 (set_attr "mode" "OI")])
7700 (define_insn "*sse2_uavgv16qi3"
7701 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
7707 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x"))
7709 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))
7710 (const_vector:V16QI [(const_int 1) (const_int 1)
7711 (const_int 1) (const_int 1)
7712 (const_int 1) (const_int 1)
7713 (const_int 1) (const_int 1)
7714 (const_int 1) (const_int 1)
7715 (const_int 1) (const_int 1)
7716 (const_int 1) (const_int 1)
7717 (const_int 1) (const_int 1)]))
7719 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7721 pavgb\t{%2, %0|%0, %2}
7722 vpavgb\t{%2, %1, %0|%0, %1, %2}"
7723 [(set_attr "isa" "noavx,avx")
7724 (set_attr "type" "sseiadd")
7725 (set_attr "prefix_data16" "1,*")
7726 (set_attr "prefix" "orig,vex")
7727 (set_attr "mode" "TI")])
7729 (define_expand "avx2_uavgv16hi3"
7730 [(set (match_operand:V16HI 0 "register_operand" "")
7736 (match_operand:V16HI 1 "nonimmediate_operand" ""))
7738 (match_operand:V16HI 2 "nonimmediate_operand" "")))
7739 (const_vector:V16HI [(const_int 1) (const_int 1)
7740 (const_int 1) (const_int 1)
7741 (const_int 1) (const_int 1)
7742 (const_int 1) (const_int 1)
7743 (const_int 1) (const_int 1)
7744 (const_int 1) (const_int 1)
7745 (const_int 1) (const_int 1)
7746 (const_int 1) (const_int 1)]))
7749 "ix86_fixup_binary_operands_no_copy (PLUS, V16HImode, operands);")
7751 (define_expand "sse2_uavgv8hi3"
7752 [(set (match_operand:V8HI 0 "register_operand" "")
7758 (match_operand:V8HI 1 "nonimmediate_operand" ""))
7760 (match_operand:V8HI 2 "nonimmediate_operand" "")))
7761 (const_vector:V8HI [(const_int 1) (const_int 1)
7762 (const_int 1) (const_int 1)
7763 (const_int 1) (const_int 1)
7764 (const_int 1) (const_int 1)]))
7767 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
7769 (define_insn "*avx2_uavgv16hi3"
7770 [(set (match_operand:V16HI 0 "register_operand" "=x")
7776 (match_operand:V16HI 1 "nonimmediate_operand" "%x"))
7778 (match_operand:V16HI 2 "nonimmediate_operand" "xm")))
7779 (const_vector:V16HI [(const_int 1) (const_int 1)
7780 (const_int 1) (const_int 1)
7781 (const_int 1) (const_int 1)
7782 (const_int 1) (const_int 1)
7783 (const_int 1) (const_int 1)
7784 (const_int 1) (const_int 1)
7785 (const_int 1) (const_int 1)
7786 (const_int 1) (const_int 1)]))
7788 "TARGET_AVX2 && ix86_binary_operator_ok (PLUS, V16HImode, operands)"
7789 "vpavgw\t{%2, %1, %0|%0, %1, %2}"
7790 [(set_attr "type" "sseiadd")
7791 (set_attr "prefix" "vex")
7792 (set_attr "mode" "OI")])
7794 (define_insn "*sse2_uavgv8hi3"
7795 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7801 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
7803 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
7804 (const_vector:V8HI [(const_int 1) (const_int 1)
7805 (const_int 1) (const_int 1)
7806 (const_int 1) (const_int 1)
7807 (const_int 1) (const_int 1)]))
7809 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7811 pavgw\t{%2, %0|%0, %2}
7812 vpavgw\t{%2, %1, %0|%0, %1, %2}"
7813 [(set_attr "isa" "noavx,avx")
7814 (set_attr "type" "sseiadd")
7815 (set_attr "prefix_data16" "1,*")
7816 (set_attr "prefix" "orig,vex")
7817 (set_attr "mode" "TI")])
7819 ;; The correct representation for this is absolutely enormous, and
7820 ;; surely not generally useful.
7821 (define_insn "<sse2_avx2>_psadbw"
7822 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x,x")
7823 (unspec:VI8_AVX2 [(match_operand:<ssebytemode> 1 "register_operand" "0,x")
7824 (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,xm")]
7828 psadbw\t{%2, %0|%0, %2}
7829 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
7830 [(set_attr "isa" "noavx,avx")
7831 (set_attr "type" "sseiadd")
7832 (set_attr "atom_unit" "simul")
7833 (set_attr "prefix_data16" "1,*")
7834 (set_attr "prefix" "orig,vex")
7835 (set_attr "mode" "<sseinsnmode>")])
7837 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
7838 [(set (match_operand:SI 0 "register_operand" "=r")
7840 [(match_operand:VF 1 "register_operand" "x")]
7843 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
7844 [(set_attr "type" "ssemov")
7845 (set_attr "prefix" "maybe_vex")
7846 (set_attr "mode" "<MODE>")])
7848 (define_insn "avx2_pmovmskb"
7849 [(set (match_operand:SI 0 "register_operand" "=r")
7850 (unspec:SI [(match_operand:V32QI 1 "register_operand" "x")]
7853 "vpmovmskb\t{%1, %0|%0, %1}"
7854 [(set_attr "type" "ssemov")
7855 (set_attr "prefix" "vex")
7856 (set_attr "mode" "DI")])
7858 (define_insn "sse2_pmovmskb"
7859 [(set (match_operand:SI 0 "register_operand" "=r")
7860 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
7863 "%vpmovmskb\t{%1, %0|%0, %1}"
7864 [(set_attr "type" "ssemov")
7865 (set_attr "prefix_data16" "1")
7866 (set_attr "prefix" "maybe_vex")
7867 (set_attr "mode" "SI")])
7869 (define_expand "sse2_maskmovdqu"
7870 [(set (match_operand:V16QI 0 "memory_operand" "")
7871 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
7872 (match_operand:V16QI 2 "register_operand" "")
7877 (define_insn "*sse2_maskmovdqu"
7878 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
7879 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
7880 (match_operand:V16QI 2 "register_operand" "x")
7881 (mem:V16QI (match_dup 0))]
7884 "%vmaskmovdqu\t{%2, %1|%1, %2}"
7885 [(set_attr "type" "ssemov")
7886 (set_attr "prefix_data16" "1")
7887 ;; The implicit %rdi operand confuses default length_vex computation.
7888 (set (attr "length_vex")
7889 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
7890 (set_attr "prefix" "maybe_vex")
7891 (set_attr "mode" "TI")])
7893 (define_insn "sse_ldmxcsr"
7894 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
7898 [(set_attr "type" "sse")
7899 (set_attr "atom_sse_attr" "mxcsr")
7900 (set_attr "prefix" "maybe_vex")
7901 (set_attr "memory" "load")])
7903 (define_insn "sse_stmxcsr"
7904 [(set (match_operand:SI 0 "memory_operand" "=m")
7905 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
7908 [(set_attr "type" "sse")
7909 (set_attr "atom_sse_attr" "mxcsr")
7910 (set_attr "prefix" "maybe_vex")
7911 (set_attr "memory" "store")])
7913 (define_expand "sse_sfence"
7915 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
7916 "TARGET_SSE || TARGET_3DNOW_A"
7918 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
7919 MEM_VOLATILE_P (operands[0]) = 1;
7922 (define_insn "*sse_sfence"
7923 [(set (match_operand:BLK 0 "" "")
7924 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
7925 "TARGET_SSE || TARGET_3DNOW_A"
7927 [(set_attr "type" "sse")
7928 (set_attr "length_address" "0")
7929 (set_attr "atom_sse_attr" "fence")
7930 (set_attr "memory" "unknown")])
7932 (define_insn "sse2_clflush"
7933 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
7937 [(set_attr "type" "sse")
7938 (set_attr "atom_sse_attr" "fence")
7939 (set_attr "memory" "unknown")])
7941 (define_expand "sse2_mfence"
7943 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
7946 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
7947 MEM_VOLATILE_P (operands[0]) = 1;
7950 (define_insn "*sse2_mfence"
7951 [(set (match_operand:BLK 0 "" "")
7952 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
7953 "TARGET_64BIT || TARGET_SSE2"
7955 [(set_attr "type" "sse")
7956 (set_attr "length_address" "0")
7957 (set_attr "atom_sse_attr" "fence")
7958 (set_attr "memory" "unknown")])
7960 (define_expand "sse2_lfence"
7962 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
7965 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
7966 MEM_VOLATILE_P (operands[0]) = 1;
7969 (define_insn "*sse2_lfence"
7970 [(set (match_operand:BLK 0 "" "")
7971 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
7974 [(set_attr "type" "sse")
7975 (set_attr "length_address" "0")
7976 (set_attr "atom_sse_attr" "lfence")
7977 (set_attr "memory" "unknown")])
7979 (define_insn "sse3_mwait"
7980 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
7981 (match_operand:SI 1 "register_operand" "c")]
7984 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
7985 ;; Since 32bit register operands are implicitly zero extended to 64bit,
7986 ;; we only need to set up 32bit registers.
7988 [(set_attr "length" "3")])
7990 (define_insn "sse3_monitor"
7991 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
7992 (match_operand:SI 1 "register_operand" "c")
7993 (match_operand:SI 2 "register_operand" "d")]
7995 "TARGET_SSE3 && !TARGET_64BIT"
7996 "monitor\t%0, %1, %2"
7997 [(set_attr "length" "3")])
7999 (define_insn "sse3_monitor64"
8000 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
8001 (match_operand:SI 1 "register_operand" "c")
8002 (match_operand:SI 2 "register_operand" "d")]
8004 "TARGET_SSE3 && TARGET_64BIT"
8005 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
8006 ;; RCX and RDX are used. Since 32bit register operands are implicitly
8007 ;; zero extended to 64bit, we only need to set up 32bit registers.
8009 [(set_attr "length" "3")])
8011 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8013 ;; SSSE3 instructions
8015 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8017 (define_insn "avx2_phaddwv16hi3"
8018 [(set (match_operand:V16HI 0 "register_operand" "=x")
8025 (match_operand:V16HI 1 "register_operand" "x")
8026 (parallel [(const_int 0)]))
8027 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8029 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8030 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8033 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8034 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8036 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8037 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8041 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8042 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8044 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8045 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8048 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8049 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8051 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8052 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8058 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8059 (parallel [(const_int 0)]))
8060 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8062 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8063 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8066 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8067 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8069 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8070 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8074 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8075 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8077 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8078 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8081 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8082 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8084 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8085 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8087 "vphaddw\t{%2, %1, %0|%0, %1, %2}"
8088 [(set_attr "type" "sseiadd")
8089 (set_attr "prefix_extra" "1")
8090 (set_attr "prefix" "vex")
8091 (set_attr "mode" "OI")])
8093 (define_insn "ssse3_phaddwv8hi3"
8094 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8100 (match_operand:V8HI 1 "register_operand" "0,x")
8101 (parallel [(const_int 0)]))
8102 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8104 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8105 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8108 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8109 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8111 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8112 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8117 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8118 (parallel [(const_int 0)]))
8119 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8121 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8122 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8125 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8126 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8128 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8129 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8132 phaddw\t{%2, %0|%0, %2}
8133 vphaddw\t{%2, %1, %0|%0, %1, %2}"
8134 [(set_attr "isa" "noavx,avx")
8135 (set_attr "type" "sseiadd")
8136 (set_attr "atom_unit" "complex")
8137 (set_attr "prefix_data16" "1,*")
8138 (set_attr "prefix_extra" "1")
8139 (set_attr "prefix" "orig,vex")
8140 (set_attr "mode" "TI")])
8142 (define_insn "ssse3_phaddwv4hi3"
8143 [(set (match_operand:V4HI 0 "register_operand" "=y")
8148 (match_operand:V4HI 1 "register_operand" "0")
8149 (parallel [(const_int 0)]))
8150 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8152 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8153 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8157 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8158 (parallel [(const_int 0)]))
8159 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8161 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8162 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8164 "phaddw\t{%2, %0|%0, %2}"
8165 [(set_attr "type" "sseiadd")
8166 (set_attr "atom_unit" "complex")
8167 (set_attr "prefix_extra" "1")
8168 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8169 (set_attr "mode" "DI")])
8171 (define_insn "avx2_phadddv8si3"
8172 [(set (match_operand:V8SI 0 "register_operand" "=x")
8178 (match_operand:V8SI 1 "register_operand" "x")
8179 (parallel [(const_int 0)]))
8180 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8182 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8183 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8186 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
8187 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
8189 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
8190 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
8195 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
8196 (parallel [(const_int 0)]))
8197 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8199 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8200 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
8203 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
8204 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
8206 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
8207 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
8209 "vphaddd\t{%2, %1, %0|%0, %1, %2}"
8210 [(set_attr "type" "sseiadd")
8211 (set_attr "prefix_extra" "1")
8212 (set_attr "prefix" "vex")
8213 (set_attr "mode" "OI")])
8215 (define_insn "ssse3_phadddv4si3"
8216 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
8221 (match_operand:V4SI 1 "register_operand" "0,x")
8222 (parallel [(const_int 0)]))
8223 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8225 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8226 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8230 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
8231 (parallel [(const_int 0)]))
8232 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8234 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8235 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8238 phaddd\t{%2, %0|%0, %2}
8239 vphaddd\t{%2, %1, %0|%0, %1, %2}"
8240 [(set_attr "isa" "noavx,avx")
8241 (set_attr "type" "sseiadd")
8242 (set_attr "atom_unit" "complex")
8243 (set_attr "prefix_data16" "1,*")
8244 (set_attr "prefix_extra" "1")
8245 (set_attr "prefix" "orig,vex")
8246 (set_attr "mode" "TI")])
8248 (define_insn "ssse3_phadddv2si3"
8249 [(set (match_operand:V2SI 0 "register_operand" "=y")
8253 (match_operand:V2SI 1 "register_operand" "0")
8254 (parallel [(const_int 0)]))
8255 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8258 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8259 (parallel [(const_int 0)]))
8260 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8262 "phaddd\t{%2, %0|%0, %2}"
8263 [(set_attr "type" "sseiadd")
8264 (set_attr "atom_unit" "complex")
8265 (set_attr "prefix_extra" "1")
8266 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8267 (set_attr "mode" "DI")])
8269 (define_insn "avx2_phaddswv16hi3"
8270 [(set (match_operand:V16HI 0 "register_operand" "=x")
8277 (match_operand:V16HI 1 "register_operand" "x")
8278 (parallel [(const_int 0)]))
8279 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8281 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8282 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8285 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8286 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8288 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8289 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8293 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8294 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8296 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8297 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8300 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8301 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8303 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8304 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8310 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8311 (parallel [(const_int 0)]))
8312 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8314 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8315 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8318 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8319 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8321 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8322 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8326 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8327 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8329 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8330 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8333 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8334 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8336 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8337 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8339 "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8340 [(set_attr "type" "sseiadd")
8341 (set_attr "prefix_extra" "1")
8342 (set_attr "prefix" "vex")
8343 (set_attr "mode" "OI")])
8345 (define_insn "ssse3_phaddswv8hi3"
8346 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8352 (match_operand:V8HI 1 "register_operand" "0,x")
8353 (parallel [(const_int 0)]))
8354 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8356 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8357 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8360 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8361 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8363 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8364 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8369 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8370 (parallel [(const_int 0)]))
8371 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8373 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8374 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8377 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8378 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8380 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8381 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8384 phaddsw\t{%2, %0|%0, %2}
8385 vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8386 [(set_attr "isa" "noavx,avx")
8387 (set_attr "type" "sseiadd")
8388 (set_attr "atom_unit" "complex")
8389 (set_attr "prefix_data16" "1,*")
8390 (set_attr "prefix_extra" "1")
8391 (set_attr "prefix" "orig,vex")
8392 (set_attr "mode" "TI")])
8394 (define_insn "ssse3_phaddswv4hi3"
8395 [(set (match_operand:V4HI 0 "register_operand" "=y")
8400 (match_operand:V4HI 1 "register_operand" "0")
8401 (parallel [(const_int 0)]))
8402 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8404 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8405 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8409 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8410 (parallel [(const_int 0)]))
8411 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8413 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8414 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8416 "phaddsw\t{%2, %0|%0, %2}"
8417 [(set_attr "type" "sseiadd")
8418 (set_attr "atom_unit" "complex")
8419 (set_attr "prefix_extra" "1")
8420 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8421 (set_attr "mode" "DI")])
8423 (define_insn "avx2_phsubwv16hi3"
8424 [(set (match_operand:V16HI 0 "register_operand" "=x")
8431 (match_operand:V16HI 1 "register_operand" "x")
8432 (parallel [(const_int 0)]))
8433 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8435 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8436 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8439 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8440 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8442 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8443 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8447 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8448 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8450 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8451 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8454 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8455 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8457 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8458 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8464 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8465 (parallel [(const_int 0)]))
8466 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8468 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8469 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8472 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8473 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8475 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8476 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8480 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8481 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8483 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8484 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8487 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8488 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8490 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8491 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8493 "vphsubw\t{%2, %1, %0|%0, %1, %2}"
8494 [(set_attr "type" "sseiadd")
8495 (set_attr "prefix_extra" "1")
8496 (set_attr "prefix" "vex")
8497 (set_attr "mode" "OI")])
8499 (define_insn "ssse3_phsubwv8hi3"
8500 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8506 (match_operand:V8HI 1 "register_operand" "0,x")
8507 (parallel [(const_int 0)]))
8508 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8510 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8511 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8514 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8515 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8517 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8518 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8523 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8524 (parallel [(const_int 0)]))
8525 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8527 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8528 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8531 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8532 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8534 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8535 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8538 phsubw\t{%2, %0|%0, %2}
8539 vphsubw\t{%2, %1, %0|%0, %1, %2}"
8540 [(set_attr "isa" "noavx,avx")
8541 (set_attr "type" "sseiadd")
8542 (set_attr "atom_unit" "complex")
8543 (set_attr "prefix_data16" "1,*")
8544 (set_attr "prefix_extra" "1")
8545 (set_attr "prefix" "orig,vex")
8546 (set_attr "mode" "TI")])
8548 (define_insn "ssse3_phsubwv4hi3"
8549 [(set (match_operand:V4HI 0 "register_operand" "=y")
8554 (match_operand:V4HI 1 "register_operand" "0")
8555 (parallel [(const_int 0)]))
8556 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8558 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8559 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8563 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8564 (parallel [(const_int 0)]))
8565 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8567 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8568 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8570 "phsubw\t{%2, %0|%0, %2}"
8571 [(set_attr "type" "sseiadd")
8572 (set_attr "atom_unit" "complex")
8573 (set_attr "prefix_extra" "1")
8574 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8575 (set_attr "mode" "DI")])
8577 (define_insn "avx2_phsubdv8si3"
8578 [(set (match_operand:V8SI 0 "register_operand" "=x")
8584 (match_operand:V8SI 1 "register_operand" "x")
8585 (parallel [(const_int 0)]))
8586 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8588 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8589 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8592 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
8593 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
8595 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
8596 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
8601 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
8602 (parallel [(const_int 0)]))
8603 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8605 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8606 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
8609 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
8610 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
8612 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
8613 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
8615 "vphsubd\t{%2, %1, %0|%0, %1, %2}"
8616 [(set_attr "type" "sseiadd")
8617 (set_attr "prefix_extra" "1")
8618 (set_attr "prefix" "vex")
8619 (set_attr "mode" "OI")])
8621 (define_insn "ssse3_phsubdv4si3"
8622 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
8627 (match_operand:V4SI 1 "register_operand" "0,x")
8628 (parallel [(const_int 0)]))
8629 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8631 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8632 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8636 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
8637 (parallel [(const_int 0)]))
8638 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8640 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8641 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8644 phsubd\t{%2, %0|%0, %2}
8645 vphsubd\t{%2, %1, %0|%0, %1, %2}"
8647 [(set_attr "isa" "noavx,avx")
8648 (set_attr "type" "sseiadd")
8649 (set_attr "atom_unit" "complex")
8650 (set_attr "prefix_data16" "1,*")
8651 (set_attr "prefix_extra" "1")
8652 (set_attr "prefix" "orig,vex")
8653 (set_attr "mode" "TI")])
8655 (define_insn "ssse3_phsubdv2si3"
8656 [(set (match_operand:V2SI 0 "register_operand" "=y")
8660 (match_operand:V2SI 1 "register_operand" "0")
8661 (parallel [(const_int 0)]))
8662 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8665 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8666 (parallel [(const_int 0)]))
8667 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8669 "phsubd\t{%2, %0|%0, %2}"
8670 [(set_attr "type" "sseiadd")
8671 (set_attr "atom_unit" "complex")
8672 (set_attr "prefix_extra" "1")
8673 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8674 (set_attr "mode" "DI")])
8676 (define_insn "avx2_phsubswv16hi3"
8677 [(set (match_operand:V16HI 0 "register_operand" "=x")
8684 (match_operand:V16HI 1 "register_operand" "x")
8685 (parallel [(const_int 0)]))
8686 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8688 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8689 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8692 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8693 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8695 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8696 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8700 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8701 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8703 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8704 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8707 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8708 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8710 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8711 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8717 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8718 (parallel [(const_int 0)]))
8719 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8721 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8722 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8725 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8726 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8728 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8729 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8733 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8734 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8736 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8737 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8740 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8741 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8743 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8744 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8746 "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8747 [(set_attr "type" "sseiadd")
8748 (set_attr "prefix_extra" "1")
8749 (set_attr "prefix" "vex")
8750 (set_attr "mode" "OI")])
8752 (define_insn "ssse3_phsubswv8hi3"
8753 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8759 (match_operand:V8HI 1 "register_operand" "0,x")
8760 (parallel [(const_int 0)]))
8761 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8763 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8764 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8767 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8768 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8770 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8771 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8776 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8777 (parallel [(const_int 0)]))
8778 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8780 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8781 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8784 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8785 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8787 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8788 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8791 phsubsw\t{%2, %0|%0, %2}
8792 vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8793 [(set_attr "isa" "noavx,avx")
8794 (set_attr "type" "sseiadd")
8795 (set_attr "atom_unit" "complex")
8796 (set_attr "prefix_data16" "1,*")
8797 (set_attr "prefix_extra" "1")
8798 (set_attr "prefix" "orig,vex")
8799 (set_attr "mode" "TI")])
8801 (define_insn "ssse3_phsubswv4hi3"
8802 [(set (match_operand:V4HI 0 "register_operand" "=y")
8807 (match_operand:V4HI 1 "register_operand" "0")
8808 (parallel [(const_int 0)]))
8809 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8811 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8812 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8816 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8817 (parallel [(const_int 0)]))
8818 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8820 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8821 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8823 "phsubsw\t{%2, %0|%0, %2}"
8824 [(set_attr "type" "sseiadd")
8825 (set_attr "atom_unit" "complex")
8826 (set_attr "prefix_extra" "1")
8827 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8828 (set_attr "mode" "DI")])
8830 (define_insn "avx2_pmaddubsw256"
8831 [(set (match_operand:V16HI 0 "register_operand" "=x")
8836 (match_operand:V32QI 1 "register_operand" "x")
8837 (parallel [(const_int 0)
8855 (match_operand:V32QI 2 "nonimmediate_operand" "xm")
8856 (parallel [(const_int 0)
8874 (vec_select:V16QI (match_dup 1)
8875 (parallel [(const_int 1)
8892 (vec_select:V16QI (match_dup 2)
8893 (parallel [(const_int 1)
8908 (const_int 31)]))))))]
8910 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8911 [(set_attr "type" "sseiadd")
8912 (set_attr "prefix_extra" "1")
8913 (set_attr "prefix" "vex")
8914 (set_attr "mode" "OI")])
8916 (define_insn "ssse3_pmaddubsw128"
8917 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8922 (match_operand:V16QI 1 "register_operand" "0,x")
8923 (parallel [(const_int 0)
8933 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
8934 (parallel [(const_int 0)
8944 (vec_select:V8QI (match_dup 1)
8945 (parallel [(const_int 1)
8954 (vec_select:V8QI (match_dup 2)
8955 (parallel [(const_int 1)
8962 (const_int 15)]))))))]
8965 pmaddubsw\t{%2, %0|%0, %2}
8966 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8967 [(set_attr "isa" "noavx,avx")
8968 (set_attr "type" "sseiadd")
8969 (set_attr "atom_unit" "simul")
8970 (set_attr "prefix_data16" "1,*")
8971 (set_attr "prefix_extra" "1")
8972 (set_attr "prefix" "orig,vex")
8973 (set_attr "mode" "TI")])
8975 (define_insn "ssse3_pmaddubsw"
8976 [(set (match_operand:V4HI 0 "register_operand" "=y")
8981 (match_operand:V8QI 1 "register_operand" "0")
8982 (parallel [(const_int 0)
8988 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
8989 (parallel [(const_int 0)
8995 (vec_select:V4QI (match_dup 1)
8996 (parallel [(const_int 1)
9001 (vec_select:V4QI (match_dup 2)
9002 (parallel [(const_int 1)
9005 (const_int 7)]))))))]
9007 "pmaddubsw\t{%2, %0|%0, %2}"
9008 [(set_attr "type" "sseiadd")
9009 (set_attr "atom_unit" "simul")
9010 (set_attr "prefix_extra" "1")
9011 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9012 (set_attr "mode" "DI")])
9014 (define_expand "avx2_umulhrswv16hi3"
9015 [(set (match_operand:V16HI 0 "register_operand" "")
9022 (match_operand:V16HI 1 "nonimmediate_operand" ""))
9024 (match_operand:V16HI 2 "nonimmediate_operand" "")))
9026 (const_vector:V16HI [(const_int 1) (const_int 1)
9027 (const_int 1) (const_int 1)
9028 (const_int 1) (const_int 1)
9029 (const_int 1) (const_int 1)
9030 (const_int 1) (const_int 1)
9031 (const_int 1) (const_int 1)
9032 (const_int 1) (const_int 1)
9033 (const_int 1) (const_int 1)]))
9036 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
9038 (define_insn "*avx2_umulhrswv16hi3"
9039 [(set (match_operand:V16HI 0 "register_operand" "=x")
9046 (match_operand:V16HI 1 "nonimmediate_operand" "%x"))
9048 (match_operand:V16HI 2 "nonimmediate_operand" "xm")))
9050 (const_vector:V16HI [(const_int 1) (const_int 1)
9051 (const_int 1) (const_int 1)
9052 (const_int 1) (const_int 1)
9053 (const_int 1) (const_int 1)
9054 (const_int 1) (const_int 1)
9055 (const_int 1) (const_int 1)
9056 (const_int 1) (const_int 1)
9057 (const_int 1) (const_int 1)]))
9059 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
9060 "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9061 [(set_attr "type" "sseimul")
9062 (set_attr "prefix_extra" "1")
9063 (set_attr "prefix" "vex")
9064 (set_attr "mode" "OI")])
9066 (define_expand "ssse3_pmulhrswv8hi3"
9067 [(set (match_operand:V8HI 0 "register_operand" "")
9074 (match_operand:V8HI 1 "nonimmediate_operand" ""))
9076 (match_operand:V8HI 2 "nonimmediate_operand" "")))
9078 (const_vector:V8HI [(const_int 1) (const_int 1)
9079 (const_int 1) (const_int 1)
9080 (const_int 1) (const_int 1)
9081 (const_int 1) (const_int 1)]))
9084 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
9086 (define_insn "*ssse3_pmulhrswv8hi3"
9087 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9094 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
9096 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
9098 (const_vector:V8HI [(const_int 1) (const_int 1)
9099 (const_int 1) (const_int 1)
9100 (const_int 1) (const_int 1)
9101 (const_int 1) (const_int 1)]))
9103 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9105 pmulhrsw\t{%2, %0|%0, %2}
9106 vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9107 [(set_attr "isa" "noavx,avx")
9108 (set_attr "type" "sseimul")
9109 (set_attr "prefix_data16" "1,*")
9110 (set_attr "prefix_extra" "1")
9111 (set_attr "prefix" "orig,vex")
9112 (set_attr "mode" "TI")])
9114 (define_expand "ssse3_pmulhrswv4hi3"
9115 [(set (match_operand:V4HI 0 "register_operand" "")
9122 (match_operand:V4HI 1 "nonimmediate_operand" ""))
9124 (match_operand:V4HI 2 "nonimmediate_operand" "")))
9126 (const_vector:V4HI [(const_int 1) (const_int 1)
9127 (const_int 1) (const_int 1)]))
9130 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
9132 (define_insn "*ssse3_pmulhrswv4hi3"
9133 [(set (match_operand:V4HI 0 "register_operand" "=y")
9140 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
9142 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
9144 (const_vector:V4HI [(const_int 1) (const_int 1)
9145 (const_int 1) (const_int 1)]))
9147 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
9148 "pmulhrsw\t{%2, %0|%0, %2}"
9149 [(set_attr "type" "sseimul")
9150 (set_attr "prefix_extra" "1")
9151 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9152 (set_attr "mode" "DI")])
9154 (define_insn "<ssse3_avx2>_pshufb<mode>3"
9155 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
9156 (unspec:VI1_AVX2 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
9157 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")]
9161 pshufb\t{%2, %0|%0, %2}
9162 vpshufb\t{%2, %1, %0|%0, %1, %2}"
9163 [(set_attr "isa" "noavx,avx")
9164 (set_attr "type" "sselog1")
9165 (set_attr "prefix_data16" "1,*")
9166 (set_attr "prefix_extra" "1")
9167 (set_attr "prefix" "orig,vex")
9168 (set_attr "mode" "<sseinsnmode>")])
9170 (define_insn "ssse3_pshufbv8qi3"
9171 [(set (match_operand:V8QI 0 "register_operand" "=y")
9172 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
9173 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
9176 "pshufb\t{%2, %0|%0, %2}";
9177 [(set_attr "type" "sselog1")
9178 (set_attr "prefix_extra" "1")
9179 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9180 (set_attr "mode" "DI")])
9182 (define_insn "<ssse3_avx2>_psign<mode>3"
9183 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
9185 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
9186 (match_operand:VI124_AVX2 2 "nonimmediate_operand" "xm,xm")]
9190 psign<ssemodesuffix>\t{%2, %0|%0, %2}
9191 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9192 [(set_attr "isa" "noavx,avx")
9193 (set_attr "type" "sselog1")
9194 (set_attr "prefix_data16" "1,*")
9195 (set_attr "prefix_extra" "1")
9196 (set_attr "prefix" "orig,vex")
9197 (set_attr "mode" "<sseinsnmode>")])
9199 (define_insn "ssse3_psign<mode>3"
9200 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9202 [(match_operand:MMXMODEI 1 "register_operand" "0")
9203 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
9206 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
9207 [(set_attr "type" "sselog1")
9208 (set_attr "prefix_extra" "1")
9209 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9210 (set_attr "mode" "DI")])
9212 (define_insn "<ssse3_avx2>_palignr<mode>"
9213 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x")
9214 (unspec:SSESCALARMODE [(match_operand:SSESCALARMODE 1 "register_operand" "0,x")
9215 (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,xm")
9216 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
9220 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9222 switch (which_alternative)
9225 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9227 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9232 [(set_attr "isa" "noavx,avx")
9233 (set_attr "type" "sseishft")
9234 (set_attr "atom_unit" "sishuf")
9235 (set_attr "prefix_data16" "1,*")
9236 (set_attr "prefix_extra" "1")
9237 (set_attr "length_immediate" "1")
9238 (set_attr "prefix" "orig,vex")
9239 (set_attr "mode" "<sseinsnmode>")])
9241 (define_insn "ssse3_palignrdi"
9242 [(set (match_operand:DI 0 "register_operand" "=y")
9243 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
9244 (match_operand:DI 2 "nonimmediate_operand" "ym")
9245 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9249 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9250 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9252 [(set_attr "type" "sseishft")
9253 (set_attr "atom_unit" "sishuf")
9254 (set_attr "prefix_extra" "1")
9255 (set_attr "length_immediate" "1")
9256 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9257 (set_attr "mode" "DI")])
9259 (define_insn "abs<mode>2"
9260 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x")
9262 (match_operand:VI124_AVX2 1 "nonimmediate_operand" "xm")))]
9264 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
9265 [(set_attr "type" "sselog1")
9266 (set_attr "prefix_data16" "1")
9267 (set_attr "prefix_extra" "1")
9268 (set_attr "prefix" "maybe_vex")
9269 (set_attr "mode" "<sseinsnmode>")])
9271 (define_insn "abs<mode>2"
9272 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9274 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
9276 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
9277 [(set_attr "type" "sselog1")
9278 (set_attr "prefix_rep" "0")
9279 (set_attr "prefix_extra" "1")
9280 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9281 (set_attr "mode" "DI")])
9283 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9285 ;; AMD SSE4A instructions
9287 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9289 (define_insn "sse4a_movnt<mode>"
9290 [(set (match_operand:MODEF 0 "memory_operand" "=m")
9292 [(match_operand:MODEF 1 "register_operand" "x")]
9295 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
9296 [(set_attr "type" "ssemov")
9297 (set_attr "mode" "<MODE>")])
9299 (define_insn "sse4a_vmmovnt<mode>"
9300 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
9301 (unspec:<ssescalarmode>
9302 [(vec_select:<ssescalarmode>
9303 (match_operand:VF_128 1 "register_operand" "x")
9304 (parallel [(const_int 0)]))]
9307 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
9308 [(set_attr "type" "ssemov")
9309 (set_attr "mode" "<ssescalarmode>")])
9311 (define_insn "sse4a_extrqi"
9312 [(set (match_operand:V2DI 0 "register_operand" "=x")
9313 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9314 (match_operand 2 "const_0_to_255_operand" "")
9315 (match_operand 3 "const_0_to_255_operand" "")]
9318 "extrq\t{%3, %2, %0|%0, %2, %3}"
9319 [(set_attr "type" "sse")
9320 (set_attr "prefix_data16" "1")
9321 (set_attr "length_immediate" "2")
9322 (set_attr "mode" "TI")])
9324 (define_insn "sse4a_extrq"
9325 [(set (match_operand:V2DI 0 "register_operand" "=x")
9326 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9327 (match_operand:V16QI 2 "register_operand" "x")]
9330 "extrq\t{%2, %0|%0, %2}"
9331 [(set_attr "type" "sse")
9332 (set_attr "prefix_data16" "1")
9333 (set_attr "mode" "TI")])
9335 (define_insn "sse4a_insertqi"
9336 [(set (match_operand:V2DI 0 "register_operand" "=x")
9337 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9338 (match_operand:V2DI 2 "register_operand" "x")
9339 (match_operand 3 "const_0_to_255_operand" "")
9340 (match_operand 4 "const_0_to_255_operand" "")]
9343 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
9344 [(set_attr "type" "sseins")
9345 (set_attr "prefix_data16" "0")
9346 (set_attr "prefix_rep" "1")
9347 (set_attr "length_immediate" "2")
9348 (set_attr "mode" "TI")])
9350 (define_insn "sse4a_insertq"
9351 [(set (match_operand:V2DI 0 "register_operand" "=x")
9352 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9353 (match_operand:V2DI 2 "register_operand" "x")]
9356 "insertq\t{%2, %0|%0, %2}"
9357 [(set_attr "type" "sseins")
9358 (set_attr "prefix_data16" "0")
9359 (set_attr "prefix_rep" "1")
9360 (set_attr "mode" "TI")])
9362 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9364 ;; Intel SSE4.1 instructions
9366 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9368 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
9369 [(set (match_operand:VF 0 "register_operand" "=x,x")
9371 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
9372 (match_operand:VF 1 "register_operand" "0,x")
9373 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "")))]
9376 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
9377 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9378 [(set_attr "isa" "noavx,avx")
9379 (set_attr "type" "ssemov")
9380 (set_attr "length_immediate" "1")
9381 (set_attr "prefix_data16" "1,*")
9382 (set_attr "prefix_extra" "1")
9383 (set_attr "prefix" "orig,vex")
9384 (set_attr "mode" "<MODE>")])
9386 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
9387 [(set (match_operand:VF 0 "reg_not_xmm0_operand_maybe_avx" "=x,x")
9389 [(match_operand:VF 1 "reg_not_xmm0_operand_maybe_avx" "0,x")
9390 (match_operand:VF 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
9391 (match_operand:VF 3 "register_operand" "Yz,x")]
9395 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
9396 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9397 [(set_attr "isa" "noavx,avx")
9398 (set_attr "type" "ssemov")
9399 (set_attr "length_immediate" "1")
9400 (set_attr "prefix_data16" "1,*")
9401 (set_attr "prefix_extra" "1")
9402 (set_attr "prefix" "orig,vex")
9403 (set_attr "mode" "<MODE>")])
9405 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
9406 [(set (match_operand:VF 0 "register_operand" "=x,x")
9408 [(match_operand:VF 1 "nonimmediate_operand" "%0,x")
9409 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
9410 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9414 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
9415 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9416 [(set_attr "isa" "noavx,avx")
9417 (set_attr "type" "ssemul")
9418 (set_attr "length_immediate" "1")
9419 (set_attr "prefix_data16" "1,*")
9420 (set_attr "prefix_extra" "1")
9421 (set_attr "prefix" "orig,vex")
9422 (set_attr "mode" "<MODE>")])
9424 (define_insn "<sse4_1_avx2>_movntdqa"
9425 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x")
9426 (unspec:VI8_AVX2 [(match_operand:VI8_AVX2 1 "memory_operand" "m")]
9429 "%vmovntdqa\t{%1, %0|%0, %1}"
9430 [(set_attr "type" "ssemov")
9431 (set_attr "prefix_extra" "1")
9432 (set_attr "prefix" "maybe_vex")
9433 (set_attr "mode" "<sseinsnmode>")])
9435 (define_insn "<sse4_1_avx2>_mpsadbw"
9436 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
9437 (unspec:VI1_AVX2 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
9438 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
9439 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9443 mpsadbw\t{%3, %2, %0|%0, %2, %3}
9444 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9445 [(set_attr "isa" "noavx,avx")
9446 (set_attr "type" "sselog1")
9447 (set_attr "length_immediate" "1")
9448 (set_attr "prefix_extra" "1")
9449 (set_attr "prefix" "orig,vex")
9450 (set_attr "mode" "<sseinsnmode>")])
9452 (define_insn "avx2_packusdw"
9453 [(set (match_operand:V16HI 0 "register_operand" "=x")
9456 (match_operand:V8SI 1 "register_operand" "x"))
9458 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))))]
9460 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9461 [(set_attr "type" "sselog")
9462 (set_attr "prefix_extra" "1")
9463 (set_attr "prefix" "vex")
9464 (set_attr "mode" "OI")])
9466 (define_insn "sse4_1_packusdw"
9467 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9470 (match_operand:V4SI 1 "register_operand" "0,x"))
9472 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))]
9475 packusdw\t{%2, %0|%0, %2}
9476 vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9477 [(set_attr "isa" "noavx,avx")
9478 (set_attr "type" "sselog")
9479 (set_attr "prefix_extra" "1")
9480 (set_attr "prefix" "orig,vex")
9481 (set_attr "mode" "TI")])
9483 (define_insn "<sse4_1_avx2>_pblendvb"
9484 [(set (match_operand:VI1_AVX2 0 "reg_not_xmm0_operand" "=x,x")
9486 [(match_operand:VI1_AVX2 1 "reg_not_xmm0_operand_maybe_avx" "0,x")
9487 (match_operand:VI1_AVX2 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
9488 (match_operand:VI1_AVX2 3 "register_operand" "Yz,x")]
9492 pblendvb\t{%3, %2, %0|%0, %2, %3}
9493 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9494 [(set_attr "isa" "noavx,avx")
9495 (set_attr "type" "ssemov")
9496 (set_attr "prefix_extra" "1")
9497 (set_attr "length_immediate" "*,1")
9498 (set_attr "prefix" "orig,vex")
9499 (set_attr "mode" "<sseinsnmode>")])
9501 (define_insn "<sse4_1_avx2>_pblendw"
9502 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
9504 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")
9505 (match_operand:VI2_AVX2 1 "register_operand" "0,x")
9506 (match_operand:SI 3 "const_0_to_255_operand" "n,n")))]
9509 pblendw\t{%3, %2, %0|%0, %2, %3}
9510 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9511 [(set_attr "isa" "noavx,avx")
9512 (set_attr "type" "ssemov")
9513 (set_attr "prefix_extra" "1")
9514 (set_attr "length_immediate" "1")
9515 (set_attr "prefix" "orig,vex")
9516 (set_attr "mode" "<sseinsnmode>")])
9518 (define_insn "avx2_pblendd<mode>"
9519 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
9521 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
9522 (match_operand:VI4_AVX2 1 "register_operand" "x")
9523 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9525 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9526 [(set_attr "type" "ssemov")
9527 (set_attr "prefix_extra" "1")
9528 (set_attr "length_immediate" "1")
9529 (set_attr "prefix" "vex")
9530 (set_attr "mode" "<sseinsnmode>")])
9532 (define_insn "sse4_1_phminposuw"
9533 [(set (match_operand:V8HI 0 "register_operand" "=x")
9534 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
9535 UNSPEC_PHMINPOSUW))]
9537 "%vphminposuw\t{%1, %0|%0, %1}"
9538 [(set_attr "type" "sselog1")
9539 (set_attr "prefix_extra" "1")
9540 (set_attr "prefix" "maybe_vex")
9541 (set_attr "mode" "TI")])
9543 (define_insn "avx2_<code>v16qiv16hi2"
9544 [(set (match_operand:V16HI 0 "register_operand" "=x")
9546 (match_operand:V16QI 1 "nonimmediate_operand" "xm")))]
9548 "vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
9549 [(set_attr "type" "ssemov")
9550 (set_attr "prefix_extra" "1")
9551 (set_attr "prefix" "vex")
9552 (set_attr "mode" "OI")])
9554 (define_insn "sse4_1_<code>v8qiv8hi2"
9555 [(set (match_operand:V8HI 0 "register_operand" "=x")
9558 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9559 (parallel [(const_int 0)
9568 "%vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
9569 [(set_attr "type" "ssemov")
9570 (set_attr "prefix_extra" "1")
9571 (set_attr "prefix" "maybe_vex")
9572 (set_attr "mode" "TI")])
9574 (define_insn "avx2_<code>v8qiv8si2"
9575 [(set (match_operand:V8SI 0 "register_operand" "=x")
9578 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9579 (parallel [(const_int 0)
9588 "vpmov<extsuffix>bd\t{%1, %0|%0, %1}"
9589 [(set_attr "type" "ssemov")
9590 (set_attr "prefix_extra" "1")
9591 (set_attr "prefix" "vex")
9592 (set_attr "mode" "OI")])
9594 (define_insn "sse4_1_<code>v4qiv4si2"
9595 [(set (match_operand:V4SI 0 "register_operand" "=x")
9598 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9599 (parallel [(const_int 0)
9604 "%vpmov<extsuffix>bd\t{%1, %0|%0, %1}"
9605 [(set_attr "type" "ssemov")
9606 (set_attr "prefix_extra" "1")
9607 (set_attr "prefix" "maybe_vex")
9608 (set_attr "mode" "TI")])
9610 (define_insn "avx2_<code>v8hiv8si2"
9611 [(set (match_operand:V8SI 0 "register_operand" "=x")
9613 (match_operand:V8HI 1 "nonimmediate_operand" "xm")))]
9615 "vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
9616 [(set_attr "type" "ssemov")
9617 (set_attr "prefix_extra" "1")
9618 (set_attr "prefix" "vex")
9619 (set_attr "mode" "OI")])
9621 (define_insn "sse4_1_<code>v4hiv4si2"
9622 [(set (match_operand:V4SI 0 "register_operand" "=x")
9625 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9626 (parallel [(const_int 0)
9631 "%vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
9632 [(set_attr "type" "ssemov")
9633 (set_attr "prefix_extra" "1")
9634 (set_attr "prefix" "maybe_vex")
9635 (set_attr "mode" "TI")])
9637 (define_insn "avx2_<code>v4qiv4di2"
9638 [(set (match_operand:V4DI 0 "register_operand" "=x")
9641 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9642 (parallel [(const_int 0)
9647 "vpmov<extsuffix>bq\t{%1, %0|%0, %1}"
9648 [(set_attr "type" "ssemov")
9649 (set_attr "prefix_extra" "1")
9650 (set_attr "prefix" "vex")
9651 (set_attr "mode" "OI")])
9653 (define_insn "sse4_1_<code>v2qiv2di2"
9654 [(set (match_operand:V2DI 0 "register_operand" "=x")
9657 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9658 (parallel [(const_int 0)
9661 "%vpmov<extsuffix>bq\t{%1, %0|%0, %1}"
9662 [(set_attr "type" "ssemov")
9663 (set_attr "prefix_extra" "1")
9664 (set_attr "prefix" "maybe_vex")
9665 (set_attr "mode" "TI")])
9667 (define_insn "avx2_<code>v4hiv4di2"
9668 [(set (match_operand:V4DI 0 "register_operand" "=x")
9671 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9672 (parallel [(const_int 0)
9677 "vpmov<extsuffix>wq\t{%1, %0|%0, %1}"
9678 [(set_attr "type" "ssemov")
9679 (set_attr "prefix_extra" "1")
9680 (set_attr "prefix" "vex")
9681 (set_attr "mode" "OI")])
9683 (define_insn "sse4_1_<code>v2hiv2di2"
9684 [(set (match_operand:V2DI 0 "register_operand" "=x")
9687 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9688 (parallel [(const_int 0)
9691 "%vpmov<extsuffix>wq\t{%1, %0|%0, %1}"
9692 [(set_attr "type" "ssemov")
9693 (set_attr "prefix_extra" "1")
9694 (set_attr "prefix" "maybe_vex")
9695 (set_attr "mode" "TI")])
9697 (define_insn "avx2_<code>v4siv4di2"
9698 [(set (match_operand:V4DI 0 "register_operand" "=x")
9700 (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
9702 "vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
9703 [(set_attr "type" "ssemov")
9704 (set_attr "prefix_extra" "1")
9705 (set_attr "mode" "OI")])
9707 (define_insn "sse4_1_<code>v2siv2di2"
9708 [(set (match_operand:V2DI 0 "register_operand" "=x")
9711 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9712 (parallel [(const_int 0)
9715 "%vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
9716 [(set_attr "type" "ssemov")
9717 (set_attr "prefix_extra" "1")
9718 (set_attr "prefix" "maybe_vex")
9719 (set_attr "mode" "TI")])
9721 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
9722 ;; setting FLAGS_REG. But it is not a really compare instruction.
9723 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
9724 [(set (reg:CC FLAGS_REG)
9725 (unspec:CC [(match_operand:VF 0 "register_operand" "x")
9726 (match_operand:VF 1 "nonimmediate_operand" "xm")]
9729 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
9730 [(set_attr "type" "ssecomi")
9731 (set_attr "prefix_extra" "1")
9732 (set_attr "prefix" "vex")
9733 (set_attr "mode" "<MODE>")])
9735 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
9736 ;; But it is not a really compare instruction.
9737 (define_insn "avx_ptest256"
9738 [(set (reg:CC FLAGS_REG)
9739 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
9740 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
9743 "vptest\t{%1, %0|%0, %1}"
9744 [(set_attr "type" "ssecomi")
9745 (set_attr "prefix_extra" "1")
9746 (set_attr "prefix" "vex")
9747 (set_attr "mode" "OI")])
9749 (define_insn "sse4_1_ptest"
9750 [(set (reg:CC FLAGS_REG)
9751 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
9752 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
9755 "%vptest\t{%1, %0|%0, %1}"
9756 [(set_attr "type" "ssecomi")
9757 (set_attr "prefix_extra" "1")
9758 (set_attr "prefix" "maybe_vex")
9759 (set_attr "mode" "TI")])
9761 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
9762 [(set (match_operand:VF 0 "register_operand" "=x")
9764 [(match_operand:VF 1 "nonimmediate_operand" "xm")
9765 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9768 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9769 [(set_attr "type" "ssecvt")
9770 (set (attr "prefix_data16")
9772 (match_test "TARGET_AVX")
9774 (const_string "1")))
9775 (set_attr "prefix_extra" "1")
9776 (set_attr "length_immediate" "1")
9777 (set_attr "prefix" "maybe_vex")
9778 (set_attr "mode" "<MODE>")])
9780 (define_insn "sse4_1_round<ssescalarmodesuffix>"
9781 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
9784 [(match_operand:VF_128 2 "register_operand" "x,x")
9785 (match_operand:SI 3 "const_0_to_15_operand" "n,n")]
9787 (match_operand:VF_128 1 "register_operand" "0,x")
9791 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
9792 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9793 [(set_attr "isa" "noavx,avx")
9794 (set_attr "type" "ssecvt")
9795 (set_attr "length_immediate" "1")
9796 (set_attr "prefix_data16" "1,*")
9797 (set_attr "prefix_extra" "1")
9798 (set_attr "prefix" "orig,vex")
9799 (set_attr "mode" "<MODE>")])
9801 (define_expand "round<mode>2"
9804 (match_operand:VF 1 "nonimmediate_operand" "")
9806 (set (match_operand:VF 0 "register_operand" "")
9808 [(match_dup 4) (match_dup 5)]
9810 "TARGET_ROUND && !flag_trapping_math"
9812 enum machine_mode scalar_mode;
9813 const struct real_format *fmt;
9814 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
9817 scalar_mode = GET_MODE_INNER (<MODE>mode);
9819 /* load nextafter (0.5, 0.0) */
9820 fmt = REAL_MODE_FORMAT (scalar_mode);
9821 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
9822 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
9823 half = const_double_from_real_value (pred_half, scalar_mode);
9825 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
9826 vec_half = force_reg (<MODE>mode, vec_half);
9828 operands[3] = gen_reg_rtx (<MODE>mode);
9829 emit_insn (gen_copysign<mode>3 (operands[3], vec_half, operands[1]));
9831 operands[4] = gen_reg_rtx (<MODE>mode);
9832 operands[5] = GEN_INT (ROUND_TRUNC);
9835 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9837 ;; Intel SSE4.2 string/text processing instructions
9839 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9841 (define_insn_and_split "sse4_2_pcmpestr"
9842 [(set (match_operand:SI 0 "register_operand" "=c,c")
9844 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
9845 (match_operand:SI 3 "register_operand" "a,a")
9846 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
9847 (match_operand:SI 5 "register_operand" "d,d")
9848 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
9850 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9858 (set (reg:CC FLAGS_REG)
9867 && can_create_pseudo_p ()"
9872 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9873 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9874 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9877 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
9878 operands[3], operands[4],
9879 operands[5], operands[6]));
9881 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
9882 operands[3], operands[4],
9883 operands[5], operands[6]));
9884 if (flags && !(ecx || xmm0))
9885 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
9886 operands[2], operands[3],
9887 operands[4], operands[5],
9889 if (!(flags || ecx || xmm0))
9890 emit_note (NOTE_INSN_DELETED);
9894 [(set_attr "type" "sselog")
9895 (set_attr "prefix_data16" "1")
9896 (set_attr "prefix_extra" "1")
9897 (set_attr "length_immediate" "1")
9898 (set_attr "memory" "none,load")
9899 (set_attr "mode" "TI")])
9901 (define_insn "sse4_2_pcmpestri"
9902 [(set (match_operand:SI 0 "register_operand" "=c,c")
9904 [(match_operand:V16QI 1 "register_operand" "x,x")
9905 (match_operand:SI 2 "register_operand" "a,a")
9906 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9907 (match_operand:SI 4 "register_operand" "d,d")
9908 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9910 (set (reg:CC FLAGS_REG)
9919 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
9920 [(set_attr "type" "sselog")
9921 (set_attr "prefix_data16" "1")
9922 (set_attr "prefix_extra" "1")
9923 (set_attr "prefix" "maybe_vex")
9924 (set_attr "length_immediate" "1")
9925 (set_attr "memory" "none,load")
9926 (set_attr "mode" "TI")])
9928 (define_insn "sse4_2_pcmpestrm"
9929 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
9931 [(match_operand:V16QI 1 "register_operand" "x,x")
9932 (match_operand:SI 2 "register_operand" "a,a")
9933 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9934 (match_operand:SI 4 "register_operand" "d,d")
9935 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9937 (set (reg:CC FLAGS_REG)
9946 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
9947 [(set_attr "type" "sselog")
9948 (set_attr "prefix_data16" "1")
9949 (set_attr "prefix_extra" "1")
9950 (set_attr "length_immediate" "1")
9951 (set_attr "prefix" "maybe_vex")
9952 (set_attr "memory" "none,load")
9953 (set_attr "mode" "TI")])
9955 (define_insn "sse4_2_pcmpestr_cconly"
9956 [(set (reg:CC FLAGS_REG)
9958 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
9959 (match_operand:SI 3 "register_operand" "a,a,a,a")
9960 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
9961 (match_operand:SI 5 "register_operand" "d,d,d,d")
9962 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
9964 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
9965 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
9968 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
9969 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
9970 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
9971 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
9972 [(set_attr "type" "sselog")
9973 (set_attr "prefix_data16" "1")
9974 (set_attr "prefix_extra" "1")
9975 (set_attr "length_immediate" "1")
9976 (set_attr "memory" "none,load,none,load")
9977 (set_attr "prefix" "maybe_vex")
9978 (set_attr "mode" "TI")])
9980 (define_insn_and_split "sse4_2_pcmpistr"
9981 [(set (match_operand:SI 0 "register_operand" "=c,c")
9983 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
9984 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
9985 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
9987 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9993 (set (reg:CC FLAGS_REG)
10000 && can_create_pseudo_p ()"
10005 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10006 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10007 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10010 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
10011 operands[3], operands[4]));
10013 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
10014 operands[3], operands[4]));
10015 if (flags && !(ecx || xmm0))
10016 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
10017 operands[2], operands[3],
10019 if (!(flags || ecx || xmm0))
10020 emit_note (NOTE_INSN_DELETED);
10024 [(set_attr "type" "sselog")
10025 (set_attr "prefix_data16" "1")
10026 (set_attr "prefix_extra" "1")
10027 (set_attr "length_immediate" "1")
10028 (set_attr "memory" "none,load")
10029 (set_attr "mode" "TI")])
10031 (define_insn "sse4_2_pcmpistri"
10032 [(set (match_operand:SI 0 "register_operand" "=c,c")
10034 [(match_operand:V16QI 1 "register_operand" "x,x")
10035 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10036 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10038 (set (reg:CC FLAGS_REG)
10045 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
10046 [(set_attr "type" "sselog")
10047 (set_attr "prefix_data16" "1")
10048 (set_attr "prefix_extra" "1")
10049 (set_attr "length_immediate" "1")
10050 (set_attr "prefix" "maybe_vex")
10051 (set_attr "memory" "none,load")
10052 (set_attr "mode" "TI")])
10054 (define_insn "sse4_2_pcmpistrm"
10055 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10057 [(match_operand:V16QI 1 "register_operand" "x,x")
10058 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10059 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10061 (set (reg:CC FLAGS_REG)
10068 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
10069 [(set_attr "type" "sselog")
10070 (set_attr "prefix_data16" "1")
10071 (set_attr "prefix_extra" "1")
10072 (set_attr "length_immediate" "1")
10073 (set_attr "prefix" "maybe_vex")
10074 (set_attr "memory" "none,load")
10075 (set_attr "mode" "TI")])
10077 (define_insn "sse4_2_pcmpistr_cconly"
10078 [(set (reg:CC FLAGS_REG)
10080 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10081 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
10082 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
10084 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10085 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10088 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10089 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10090 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
10091 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
10092 [(set_attr "type" "sselog")
10093 (set_attr "prefix_data16" "1")
10094 (set_attr "prefix_extra" "1")
10095 (set_attr "length_immediate" "1")
10096 (set_attr "memory" "none,load,none,load")
10097 (set_attr "prefix" "maybe_vex")
10098 (set_attr "mode" "TI")])
10100 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10102 ;; XOP instructions
10104 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10106 ;; XOP parallel integer multiply/add instructions.
10107 ;; Note the XOP multiply/add instructions
10108 ;; a[i] = b[i] * c[i] + d[i];
10109 ;; do not allow the value being added to be a memory operation.
10110 (define_insn "xop_pmacsww"
10111 [(set (match_operand:V8HI 0 "register_operand" "=x")
10114 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10115 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10116 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10118 "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10119 [(set_attr "type" "ssemuladd")
10120 (set_attr "mode" "TI")])
10122 (define_insn "xop_pmacssww"
10123 [(set (match_operand:V8HI 0 "register_operand" "=x")
10125 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10126 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10127 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10129 "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10130 [(set_attr "type" "ssemuladd")
10131 (set_attr "mode" "TI")])
10133 (define_insn "xop_pmacsdd"
10134 [(set (match_operand:V4SI 0 "register_operand" "=x")
10137 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10138 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10139 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10141 "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10142 [(set_attr "type" "ssemuladd")
10143 (set_attr "mode" "TI")])
10145 (define_insn "xop_pmacssdd"
10146 [(set (match_operand:V4SI 0 "register_operand" "=x")
10148 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10149 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10150 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10152 "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10153 [(set_attr "type" "ssemuladd")
10154 (set_attr "mode" "TI")])
10156 (define_insn "xop_pmacssdql"
10157 [(set (match_operand:V2DI 0 "register_operand" "=x")
10162 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10163 (parallel [(const_int 1)
10166 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10167 (parallel [(const_int 1)
10169 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10171 "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10172 [(set_attr "type" "ssemuladd")
10173 (set_attr "mode" "TI")])
10175 (define_insn "xop_pmacssdqh"
10176 [(set (match_operand:V2DI 0 "register_operand" "=x")
10181 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10182 (parallel [(const_int 0)
10186 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10187 (parallel [(const_int 0)
10189 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10191 "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10192 [(set_attr "type" "ssemuladd")
10193 (set_attr "mode" "TI")])
10195 (define_insn "xop_pmacsdql"
10196 [(set (match_operand:V2DI 0 "register_operand" "=x")
10201 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10202 (parallel [(const_int 1)
10206 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10207 (parallel [(const_int 1)
10209 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10211 "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10212 [(set_attr "type" "ssemuladd")
10213 (set_attr "mode" "TI")])
10215 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10216 ;; fake it with a multiply/add. In general, we expect the define_split to
10217 ;; occur before register allocation, so we have to handle the corner case where
10218 ;; the target is the same as operands 1/2
10219 (define_insn_and_split "xop_mulv2div2di3_low"
10220 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10224 (match_operand:V4SI 1 "register_operand" "%x")
10225 (parallel [(const_int 1)
10229 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10230 (parallel [(const_int 1)
10231 (const_int 3)])))))]
10234 "&& reload_completed"
10235 [(set (match_dup 0)
10243 (parallel [(const_int 1)
10248 (parallel [(const_int 1)
10252 operands[3] = CONST0_RTX (V2DImode);
10254 [(set_attr "type" "ssemul")
10255 (set_attr "mode" "TI")])
10257 (define_insn "xop_pmacsdqh"
10258 [(set (match_operand:V2DI 0 "register_operand" "=x")
10263 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10264 (parallel [(const_int 0)
10268 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10269 (parallel [(const_int 0)
10271 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10273 "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10274 [(set_attr "type" "ssemuladd")
10275 (set_attr "mode" "TI")])
10277 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10278 ;; fake it with a multiply/add. In general, we expect the define_split to
10279 ;; occur before register allocation, so we have to handle the corner case where
10280 ;; the target is the same as either operands[1] or operands[2]
10281 (define_insn_and_split "xop_mulv2div2di3_high"
10282 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10286 (match_operand:V4SI 1 "register_operand" "%x")
10287 (parallel [(const_int 0)
10291 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10292 (parallel [(const_int 0)
10293 (const_int 2)])))))]
10296 "&& reload_completed"
10297 [(set (match_dup 0)
10305 (parallel [(const_int 0)
10310 (parallel [(const_int 0)
10314 operands[3] = CONST0_RTX (V2DImode);
10316 [(set_attr "type" "ssemul")
10317 (set_attr "mode" "TI")])
10319 ;; XOP parallel integer multiply/add instructions for the intrinisics
10320 (define_insn "xop_pmacsswd"
10321 [(set (match_operand:V4SI 0 "register_operand" "=x")
10326 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10327 (parallel [(const_int 1)
10333 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10334 (parallel [(const_int 1)
10338 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10340 "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10341 [(set_attr "type" "ssemuladd")
10342 (set_attr "mode" "TI")])
10344 (define_insn "xop_pmacswd"
10345 [(set (match_operand:V4SI 0 "register_operand" "=x")
10350 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10351 (parallel [(const_int 1)
10357 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10358 (parallel [(const_int 1)
10362 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10364 "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10365 [(set_attr "type" "ssemuladd")
10366 (set_attr "mode" "TI")])
10368 (define_insn "xop_pmadcsswd"
10369 [(set (match_operand:V4SI 0 "register_operand" "=x")
10375 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10376 (parallel [(const_int 0)
10382 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10383 (parallel [(const_int 0)
10391 (parallel [(const_int 1)
10398 (parallel [(const_int 1)
10401 (const_int 7)])))))
10402 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10404 "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10405 [(set_attr "type" "ssemuladd")
10406 (set_attr "mode" "TI")])
10408 (define_insn "xop_pmadcswd"
10409 [(set (match_operand:V4SI 0 "register_operand" "=x")
10415 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10416 (parallel [(const_int 0)
10422 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10423 (parallel [(const_int 0)
10431 (parallel [(const_int 1)
10438 (parallel [(const_int 1)
10441 (const_int 7)])))))
10442 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10444 "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10445 [(set_attr "type" "ssemuladd")
10446 (set_attr "mode" "TI")])
10448 ;; XOP parallel XMM conditional moves
10449 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
10450 [(set (match_operand:V 0 "register_operand" "=x,x")
10452 (match_operand:V 3 "nonimmediate_operand" "x,m")
10453 (match_operand:V 1 "vector_move_operand" "x,x")
10454 (match_operand:V 2 "vector_move_operand" "xm,x")))]
10456 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10457 [(set_attr "type" "sse4arg")])
10459 ;; XOP horizontal add/subtract instructions
10460 (define_insn "xop_phaddbw"
10461 [(set (match_operand:V8HI 0 "register_operand" "=x")
10465 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10466 (parallel [(const_int 0)
10477 (parallel [(const_int 1)
10484 (const_int 15)])))))]
10486 "vphaddbw\t{%1, %0|%0, %1}"
10487 [(set_attr "type" "sseiadd1")])
10489 (define_insn "xop_phaddbd"
10490 [(set (match_operand:V4SI 0 "register_operand" "=x")
10495 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10496 (parallel [(const_int 0)
10503 (parallel [(const_int 1)
10506 (const_int 13)]))))
10511 (parallel [(const_int 2)
10518 (parallel [(const_int 3)
10521 (const_int 15)]))))))]
10523 "vphaddbd\t{%1, %0|%0, %1}"
10524 [(set_attr "type" "sseiadd1")])
10526 (define_insn "xop_phaddbq"
10527 [(set (match_operand:V2DI 0 "register_operand" "=x")
10533 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10534 (parallel [(const_int 0)
10539 (parallel [(const_int 1)
10545 (parallel [(const_int 2)
10550 (parallel [(const_int 3)
10551 (const_int 7)])))))
10557 (parallel [(const_int 8)
10562 (parallel [(const_int 9)
10563 (const_int 13)]))))
10568 (parallel [(const_int 10)
10573 (parallel [(const_int 11)
10574 (const_int 15)])))))))]
10576 "vphaddbq\t{%1, %0|%0, %1}"
10577 [(set_attr "type" "sseiadd1")])
10579 (define_insn "xop_phaddwd"
10580 [(set (match_operand:V4SI 0 "register_operand" "=x")
10584 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10585 (parallel [(const_int 0)
10592 (parallel [(const_int 1)
10595 (const_int 7)])))))]
10597 "vphaddwd\t{%1, %0|%0, %1}"
10598 [(set_attr "type" "sseiadd1")])
10600 (define_insn "xop_phaddwq"
10601 [(set (match_operand:V2DI 0 "register_operand" "=x")
10606 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10607 (parallel [(const_int 0)
10612 (parallel [(const_int 1)
10618 (parallel [(const_int 2)
10623 (parallel [(const_int 3)
10624 (const_int 7)]))))))]
10626 "vphaddwq\t{%1, %0|%0, %1}"
10627 [(set_attr "type" "sseiadd1")])
10629 (define_insn "xop_phadddq"
10630 [(set (match_operand:V2DI 0 "register_operand" "=x")
10634 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10635 (parallel [(const_int 0)
10640 (parallel [(const_int 1)
10641 (const_int 3)])))))]
10643 "vphadddq\t{%1, %0|%0, %1}"
10644 [(set_attr "type" "sseiadd1")])
10646 (define_insn "xop_phaddubw"
10647 [(set (match_operand:V8HI 0 "register_operand" "=x")
10651 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10652 (parallel [(const_int 0)
10663 (parallel [(const_int 1)
10670 (const_int 15)])))))]
10672 "vphaddubw\t{%1, %0|%0, %1}"
10673 [(set_attr "type" "sseiadd1")])
10675 (define_insn "xop_phaddubd"
10676 [(set (match_operand:V4SI 0 "register_operand" "=x")
10681 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10682 (parallel [(const_int 0)
10689 (parallel [(const_int 1)
10692 (const_int 13)]))))
10697 (parallel [(const_int 2)
10704 (parallel [(const_int 3)
10707 (const_int 15)]))))))]
10709 "vphaddubd\t{%1, %0|%0, %1}"
10710 [(set_attr "type" "sseiadd1")])
10712 (define_insn "xop_phaddubq"
10713 [(set (match_operand:V2DI 0 "register_operand" "=x")
10719 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10720 (parallel [(const_int 0)
10725 (parallel [(const_int 1)
10731 (parallel [(const_int 2)
10736 (parallel [(const_int 3)
10737 (const_int 7)])))))
10743 (parallel [(const_int 8)
10748 (parallel [(const_int 9)
10749 (const_int 13)]))))
10754 (parallel [(const_int 10)
10759 (parallel [(const_int 11)
10760 (const_int 15)])))))))]
10762 "vphaddubq\t{%1, %0|%0, %1}"
10763 [(set_attr "type" "sseiadd1")])
10765 (define_insn "xop_phadduwd"
10766 [(set (match_operand:V4SI 0 "register_operand" "=x")
10770 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10771 (parallel [(const_int 0)
10778 (parallel [(const_int 1)
10781 (const_int 7)])))))]
10783 "vphadduwd\t{%1, %0|%0, %1}"
10784 [(set_attr "type" "sseiadd1")])
10786 (define_insn "xop_phadduwq"
10787 [(set (match_operand:V2DI 0 "register_operand" "=x")
10792 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10793 (parallel [(const_int 0)
10798 (parallel [(const_int 1)
10804 (parallel [(const_int 2)
10809 (parallel [(const_int 3)
10810 (const_int 7)]))))))]
10812 "vphadduwq\t{%1, %0|%0, %1}"
10813 [(set_attr "type" "sseiadd1")])
10815 (define_insn "xop_phaddudq"
10816 [(set (match_operand:V2DI 0 "register_operand" "=x")
10820 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10821 (parallel [(const_int 0)
10826 (parallel [(const_int 1)
10827 (const_int 3)])))))]
10829 "vphaddudq\t{%1, %0|%0, %1}"
10830 [(set_attr "type" "sseiadd1")])
10832 (define_insn "xop_phsubbw"
10833 [(set (match_operand:V8HI 0 "register_operand" "=x")
10837 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10838 (parallel [(const_int 0)
10849 (parallel [(const_int 1)
10856 (const_int 15)])))))]
10858 "vphsubbw\t{%1, %0|%0, %1}"
10859 [(set_attr "type" "sseiadd1")])
10861 (define_insn "xop_phsubwd"
10862 [(set (match_operand:V4SI 0 "register_operand" "=x")
10866 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10867 (parallel [(const_int 0)
10874 (parallel [(const_int 1)
10877 (const_int 7)])))))]
10879 "vphsubwd\t{%1, %0|%0, %1}"
10880 [(set_attr "type" "sseiadd1")])
10882 (define_insn "xop_phsubdq"
10883 [(set (match_operand:V2DI 0 "register_operand" "=x")
10887 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10888 (parallel [(const_int 0)
10893 (parallel [(const_int 1)
10894 (const_int 3)])))))]
10896 "vphsubdq\t{%1, %0|%0, %1}"
10897 [(set_attr "type" "sseiadd1")])
10899 ;; XOP permute instructions
10900 (define_insn "xop_pperm"
10901 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
10903 [(match_operand:V16QI 1 "register_operand" "x,x")
10904 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10905 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
10906 UNSPEC_XOP_PERMUTE))]
10907 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10908 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10909 [(set_attr "type" "sse4arg")
10910 (set_attr "mode" "TI")])
10912 ;; XOP pack instructions that combine two vectors into a smaller vector
10913 (define_insn "xop_pperm_pack_v2di_v4si"
10914 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10917 (match_operand:V2DI 1 "register_operand" "x,x"))
10919 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
10920 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
10921 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10922 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10923 [(set_attr "type" "sse4arg")
10924 (set_attr "mode" "TI")])
10926 (define_insn "xop_pperm_pack_v4si_v8hi"
10927 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10930 (match_operand:V4SI 1 "register_operand" "x,x"))
10932 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
10933 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
10934 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10935 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10936 [(set_attr "type" "sse4arg")
10937 (set_attr "mode" "TI")])
10939 (define_insn "xop_pperm_pack_v8hi_v16qi"
10940 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
10943 (match_operand:V8HI 1 "register_operand" "x,x"))
10945 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
10946 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
10947 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10948 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10949 [(set_attr "type" "sse4arg")
10950 (set_attr "mode" "TI")])
10952 ;; XOP packed rotate instructions
10953 (define_expand "rotl<mode>3"
10954 [(set (match_operand:VI_128 0 "register_operand" "")
10956 (match_operand:VI_128 1 "nonimmediate_operand" "")
10957 (match_operand:SI 2 "general_operand")))]
10960 /* If we were given a scalar, convert it to parallel */
10961 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
10963 rtvec vs = rtvec_alloc (<ssescalarnum>);
10964 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
10965 rtx reg = gen_reg_rtx (<MODE>mode);
10966 rtx op2 = operands[2];
10969 if (GET_MODE (op2) != <ssescalarmode>mode)
10971 op2 = gen_reg_rtx (<ssescalarmode>mode);
10972 convert_move (op2, operands[2], false);
10975 for (i = 0; i < <ssescalarnum>; i++)
10976 RTVEC_ELT (vs, i) = op2;
10978 emit_insn (gen_vec_init<mode> (reg, par));
10979 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
10984 (define_expand "rotr<mode>3"
10985 [(set (match_operand:VI_128 0 "register_operand" "")
10987 (match_operand:VI_128 1 "nonimmediate_operand" "")
10988 (match_operand:SI 2 "general_operand")))]
10991 /* If we were given a scalar, convert it to parallel */
10992 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
10994 rtvec vs = rtvec_alloc (<ssescalarnum>);
10995 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
10996 rtx neg = gen_reg_rtx (<MODE>mode);
10997 rtx reg = gen_reg_rtx (<MODE>mode);
10998 rtx op2 = operands[2];
11001 if (GET_MODE (op2) != <ssescalarmode>mode)
11003 op2 = gen_reg_rtx (<ssescalarmode>mode);
11004 convert_move (op2, operands[2], false);
11007 for (i = 0; i < <ssescalarnum>; i++)
11008 RTVEC_ELT (vs, i) = op2;
11010 emit_insn (gen_vec_init<mode> (reg, par));
11011 emit_insn (gen_neg<mode>2 (neg, reg));
11012 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
11017 (define_insn "xop_rotl<mode>3"
11018 [(set (match_operand:VI_128 0 "register_operand" "=x")
11020 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
11021 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11023 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11024 [(set_attr "type" "sseishft")
11025 (set_attr "length_immediate" "1")
11026 (set_attr "mode" "TI")])
11028 (define_insn "xop_rotr<mode>3"
11029 [(set (match_operand:VI_128 0 "register_operand" "=x")
11031 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
11032 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11035 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
11036 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
11038 [(set_attr "type" "sseishft")
11039 (set_attr "length_immediate" "1")
11040 (set_attr "mode" "TI")])
11042 (define_expand "vrotr<mode>3"
11043 [(match_operand:VI_128 0 "register_operand" "")
11044 (match_operand:VI_128 1 "register_operand" "")
11045 (match_operand:VI_128 2 "register_operand" "")]
11048 rtx reg = gen_reg_rtx (<MODE>mode);
11049 emit_insn (gen_neg<mode>2 (reg, operands[2]));
11050 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11054 (define_expand "vrotl<mode>3"
11055 [(match_operand:VI_128 0 "register_operand" "")
11056 (match_operand:VI_128 1 "register_operand" "")
11057 (match_operand:VI_128 2 "register_operand" "")]
11060 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
11064 (define_insn "xop_vrotl<mode>3"
11065 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
11066 (if_then_else:VI_128
11068 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
11071 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
11075 (neg:VI_128 (match_dup 2)))))]
11076 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11077 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11078 [(set_attr "type" "sseishft")
11079 (set_attr "prefix_data16" "0")
11080 (set_attr "prefix_extra" "2")
11081 (set_attr "mode" "TI")])
11083 ;; XOP packed shift instructions.
11084 ;; FIXME: add V2DI back in
11085 (define_expand "vlshr<mode>3"
11086 [(match_operand:VI124_128 0 "register_operand" "")
11087 (match_operand:VI124_128 1 "register_operand" "")
11088 (match_operand:VI124_128 2 "register_operand" "")]
11091 rtx neg = gen_reg_rtx (<MODE>mode);
11092 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11093 emit_insn (gen_xop_lshl<mode>3 (operands[0], operands[1], neg));
11097 (define_expand "vashr<mode>3"
11098 [(match_operand:VI124_128 0 "register_operand" "")
11099 (match_operand:VI124_128 1 "register_operand" "")
11100 (match_operand:VI124_128 2 "register_operand" "")]
11103 rtx neg = gen_reg_rtx (<MODE>mode);
11104 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11105 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], neg));
11109 (define_expand "vashl<mode>3"
11110 [(match_operand:VI124_128 0 "register_operand" "")
11111 (match_operand:VI124_128 1 "register_operand" "")
11112 (match_operand:VI124_128 2 "register_operand" "")]
11115 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], operands[2]));
11119 (define_insn "xop_ashl<mode>3"
11120 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
11121 (if_then_else:VI_128
11123 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
11126 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
11130 (neg:VI_128 (match_dup 2)))))]
11131 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11132 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11133 [(set_attr "type" "sseishft")
11134 (set_attr "prefix_data16" "0")
11135 (set_attr "prefix_extra" "2")
11136 (set_attr "mode" "TI")])
11138 (define_insn "xop_lshl<mode>3"
11139 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
11140 (if_then_else:VI_128
11142 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
11145 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
11149 (neg:VI_128 (match_dup 2)))))]
11150 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11151 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11152 [(set_attr "type" "sseishft")
11153 (set_attr "prefix_data16" "0")
11154 (set_attr "prefix_extra" "2")
11155 (set_attr "mode" "TI")])
11157 ;; SSE2 doesn't have some shift varients, so define versions for XOP
11158 (define_expand "ashlv16qi3"
11159 [(match_operand:V16QI 0 "register_operand" "")
11160 (match_operand:V16QI 1 "register_operand" "")
11161 (match_operand:SI 2 "nonmemory_operand" "")]
11164 rtvec vs = rtvec_alloc (16);
11165 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11166 rtx reg = gen_reg_rtx (V16QImode);
11168 for (i = 0; i < 16; i++)
11169 RTVEC_ELT (vs, i) = operands[2];
11171 emit_insn (gen_vec_initv16qi (reg, par));
11172 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11176 (define_expand "lshlv16qi3"
11177 [(match_operand:V16QI 0 "register_operand" "")
11178 (match_operand:V16QI 1 "register_operand" "")
11179 (match_operand:SI 2 "nonmemory_operand" "")]
11182 rtvec vs = rtvec_alloc (16);
11183 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11184 rtx reg = gen_reg_rtx (V16QImode);
11186 for (i = 0; i < 16; i++)
11187 RTVEC_ELT (vs, i) = operands[2];
11189 emit_insn (gen_vec_initv16qi (reg, par));
11190 emit_insn (gen_xop_lshlv16qi3 (operands[0], operands[1], reg));
11194 (define_expand "ashrv16qi3"
11195 [(match_operand:V16QI 0 "register_operand" "")
11196 (match_operand:V16QI 1 "register_operand" "")
11197 (match_operand:SI 2 "nonmemory_operand" "")]
11200 rtvec vs = rtvec_alloc (16);
11201 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11202 rtx reg = gen_reg_rtx (V16QImode);
11204 rtx ele = ((CONST_INT_P (operands[2]))
11205 ? GEN_INT (- INTVAL (operands[2]))
11208 for (i = 0; i < 16; i++)
11209 RTVEC_ELT (vs, i) = ele;
11211 emit_insn (gen_vec_initv16qi (reg, par));
11213 if (!CONST_INT_P (operands[2]))
11215 rtx neg = gen_reg_rtx (V16QImode);
11216 emit_insn (gen_negv16qi2 (neg, reg));
11217 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], neg));
11220 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11225 (define_expand "ashrv2di3"
11226 [(match_operand:V2DI 0 "register_operand" "")
11227 (match_operand:V2DI 1 "register_operand" "")
11228 (match_operand:DI 2 "nonmemory_operand" "")]
11231 rtvec vs = rtvec_alloc (2);
11232 rtx par = gen_rtx_PARALLEL (V2DImode, vs);
11233 rtx reg = gen_reg_rtx (V2DImode);
11236 if (CONST_INT_P (operands[2]))
11237 ele = GEN_INT (- INTVAL (operands[2]));
11238 else if (GET_MODE (operands[2]) != DImode)
11240 rtx move = gen_reg_rtx (DImode);
11241 ele = gen_reg_rtx (DImode);
11242 convert_move (move, operands[2], false);
11243 emit_insn (gen_negdi2 (ele, move));
11247 ele = gen_reg_rtx (DImode);
11248 emit_insn (gen_negdi2 (ele, operands[2]));
11251 RTVEC_ELT (vs, 0) = ele;
11252 RTVEC_ELT (vs, 1) = ele;
11253 emit_insn (gen_vec_initv2di (reg, par));
11254 emit_insn (gen_xop_ashlv2di3 (operands[0], operands[1], reg));
11258 ;; XOP FRCZ support
11259 (define_insn "xop_frcz<mode>2"
11260 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
11262 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
11265 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
11266 [(set_attr "type" "ssecvt1")
11267 (set_attr "mode" "<MODE>")])
11270 (define_expand "xop_vmfrcz<mode>2"
11271 [(set (match_operand:VF_128 0 "register_operand")
11274 [(match_operand:VF_128 1 "nonimmediate_operand")]
11280 operands[3] = CONST0_RTX (<MODE>mode);
11283 (define_insn "*xop_vmfrcz_<mode>"
11284 [(set (match_operand:VF_128 0 "register_operand" "=x")
11287 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
11289 (match_operand:VF_128 2 "const0_operand")
11292 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
11293 [(set_attr "type" "ssecvt1")
11294 (set_attr "mode" "<MODE>")])
11296 (define_insn "xop_maskcmp<mode>3"
11297 [(set (match_operand:VI_128 0 "register_operand" "=x")
11298 (match_operator:VI_128 1 "ix86_comparison_int_operator"
11299 [(match_operand:VI_128 2 "register_operand" "x")
11300 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
11302 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
11303 [(set_attr "type" "sse4arg")
11304 (set_attr "prefix_data16" "0")
11305 (set_attr "prefix_rep" "0")
11306 (set_attr "prefix_extra" "2")
11307 (set_attr "length_immediate" "1")
11308 (set_attr "mode" "TI")])
11310 (define_insn "xop_maskcmp_uns<mode>3"
11311 [(set (match_operand:VI_128 0 "register_operand" "=x")
11312 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
11313 [(match_operand:VI_128 2 "register_operand" "x")
11314 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
11316 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
11317 [(set_attr "type" "ssecmp")
11318 (set_attr "prefix_data16" "0")
11319 (set_attr "prefix_rep" "0")
11320 (set_attr "prefix_extra" "2")
11321 (set_attr "length_immediate" "1")
11322 (set_attr "mode" "TI")])
11324 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
11325 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
11326 ;; the exact instruction generated for the intrinsic.
11327 (define_insn "xop_maskcmp_uns2<mode>3"
11328 [(set (match_operand:VI_128 0 "register_operand" "=x")
11330 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
11331 [(match_operand:VI_128 2 "register_operand" "x")
11332 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
11333 UNSPEC_XOP_UNSIGNED_CMP))]
11335 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
11336 [(set_attr "type" "ssecmp")
11337 (set_attr "prefix_data16" "0")
11338 (set_attr "prefix_extra" "2")
11339 (set_attr "length_immediate" "1")
11340 (set_attr "mode" "TI")])
11342 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
11343 ;; being added here to be complete.
11344 (define_insn "xop_pcom_tf<mode>3"
11345 [(set (match_operand:VI_128 0 "register_operand" "=x")
11347 [(match_operand:VI_128 1 "register_operand" "x")
11348 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
11349 (match_operand:SI 3 "const_int_operand" "n")]
11350 UNSPEC_XOP_TRUEFALSE))]
11353 return ((INTVAL (operands[3]) != 0)
11354 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11355 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
11357 [(set_attr "type" "ssecmp")
11358 (set_attr "prefix_data16" "0")
11359 (set_attr "prefix_extra" "2")
11360 (set_attr "length_immediate" "1")
11361 (set_attr "mode" "TI")])
11363 (define_insn "xop_vpermil2<mode>3"
11364 [(set (match_operand:VF 0 "register_operand" "=x")
11366 [(match_operand:VF 1 "register_operand" "x")
11367 (match_operand:VF 2 "nonimmediate_operand" "%x")
11368 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm")
11369 (match_operand:SI 4 "const_0_to_3_operand" "n")]
11372 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
11373 [(set_attr "type" "sse4arg")
11374 (set_attr "length_immediate" "1")
11375 (set_attr "mode" "<MODE>")])
11377 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11379 (define_insn "aesenc"
11380 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11381 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11382 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11386 aesenc\t{%2, %0|%0, %2}
11387 vaesenc\t{%2, %1, %0|%0, %1, %2}"
11388 [(set_attr "isa" "noavx,avx")
11389 (set_attr "type" "sselog1")
11390 (set_attr "prefix_extra" "1")
11391 (set_attr "prefix" "orig,vex")
11392 (set_attr "mode" "TI")])
11394 (define_insn "aesenclast"
11395 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11396 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11397 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11398 UNSPEC_AESENCLAST))]
11401 aesenclast\t{%2, %0|%0, %2}
11402 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
11403 [(set_attr "isa" "noavx,avx")
11404 (set_attr "type" "sselog1")
11405 (set_attr "prefix_extra" "1")
11406 (set_attr "prefix" "orig,vex")
11407 (set_attr "mode" "TI")])
11409 (define_insn "aesdec"
11410 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11411 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11412 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11416 aesdec\t{%2, %0|%0, %2}
11417 vaesdec\t{%2, %1, %0|%0, %1, %2}"
11418 [(set_attr "isa" "noavx,avx")
11419 (set_attr "type" "sselog1")
11420 (set_attr "prefix_extra" "1")
11421 (set_attr "prefix" "orig,vex")
11422 (set_attr "mode" "TI")])
11424 (define_insn "aesdeclast"
11425 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11426 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11427 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11428 UNSPEC_AESDECLAST))]
11431 aesdeclast\t{%2, %0|%0, %2}
11432 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
11433 [(set_attr "isa" "noavx,avx")
11434 (set_attr "type" "sselog1")
11435 (set_attr "prefix_extra" "1")
11436 (set_attr "prefix" "orig,vex")
11437 (set_attr "mode" "TI")])
11439 (define_insn "aesimc"
11440 [(set (match_operand:V2DI 0 "register_operand" "=x")
11441 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
11444 "%vaesimc\t{%1, %0|%0, %1}"
11445 [(set_attr "type" "sselog1")
11446 (set_attr "prefix_extra" "1")
11447 (set_attr "prefix" "maybe_vex")
11448 (set_attr "mode" "TI")])
11450 (define_insn "aeskeygenassist"
11451 [(set (match_operand:V2DI 0 "register_operand" "=x")
11452 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
11453 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11454 UNSPEC_AESKEYGENASSIST))]
11456 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
11457 [(set_attr "type" "sselog1")
11458 (set_attr "prefix_extra" "1")
11459 (set_attr "length_immediate" "1")
11460 (set_attr "prefix" "maybe_vex")
11461 (set_attr "mode" "TI")])
11463 (define_insn "pclmulqdq"
11464 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11465 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11466 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")
11467 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
11471 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
11472 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11473 [(set_attr "isa" "noavx,avx")
11474 (set_attr "type" "sselog1")
11475 (set_attr "prefix_extra" "1")
11476 (set_attr "length_immediate" "1")
11477 (set_attr "prefix" "orig,vex")
11478 (set_attr "mode" "TI")])
11480 (define_expand "avx_vzeroall"
11481 [(match_par_dup 0 [(const_int 0)])]
11484 int nregs = TARGET_64BIT ? 16 : 8;
11487 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11489 XVECEXP (operands[0], 0, 0)
11490 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11493 for (regno = 0; regno < nregs; regno++)
11494 XVECEXP (operands[0], 0, regno + 1)
11495 = gen_rtx_SET (VOIDmode,
11496 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
11497 CONST0_RTX (V8SImode));
11500 (define_insn "*avx_vzeroall"
11501 [(match_parallel 0 "vzeroall_operation"
11502 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
11505 [(set_attr "type" "sse")
11506 (set_attr "modrm" "0")
11507 (set_attr "memory" "none")
11508 (set_attr "prefix" "vex")
11509 (set_attr "mode" "OI")])
11511 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
11512 ;; if the upper 128bits are unused.
11513 (define_insn "avx_vzeroupper"
11514 [(unspec_volatile [(match_operand 0 "const_int_operand" "")]
11515 UNSPECV_VZEROUPPER)]
11518 [(set_attr "type" "sse")
11519 (set_attr "modrm" "0")
11520 (set_attr "memory" "none")
11521 (set_attr "prefix" "vex")
11522 (set_attr "mode" "OI")])
11524 (define_mode_attr AVXTOSSEMODE
11525 [(V4DI "V2DI") (V2DI "V2DI")
11526 (V8SI "V4SI") (V4SI "V4SI")
11527 (V16HI "V8HI") (V8HI "V8HI")
11528 (V32QI "V16QI") (V16QI "V16QI")])
11530 (define_insn "avx2_pbroadcast<mode>"
11531 [(set (match_operand:VI 0 "register_operand" "=x")
11533 (vec_select:<ssescalarmode>
11534 (match_operand:<AVXTOSSEMODE> 1 "nonimmediate_operand" "xm")
11535 (parallel [(const_int 0)]))))]
11537 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %1}"
11538 [(set_attr "type" "ssemov")
11539 (set_attr "prefix_extra" "1")
11540 (set_attr "prefix" "vex")
11541 (set_attr "mode" "<sseinsnmode>")])
11543 (define_insn "avx2_permvarv8si"
11544 [(set (match_operand:V8SI 0 "register_operand" "=x")
11546 [(match_operand:V8SI 1 "register_operand" "x")
11547 (match_operand:V8SI 2 "nonimmediate_operand" "xm")]
11550 "vpermd\t{%2, %1, %0|%0, %1, %2}"
11551 [(set_attr "type" "sselog")
11552 (set_attr "prefix" "vex")
11553 (set_attr "mode" "OI")])
11555 (define_insn "avx2_permv4df"
11556 [(set (match_operand:V4DF 0 "register_operand" "=x")
11558 [(match_operand:V4DF 1 "register_operand" "xm")
11559 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11562 "vpermpd\t{%2, %1, %0|%0, %1, %2}"
11563 [(set_attr "type" "sselog")
11564 (set_attr "prefix_extra" "1")
11565 (set_attr "prefix" "vex")
11566 (set_attr "mode" "OI")])
11568 (define_insn "avx2_permvarv8sf"
11569 [(set (match_operand:V8SF 0 "register_operand" "=x")
11571 [(match_operand:V8SF 1 "register_operand" "x")
11572 (match_operand:V8SF 2 "nonimmediate_operand" "xm")]
11575 "vpermps\t{%2, %1, %0|%0, %1, %2}"
11576 [(set_attr "type" "sselog")
11577 (set_attr "prefix" "vex")
11578 (set_attr "mode" "OI")])
11580 (define_insn "avx2_permv4di"
11581 [(set (match_operand:V4DI 0 "register_operand" "=x")
11583 [(match_operand:V4DI 1 "register_operand" "xm")
11584 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11587 "vpermq\t{%2, %1, %0|%0, %1, %2}"
11588 [(set_attr "type" "sselog")
11589 (set_attr "prefix" "vex")
11590 (set_attr "mode" "OI")])
11592 (define_insn "avx2_permv2ti"
11593 [(set (match_operand:V4DI 0 "register_operand" "=x")
11595 [(match_operand:V4DI 1 "register_operand" "x")
11596 (match_operand:V4DI 2 "register_operand" "xm")
11597 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11600 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11601 [(set_attr "type" "sselog")
11602 (set_attr "prefix" "vex")
11603 (set_attr "mode" "OI")])
11605 (define_insn "avx2_vec_dupv4df"
11606 [(set (match_operand:V4DF 0 "register_operand" "=x")
11607 (vec_duplicate:V4DF
11609 (match_operand:V2DF 1 "register_operand" "x")
11610 (parallel [(const_int 0)]))))]
11612 "vbroadcastsd\t{%1, %0|%0, %1}"
11613 [(set_attr "type" "sselog1")
11614 (set_attr "prefix" "vex")
11615 (set_attr "mode" "V4DF")])
11617 ;; Modes handled by AVX vec_dup patterns.
11618 (define_mode_iterator AVX_VEC_DUP_MODE
11619 [V8SI V8SF V4DI V4DF])
11621 (define_insn "vec_dup<mode>"
11622 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x")
11623 (vec_duplicate:AVX_VEC_DUP_MODE
11624 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,?x")))]
11627 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
11629 [(set_attr "type" "ssemov")
11630 (set_attr "prefix_extra" "1")
11631 (set_attr "prefix" "vex")
11632 (set_attr "mode" "V8SF")])
11634 (define_insn "avx2_vbroadcasti128_<mode>"
11635 [(set (match_operand:VI_256 0 "register_operand" "=x")
11637 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m")
11640 "vbroadcasti128\t{%1, %0|%0, %1}"
11641 [(set_attr "type" "ssemov")
11642 (set_attr "prefix_extra" "1")
11643 (set_attr "prefix" "vex")
11644 (set_attr "mode" "OI")])
11647 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "")
11648 (vec_duplicate:AVX_VEC_DUP_MODE
11649 (match_operand:<ssescalarmode> 1 "register_operand" "")))]
11650 "TARGET_AVX && reload_completed"
11651 [(set (match_dup 2)
11652 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
11654 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
11655 "operands[2] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[0]));")
11657 (define_insn "avx_vbroadcastf128_<mode>"
11658 [(set (match_operand:V_256 0 "register_operand" "=x,x,x")
11660 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
11664 vbroadcastf128\t{%1, %0|%0, %1}
11665 vinsertf128\t{$1, %1, %0, %0|%0, %0, %1, 1}
11666 vperm2f128\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
11667 [(set_attr "type" "ssemov,sselog1,sselog1")
11668 (set_attr "prefix_extra" "1")
11669 (set_attr "length_immediate" "0,1,1")
11670 (set_attr "prefix" "vex")
11671 (set_attr "mode" "V4SF,V8SF,V8SF")])
11673 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
11674 ;; If it so happens that the input is in memory, use vbroadcast.
11675 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
11676 (define_insn "*avx_vperm_broadcast_v4sf"
11677 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
11679 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
11680 (match_parallel 2 "avx_vbroadcast_operand"
11681 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11684 int elt = INTVAL (operands[3]);
11685 switch (which_alternative)
11689 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
11690 return "vbroadcastss\t{%1, %0|%0, %1}";
11692 operands[2] = GEN_INT (elt * 0x55);
11693 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
11695 gcc_unreachable ();
11698 [(set_attr "type" "ssemov,ssemov,sselog1")
11699 (set_attr "prefix_extra" "1")
11700 (set_attr "length_immediate" "0,0,1")
11701 (set_attr "prefix" "vex")
11702 (set_attr "mode" "SF,SF,V4SF")])
11704 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
11705 [(set (match_operand:VF_256 0 "register_operand" "=x,x,x")
11707 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x")
11708 (match_parallel 2 "avx_vbroadcast_operand"
11709 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11712 "&& reload_completed"
11713 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
11715 rtx op0 = operands[0], op1 = operands[1];
11716 int elt = INTVAL (operands[3]);
11722 /* Shuffle element we care about into all elements of the 128-bit lane.
11723 The other lane gets shuffled too, but we don't care. */
11724 if (<MODE>mode == V4DFmode)
11725 mask = (elt & 1 ? 15 : 0);
11727 mask = (elt & 3) * 0x55;
11728 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
11730 /* Shuffle the lane we care about into both lanes of the dest. */
11731 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
11732 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
11736 operands[1] = adjust_address_nv (op1, <ssescalarmode>mode,
11737 elt * GET_MODE_SIZE (<ssescalarmode>mode));
11740 (define_expand "avx_vpermil<mode>"
11741 [(set (match_operand:VF2 0 "register_operand" "")
11743 (match_operand:VF2 1 "nonimmediate_operand" "")
11744 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11747 int mask = INTVAL (operands[2]);
11748 rtx perm[<ssescalarnum>];
11750 perm[0] = GEN_INT (mask & 1);
11751 perm[1] = GEN_INT ((mask >> 1) & 1);
11752 if (<MODE>mode == V4DFmode)
11754 perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
11755 perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
11759 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11762 (define_expand "avx_vpermil<mode>"
11763 [(set (match_operand:VF1 0 "register_operand" "")
11765 (match_operand:VF1 1 "nonimmediate_operand" "")
11766 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11769 int mask = INTVAL (operands[2]);
11770 rtx perm[<ssescalarnum>];
11772 perm[0] = GEN_INT (mask & 3);
11773 perm[1] = GEN_INT ((mask >> 2) & 3);
11774 perm[2] = GEN_INT ((mask >> 4) & 3);
11775 perm[3] = GEN_INT ((mask >> 6) & 3);
11776 if (<MODE>mode == V8SFmode)
11778 perm[4] = GEN_INT ((mask & 3) + 4);
11779 perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
11780 perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
11781 perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
11785 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11788 (define_insn "*avx_vpermilp<mode>"
11789 [(set (match_operand:VF 0 "register_operand" "=x")
11791 (match_operand:VF 1 "nonimmediate_operand" "xm")
11792 (match_parallel 2 ""
11793 [(match_operand 3 "const_int_operand" "")])))]
11795 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
11797 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
11798 operands[2] = GEN_INT (mask);
11799 return "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
11801 [(set_attr "type" "sselog")
11802 (set_attr "prefix_extra" "1")
11803 (set_attr "length_immediate" "1")
11804 (set_attr "prefix" "vex")
11805 (set_attr "mode" "<MODE>")])
11807 (define_insn "avx_vpermilvar<mode>3"
11808 [(set (match_operand:VF 0 "register_operand" "=x")
11810 [(match_operand:VF 1 "register_operand" "x")
11811 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "xm")]
11814 "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11815 [(set_attr "type" "sselog")
11816 (set_attr "prefix_extra" "1")
11817 (set_attr "prefix" "vex")
11818 (set_attr "mode" "<MODE>")])
11820 (define_expand "avx_vperm2f128<mode>3"
11821 [(set (match_operand:AVX256MODE2P 0 "register_operand" "")
11822 (unspec:AVX256MODE2P
11823 [(match_operand:AVX256MODE2P 1 "register_operand" "")
11824 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "")
11825 (match_operand:SI 3 "const_0_to_255_operand" "")]
11826 UNSPEC_VPERMIL2F128))]
11829 int mask = INTVAL (operands[3]);
11830 if ((mask & 0x88) == 0)
11832 rtx perm[<ssescalarnum>], t1, t2;
11833 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
11835 base = (mask & 3) * nelt2;
11836 for (i = 0; i < nelt2; ++i)
11837 perm[i] = GEN_INT (base + i);
11839 base = ((mask >> 4) & 3) * nelt2;
11840 for (i = 0; i < nelt2; ++i)
11841 perm[i + nelt2] = GEN_INT (base + i);
11843 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
11844 operands[1], operands[2]);
11845 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
11846 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
11847 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
11853 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
11854 ;; means that in order to represent this properly in rtl we'd have to
11855 ;; nest *another* vec_concat with a zero operand and do the select from
11856 ;; a 4x wide vector. That doesn't seem very nice.
11857 (define_insn "*avx_vperm2f128<mode>_full"
11858 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
11859 (unspec:AVX256MODE2P
11860 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
11861 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
11862 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11863 UNSPEC_VPERMIL2F128))]
11865 "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11866 [(set_attr "type" "sselog")
11867 (set_attr "prefix_extra" "1")
11868 (set_attr "length_immediate" "1")
11869 (set_attr "prefix" "vex")
11870 (set_attr "mode" "V8SF")])
11872 (define_insn "*avx_vperm2f128<mode>_nozero"
11873 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
11874 (vec_select:AVX256MODE2P
11875 (vec_concat:<ssedoublevecmode>
11876 (match_operand:AVX256MODE2P 1 "register_operand" "x")
11877 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
11878 (match_parallel 3 ""
11879 [(match_operand 4 "const_int_operand" "")])))]
11881 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
11883 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
11884 operands[3] = GEN_INT (mask);
11885 return "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}";
11887 [(set_attr "type" "sselog")
11888 (set_attr "prefix_extra" "1")
11889 (set_attr "length_immediate" "1")
11890 (set_attr "prefix" "vex")
11891 (set_attr "mode" "V8SF")])
11893 (define_expand "avx_vinsertf128<mode>"
11894 [(match_operand:V_256 0 "register_operand" "")
11895 (match_operand:V_256 1 "register_operand" "")
11896 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "")
11897 (match_operand:SI 3 "const_0_to_1_operand" "")]
11900 rtx (*insn)(rtx, rtx, rtx);
11902 switch (INTVAL (operands[3]))
11905 insn = gen_vec_set_lo_<mode>;
11908 insn = gen_vec_set_hi_<mode>;
11911 gcc_unreachable ();
11914 emit_insn (insn (operands[0], operands[1], operands[2]));
11918 (define_insn "avx2_vec_set_lo_v4di"
11919 [(set (match_operand:V4DI 0 "register_operand" "=x")
11921 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11923 (match_operand:V4DI 1 "register_operand" "x")
11924 (parallel [(const_int 2) (const_int 3)]))))]
11926 "vinserti128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11927 [(set_attr "type" "sselog")
11928 (set_attr "prefix_extra" "1")
11929 (set_attr "length_immediate" "1")
11930 (set_attr "prefix" "vex")
11931 (set_attr "mode" "OI")])
11933 (define_insn "avx2_vec_set_hi_v4di"
11934 [(set (match_operand:V4DI 0 "register_operand" "=x")
11937 (match_operand:V4DI 1 "register_operand" "x")
11938 (parallel [(const_int 0) (const_int 1)]))
11939 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
11941 "vinserti128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11942 [(set_attr "type" "sselog")
11943 (set_attr "prefix_extra" "1")
11944 (set_attr "length_immediate" "1")
11945 (set_attr "prefix" "vex")
11946 (set_attr "mode" "OI")])
11948 (define_insn "vec_set_lo_<mode>"
11949 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
11950 (vec_concat:VI8F_256
11951 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
11952 (vec_select:<ssehalfvecmode>
11953 (match_operand:VI8F_256 1 "register_operand" "x")
11954 (parallel [(const_int 2) (const_int 3)]))))]
11956 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11957 [(set_attr "type" "sselog")
11958 (set_attr "prefix_extra" "1")
11959 (set_attr "length_immediate" "1")
11960 (set_attr "prefix" "vex")
11961 (set_attr "mode" "V4DF")])
11963 (define_insn "vec_set_hi_<mode>"
11964 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
11965 (vec_concat:VI8F_256
11966 (vec_select:<ssehalfvecmode>
11967 (match_operand:VI8F_256 1 "register_operand" "x")
11968 (parallel [(const_int 0) (const_int 1)]))
11969 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
11971 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11972 [(set_attr "type" "sselog")
11973 (set_attr "prefix_extra" "1")
11974 (set_attr "length_immediate" "1")
11975 (set_attr "prefix" "vex")
11976 (set_attr "mode" "V4DF")])
11978 (define_insn "vec_set_lo_<mode>"
11979 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
11980 (vec_concat:VI4F_256
11981 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
11982 (vec_select:<ssehalfvecmode>
11983 (match_operand:VI4F_256 1 "register_operand" "x")
11984 (parallel [(const_int 4) (const_int 5)
11985 (const_int 6) (const_int 7)]))))]
11987 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11988 [(set_attr "type" "sselog")
11989 (set_attr "prefix_extra" "1")
11990 (set_attr "length_immediate" "1")
11991 (set_attr "prefix" "vex")
11992 (set_attr "mode" "V8SF")])
11994 (define_insn "vec_set_hi_<mode>"
11995 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
11996 (vec_concat:VI4F_256
11997 (vec_select:<ssehalfvecmode>
11998 (match_operand:VI4F_256 1 "register_operand" "x")
11999 (parallel [(const_int 0) (const_int 1)
12000 (const_int 2) (const_int 3)]))
12001 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
12003 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12004 [(set_attr "type" "sselog")
12005 (set_attr "prefix_extra" "1")
12006 (set_attr "length_immediate" "1")
12007 (set_attr "prefix" "vex")
12008 (set_attr "mode" "V8SF")])
12010 (define_insn "vec_set_lo_v16hi"
12011 [(set (match_operand:V16HI 0 "register_operand" "=x")
12013 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
12015 (match_operand:V16HI 1 "register_operand" "x")
12016 (parallel [(const_int 8) (const_int 9)
12017 (const_int 10) (const_int 11)
12018 (const_int 12) (const_int 13)
12019 (const_int 14) (const_int 15)]))))]
12021 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12022 [(set_attr "type" "sselog")
12023 (set_attr "prefix_extra" "1")
12024 (set_attr "length_immediate" "1")
12025 (set_attr "prefix" "vex")
12026 (set_attr "mode" "V8SF")])
12028 (define_insn "vec_set_hi_v16hi"
12029 [(set (match_operand:V16HI 0 "register_operand" "=x")
12032 (match_operand:V16HI 1 "register_operand" "x")
12033 (parallel [(const_int 0) (const_int 1)
12034 (const_int 2) (const_int 3)
12035 (const_int 4) (const_int 5)
12036 (const_int 6) (const_int 7)]))
12037 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
12039 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12040 [(set_attr "type" "sselog")
12041 (set_attr "prefix_extra" "1")
12042 (set_attr "length_immediate" "1")
12043 (set_attr "prefix" "vex")
12044 (set_attr "mode" "V8SF")])
12046 (define_insn "vec_set_lo_v32qi"
12047 [(set (match_operand:V32QI 0 "register_operand" "=x")
12049 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
12051 (match_operand:V32QI 1 "register_operand" "x")
12052 (parallel [(const_int 16) (const_int 17)
12053 (const_int 18) (const_int 19)
12054 (const_int 20) (const_int 21)
12055 (const_int 22) (const_int 23)
12056 (const_int 24) (const_int 25)
12057 (const_int 26) (const_int 27)
12058 (const_int 28) (const_int 29)
12059 (const_int 30) (const_int 31)]))))]
12061 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12062 [(set_attr "type" "sselog")
12063 (set_attr "prefix_extra" "1")
12064 (set_attr "length_immediate" "1")
12065 (set_attr "prefix" "vex")
12066 (set_attr "mode" "V8SF")])
12068 (define_insn "vec_set_hi_v32qi"
12069 [(set (match_operand:V32QI 0 "register_operand" "=x")
12072 (match_operand:V32QI 1 "register_operand" "x")
12073 (parallel [(const_int 0) (const_int 1)
12074 (const_int 2) (const_int 3)
12075 (const_int 4) (const_int 5)
12076 (const_int 6) (const_int 7)
12077 (const_int 8) (const_int 9)
12078 (const_int 10) (const_int 11)
12079 (const_int 12) (const_int 13)
12080 (const_int 14) (const_int 15)]))
12081 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
12083 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12084 [(set_attr "type" "sselog")
12085 (set_attr "prefix_extra" "1")
12086 (set_attr "length_immediate" "1")
12087 (set_attr "prefix" "vex")
12088 (set_attr "mode" "V8SF")])
12090 (define_expand "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
12091 [(set (match_operand:V48_AVX2 0 "register_operand" "")
12093 [(match_operand:<sseintvecmode> 2 "register_operand" "")
12094 (match_operand:V48_AVX2 1 "memory_operand" "")
12099 (define_expand "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
12100 [(set (match_operand:V48_AVX2 0 "memory_operand" "")
12102 [(match_operand:<sseintvecmode> 1 "register_operand" "")
12103 (match_operand:V48_AVX2 2 "register_operand" "")
12108 (define_insn "*avx2_maskmov<ssemodesuffix><avxsizesuffix>"
12109 [(set (match_operand:VI48_AVX2 0 "nonimmediate_operand" "=x,m")
12111 [(match_operand:<sseintvecmode> 1 "register_operand" "x,x")
12112 (match_operand:VI48_AVX2 2 "nonimmediate_operand" "m,x")
12116 && (REG_P (operands[0]) == MEM_P (operands[2]))"
12117 "vpmaskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12118 [(set_attr "type" "sselog1")
12119 (set_attr "prefix_extra" "1")
12120 (set_attr "prefix" "vex")
12121 (set_attr "mode" "<sseinsnmode>")])
12123 (define_insn "*avx_maskmov<ssemodesuffix><avxsizesuffix>"
12124 [(set (match_operand:VF 0 "nonimmediate_operand" "=x,m")
12126 [(match_operand:<sseintvecmode> 1 "register_operand" "x,x")
12127 (match_operand:VF 2 "nonimmediate_operand" "m,x")
12131 && (REG_P (operands[0]) == MEM_P (operands[2]))"
12132 "vmaskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12133 [(set_attr "type" "sselog1")
12134 (set_attr "prefix_extra" "1")
12135 (set_attr "prefix" "vex")
12136 (set_attr "mode" "<MODE>")])
12138 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
12139 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
12140 (unspec:AVX256MODE2P
12141 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
12145 "&& reload_completed"
12148 rtx op0 = operands[0];
12149 rtx op1 = operands[1];
12151 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
12153 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
12154 emit_move_insn (op0, op1);
12158 (define_expand "vec_init<mode>"
12159 [(match_operand:V_256 0 "register_operand" "")
12160 (match_operand 1 "" "")]
12163 ix86_expand_vector_init (false, operands[0], operands[1]);
12167 (define_expand "avx2_extracti128"
12168 [(match_operand:V2DI 0 "nonimmediate_operand" "")
12169 (match_operand:V4DI 1 "register_operand" "")
12170 (match_operand:SI 2 "const_0_to_1_operand" "")]
12173 rtx (*insn)(rtx, rtx);
12175 switch (INTVAL (operands[2]))
12178 insn = gen_vec_extract_lo_v4di;
12181 insn = gen_vec_extract_hi_v4di;
12184 gcc_unreachable ();
12187 emit_insn (insn (operands[0], operands[1]));
12191 (define_expand "avx2_inserti128"
12192 [(match_operand:V4DI 0 "register_operand" "")
12193 (match_operand:V4DI 1 "register_operand" "")
12194 (match_operand:V2DI 2 "nonimmediate_operand" "")
12195 (match_operand:SI 3 "const_0_to_1_operand" "")]
12198 rtx (*insn)(rtx, rtx, rtx);
12200 switch (INTVAL (operands[3]))
12203 insn = gen_avx2_vec_set_lo_v4di;
12206 insn = gen_avx2_vec_set_hi_v4di;
12209 gcc_unreachable ();
12212 emit_insn (insn (operands[0], operands[1], operands[2]));
12216 (define_insn "avx2_ashrvv8si"
12217 [(set (match_operand:V8SI 0 "register_operand" "=x")
12223 (match_operand:V8SI 1 "register_operand" "x")
12224 (parallel [(const_int 0)]))
12226 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
12227 (parallel [(const_int 0)])))
12231 (parallel [(const_int 1)]))
12234 (parallel [(const_int 1)]))))
12239 (parallel [(const_int 2)]))
12242 (parallel [(const_int 2)])))
12246 (parallel [(const_int 3)]))
12249 (parallel [(const_int 3)])))))
12255 (parallel [(const_int 0)]))
12258 (parallel [(const_int 0)])))
12262 (parallel [(const_int 1)]))
12265 (parallel [(const_int 1)]))))
12270 (parallel [(const_int 2)]))
12273 (parallel [(const_int 2)])))
12277 (parallel [(const_int 3)]))
12280 (parallel [(const_int 3)])))))))]
12282 "vpsravd\t{%2, %1, %0|%0, %1, %2}"
12283 [(set_attr "type" "sseishft")
12284 (set_attr "prefix" "vex")
12285 (set_attr "mode" "OI")])
12287 (define_insn "avx2_ashrvv4si"
12288 [(set (match_operand:V4SI 0 "register_operand" "=x")
12293 (match_operand:V4SI 1 "register_operand" "x")
12294 (parallel [(const_int 0)]))
12296 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
12297 (parallel [(const_int 0)])))
12301 (parallel [(const_int 1)]))
12304 (parallel [(const_int 1)]))))
12309 (parallel [(const_int 2)]))
12312 (parallel [(const_int 2)])))
12316 (parallel [(const_int 3)]))
12319 (parallel [(const_int 3)]))))))]
12321 "vpsravd\t{%2, %1, %0|%0, %1, %2}"
12322 [(set_attr "type" "sseishft")
12323 (set_attr "prefix" "vex")
12324 (set_attr "mode" "TI")])
12326 (define_insn "avx2_<lshift>vv8si"
12327 [(set (match_operand:V8SI 0 "register_operand" "=x")
12333 (match_operand:V8SI 1 "register_operand" "x")
12334 (parallel [(const_int 0)]))
12336 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
12337 (parallel [(const_int 0)])))
12341 (parallel [(const_int 1)]))
12344 (parallel [(const_int 1)]))))
12349 (parallel [(const_int 2)]))
12352 (parallel [(const_int 2)])))
12356 (parallel [(const_int 3)]))
12359 (parallel [(const_int 3)])))))
12365 (parallel [(const_int 0)]))
12368 (parallel [(const_int 0)])))
12372 (parallel [(const_int 1)]))
12375 (parallel [(const_int 1)]))))
12380 (parallel [(const_int 2)]))
12383 (parallel [(const_int 2)])))
12387 (parallel [(const_int 3)]))
12390 (parallel [(const_int 3)])))))))]
12392 "vp<lshift_insn>vd\t{%2, %1, %0|%0, %1, %2}"
12393 [(set_attr "type" "sseishft")
12394 (set_attr "prefix" "vex")
12395 (set_attr "mode" "OI")])
12397 (define_insn "avx2_<lshift>v<mode>"
12398 [(set (match_operand:VI4SD_AVX2 0 "register_operand" "=x")
12399 (vec_concat:VI4SD_AVX2
12400 (vec_concat:<ssehalfvecmode>
12401 (lshift:<ssescalarmode>
12402 (vec_select:<ssescalarmode>
12403 (match_operand:VI4SD_AVX2 1 "register_operand" "x")
12404 (parallel [(const_int 0)]))
12405 (vec_select:<ssescalarmode>
12406 (match_operand:VI4SD_AVX2 2 "nonimmediate_operand" "xm")
12407 (parallel [(const_int 0)])))
12408 (lshift:<ssescalarmode>
12409 (vec_select:<ssescalarmode>
12411 (parallel [(const_int 1)]))
12412 (vec_select:<ssescalarmode>
12414 (parallel [(const_int 1)]))))
12415 (vec_concat:<ssehalfvecmode>
12416 (lshift:<ssescalarmode>
12417 (vec_select:<ssescalarmode>
12419 (parallel [(const_int 2)]))
12420 (vec_select:<ssescalarmode>
12422 (parallel [(const_int 2)])))
12423 (lshift:<ssescalarmode>
12424 (vec_select:<ssescalarmode>
12426 (parallel [(const_int 3)]))
12427 (vec_select:<ssescalarmode>
12429 (parallel [(const_int 3)]))))))]
12431 "vp<lshift_insn>v<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12432 [(set_attr "type" "sseishft")
12433 (set_attr "prefix" "vex")
12434 (set_attr "mode" "<sseinsnmode>")])
12436 (define_insn "avx2_<lshift>vv2di"
12437 [(set (match_operand:V2DI 0 "register_operand" "=x")
12441 (match_operand:V2DI 1 "register_operand" "x")
12442 (parallel [(const_int 0)]))
12444 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
12445 (parallel [(const_int 0)])))
12449 (parallel [(const_int 1)]))
12452 (parallel [(const_int 1)])))))]
12454 "vp<lshift_insn>vq\t{%2, %1, %0|%0, %1, %2}"
12455 [(set_attr "type" "sseishft")
12456 (set_attr "prefix" "vex")
12457 (set_attr "mode" "TI")])
12459 (define_insn "*vec_concat<mode>_avx"
12460 [(set (match_operand:V_256 0 "register_operand" "=x,x")
12462 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
12463 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))]
12466 switch (which_alternative)
12469 return "vinsertf128\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
12471 switch (get_attr_mode (insn))
12474 return "vmovaps\t{%1, %x0|%x0, %1}";
12476 return "vmovapd\t{%1, %x0|%x0, %1}";
12478 return "vmovdqa\t{%1, %x0|%x0, %1}";
12481 gcc_unreachable ();
12484 [(set_attr "type" "sselog,ssemov")
12485 (set_attr "prefix_extra" "1,*")
12486 (set_attr "length_immediate" "1,*")
12487 (set_attr "prefix" "vex")
12488 (set_attr "mode" "<sseinsnmode>")])
12490 (define_insn "vcvtph2ps"
12491 [(set (match_operand:V4SF 0 "register_operand" "=x")
12493 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
12495 (parallel [(const_int 0) (const_int 1)
12496 (const_int 1) (const_int 2)])))]
12498 "vcvtph2ps\t{%1, %0|%0, %1}"
12499 [(set_attr "type" "ssecvt")
12500 (set_attr "prefix" "vex")
12501 (set_attr "mode" "V4SF")])
12503 (define_insn "*vcvtph2ps_load"
12504 [(set (match_operand:V4SF 0 "register_operand" "=x")
12505 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
12506 UNSPEC_VCVTPH2PS))]
12508 "vcvtph2ps\t{%1, %0|%0, %1}"
12509 [(set_attr "type" "ssecvt")
12510 (set_attr "prefix" "vex")
12511 (set_attr "mode" "V8SF")])
12513 (define_insn "vcvtph2ps256"
12514 [(set (match_operand:V8SF 0 "register_operand" "=x")
12515 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
12516 UNSPEC_VCVTPH2PS))]
12518 "vcvtph2ps\t{%1, %0|%0, %1}"
12519 [(set_attr "type" "ssecvt")
12520 (set_attr "prefix" "vex")
12521 (set_attr "mode" "V8SF")])
12523 (define_expand "vcvtps2ph"
12524 [(set (match_operand:V8HI 0 "register_operand" "")
12526 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "")
12527 (match_operand:SI 2 "const_0_to_255_operand" "")]
12531 "operands[3] = CONST0_RTX (V4HImode);")
12533 (define_insn "*vcvtps2ph"
12534 [(set (match_operand:V8HI 0 "register_operand" "=x")
12536 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12537 (match_operand:SI 2 "const_0_to_255_operand" "N")]
12539 (match_operand:V4HI 3 "const0_operand" "")))]
12541 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12542 [(set_attr "type" "ssecvt")
12543 (set_attr "prefix" "vex")
12544 (set_attr "mode" "V4SF")])
12546 (define_insn "*vcvtps2ph_store"
12547 [(set (match_operand:V4HI 0 "memory_operand" "=m")
12548 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12549 (match_operand:SI 2 "const_0_to_255_operand" "N")]
12550 UNSPEC_VCVTPS2PH))]
12552 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12553 [(set_attr "type" "ssecvt")
12554 (set_attr "prefix" "vex")
12555 (set_attr "mode" "V4SF")])
12557 (define_insn "vcvtps2ph256"
12558 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
12559 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
12560 (match_operand:SI 2 "const_0_to_255_operand" "N")]
12561 UNSPEC_VCVTPS2PH))]
12563 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12564 [(set_attr "type" "ssecvt")
12565 (set_attr "prefix" "vex")
12566 (set_attr "mode" "V8SF")])
12568 ;; For gather* insn patterns
12569 (define_mode_iterator VEC_GATHER_MODE
12570 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
12571 (define_mode_attr VEC_GATHER_MODE
12572 [(V2DI "V4SI") (V2DF "V4SI")
12573 (V4DI "V4SI") (V4DF "V4SI")
12574 (V4SI "V4SI") (V4SF "V4SI")
12575 (V8SI "V8SI") (V8SF "V8SI")])
12577 (define_expand "avx2_gathersi<mode>"
12578 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "")
12579 (unspec:VEC_GATHER_MODE
12580 [(match_operand:VEC_GATHER_MODE 1 "register_operand" "")
12581 (match_operand:<ssescalarmode> 2 "memory_operand" "")
12582 (match_operand:<VEC_GATHER_MODE> 3 "register_operand" "")
12583 (match_operand:VEC_GATHER_MODE 4 "register_operand" "")
12584 (match_operand:SI 5 "const1248_operand " "")]
12588 (define_insn "*avx2_gathersi<mode>"
12589 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=x")
12590 (unspec:VEC_GATHER_MODE
12591 [(match_operand:VEC_GATHER_MODE 1 "register_operand" "0")
12592 (mem:<ssescalarmode>
12593 (match_operand:P 2 "register_operand" "r"))
12594 (match_operand:<VEC_GATHER_MODE> 3 "register_operand" "x")
12595 (match_operand:VEC_GATHER_MODE 4 "register_operand" "x")
12596 (match_operand:SI 5 "const1248_operand" "n")]
12599 "v<gthrfirstp>gatherd<gthrlastp>\t{%4, (%2, %3, %c5), %0|%0, (%2, %3, %c5), %4}"
12600 [(set_attr "type" "ssemov")
12601 (set_attr "prefix" "vex")
12602 (set_attr "mode" "<sseinsnmode>")])
12604 (define_expand "avx2_gatherdi<mode>"
12605 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "")
12606 (unspec:VEC_GATHER_MODE
12607 [(match_operand:VEC_GATHER_MODE 1 "register_operand" "")
12608 (match_operand:<ssescalarmode> 2 "memory_operand" "")
12609 (match_operand:<AVXMODE48P_DI> 3 "register_operand" "")
12610 (match_operand:VEC_GATHER_MODE 4 "register_operand" "")
12611 (match_operand:SI 5 "const1248_operand " "")]
12615 (define_insn "*avx2_gatherdi<mode>"
12616 [(set (match_operand:AVXMODE48P_DI 0 "register_operand" "=x")
12617 (unspec:AVXMODE48P_DI
12618 [(match_operand:AVXMODE48P_DI 1 "register_operand" "0")
12619 (mem:<ssescalarmode>
12620 (match_operand:P 2 "register_operand" "r"))
12621 (match_operand:<AVXMODE48P_DI> 3 "register_operand" "x")
12622 (match_operand:AVXMODE48P_DI 4 "register_operand" "x")
12623 (match_operand:SI 5 "const1248_operand" "n")]
12626 "v<gthrfirstp>gatherq<gthrlastp>\t{%4, (%2, %3, %c5), %0|%0, (%2, %3, %c5), %4}"
12627 [(set_attr "type" "ssemov")
12628 (set_attr "prefix" "vex")
12629 (set_attr "mode" "<sseinsnmode>")])
12631 ;; Special handling for VEX.256 with float arguments
12632 ;; since there're still xmms as operands
12633 (define_expand "avx2_gatherdi<mode>256"
12634 [(set (match_operand:VI4F_128 0 "register_operand" "")
12636 [(match_operand:VI4F_128 1 "register_operand" "")
12637 (match_operand:<ssescalarmode> 2 "memory_operand" "")
12638 (match_operand:V4DI 3 "register_operand" "")
12639 (match_operand:VI4F_128 4 "register_operand" "")
12640 (match_operand:SI 5 "const1248_operand " "")]
12644 (define_insn "*avx2_gatherdi<mode>256"
12645 [(set (match_operand:VI4F_128 0 "register_operand" "=x")
12647 [(match_operand:VI4F_128 1 "register_operand" "0")
12648 (mem:<ssescalarmode>
12649 (match_operand:P 2 "register_operand" "r"))
12650 (match_operand:V4DI 3 "register_operand" "x")
12651 (match_operand:VI4F_128 4 "register_operand" "x")
12652 (match_operand:SI 5 "const1248_operand" "n")]
12655 "v<gthrfirstp>gatherq<gthrlastp>\t{%4, (%2, %3, %c5), %0|%0, (%2, %3, %c5), %4}"
12656 [(set_attr "type" "ssemov")
12657 (set_attr "prefix" "vex")
12658 (set_attr "mode" "<sseinsnmode>")])