1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 ;; All vector modes including V?TImode, used in move patterns.
22 (define_mode_iterator V16
23 [(V32QI "TARGET_AVX") V16QI
24 (V16HI "TARGET_AVX") V8HI
25 (V8SI "TARGET_AVX") V4SI
26 (V4DI "TARGET_AVX") V2DI
27 (V2TI "TARGET_AVX") V1TI
28 (V8SF "TARGET_AVX") V4SF
29 (V4DF "TARGET_AVX") V2DF])
32 (define_mode_iterator V
33 [(V32QI "TARGET_AVX") V16QI
34 (V16HI "TARGET_AVX") V8HI
35 (V8SI "TARGET_AVX") V4SI
36 (V4DI "TARGET_AVX") V2DI
37 (V8SF "TARGET_AVX") V4SF
38 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
40 ;; All 128bit vector modes
41 (define_mode_iterator V_128
42 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
44 ;; All 256bit vector modes
45 (define_mode_iterator V_256
46 [V32QI V16HI V8SI V4DI V8SF V4DF])
48 ;; All vector float modes
49 (define_mode_iterator VF
50 [(V8SF "TARGET_AVX") V4SF
51 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
53 ;; All SFmode vector float modes
54 (define_mode_iterator VF1
55 [(V8SF "TARGET_AVX") V4SF])
57 ;; All DFmode vector float modes
58 (define_mode_iterator VF2
59 [(V4DF "TARGET_AVX") V2DF])
61 ;; All 128bit vector float modes
62 (define_mode_iterator VF_128
63 [V4SF (V2DF "TARGET_SSE2")])
65 ;; All 256bit vector float modes
66 (define_mode_iterator VF_256
69 ;; All vector integer modes
70 (define_mode_iterator VI
71 [(V32QI "TARGET_AVX") V16QI
72 (V16HI "TARGET_AVX") V8HI
73 (V8SI "TARGET_AVX") V4SI
74 (V4DI "TARGET_AVX") V2DI])
76 (define_mode_iterator VI_AVX2
77 [(V32QI "TARGET_AVX2") V16QI
78 (V16HI "TARGET_AVX2") V8HI
79 (V8SI "TARGET_AVX2") V4SI
80 (V4DI "TARGET_AVX2") V2DI])
82 ;; All QImode vector integer modes
83 (define_mode_iterator VI1
84 [(V32QI "TARGET_AVX") V16QI])
86 ;; All DImode vector integer modes
87 (define_mode_iterator VI8
88 [(V4DI "TARGET_AVX") V2DI])
90 (define_mode_iterator VI1_AVX2
91 [(V32QI "TARGET_AVX2") V16QI])
93 (define_mode_iterator VI2_AVX2
94 [(V16HI "TARGET_AVX2") V8HI])
96 (define_mode_iterator VI4_AVX2
97 [(V8SI "TARGET_AVX2") V4SI])
99 (define_mode_iterator VI8_AVX2
100 [(V4DI "TARGET_AVX2") V2DI])
102 ;; ??? We should probably use TImode instead.
103 (define_mode_iterator VIMAX_AVX2
104 [(V2TI "TARGET_AVX2") V1TI])
106 ;; ??? This should probably be dropped in favor of VIMAX_AVX2.
107 (define_mode_iterator SSESCALARMODE
108 [(V2TI "TARGET_AVX2") TI])
110 (define_mode_iterator VI12_AVX2
111 [(V32QI "TARGET_AVX2") V16QI
112 (V16HI "TARGET_AVX2") V8HI])
114 (define_mode_iterator VI24_AVX2
115 [(V16HI "TARGET_AVX2") V8HI
116 (V8SI "TARGET_AVX2") V4SI])
118 (define_mode_iterator VI124_AVX2
119 [(V32QI "TARGET_AVX2") V16QI
120 (V16HI "TARGET_AVX2") V8HI
121 (V8SI "TARGET_AVX2") V4SI])
123 (define_mode_iterator VI248_AVX2
124 [(V16HI "TARGET_AVX2") V8HI
125 (V8SI "TARGET_AVX2") V4SI
126 (V4DI "TARGET_AVX2") V2DI])
128 (define_mode_iterator VI48_AVX2
129 [(V8SI "TARGET_AVX2") V4SI
130 (V4DI "TARGET_AVX2") V2DI])
132 (define_mode_iterator V48_AVX2
135 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
136 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
138 (define_mode_attr sse2_avx2
139 [(V16QI "sse2") (V32QI "avx2")
140 (V8HI "sse2") (V16HI "avx2")
141 (V4SI "sse2") (V8SI "avx2")
142 (V2DI "sse2") (V4DI "avx2")
143 (V1TI "sse2") (V2TI "avx2")])
145 (define_mode_attr ssse3_avx2
146 [(V16QI "ssse3") (V32QI "avx2")
147 (V8HI "ssse3") (V16HI "avx2")
148 (V4SI "ssse3") (V8SI "avx2")
149 (V2DI "ssse3") (V4DI "avx2")
150 (TI "ssse3") (V2TI "avx2")])
152 (define_mode_attr sse4_1_avx2
153 [(V16QI "sse4_1") (V32QI "avx2")
154 (V8HI "sse4_1") (V16HI "avx2")
155 (V4SI "sse4_1") (V8SI "avx2")
156 (V2DI "sse4_1") (V4DI "avx2")])
158 (define_mode_attr avx_avx2
159 [(V4SF "avx") (V2DF "avx")
160 (V8SF "avx") (V4DF "avx")
161 (V4SI "avx2") (V2DI "avx2")
162 (V8SI "avx2") (V4DI "avx2")])
164 (define_mode_attr vec_avx2
165 [(V16QI "vec") (V32QI "avx2")
166 (V8HI "vec") (V16HI "avx2")
167 (V4SI "vec") (V8SI "avx2")
168 (V2DI "vec") (V4DI "avx2")])
170 (define_mode_attr ssedoublemode
171 [(V16HI "V16SI") (V8HI "V8SI")])
173 (define_mode_attr ssebytemode
174 [(V4DI "V32QI") (V2DI "V16QI")])
176 ;; All 128bit vector integer modes
177 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
179 ;; All 256bit vector integer modes
180 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
182 ;; Random 128bit vector integer mode combinations
183 (define_mode_iterator VI12_128 [V16QI V8HI])
184 (define_mode_iterator VI14_128 [V16QI V4SI])
185 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
186 (define_mode_iterator VI128_128 [V16QI V8HI V2DI])
187 (define_mode_iterator VI24_128 [V8HI V4SI])
188 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
189 (define_mode_iterator VI48_128 [V4SI V2DI])
191 ;; Random 256bit vector integer mode combinations
192 (define_mode_iterator VI124_256 [V32QI V16HI V8SI])
193 (define_mode_iterator VI48_256 [V8SI V4DI])
195 ;; Int-float size matches
196 (define_mode_iterator VI4F_128 [V4SI V4SF])
197 (define_mode_iterator VI8F_128 [V2DI V2DF])
198 (define_mode_iterator VI4F_256 [V8SI V8SF])
199 (define_mode_iterator VI8F_256 [V4DI V4DF])
201 ;; Mapping from float mode to required SSE level
202 (define_mode_attr sse
203 [(SF "sse") (DF "sse2")
204 (V4SF "sse") (V2DF "sse2")
205 (V8SF "avx") (V4DF "avx")])
207 (define_mode_attr sse2
208 [(V16QI "sse2") (V32QI "avx")
209 (V2DI "sse2") (V4DI "avx")])
211 (define_mode_attr sse3
212 [(V16QI "sse3") (V32QI "avx")])
214 (define_mode_attr sse4_1
215 [(V4SF "sse4_1") (V2DF "sse4_1")
216 (V8SF "avx") (V4DF "avx")])
218 (define_mode_attr avxsizesuffix
219 [(V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
220 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
221 (V8SF "256") (V4DF "256")
222 (V4SF "") (V2DF "")])
224 ;; SSE instruction mode
225 (define_mode_attr sseinsnmode
226 [(V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
227 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
228 (V8SF "V8SF") (V4DF "V4DF")
229 (V4SF "V4SF") (V2DF "V2DF")
232 ;; Mapping of vector float modes to an integer mode of the same size
233 (define_mode_attr sseintvecmode
234 [(V8SF "V8SI") (V4DF "V4DI")
235 (V4SF "V4SI") (V2DF "V2DI")
236 (V4DF "V4DI") (V8SF "V8SI")
237 (V8SI "V8SI") (V4DI "V4DI")
238 (V4SI "V4SI") (V2DI "V2DI")
239 (V16HI "V16HI") (V8HI "V8HI")
240 (V32QI "V32QI") (V16QI "V16QI")])
242 ;; Mapping of vector modes to a vector mode of double size
243 (define_mode_attr ssedoublevecmode
244 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
245 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
246 (V8SF "V16SF") (V4DF "V8DF")
247 (V4SF "V8SF") (V2DF "V4DF")])
249 ;; Mapping of vector modes to a vector mode of half size
250 (define_mode_attr ssehalfvecmode
251 [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
252 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
253 (V8SF "V4SF") (V4DF "V2DF")
256 ;; Mapping of vector modes back to the scalar modes
257 (define_mode_attr ssescalarmode
258 [(V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI")
259 (V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI")
260 (V8SF "SF") (V4DF "DF")
261 (V4SF "SF") (V2DF "DF")])
263 ;; Number of scalar elements in each vector type
264 (define_mode_attr ssescalarnum
265 [(V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
266 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
267 (V8SF "8") (V4DF "4")
268 (V4SF "4") (V2DF "2")])
270 ;; SSE prefix for integer vector modes
271 (define_mode_attr sseintprefix
272 [(V2DI "p") (V2DF "")
275 (V8SI "p") (V8SF "")])
277 ;; SSE scalar suffix for vector modes
278 (define_mode_attr ssescalarmodesuffix
280 (V8SF "ss") (V4DF "sd")
281 (V4SF "ss") (V2DF "sd")
282 (V8SI "ss") (V4DI "sd")
285 ;; Pack/unpack vector modes
286 (define_mode_attr sseunpackmode
287 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
288 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")])
290 (define_mode_attr ssepackmode
291 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
292 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")])
294 ;; Mapping of the max integer size for xop rotate immediate constraint
295 (define_mode_attr sserotatemax
296 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
298 ;; Mapping of mode to cast intrinsic name
299 (define_mode_attr castmode [(V8SI "si") (V8SF "ps") (V4DF "pd")])
301 ;; Instruction suffix for sign and zero extensions.
302 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
304 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
305 (define_mode_attr i128
306 [(V8SF "f128") (V4DF "f128") (V32QI "%~128") (V16HI "%~128")
307 (V8SI "%~128") (V4DI "%~128")])
310 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
312 (define_mode_iterator AVXMODE48P_DI
313 [V2DI V2DF V4DI V4DF V4SF V4SI])
314 (define_mode_attr AVXMODE48P_DI
315 [(V2DI "V2DI") (V2DF "V2DI")
316 (V4DI "V4DI") (V4DF "V4DI")
317 (V4SI "V2DI") (V4SF "V2DI")
318 (V8SI "V4DI") (V8SF "V4DI")])
320 (define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
322 ;; Mapping of immediate bits for blend instructions
323 (define_mode_attr blendbits
324 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
326 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
328 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
332 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
334 ;; All of these patterns are enabled for SSE1 as well as SSE2.
335 ;; This is essential for maintaining stable calling conventions.
337 (define_expand "mov<mode>"
338 [(set (match_operand:V16 0 "nonimmediate_operand" "")
339 (match_operand:V16 1 "nonimmediate_operand" ""))]
342 ix86_expand_vector_move (<MODE>mode, operands);
346 (define_insn "*mov<mode>_internal"
347 [(set (match_operand:V16 0 "nonimmediate_operand" "=x,x ,m")
348 (match_operand:V16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
350 && (register_operand (operands[0], <MODE>mode)
351 || register_operand (operands[1], <MODE>mode))"
353 switch (which_alternative)
356 return standard_sse_constant_opcode (insn, operands[1]);
359 switch (get_attr_mode (insn))
364 && (misaligned_operand (operands[0], <MODE>mode)
365 || misaligned_operand (operands[1], <MODE>mode)))
366 return "vmovups\t{%1, %0|%0, %1}";
368 return "%vmovaps\t{%1, %0|%0, %1}";
373 && (misaligned_operand (operands[0], <MODE>mode)
374 || misaligned_operand (operands[1], <MODE>mode)))
375 return "vmovupd\t{%1, %0|%0, %1}";
376 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
377 return "%vmovaps\t{%1, %0|%0, %1}";
379 return "%vmovapd\t{%1, %0|%0, %1}";
384 && (misaligned_operand (operands[0], <MODE>mode)
385 || misaligned_operand (operands[1], <MODE>mode)))
386 return "vmovdqu\t{%1, %0|%0, %1}";
387 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
388 return "%vmovaps\t{%1, %0|%0, %1}";
390 return "%vmovdqa\t{%1, %0|%0, %1}";
399 [(set_attr "type" "sselog1,ssemov,ssemov")
400 (set_attr "prefix" "maybe_vex")
402 (cond [(match_test "TARGET_AVX")
403 (const_string "<sseinsnmode>")
404 (ior (ior (match_test "optimize_function_for_size_p (cfun)")
405 (not (match_test "TARGET_SSE2")))
406 (and (eq_attr "alternative" "2")
407 (match_test "TARGET_SSE_TYPELESS_STORES")))
408 (const_string "V4SF")
409 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
410 (const_string "V4SF")
411 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
412 (const_string "V2DF")
414 (const_string "TI")))])
416 (define_insn "sse2_movq128"
417 [(set (match_operand:V2DI 0 "register_operand" "=x")
420 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
421 (parallel [(const_int 0)]))
424 "%vmovq\t{%1, %0|%0, %1}"
425 [(set_attr "type" "ssemov")
426 (set_attr "prefix" "maybe_vex")
427 (set_attr "mode" "TI")])
429 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
430 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
431 ;; from memory, we'd prefer to load the memory directly into the %xmm
432 ;; register. To facilitate this happy circumstance, this pattern won't
433 ;; split until after register allocation. If the 64-bit value didn't
434 ;; come from memory, this is the best we can do. This is much better
435 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
438 (define_insn_and_split "movdi_to_sse"
440 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
441 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
442 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
443 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
445 "&& reload_completed"
448 if (register_operand (operands[1], DImode))
450 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
451 Assemble the 64-bit DImode value in an xmm register. */
452 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
453 gen_rtx_SUBREG (SImode, operands[1], 0)));
454 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
455 gen_rtx_SUBREG (SImode, operands[1], 4)));
456 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
459 else if (memory_operand (operands[1], DImode))
460 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
461 operands[1], const0_rtx));
467 [(set (match_operand:V4SF 0 "register_operand" "")
468 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
469 "TARGET_SSE && reload_completed"
472 (vec_duplicate:V4SF (match_dup 1))
476 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
477 operands[2] = CONST0_RTX (V4SFmode);
481 [(set (match_operand:V2DF 0 "register_operand" "")
482 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
483 "TARGET_SSE2 && reload_completed"
484 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
486 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
487 operands[2] = CONST0_RTX (DFmode);
490 (define_expand "push<mode>1"
491 [(match_operand:V16 0 "register_operand" "")]
494 ix86_expand_push (<MODE>mode, operands[0]);
498 (define_expand "movmisalign<mode>"
499 [(set (match_operand:V16 0 "nonimmediate_operand" "")
500 (match_operand:V16 1 "nonimmediate_operand" ""))]
503 ix86_expand_vector_move_misalign (<MODE>mode, operands);
507 (define_expand "<sse>_movu<ssemodesuffix><avxsizesuffix>"
508 [(set (match_operand:VF 0 "nonimmediate_operand" "")
510 [(match_operand:VF 1 "nonimmediate_operand" "")]
514 if (MEM_P (operands[0]) && MEM_P (operands[1]))
515 operands[1] = force_reg (<MODE>mode, operands[1]);
518 (define_insn "*<sse>_movu<ssemodesuffix><avxsizesuffix>"
519 [(set (match_operand:VF 0 "nonimmediate_operand" "=x,m")
521 [(match_operand:VF 1 "nonimmediate_operand" "xm,x")]
523 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
524 "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
525 [(set_attr "type" "ssemov")
526 (set_attr "movu" "1")
527 (set_attr "prefix" "maybe_vex")
528 (set_attr "mode" "<MODE>")])
530 (define_expand "<sse2>_movdqu<avxsizesuffix>"
531 [(set (match_operand:VI1 0 "nonimmediate_operand" "")
532 (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "")]
536 if (MEM_P (operands[0]) && MEM_P (operands[1]))
537 operands[1] = force_reg (<MODE>mode, operands[1]);
540 (define_insn "*<sse2>_movdqu<avxsizesuffix>"
541 [(set (match_operand:VI1 0 "nonimmediate_operand" "=x,m")
542 (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "xm,x")]
544 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
545 "%vmovdqu\t{%1, %0|%0, %1}"
546 [(set_attr "type" "ssemov")
547 (set_attr "movu" "1")
548 (set (attr "prefix_data16")
550 (match_test "TARGET_AVX")
553 (set_attr "prefix" "maybe_vex")
554 (set_attr "mode" "<sseinsnmode>")])
556 (define_insn "<sse3>_lddqu<avxsizesuffix>"
557 [(set (match_operand:VI1 0 "register_operand" "=x")
558 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
561 "%vlddqu\t{%1, %0|%0, %1}"
562 [(set_attr "type" "ssemov")
563 (set_attr "movu" "1")
564 (set (attr "prefix_data16")
566 (match_test "TARGET_AVX")
569 (set (attr "prefix_rep")
571 (match_test "TARGET_AVX")
574 (set_attr "prefix" "maybe_vex")
575 (set_attr "mode" "<sseinsnmode>")])
577 (define_insn "sse2_movntsi"
578 [(set (match_operand:SI 0 "memory_operand" "=m")
579 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
582 "movnti\t{%1, %0|%0, %1}"
583 [(set_attr "type" "ssemov")
584 (set_attr "prefix_data16" "0")
585 (set_attr "mode" "V2DF")])
587 (define_insn "<sse>_movnt<mode>"
588 [(set (match_operand:VF 0 "memory_operand" "=m")
589 (unspec:VF [(match_operand:VF 1 "register_operand" "x")]
592 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
593 [(set_attr "type" "ssemov")
594 (set_attr "prefix" "maybe_vex")
595 (set_attr "mode" "<MODE>")])
597 (define_insn "<sse2>_movnt<mode>"
598 [(set (match_operand:VI8 0 "memory_operand" "=m")
599 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "x")]
602 "%vmovntdq\t{%1, %0|%0, %1}"
603 [(set_attr "type" "ssecvt")
604 (set (attr "prefix_data16")
606 (match_test "TARGET_AVX")
609 (set_attr "prefix" "maybe_vex")
610 (set_attr "mode" "<sseinsnmode>")])
612 ; Expand patterns for non-temporal stores. At the moment, only those
613 ; that directly map to insns are defined; it would be possible to
614 ; define patterns for other modes that would expand to several insns.
616 ;; Modes handled by storent patterns.
617 (define_mode_iterator STORENT_MODE
618 [(SI "TARGET_SSE2") (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
620 (V8SF "TARGET_AVX") V4SF
621 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
623 (define_expand "storent<mode>"
624 [(set (match_operand:STORENT_MODE 0 "memory_operand" "")
626 [(match_operand:STORENT_MODE 1 "register_operand" "")]
630 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
632 ;; Parallel floating point arithmetic
634 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
636 (define_expand "<code><mode>2"
637 [(set (match_operand:VF 0 "register_operand" "")
639 (match_operand:VF 1 "register_operand" "")))]
641 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
643 (define_insn_and_split "*absneg<mode>2"
644 [(set (match_operand:VF 0 "register_operand" "=x,x,x,x")
645 (match_operator:VF 3 "absneg_operator"
646 [(match_operand:VF 1 "nonimmediate_operand" "0, xm,x, m")]))
647 (use (match_operand:VF 2 "nonimmediate_operand" "xm,0, xm,x"))]
650 "&& reload_completed"
653 enum rtx_code absneg_op;
659 if (MEM_P (operands[1]))
660 op1 = operands[2], op2 = operands[1];
662 op1 = operands[1], op2 = operands[2];
667 if (rtx_equal_p (operands[0], operands[1]))
673 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
674 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
675 t = gen_rtx_SET (VOIDmode, operands[0], t);
679 [(set_attr "isa" "noavx,noavx,avx,avx")])
681 (define_expand "<plusminus_insn><mode>3"
682 [(set (match_operand:VF 0 "register_operand" "")
684 (match_operand:VF 1 "nonimmediate_operand" "")
685 (match_operand:VF 2 "nonimmediate_operand" "")))]
687 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
689 (define_insn "*<plusminus_insn><mode>3"
690 [(set (match_operand:VF 0 "register_operand" "=x,x")
692 (match_operand:VF 1 "nonimmediate_operand" "<comm>0,x")
693 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
694 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
696 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
697 v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
698 [(set_attr "isa" "noavx,avx")
699 (set_attr "type" "sseadd")
700 (set_attr "prefix" "orig,vex")
701 (set_attr "mode" "<MODE>")])
703 (define_insn "<sse>_vm<plusminus_insn><mode>3"
704 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
707 (match_operand:VF_128 1 "register_operand" "0,x")
708 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
713 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}
714 v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
715 [(set_attr "isa" "noavx,avx")
716 (set_attr "type" "sseadd")
717 (set_attr "prefix" "orig,vex")
718 (set_attr "mode" "<ssescalarmode>")])
720 (define_expand "mul<mode>3"
721 [(set (match_operand:VF 0 "register_operand" "")
723 (match_operand:VF 1 "nonimmediate_operand" "")
724 (match_operand:VF 2 "nonimmediate_operand" "")))]
726 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
728 (define_insn "*mul<mode>3"
729 [(set (match_operand:VF 0 "register_operand" "=x,x")
731 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
732 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
733 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
735 mul<ssemodesuffix>\t{%2, %0|%0, %2}
736 vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
737 [(set_attr "isa" "noavx,avx")
738 (set_attr "type" "ssemul")
739 (set_attr "prefix" "orig,vex")
740 (set_attr "mode" "<MODE>")])
742 (define_insn "<sse>_vmmul<mode>3"
743 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
746 (match_operand:VF_128 1 "register_operand" "0,x")
747 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
752 mul<ssescalarmodesuffix>\t{%2, %0|%0, %2}
753 vmul<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
754 [(set_attr "isa" "noavx,avx")
755 (set_attr "type" "ssemul")
756 (set_attr "prefix" "orig,vex")
757 (set_attr "mode" "<ssescalarmode>")])
759 (define_expand "div<mode>3"
760 [(set (match_operand:VF2 0 "register_operand" "")
761 (div:VF2 (match_operand:VF2 1 "register_operand" "")
762 (match_operand:VF2 2 "nonimmediate_operand" "")))]
764 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
766 (define_expand "div<mode>3"
767 [(set (match_operand:VF1 0 "register_operand" "")
768 (div:VF1 (match_operand:VF1 1 "register_operand" "")
769 (match_operand:VF1 2 "nonimmediate_operand" "")))]
772 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
775 && TARGET_RECIP_VEC_DIV
776 && !optimize_insn_for_size_p ()
777 && flag_finite_math_only && !flag_trapping_math
778 && flag_unsafe_math_optimizations)
780 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
785 (define_insn "<sse>_div<mode>3"
786 [(set (match_operand:VF 0 "register_operand" "=x,x")
788 (match_operand:VF 1 "register_operand" "0,x")
789 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
792 div<ssemodesuffix>\t{%2, %0|%0, %2}
793 vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
794 [(set_attr "isa" "noavx,avx")
795 (set_attr "type" "ssediv")
796 (set_attr "prefix" "orig,vex")
797 (set_attr "mode" "<MODE>")])
799 (define_insn "<sse>_vmdiv<mode>3"
800 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
803 (match_operand:VF_128 1 "register_operand" "0,x")
804 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
809 div<ssescalarmodesuffix>\t{%2, %0|%0, %2}
810 vdiv<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
811 [(set_attr "isa" "noavx,avx")
812 (set_attr "type" "ssediv")
813 (set_attr "prefix" "orig,vex")
814 (set_attr "mode" "<ssescalarmode>")])
816 (define_insn "<sse>_rcp<mode>2"
817 [(set (match_operand:VF1 0 "register_operand" "=x")
819 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
821 "%vrcpps\t{%1, %0|%0, %1}"
822 [(set_attr "type" "sse")
823 (set_attr "atom_sse_attr" "rcp")
824 (set_attr "prefix" "maybe_vex")
825 (set_attr "mode" "<MODE>")])
827 (define_insn "sse_vmrcpv4sf2"
828 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
830 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
832 (match_operand:V4SF 2 "register_operand" "0,x")
836 rcpss\t{%1, %0|%0, %1}
837 vrcpss\t{%1, %2, %0|%0, %2, %1}"
838 [(set_attr "isa" "noavx,avx")
839 (set_attr "type" "sse")
840 (set_attr "atom_sse_attr" "rcp")
841 (set_attr "prefix" "orig,vex")
842 (set_attr "mode" "SF")])
844 (define_expand "sqrt<mode>2"
845 [(set (match_operand:VF2 0 "register_operand" "")
846 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand" "")))]
849 (define_expand "sqrt<mode>2"
850 [(set (match_operand:VF1 0 "register_operand" "")
851 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand" "")))]
855 && TARGET_RECIP_VEC_SQRT
856 && !optimize_insn_for_size_p ()
857 && flag_finite_math_only && !flag_trapping_math
858 && flag_unsafe_math_optimizations)
860 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
865 (define_insn "<sse>_sqrt<mode>2"
866 [(set (match_operand:VF 0 "register_operand" "=x")
867 (sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "xm")))]
869 "%vsqrt<ssemodesuffix>\t{%1, %0|%0, %1}"
870 [(set_attr "type" "sse")
871 (set_attr "atom_sse_attr" "sqrt")
872 (set_attr "prefix" "maybe_vex")
873 (set_attr "mode" "<MODE>")])
875 (define_insn "<sse>_vmsqrt<mode>2"
876 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
879 (match_operand:VF_128 1 "nonimmediate_operand" "xm,xm"))
880 (match_operand:VF_128 2 "register_operand" "0,x")
884 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}
885 vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
886 [(set_attr "isa" "noavx,avx")
887 (set_attr "type" "sse")
888 (set_attr "atom_sse_attr" "sqrt")
889 (set_attr "prefix" "orig,vex")
890 (set_attr "mode" "<ssescalarmode>")])
892 (define_expand "rsqrt<mode>2"
893 [(set (match_operand:VF1 0 "register_operand" "")
895 [(match_operand:VF1 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
898 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
902 (define_insn "<sse>_rsqrt<mode>2"
903 [(set (match_operand:VF1 0 "register_operand" "=x")
905 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
907 "%vrsqrtps\t{%1, %0|%0, %1}"
908 [(set_attr "type" "sse")
909 (set_attr "prefix" "maybe_vex")
910 (set_attr "mode" "<MODE>")])
912 (define_insn "sse_vmrsqrtv4sf2"
913 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
915 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
917 (match_operand:V4SF 2 "register_operand" "0,x")
921 rsqrtss\t{%1, %0|%0, %1}
922 vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
923 [(set_attr "isa" "noavx,avx")
924 (set_attr "type" "sse")
925 (set_attr "prefix" "orig,vex")
926 (set_attr "mode" "SF")])
928 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
929 ;; isn't really correct, as those rtl operators aren't defined when
930 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
932 (define_expand "<code><mode>3"
933 [(set (match_operand:VF 0 "register_operand" "")
935 (match_operand:VF 1 "nonimmediate_operand" "")
936 (match_operand:VF 2 "nonimmediate_operand" "")))]
939 if (!flag_finite_math_only)
940 operands[1] = force_reg (<MODE>mode, operands[1]);
941 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
944 (define_insn "*<code><mode>3_finite"
945 [(set (match_operand:VF 0 "register_operand" "=x,x")
947 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
948 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
949 "TARGET_SSE && flag_finite_math_only
950 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
952 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
953 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
954 [(set_attr "isa" "noavx,avx")
955 (set_attr "type" "sseadd")
956 (set_attr "prefix" "orig,vex")
957 (set_attr "mode" "<MODE>")])
959 (define_insn "*<code><mode>3"
960 [(set (match_operand:VF 0 "register_operand" "=x,x")
962 (match_operand:VF 1 "register_operand" "0,x")
963 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
964 "TARGET_SSE && !flag_finite_math_only"
966 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
967 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
968 [(set_attr "isa" "noavx,avx")
969 (set_attr "type" "sseadd")
970 (set_attr "prefix" "orig,vex")
971 (set_attr "mode" "<MODE>")])
973 (define_insn "<sse>_vm<code><mode>3"
974 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
977 (match_operand:VF_128 1 "register_operand" "0,x")
978 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
983 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %2}
984 v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
985 [(set_attr "isa" "noavx,avx")
986 (set_attr "type" "sse")
987 (set_attr "prefix" "orig,vex")
988 (set_attr "mode" "<ssescalarmode>")])
990 ;; These versions of the min/max patterns implement exactly the operations
991 ;; min = (op1 < op2 ? op1 : op2)
992 ;; max = (!(op1 < op2) ? op1 : op2)
993 ;; Their operands are not commutative, and thus they may be used in the
994 ;; presence of -0.0 and NaN.
996 (define_insn "*ieee_smin<mode>3"
997 [(set (match_operand:VF 0 "register_operand" "=x,x")
999 [(match_operand:VF 1 "register_operand" "0,x")
1000 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
1004 min<ssemodesuffix>\t{%2, %0|%0, %2}
1005 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1006 [(set_attr "isa" "noavx,avx")
1007 (set_attr "type" "sseadd")
1008 (set_attr "prefix" "orig,vex")
1009 (set_attr "mode" "<MODE>")])
1011 (define_insn "*ieee_smax<mode>3"
1012 [(set (match_operand:VF 0 "register_operand" "=x,x")
1014 [(match_operand:VF 1 "register_operand" "0,x")
1015 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
1019 max<ssemodesuffix>\t{%2, %0|%0, %2}
1020 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1021 [(set_attr "isa" "noavx,avx")
1022 (set_attr "type" "sseadd")
1023 (set_attr "prefix" "orig,vex")
1024 (set_attr "mode" "<MODE>")])
1026 (define_insn "avx_addsubv4df3"
1027 [(set (match_operand:V4DF 0 "register_operand" "=x")
1030 (match_operand:V4DF 1 "register_operand" "x")
1031 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1032 (minus:V4DF (match_dup 1) (match_dup 2))
1035 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1036 [(set_attr "type" "sseadd")
1037 (set_attr "prefix" "vex")
1038 (set_attr "mode" "V4DF")])
1040 (define_insn "sse3_addsubv2df3"
1041 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1044 (match_operand:V2DF 1 "register_operand" "0,x")
1045 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
1046 (minus:V2DF (match_dup 1) (match_dup 2))
1050 addsubpd\t{%2, %0|%0, %2}
1051 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1052 [(set_attr "isa" "noavx,avx")
1053 (set_attr "type" "sseadd")
1054 (set_attr "atom_unit" "complex")
1055 (set_attr "prefix" "orig,vex")
1056 (set_attr "mode" "V2DF")])
1058 (define_insn "avx_addsubv8sf3"
1059 [(set (match_operand:V8SF 0 "register_operand" "=x")
1062 (match_operand:V8SF 1 "register_operand" "x")
1063 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1064 (minus:V8SF (match_dup 1) (match_dup 2))
1067 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1068 [(set_attr "type" "sseadd")
1069 (set_attr "prefix" "vex")
1070 (set_attr "mode" "V8SF")])
1072 (define_insn "sse3_addsubv4sf3"
1073 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1076 (match_operand:V4SF 1 "register_operand" "0,x")
1077 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
1078 (minus:V4SF (match_dup 1) (match_dup 2))
1082 addsubps\t{%2, %0|%0, %2}
1083 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1084 [(set_attr "isa" "noavx,avx")
1085 (set_attr "type" "sseadd")
1086 (set_attr "prefix" "orig,vex")
1087 (set_attr "prefix_rep" "1,*")
1088 (set_attr "mode" "V4SF")])
1090 (define_insn "avx_h<plusminus_insn>v4df3"
1091 [(set (match_operand:V4DF 0 "register_operand" "=x")
1096 (match_operand:V4DF 1 "register_operand" "x")
1097 (parallel [(const_int 0)]))
1098 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1100 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1101 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
1105 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1106 (parallel [(const_int 0)]))
1107 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
1109 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1110 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1112 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1113 [(set_attr "type" "sseadd")
1114 (set_attr "prefix" "vex")
1115 (set_attr "mode" "V4DF")])
1117 (define_insn "sse3_h<plusminus_insn>v2df3"
1118 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1122 (match_operand:V2DF 1 "register_operand" "0,x")
1123 (parallel [(const_int 0)]))
1124 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1127 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
1128 (parallel [(const_int 0)]))
1129 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1132 h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}
1133 vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1134 [(set_attr "isa" "noavx,avx")
1135 (set_attr "type" "sseadd")
1136 (set_attr "prefix" "orig,vex")
1137 (set_attr "mode" "V2DF")])
1139 (define_insn "avx_h<plusminus_insn>v8sf3"
1140 [(set (match_operand:V8SF 0 "register_operand" "=x")
1146 (match_operand:V8SF 1 "register_operand" "x")
1147 (parallel [(const_int 0)]))
1148 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1150 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1151 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1155 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1156 (parallel [(const_int 0)]))
1157 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1159 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1160 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1164 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1165 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1167 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1168 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1171 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1172 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1174 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1175 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1177 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1178 [(set_attr "type" "sseadd")
1179 (set_attr "prefix" "vex")
1180 (set_attr "mode" "V8SF")])
1182 (define_insn "sse3_h<plusminus_insn>v4sf3"
1183 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1188 (match_operand:V4SF 1 "register_operand" "0,x")
1189 (parallel [(const_int 0)]))
1190 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1192 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1193 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1197 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
1198 (parallel [(const_int 0)]))
1199 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1201 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1202 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1205 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
1206 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1207 [(set_attr "isa" "noavx,avx")
1208 (set_attr "type" "sseadd")
1209 (set_attr "atom_unit" "complex")
1210 (set_attr "prefix" "orig,vex")
1211 (set_attr "prefix_rep" "1,*")
1212 (set_attr "mode" "V4SF")])
1214 (define_expand "reduc_splus_v4df"
1215 [(match_operand:V4DF 0 "register_operand" "")
1216 (match_operand:V4DF 1 "register_operand" "")]
1219 rtx tmp = gen_reg_rtx (V4DFmode);
1220 rtx tmp2 = gen_reg_rtx (V4DFmode);
1221 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
1222 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
1223 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
1227 (define_expand "reduc_splus_v2df"
1228 [(match_operand:V2DF 0 "register_operand" "")
1229 (match_operand:V2DF 1 "register_operand" "")]
1232 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1236 (define_expand "reduc_splus_v8sf"
1237 [(match_operand:V8SF 0 "register_operand" "")
1238 (match_operand:V8SF 1 "register_operand" "")]
1241 rtx tmp = gen_reg_rtx (V8SFmode);
1242 rtx tmp2 = gen_reg_rtx (V8SFmode);
1243 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
1244 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
1245 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
1246 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
1250 (define_expand "reduc_splus_v4sf"
1251 [(match_operand:V4SF 0 "register_operand" "")
1252 (match_operand:V4SF 1 "register_operand" "")]
1257 rtx tmp = gen_reg_rtx (V4SFmode);
1258 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1259 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1262 ix86_expand_reduc (gen_addv4sf3, operands[0], operands[1]);
1266 ;; Modes handled by reduc_sm{in,ax}* patterns.
1267 (define_mode_iterator REDUC_SMINMAX_MODE
1268 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
1269 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
1270 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
1271 (V4SF "TARGET_SSE")])
1273 (define_expand "reduc_<code>_<mode>"
1274 [(smaxmin:REDUC_SMINMAX_MODE
1275 (match_operand:REDUC_SMINMAX_MODE 0 "register_operand" "")
1276 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand" ""))]
1279 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1283 (define_expand "reduc_<code>_<mode>"
1285 (match_operand:VI_256 0 "register_operand" "")
1286 (match_operand:VI_256 1 "register_operand" ""))]
1289 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1293 (define_expand "reduc_umin_v8hi"
1295 (match_operand:V8HI 0 "register_operand" "")
1296 (match_operand:V8HI 1 "register_operand" ""))]
1299 ix86_expand_reduc (gen_uminv8hi3, operands[0], operands[1]);
1303 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1305 ;; Parallel floating point comparisons
1307 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1309 (define_insn "avx_cmp<mode>3"
1310 [(set (match_operand:VF 0 "register_operand" "=x")
1312 [(match_operand:VF 1 "register_operand" "x")
1313 (match_operand:VF 2 "nonimmediate_operand" "xm")
1314 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1317 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1318 [(set_attr "type" "ssecmp")
1319 (set_attr "length_immediate" "1")
1320 (set_attr "prefix" "vex")
1321 (set_attr "mode" "<MODE>")])
1323 (define_insn "avx_vmcmp<mode>3"
1324 [(set (match_operand:VF_128 0 "register_operand" "=x")
1327 [(match_operand:VF_128 1 "register_operand" "x")
1328 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
1329 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1334 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1335 [(set_attr "type" "ssecmp")
1336 (set_attr "length_immediate" "1")
1337 (set_attr "prefix" "vex")
1338 (set_attr "mode" "<ssescalarmode>")])
1340 (define_insn "*<sse>_maskcmp<mode>3_comm"
1341 [(set (match_operand:VF 0 "register_operand" "=x,x")
1342 (match_operator:VF 3 "sse_comparison_operator"
1343 [(match_operand:VF 1 "register_operand" "%0,x")
1344 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1346 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
1348 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1349 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1350 [(set_attr "isa" "noavx,avx")
1351 (set_attr "type" "ssecmp")
1352 (set_attr "length_immediate" "1")
1353 (set_attr "prefix" "orig,vex")
1354 (set_attr "mode" "<MODE>")])
1356 (define_insn "<sse>_maskcmp<mode>3"
1357 [(set (match_operand:VF 0 "register_operand" "=x,x")
1358 (match_operator:VF 3 "sse_comparison_operator"
1359 [(match_operand:VF 1 "register_operand" "0,x")
1360 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1363 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1364 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1365 [(set_attr "isa" "noavx,avx")
1366 (set_attr "type" "ssecmp")
1367 (set_attr "length_immediate" "1")
1368 (set_attr "prefix" "orig,vex")
1369 (set_attr "mode" "<MODE>")])
1371 (define_insn "<sse>_vmmaskcmp<mode>3"
1372 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1374 (match_operator:VF_128 3 "sse_comparison_operator"
1375 [(match_operand:VF_128 1 "register_operand" "0,x")
1376 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
1381 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %2}
1382 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1383 [(set_attr "isa" "noavx,avx")
1384 (set_attr "type" "ssecmp")
1385 (set_attr "length_immediate" "1,*")
1386 (set_attr "prefix" "orig,vex")
1387 (set_attr "mode" "<ssescalarmode>")])
1389 (define_insn "<sse>_comi"
1390 [(set (reg:CCFP FLAGS_REG)
1393 (match_operand:<ssevecmode> 0 "register_operand" "x")
1394 (parallel [(const_int 0)]))
1396 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1397 (parallel [(const_int 0)]))))]
1398 "SSE_FLOAT_MODE_P (<MODE>mode)"
1399 "%vcomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1400 [(set_attr "type" "ssecomi")
1401 (set_attr "prefix" "maybe_vex")
1402 (set_attr "prefix_rep" "0")
1403 (set (attr "prefix_data16")
1404 (if_then_else (eq_attr "mode" "DF")
1406 (const_string "0")))
1407 (set_attr "mode" "<MODE>")])
1409 (define_insn "<sse>_ucomi"
1410 [(set (reg:CCFPU FLAGS_REG)
1413 (match_operand:<ssevecmode> 0 "register_operand" "x")
1414 (parallel [(const_int 0)]))
1416 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1417 (parallel [(const_int 0)]))))]
1418 "SSE_FLOAT_MODE_P (<MODE>mode)"
1419 "%vucomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1420 [(set_attr "type" "ssecomi")
1421 (set_attr "prefix" "maybe_vex")
1422 (set_attr "prefix_rep" "0")
1423 (set (attr "prefix_data16")
1424 (if_then_else (eq_attr "mode" "DF")
1426 (const_string "0")))
1427 (set_attr "mode" "<MODE>")])
1429 (define_expand "vcond<V_256:mode><VF_256:mode>"
1430 [(set (match_operand:V_256 0 "register_operand" "")
1432 (match_operator 3 ""
1433 [(match_operand:VF_256 4 "nonimmediate_operand" "")
1434 (match_operand:VF_256 5 "nonimmediate_operand" "")])
1435 (match_operand:V_256 1 "general_operand" "")
1436 (match_operand:V_256 2 "general_operand" "")))]
1438 && (GET_MODE_NUNITS (<V_256:MODE>mode)
1439 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
1441 bool ok = ix86_expand_fp_vcond (operands);
1446 (define_expand "vcond<V_128:mode><VF_128:mode>"
1447 [(set (match_operand:V_128 0 "register_operand" "")
1449 (match_operator 3 ""
1450 [(match_operand:VF_128 4 "nonimmediate_operand" "")
1451 (match_operand:VF_128 5 "nonimmediate_operand" "")])
1452 (match_operand:V_128 1 "general_operand" "")
1453 (match_operand:V_128 2 "general_operand" "")))]
1455 && (GET_MODE_NUNITS (<V_128:MODE>mode)
1456 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
1458 bool ok = ix86_expand_fp_vcond (operands);
1463 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1465 ;; Parallel floating point logical operations
1467 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1469 (define_insn "<sse>_andnot<mode>3"
1470 [(set (match_operand:VF 0 "register_operand" "=x,x")
1473 (match_operand:VF 1 "register_operand" "0,x"))
1474 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1477 static char buf[32];
1480 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1482 switch (which_alternative)
1485 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1488 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1494 snprintf (buf, sizeof (buf), insn, suffix);
1497 [(set_attr "isa" "noavx,avx")
1498 (set_attr "type" "sselog")
1499 (set_attr "prefix" "orig,vex")
1500 (set_attr "mode" "<MODE>")])
1502 (define_expand "<code><mode>3"
1503 [(set (match_operand:VF 0 "register_operand" "")
1505 (match_operand:VF 1 "nonimmediate_operand" "")
1506 (match_operand:VF 2 "nonimmediate_operand" "")))]
1508 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1510 (define_insn "*<code><mode>3"
1511 [(set (match_operand:VF 0 "register_operand" "=x,x")
1513 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
1514 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1515 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1517 static char buf[32];
1520 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1522 switch (which_alternative)
1525 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1528 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1534 snprintf (buf, sizeof (buf), insn, suffix);
1537 [(set_attr "isa" "noavx,avx")
1538 (set_attr "type" "sselog")
1539 (set_attr "prefix" "orig,vex")
1540 (set_attr "mode" "<MODE>")])
1542 (define_expand "copysign<mode>3"
1545 (not:VF (match_dup 3))
1546 (match_operand:VF 1 "nonimmediate_operand" "")))
1548 (and:VF (match_dup 3)
1549 (match_operand:VF 2 "nonimmediate_operand" "")))
1550 (set (match_operand:VF 0 "register_operand" "")
1551 (ior:VF (match_dup 4) (match_dup 5)))]
1554 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
1556 operands[4] = gen_reg_rtx (<MODE>mode);
1557 operands[5] = gen_reg_rtx (<MODE>mode);
1560 ;; Also define scalar versions. These are used for abs, neg, and
1561 ;; conditional move. Using subregs into vector modes causes register
1562 ;; allocation lossage. These patterns do not allow memory operands
1563 ;; because the native instructions read the full 128-bits.
1565 (define_insn "*andnot<mode>3"
1566 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1569 (match_operand:MODEF 1 "register_operand" "0,x"))
1570 (match_operand:MODEF 2 "register_operand" "x,x")))]
1571 "SSE_FLOAT_MODE_P (<MODE>mode)"
1573 static char buf[32];
1576 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1578 switch (which_alternative)
1581 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1584 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1590 snprintf (buf, sizeof (buf), insn, suffix);
1593 [(set_attr "isa" "noavx,avx")
1594 (set_attr "type" "sselog")
1595 (set_attr "prefix" "orig,vex")
1596 (set_attr "mode" "<ssevecmode>")])
1598 (define_insn "*<code><mode>3"
1599 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1601 (match_operand:MODEF 1 "register_operand" "%0,x")
1602 (match_operand:MODEF 2 "register_operand" "x,x")))]
1603 "SSE_FLOAT_MODE_P (<MODE>mode)"
1605 static char buf[32];
1608 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1610 switch (which_alternative)
1613 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1616 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1622 snprintf (buf, sizeof (buf), insn, suffix);
1625 [(set_attr "isa" "noavx,avx")
1626 (set_attr "type" "sselog")
1627 (set_attr "prefix" "orig,vex")
1628 (set_attr "mode" "<ssevecmode>")])
1630 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1632 ;; FMA4 floating point multiply/accumulate instructions. This
1633 ;; includes the scalar version of the instructions as well as the
1636 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1638 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1639 ;; combine to generate a multiply/add with two memory references. We then
1640 ;; split this insn, into loading up the destination register with one of the
1641 ;; memory operations. If we don't manage to split the insn, reload will
1642 ;; generate the appropriate moves. The reason this is needed, is that combine
1643 ;; has already folded one of the memory references into both the multiply and
1644 ;; add insns, and it can't generate a new pseudo. I.e.:
1645 ;; (set (reg1) (mem (addr1)))
1646 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1647 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1649 ;; ??? This is historic, pre-dating the gimple fma transformation.
1650 ;; We could now properly represent that only one memory operand is
1651 ;; allowed and not be penalized during optimization.
1653 ;; Intrinsic FMA operations.
1655 ;; The standard names for fma is only available with SSE math enabled.
1656 (define_expand "fma<mode>4"
1657 [(set (match_operand:FMAMODE 0 "register_operand")
1659 (match_operand:FMAMODE 1 "nonimmediate_operand")
1660 (match_operand:FMAMODE 2 "nonimmediate_operand")
1661 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1662 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1664 (define_expand "fms<mode>4"
1665 [(set (match_operand:FMAMODE 0 "register_operand")
1667 (match_operand:FMAMODE 1 "nonimmediate_operand")
1668 (match_operand:FMAMODE 2 "nonimmediate_operand")
1669 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1670 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1672 (define_expand "fnma<mode>4"
1673 [(set (match_operand:FMAMODE 0 "register_operand")
1675 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1676 (match_operand:FMAMODE 2 "nonimmediate_operand")
1677 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1678 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1680 (define_expand "fnms<mode>4"
1681 [(set (match_operand:FMAMODE 0 "register_operand")
1683 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1684 (match_operand:FMAMODE 2 "nonimmediate_operand")
1685 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1686 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1688 ;; The builtin for fma4intrin.h is not constrained by SSE math enabled.
1689 (define_expand "fma4i_fmadd_<mode>"
1690 [(set (match_operand:FMAMODE 0 "register_operand")
1692 (match_operand:FMAMODE 1 "nonimmediate_operand")
1693 (match_operand:FMAMODE 2 "nonimmediate_operand")
1694 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1695 "TARGET_FMA || TARGET_FMA4")
1697 (define_insn "*fma4i_fmadd_<mode>"
1698 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1700 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1701 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1702 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1704 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1705 [(set_attr "type" "ssemuladd")
1706 (set_attr "mode" "<MODE>")])
1708 (define_insn "*fma4i_fmsub_<mode>"
1709 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1711 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1712 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1714 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1716 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1717 [(set_attr "type" "ssemuladd")
1718 (set_attr "mode" "<MODE>")])
1720 (define_insn "*fma4i_fnmadd_<mode>"
1721 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1724 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1725 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1726 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1728 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1729 [(set_attr "type" "ssemuladd")
1730 (set_attr "mode" "<MODE>")])
1732 (define_insn "*fma4i_fnmsub_<mode>"
1733 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1736 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1737 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1739 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1741 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1742 [(set_attr "type" "ssemuladd")
1743 (set_attr "mode" "<MODE>")])
1745 ;; Scalar versions of the above. Unlike ADDSS et al, these write the
1746 ;; entire destination register, with the high-order elements zeroed.
1748 (define_expand "fma4i_vmfmadd_<mode>"
1749 [(set (match_operand:VF_128 0 "register_operand")
1752 (match_operand:VF_128 1 "nonimmediate_operand")
1753 (match_operand:VF_128 2 "nonimmediate_operand")
1754 (match_operand:VF_128 3 "nonimmediate_operand"))
1759 operands[4] = CONST0_RTX (<MODE>mode);
1762 (define_expand "fmai_vmfmadd_<mode>"
1763 [(set (match_operand:VF_128 0 "register_operand")
1766 (match_operand:VF_128 1 "nonimmediate_operand")
1767 (match_operand:VF_128 2 "nonimmediate_operand")
1768 (match_operand:VF_128 3 "nonimmediate_operand"))
1773 (define_insn "*fmai_fmadd_<mode>"
1774 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1777 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")
1778 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1779 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))
1784 vfmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1785 vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1786 vfmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1787 [(set_attr "type" "ssemuladd")
1788 (set_attr "mode" "<MODE>")])
1790 (define_insn "*fmai_fmsub_<mode>"
1791 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1794 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")
1795 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1797 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")))
1802 vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1803 vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1804 vfmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1805 [(set_attr "type" "ssemuladd")
1806 (set_attr "mode" "<MODE>")])
1808 (define_insn "*fmai_fnmadd_<mode>"
1809 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1813 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x"))
1814 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1815 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))
1820 vfnmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1821 vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1822 vfnmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1823 [(set_attr "type" "ssemuladd")
1824 (set_attr "mode" "<MODE>")])
1826 (define_insn "*fmai_fnmsub_<mode>"
1827 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1831 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x"))
1832 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1834 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")))
1839 vfnmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1840 vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1841 vfnmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1842 [(set_attr "type" "ssemuladd")
1843 (set_attr "mode" "<MODE>")])
1845 (define_insn "*fma4i_vmfmadd_<mode>"
1846 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1849 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
1850 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1851 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
1852 (match_operand:VF_128 4 "const0_operand" "")
1855 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1856 [(set_attr "type" "ssemuladd")
1857 (set_attr "mode" "<MODE>")])
1859 (define_insn "*fma4i_vmfmsub_<mode>"
1860 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1863 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
1864 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1866 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
1867 (match_operand:VF_128 4 "const0_operand" "")
1870 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1871 [(set_attr "type" "ssemuladd")
1872 (set_attr "mode" "<MODE>")])
1874 (define_insn "*fma4i_vmfnmadd_<mode>"
1875 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1879 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
1880 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1881 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
1882 (match_operand:VF_128 4 "const0_operand" "")
1885 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1886 [(set_attr "type" "ssemuladd")
1887 (set_attr "mode" "<MODE>")])
1889 (define_insn "*fma4i_vmfnmsub_<mode>"
1890 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1894 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
1895 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1897 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
1898 (match_operand:VF_128 4 "const0_operand" "")
1901 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1902 [(set_attr "type" "ssemuladd")
1903 (set_attr "mode" "<MODE>")])
1905 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1907 ;; FMA4 Parallel floating point multiply addsub and subadd operations.
1909 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1911 ;; It would be possible to represent these without the UNSPEC as
1914 ;; (fma op1 op2 op3)
1915 ;; (fma op1 op2 (neg op3))
1918 ;; But this doesn't seem useful in practice.
1920 (define_expand "fmaddsub_<mode>"
1921 [(set (match_operand:VF 0 "register_operand")
1923 [(match_operand:VF 1 "nonimmediate_operand")
1924 (match_operand:VF 2 "nonimmediate_operand")
1925 (match_operand:VF 3 "nonimmediate_operand")]
1927 "TARGET_FMA || TARGET_FMA4")
1929 (define_insn "*fma4_fmaddsub_<mode>"
1930 [(set (match_operand:VF 0 "register_operand" "=x,x")
1932 [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
1933 (match_operand:VF 2 "nonimmediate_operand" " x,m")
1934 (match_operand:VF 3 "nonimmediate_operand" "xm,x")]
1937 "vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1938 [(set_attr "type" "ssemuladd")
1939 (set_attr "mode" "<MODE>")])
1941 (define_insn "*fma4_fmsubadd_<mode>"
1942 [(set (match_operand:VF 0 "register_operand" "=x,x")
1944 [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
1945 (match_operand:VF 2 "nonimmediate_operand" " x,m")
1947 (match_operand:VF 3 "nonimmediate_operand" "xm,x"))]
1950 "vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1951 [(set_attr "type" "ssemuladd")
1952 (set_attr "mode" "<MODE>")])
1954 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1956 ;; FMA3 floating point multiply/accumulate instructions.
1958 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1960 (define_insn "*fma_fmadd_<mode>"
1961 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1963 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
1964 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1965 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
1968 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1969 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1970 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1971 [(set_attr "type" "ssemuladd")
1972 (set_attr "mode" "<MODE>")])
1974 (define_insn "*fma_fmsub_<mode>"
1975 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1977 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
1978 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1980 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
1983 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1984 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1985 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1986 [(set_attr "type" "ssemuladd")
1987 (set_attr "mode" "<MODE>")])
1989 (define_insn "*fma_fnmadd_<mode>"
1990 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1993 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
1994 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1995 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
1998 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1999 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2000 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2001 [(set_attr "type" "ssemuladd")
2002 (set_attr "mode" "<MODE>")])
2004 (define_insn "*fma_fnmsub_<mode>"
2005 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
2008 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
2009 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
2011 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
2014 vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2015 vfnmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2016 vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2017 [(set_attr "type" "ssemuladd")
2018 (set_attr "mode" "<MODE>")])
2020 (define_insn "*fma_fmaddsub_<mode>"
2021 [(set (match_operand:VF 0 "register_operand" "=x,x,x")
2023 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
2024 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
2025 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0")]
2029 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2030 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2031 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2032 [(set_attr "type" "ssemuladd")
2033 (set_attr "mode" "<MODE>")])
2035 (define_insn "*fma_fmsubadd_<mode>"
2036 [(set (match_operand:VF 0 "register_operand" "=x,x,x")
2038 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
2039 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
2041 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0"))]
2045 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2046 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2047 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2048 [(set_attr "type" "ssemuladd")
2049 (set_attr "mode" "<MODE>")])
2051 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2053 ;; Parallel single-precision floating point conversion operations
2055 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2057 (define_insn "sse_cvtpi2ps"
2058 [(set (match_operand:V4SF 0 "register_operand" "=x")
2061 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2062 (match_operand:V4SF 1 "register_operand" "0")
2065 "cvtpi2ps\t{%2, %0|%0, %2}"
2066 [(set_attr "type" "ssecvt")
2067 (set_attr "mode" "V4SF")])
2069 (define_insn "sse_cvtps2pi"
2070 [(set (match_operand:V2SI 0 "register_operand" "=y")
2072 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2074 (parallel [(const_int 0) (const_int 1)])))]
2076 "cvtps2pi\t{%1, %0|%0, %1}"
2077 [(set_attr "type" "ssecvt")
2078 (set_attr "unit" "mmx")
2079 (set_attr "mode" "DI")])
2081 (define_insn "sse_cvttps2pi"
2082 [(set (match_operand:V2SI 0 "register_operand" "=y")
2084 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2085 (parallel [(const_int 0) (const_int 1)])))]
2087 "cvttps2pi\t{%1, %0|%0, %1}"
2088 [(set_attr "type" "ssecvt")
2089 (set_attr "unit" "mmx")
2090 (set_attr "prefix_rep" "0")
2091 (set_attr "mode" "SF")])
2093 (define_insn "sse_cvtsi2ss"
2094 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2097 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2098 (match_operand:V4SF 1 "register_operand" "0,0,x")
2102 cvtsi2ss\t{%2, %0|%0, %2}
2103 cvtsi2ss\t{%2, %0|%0, %2}
2104 vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2105 [(set_attr "isa" "noavx,noavx,avx")
2106 (set_attr "type" "sseicvt")
2107 (set_attr "athlon_decode" "vector,double,*")
2108 (set_attr "amdfam10_decode" "vector,double,*")
2109 (set_attr "bdver1_decode" "double,direct,*")
2110 (set_attr "prefix" "orig,orig,vex")
2111 (set_attr "mode" "SF")])
2113 (define_insn "sse_cvtsi2ssq"
2114 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2117 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2118 (match_operand:V4SF 1 "register_operand" "0,0,x")
2120 "TARGET_SSE && TARGET_64BIT"
2122 cvtsi2ssq\t{%2, %0|%0, %2}
2123 cvtsi2ssq\t{%2, %0|%0, %2}
2124 vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2125 [(set_attr "isa" "noavx,noavx,avx")
2126 (set_attr "type" "sseicvt")
2127 (set_attr "athlon_decode" "vector,double,*")
2128 (set_attr "amdfam10_decode" "vector,double,*")
2129 (set_attr "bdver1_decode" "double,direct,*")
2130 (set_attr "length_vex" "*,*,4")
2131 (set_attr "prefix_rex" "1,1,*")
2132 (set_attr "prefix" "orig,orig,vex")
2133 (set_attr "mode" "SF")])
2135 (define_insn "sse_cvtss2si"
2136 [(set (match_operand:SI 0 "register_operand" "=r,r")
2139 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2140 (parallel [(const_int 0)]))]
2141 UNSPEC_FIX_NOTRUNC))]
2143 "%vcvtss2si\t{%1, %0|%0, %1}"
2144 [(set_attr "type" "sseicvt")
2145 (set_attr "athlon_decode" "double,vector")
2146 (set_attr "bdver1_decode" "double,double")
2147 (set_attr "prefix_rep" "1")
2148 (set_attr "prefix" "maybe_vex")
2149 (set_attr "mode" "SI")])
2151 (define_insn "sse_cvtss2si_2"
2152 [(set (match_operand:SI 0 "register_operand" "=r,r")
2153 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2154 UNSPEC_FIX_NOTRUNC))]
2156 "%vcvtss2si\t{%1, %0|%0, %1}"
2157 [(set_attr "type" "sseicvt")
2158 (set_attr "athlon_decode" "double,vector")
2159 (set_attr "amdfam10_decode" "double,double")
2160 (set_attr "bdver1_decode" "double,double")
2161 (set_attr "prefix_rep" "1")
2162 (set_attr "prefix" "maybe_vex")
2163 (set_attr "mode" "SI")])
2165 (define_insn "sse_cvtss2siq"
2166 [(set (match_operand:DI 0 "register_operand" "=r,r")
2169 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2170 (parallel [(const_int 0)]))]
2171 UNSPEC_FIX_NOTRUNC))]
2172 "TARGET_SSE && TARGET_64BIT"
2173 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2174 [(set_attr "type" "sseicvt")
2175 (set_attr "athlon_decode" "double,vector")
2176 (set_attr "bdver1_decode" "double,double")
2177 (set_attr "prefix_rep" "1")
2178 (set_attr "prefix" "maybe_vex")
2179 (set_attr "mode" "DI")])
2181 (define_insn "sse_cvtss2siq_2"
2182 [(set (match_operand:DI 0 "register_operand" "=r,r")
2183 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2184 UNSPEC_FIX_NOTRUNC))]
2185 "TARGET_SSE && TARGET_64BIT"
2186 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2187 [(set_attr "type" "sseicvt")
2188 (set_attr "athlon_decode" "double,vector")
2189 (set_attr "amdfam10_decode" "double,double")
2190 (set_attr "bdver1_decode" "double,double")
2191 (set_attr "prefix_rep" "1")
2192 (set_attr "prefix" "maybe_vex")
2193 (set_attr "mode" "DI")])
2195 (define_insn "sse_cvttss2si"
2196 [(set (match_operand:SI 0 "register_operand" "=r,r")
2199 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2200 (parallel [(const_int 0)]))))]
2202 "%vcvttss2si\t{%1, %0|%0, %1}"
2203 [(set_attr "type" "sseicvt")
2204 (set_attr "athlon_decode" "double,vector")
2205 (set_attr "amdfam10_decode" "double,double")
2206 (set_attr "bdver1_decode" "double,double")
2207 (set_attr "prefix_rep" "1")
2208 (set_attr "prefix" "maybe_vex")
2209 (set_attr "mode" "SI")])
2211 (define_insn "sse_cvttss2siq"
2212 [(set (match_operand:DI 0 "register_operand" "=r,r")
2215 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2216 (parallel [(const_int 0)]))))]
2217 "TARGET_SSE && TARGET_64BIT"
2218 "%vcvttss2si{q}\t{%1, %0|%0, %1}"
2219 [(set_attr "type" "sseicvt")
2220 (set_attr "athlon_decode" "double,vector")
2221 (set_attr "amdfam10_decode" "double,double")
2222 (set_attr "bdver1_decode" "double,double")
2223 (set_attr "prefix_rep" "1")
2224 (set_attr "prefix" "maybe_vex")
2225 (set_attr "mode" "DI")])
2227 (define_insn "avx_cvtdq2ps256"
2228 [(set (match_operand:V8SF 0 "register_operand" "=x")
2229 (float:V8SF (match_operand:V8SI 1 "nonimmediate_operand" "xm")))]
2231 "vcvtdq2ps\t{%1, %0|%0, %1}"
2232 [(set_attr "type" "ssecvt")
2233 (set_attr "prefix" "vex")
2234 (set_attr "mode" "V8SF")])
2236 (define_insn "sse2_cvtdq2ps"
2237 [(set (match_operand:V4SF 0 "register_operand" "=x")
2238 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2240 "%vcvtdq2ps\t{%1, %0|%0, %1}"
2241 [(set_attr "type" "ssecvt")
2242 (set_attr "prefix" "maybe_vex")
2243 (set_attr "mode" "V4SF")])
2245 (define_expand "sse2_cvtudq2ps"
2247 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "")))
2249 (lt:V4SF (match_dup 5) (match_dup 3)))
2251 (and:V4SF (match_dup 6) (match_dup 4)))
2252 (set (match_operand:V4SF 0 "register_operand" "")
2253 (plus:V4SF (match_dup 5) (match_dup 7)))]
2256 REAL_VALUE_TYPE TWO32r;
2260 real_ldexp (&TWO32r, &dconst1, 32);
2261 x = const_double_from_real_value (TWO32r, SFmode);
2263 operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
2264 operands[4] = force_reg (V4SFmode,
2265 ix86_build_const_vector (V4SFmode, 1, x));
2267 for (i = 5; i < 8; i++)
2268 operands[i] = gen_reg_rtx (V4SFmode);
2271 (define_insn "avx_cvtps2dq256"
2272 [(set (match_operand:V8SI 0 "register_operand" "=x")
2273 (unspec:V8SI [(match_operand:V8SF 1 "nonimmediate_operand" "xm")]
2274 UNSPEC_FIX_NOTRUNC))]
2276 "vcvtps2dq\t{%1, %0|%0, %1}"
2277 [(set_attr "type" "ssecvt")
2278 (set_attr "prefix" "vex")
2279 (set_attr "mode" "OI")])
2281 (define_insn "sse2_cvtps2dq"
2282 [(set (match_operand:V4SI 0 "register_operand" "=x")
2283 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2284 UNSPEC_FIX_NOTRUNC))]
2286 "%vcvtps2dq\t{%1, %0|%0, %1}"
2287 [(set_attr "type" "ssecvt")
2288 (set (attr "prefix_data16")
2290 (match_test "TARGET_AVX")
2292 (const_string "1")))
2293 (set_attr "prefix" "maybe_vex")
2294 (set_attr "mode" "TI")])
2296 (define_insn "avx_cvttps2dq256"
2297 [(set (match_operand:V8SI 0 "register_operand" "=x")
2298 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
2300 "vcvttps2dq\t{%1, %0|%0, %1}"
2301 [(set_attr "type" "ssecvt")
2302 (set_attr "prefix" "vex")
2303 (set_attr "mode" "OI")])
2305 (define_insn "sse2_cvttps2dq"
2306 [(set (match_operand:V4SI 0 "register_operand" "=x")
2307 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2309 "%vcvttps2dq\t{%1, %0|%0, %1}"
2310 [(set_attr "type" "ssecvt")
2311 (set (attr "prefix_rep")
2313 (match_test "TARGET_AVX")
2315 (const_string "1")))
2316 (set (attr "prefix_data16")
2318 (match_test "TARGET_AVX")
2320 (const_string "0")))
2321 (set_attr "prefix_data16" "0")
2322 (set_attr "prefix" "maybe_vex")
2323 (set_attr "mode" "TI")])
2325 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2327 ;; Parallel double-precision floating point conversion operations
2329 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2331 (define_insn "sse2_cvtpi2pd"
2332 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2333 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2335 "cvtpi2pd\t{%1, %0|%0, %1}"
2336 [(set_attr "type" "ssecvt")
2337 (set_attr "unit" "mmx,*")
2338 (set_attr "prefix_data16" "1,*")
2339 (set_attr "mode" "V2DF")])
2341 (define_insn "sse2_cvtpd2pi"
2342 [(set (match_operand:V2SI 0 "register_operand" "=y")
2343 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2344 UNSPEC_FIX_NOTRUNC))]
2346 "cvtpd2pi\t{%1, %0|%0, %1}"
2347 [(set_attr "type" "ssecvt")
2348 (set_attr "unit" "mmx")
2349 (set_attr "bdver1_decode" "double")
2350 (set_attr "prefix_data16" "1")
2351 (set_attr "mode" "DI")])
2353 (define_insn "sse2_cvttpd2pi"
2354 [(set (match_operand:V2SI 0 "register_operand" "=y")
2355 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2357 "cvttpd2pi\t{%1, %0|%0, %1}"
2358 [(set_attr "type" "ssecvt")
2359 (set_attr "unit" "mmx")
2360 (set_attr "bdver1_decode" "double")
2361 (set_attr "prefix_data16" "1")
2362 (set_attr "mode" "TI")])
2364 (define_insn "sse2_cvtsi2sd"
2365 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2368 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2369 (match_operand:V2DF 1 "register_operand" "0,0,x")
2373 cvtsi2sd\t{%2, %0|%0, %2}
2374 cvtsi2sd\t{%2, %0|%0, %2}
2375 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2376 [(set_attr "isa" "noavx,noavx,avx")
2377 (set_attr "type" "sseicvt")
2378 (set_attr "athlon_decode" "double,direct,*")
2379 (set_attr "amdfam10_decode" "vector,double,*")
2380 (set_attr "bdver1_decode" "double,direct,*")
2381 (set_attr "prefix" "orig,orig,vex")
2382 (set_attr "mode" "DF")])
2384 (define_insn "sse2_cvtsi2sdq"
2385 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2388 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2389 (match_operand:V2DF 1 "register_operand" "0,0,x")
2391 "TARGET_SSE2 && TARGET_64BIT"
2393 cvtsi2sdq\t{%2, %0|%0, %2}
2394 cvtsi2sdq\t{%2, %0|%0, %2}
2395 vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2396 [(set_attr "isa" "noavx,noavx,avx")
2397 (set_attr "type" "sseicvt")
2398 (set_attr "athlon_decode" "double,direct,*")
2399 (set_attr "amdfam10_decode" "vector,double,*")
2400 (set_attr "bdver1_decode" "double,direct,*")
2401 (set_attr "length_vex" "*,*,4")
2402 (set_attr "prefix_rex" "1,1,*")
2403 (set_attr "prefix" "orig,orig,vex")
2404 (set_attr "mode" "DF")])
2406 (define_insn "sse2_cvtsd2si"
2407 [(set (match_operand:SI 0 "register_operand" "=r,r")
2410 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2411 (parallel [(const_int 0)]))]
2412 UNSPEC_FIX_NOTRUNC))]
2414 "%vcvtsd2si\t{%1, %0|%0, %1}"
2415 [(set_attr "type" "sseicvt")
2416 (set_attr "athlon_decode" "double,vector")
2417 (set_attr "bdver1_decode" "double,double")
2418 (set_attr "prefix_rep" "1")
2419 (set_attr "prefix" "maybe_vex")
2420 (set_attr "mode" "SI")])
2422 (define_insn "sse2_cvtsd2si_2"
2423 [(set (match_operand:SI 0 "register_operand" "=r,r")
2424 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2425 UNSPEC_FIX_NOTRUNC))]
2427 "%vcvtsd2si\t{%1, %0|%0, %1}"
2428 [(set_attr "type" "sseicvt")
2429 (set_attr "athlon_decode" "double,vector")
2430 (set_attr "amdfam10_decode" "double,double")
2431 (set_attr "bdver1_decode" "double,double")
2432 (set_attr "prefix_rep" "1")
2433 (set_attr "prefix" "maybe_vex")
2434 (set_attr "mode" "SI")])
2436 (define_insn "sse2_cvtsd2siq"
2437 [(set (match_operand:DI 0 "register_operand" "=r,r")
2440 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2441 (parallel [(const_int 0)]))]
2442 UNSPEC_FIX_NOTRUNC))]
2443 "TARGET_SSE2 && TARGET_64BIT"
2444 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2445 [(set_attr "type" "sseicvt")
2446 (set_attr "athlon_decode" "double,vector")
2447 (set_attr "bdver1_decode" "double,double")
2448 (set_attr "prefix_rep" "1")
2449 (set_attr "prefix" "maybe_vex")
2450 (set_attr "mode" "DI")])
2452 (define_insn "sse2_cvtsd2siq_2"
2453 [(set (match_operand:DI 0 "register_operand" "=r,r")
2454 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2455 UNSPEC_FIX_NOTRUNC))]
2456 "TARGET_SSE2 && TARGET_64BIT"
2457 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2458 [(set_attr "type" "sseicvt")
2459 (set_attr "athlon_decode" "double,vector")
2460 (set_attr "amdfam10_decode" "double,double")
2461 (set_attr "bdver1_decode" "double,double")
2462 (set_attr "prefix_rep" "1")
2463 (set_attr "prefix" "maybe_vex")
2464 (set_attr "mode" "DI")])
2466 (define_insn "sse2_cvttsd2si"
2467 [(set (match_operand:SI 0 "register_operand" "=r,r")
2470 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2471 (parallel [(const_int 0)]))))]
2473 "%vcvttsd2si\t{%1, %0|%0, %1}"
2474 [(set_attr "type" "sseicvt")
2475 (set_attr "athlon_decode" "double,vector")
2476 (set_attr "amdfam10_decode" "double,double")
2477 (set_attr "bdver1_decode" "double,double")
2478 (set_attr "prefix_rep" "1")
2479 (set_attr "prefix" "maybe_vex")
2480 (set_attr "mode" "SI")])
2482 (define_insn "sse2_cvttsd2siq"
2483 [(set (match_operand:DI 0 "register_operand" "=r,r")
2486 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2487 (parallel [(const_int 0)]))))]
2488 "TARGET_SSE2 && TARGET_64BIT"
2489 "%vcvttsd2si{q}\t{%1, %0|%0, %1}"
2490 [(set_attr "type" "sseicvt")
2491 (set_attr "athlon_decode" "double,vector")
2492 (set_attr "amdfam10_decode" "double,double")
2493 (set_attr "bdver1_decode" "double,double")
2494 (set_attr "prefix_rep" "1")
2495 (set_attr "prefix" "maybe_vex")
2496 (set_attr "mode" "DI")])
2498 (define_insn "avx_cvtdq2pd256"
2499 [(set (match_operand:V4DF 0 "register_operand" "=x")
2500 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2502 "vcvtdq2pd\t{%1, %0|%0, %1}"
2503 [(set_attr "type" "ssecvt")
2504 (set_attr "prefix" "vex")
2505 (set_attr "mode" "V4DF")])
2507 (define_insn "avx_cvtdq2pd256_2"
2508 [(set (match_operand:V4DF 0 "register_operand" "=x")
2511 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
2512 (parallel [(const_int 0) (const_int 1)
2513 (const_int 2) (const_int 3)]))))]
2515 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
2516 [(set_attr "type" "ssecvt")
2517 (set_attr "prefix" "vex")
2518 (set_attr "mode" "V4DF")])
2520 (define_insn "sse2_cvtdq2pd"
2521 [(set (match_operand:V2DF 0 "register_operand" "=x")
2524 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2525 (parallel [(const_int 0) (const_int 1)]))))]
2527 "%vcvtdq2pd\t{%1, %0|%0, %q1}"
2528 [(set_attr "type" "ssecvt")
2529 (set_attr "prefix" "maybe_vex")
2530 (set_attr "mode" "V2DF")])
2532 (define_insn "avx_cvtpd2dq256"
2533 [(set (match_operand:V4SI 0 "register_operand" "=x")
2534 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2535 UNSPEC_FIX_NOTRUNC))]
2537 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2538 [(set_attr "type" "ssecvt")
2539 (set_attr "prefix" "vex")
2540 (set_attr "mode" "OI")])
2542 (define_expand "avx_cvtpd2dq256_2"
2543 [(set (match_operand:V8SI 0 "register_operand" "")
2545 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "")]
2549 "operands[2] = CONST0_RTX (V4SImode);")
2551 (define_insn "*avx_cvtpd2dq256_2"
2552 [(set (match_operand:V8SI 0 "register_operand" "=x")
2554 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2556 (match_operand:V4SI 2 "const0_operand" "")))]
2558 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
2559 [(set_attr "type" "ssecvt")
2560 (set_attr "prefix" "vex")
2561 (set_attr "mode" "OI")])
2563 (define_expand "sse2_cvtpd2dq"
2564 [(set (match_operand:V4SI 0 "register_operand" "")
2566 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2570 "operands[2] = CONST0_RTX (V2SImode);")
2572 (define_insn "*sse2_cvtpd2dq"
2573 [(set (match_operand:V4SI 0 "register_operand" "=x")
2575 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2577 (match_operand:V2SI 2 "const0_operand" "")))]
2581 return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
2583 return "cvtpd2dq\t{%1, %0|%0, %1}";
2585 [(set_attr "type" "ssecvt")
2586 (set_attr "prefix_rep" "1")
2587 (set_attr "prefix_data16" "0")
2588 (set_attr "prefix" "maybe_vex")
2589 (set_attr "mode" "TI")
2590 (set_attr "amdfam10_decode" "double")
2591 (set_attr "athlon_decode" "vector")
2592 (set_attr "bdver1_decode" "double")])
2594 (define_insn "avx_cvttpd2dq256"
2595 [(set (match_operand:V4SI 0 "register_operand" "=x")
2596 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2598 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2599 [(set_attr "type" "ssecvt")
2600 (set_attr "prefix" "vex")
2601 (set_attr "mode" "OI")])
2603 (define_expand "avx_cvttpd2dq256_2"
2604 [(set (match_operand:V8SI 0 "register_operand" "")
2606 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" ""))
2609 "operands[2] = CONST0_RTX (V4SImode);")
2611 (define_insn "*avx_cvttpd2dq256_2"
2612 [(set (match_operand:V8SI 0 "register_operand" "=x")
2614 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm"))
2615 (match_operand:V4SI 2 "const0_operand" "")))]
2617 "vcvttpd2dq{y}\t{%1, %x0|%x0, %1}"
2618 [(set_attr "type" "ssecvt")
2619 (set_attr "prefix" "vex")
2620 (set_attr "mode" "OI")])
2622 (define_expand "sse2_cvttpd2dq"
2623 [(set (match_operand:V4SI 0 "register_operand" "")
2625 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2628 "operands[2] = CONST0_RTX (V2SImode);")
2630 (define_insn "*sse2_cvttpd2dq"
2631 [(set (match_operand:V4SI 0 "register_operand" "=x")
2633 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2634 (match_operand:V2SI 2 "const0_operand" "")))]
2638 return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
2640 return "cvttpd2dq\t{%1, %0|%0, %1}";
2642 [(set_attr "type" "ssecvt")
2643 (set_attr "amdfam10_decode" "double")
2644 (set_attr "athlon_decode" "vector")
2645 (set_attr "bdver1_decode" "double")
2646 (set_attr "prefix" "maybe_vex")
2647 (set_attr "mode" "TI")])
2649 (define_insn "sse2_cvtsd2ss"
2650 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2653 (float_truncate:V2SF
2654 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,xm")))
2655 (match_operand:V4SF 1 "register_operand" "0,0,x")
2659 cvtsd2ss\t{%2, %0|%0, %2}
2660 cvtsd2ss\t{%2, %0|%0, %2}
2661 vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2662 [(set_attr "isa" "noavx,noavx,avx")
2663 (set_attr "type" "ssecvt")
2664 (set_attr "athlon_decode" "vector,double,*")
2665 (set_attr "amdfam10_decode" "vector,double,*")
2666 (set_attr "bdver1_decode" "direct,direct,*")
2667 (set_attr "prefix" "orig,orig,vex")
2668 (set_attr "mode" "SF")])
2670 (define_insn "sse2_cvtss2sd"
2671 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2675 (match_operand:V4SF 2 "nonimmediate_operand" "x,m,xm")
2676 (parallel [(const_int 0) (const_int 1)])))
2677 (match_operand:V2DF 1 "register_operand" "0,0,x")
2681 cvtss2sd\t{%2, %0|%0, %2}
2682 cvtss2sd\t{%2, %0|%0, %2}
2683 vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
2684 [(set_attr "isa" "noavx,noavx,avx")
2685 (set_attr "type" "ssecvt")
2686 (set_attr "amdfam10_decode" "vector,double,*")
2687 (set_attr "athlon_decode" "direct,direct,*")
2688 (set_attr "bdver1_decode" "direct,direct,*")
2689 (set_attr "prefix" "orig,orig,vex")
2690 (set_attr "mode" "DF")])
2692 (define_insn "avx_cvtpd2ps256"
2693 [(set (match_operand:V4SF 0 "register_operand" "=x")
2694 (float_truncate:V4SF
2695 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2697 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
2698 [(set_attr "type" "ssecvt")
2699 (set_attr "prefix" "vex")
2700 (set_attr "mode" "V4SF")])
2702 (define_expand "sse2_cvtpd2ps"
2703 [(set (match_operand:V4SF 0 "register_operand" "")
2705 (float_truncate:V2SF
2706 (match_operand:V2DF 1 "nonimmediate_operand" ""))
2709 "operands[2] = CONST0_RTX (V2SFmode);")
2711 (define_insn "*sse2_cvtpd2ps"
2712 [(set (match_operand:V4SF 0 "register_operand" "=x")
2714 (float_truncate:V2SF
2715 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2716 (match_operand:V2SF 2 "const0_operand" "")))]
2720 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
2722 return "cvtpd2ps\t{%1, %0|%0, %1}";
2724 [(set_attr "type" "ssecvt")
2725 (set_attr "amdfam10_decode" "double")
2726 (set_attr "athlon_decode" "vector")
2727 (set_attr "bdver1_decode" "double")
2728 (set_attr "prefix_data16" "1")
2729 (set_attr "prefix" "maybe_vex")
2730 (set_attr "mode" "V4SF")])
2732 (define_insn "avx_cvtps2pd256"
2733 [(set (match_operand:V4DF 0 "register_operand" "=x")
2735 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2737 "vcvtps2pd\t{%1, %0|%0, %1}"
2738 [(set_attr "type" "ssecvt")
2739 (set_attr "prefix" "vex")
2740 (set_attr "mode" "V4DF")])
2742 (define_insn "*avx_cvtps2pd256_2"
2743 [(set (match_operand:V4DF 0 "register_operand" "=x")
2746 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
2747 (parallel [(const_int 0) (const_int 1)
2748 (const_int 2) (const_int 3)]))))]
2750 "vcvtps2pd\t{%x1, %0|%0, %x1}"
2751 [(set_attr "type" "ssecvt")
2752 (set_attr "prefix" "vex")
2753 (set_attr "mode" "V4DF")])
2755 (define_insn "sse2_cvtps2pd"
2756 [(set (match_operand:V2DF 0 "register_operand" "=x")
2759 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2760 (parallel [(const_int 0) (const_int 1)]))))]
2762 "%vcvtps2pd\t{%1, %0|%0, %q1}"
2763 [(set_attr "type" "ssecvt")
2764 (set_attr "amdfam10_decode" "direct")
2765 (set_attr "athlon_decode" "double")
2766 (set_attr "bdver1_decode" "double")
2767 (set_attr "prefix_data16" "0")
2768 (set_attr "prefix" "maybe_vex")
2769 (set_attr "mode" "V2DF")])
2771 (define_expand "vec_unpacks_hi_v4sf"
2776 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2777 (parallel [(const_int 6) (const_int 7)
2778 (const_int 2) (const_int 3)])))
2779 (set (match_operand:V2DF 0 "register_operand" "")
2783 (parallel [(const_int 0) (const_int 1)]))))]
2785 "operands[2] = gen_reg_rtx (V4SFmode);")
2787 (define_expand "vec_unpacks_hi_v8sf"
2790 (match_operand:V8SF 1 "nonimmediate_operand" "")
2791 (parallel [(const_int 4) (const_int 5)
2792 (const_int 6) (const_int 7)])))
2793 (set (match_operand:V4DF 0 "register_operand" "")
2797 "operands[2] = gen_reg_rtx (V4SFmode);")
2799 (define_expand "vec_unpacks_lo_v4sf"
2800 [(set (match_operand:V2DF 0 "register_operand" "")
2803 (match_operand:V4SF 1 "nonimmediate_operand" "")
2804 (parallel [(const_int 0) (const_int 1)]))))]
2807 (define_expand "vec_unpacks_lo_v8sf"
2808 [(set (match_operand:V4DF 0 "register_operand" "")
2811 (match_operand:V8SF 1 "nonimmediate_operand" "")
2812 (parallel [(const_int 0) (const_int 1)
2813 (const_int 2) (const_int 3)]))))]
2816 (define_mode_attr sseunpackfltmode
2817 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF") (V8SI "V4DF")])
2819 (define_expand "vec_unpacks_float_hi_<mode>"
2820 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2821 (match_operand:VI2_AVX2 1 "register_operand" "")]
2824 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2826 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
2827 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2828 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2832 (define_expand "vec_unpacks_float_lo_<mode>"
2833 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2834 (match_operand:VI2_AVX2 1 "register_operand" "")]
2837 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2839 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
2840 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2841 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2845 (define_expand "vec_unpacku_float_hi_<mode>"
2846 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2847 (match_operand:VI2_AVX2 1 "register_operand" "")]
2850 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2852 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
2853 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2854 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2858 (define_expand "vec_unpacku_float_lo_<mode>"
2859 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2860 (match_operand:VI2_AVX2 1 "register_operand" "")]
2863 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2865 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
2866 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2867 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2871 (define_expand "vec_unpacks_float_hi_v4si"
2874 (match_operand:V4SI 1 "nonimmediate_operand" "")
2875 (parallel [(const_int 2) (const_int 3)
2876 (const_int 2) (const_int 3)])))
2877 (set (match_operand:V2DF 0 "register_operand" "")
2881 (parallel [(const_int 0) (const_int 1)]))))]
2883 "operands[2] = gen_reg_rtx (V4SImode);")
2885 (define_expand "vec_unpacks_float_lo_v4si"
2886 [(set (match_operand:V2DF 0 "register_operand" "")
2889 (match_operand:V4SI 1 "nonimmediate_operand" "")
2890 (parallel [(const_int 0) (const_int 1)]))))]
2893 (define_expand "vec_unpacks_float_hi_v8si"
2896 (match_operand:V8SI 1 "nonimmediate_operand" "")
2897 (parallel [(const_int 4) (const_int 5)
2898 (const_int 6) (const_int 7)])))
2899 (set (match_operand:V4DF 0 "register_operand" "")
2903 "operands[2] = gen_reg_rtx (V4SImode);")
2905 (define_expand "vec_unpacks_float_lo_v8si"
2906 [(set (match_operand:V4DF 0 "register_operand" "")
2909 (match_operand:V8SI 1 "nonimmediate_operand" "")
2910 (parallel [(const_int 0) (const_int 1)
2911 (const_int 2) (const_int 3)]))))]
2914 (define_expand "vec_unpacku_float_hi_v4si"
2917 (match_operand:V4SI 1 "nonimmediate_operand" "")
2918 (parallel [(const_int 2) (const_int 3)
2919 (const_int 2) (const_int 3)])))
2924 (parallel [(const_int 0) (const_int 1)]))))
2926 (lt:V2DF (match_dup 6) (match_dup 3)))
2928 (and:V2DF (match_dup 7) (match_dup 4)))
2929 (set (match_operand:V2DF 0 "register_operand" "")
2930 (plus:V2DF (match_dup 6) (match_dup 8)))]
2933 REAL_VALUE_TYPE TWO32r;
2937 real_ldexp (&TWO32r, &dconst1, 32);
2938 x = const_double_from_real_value (TWO32r, DFmode);
2940 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2941 operands[4] = force_reg (V2DFmode,
2942 ix86_build_const_vector (V2DFmode, 1, x));
2944 operands[5] = gen_reg_rtx (V4SImode);
2946 for (i = 6; i < 9; i++)
2947 operands[i] = gen_reg_rtx (V2DFmode);
2950 (define_expand "vec_unpacku_float_lo_v4si"
2954 (match_operand:V4SI 1 "nonimmediate_operand" "")
2955 (parallel [(const_int 0) (const_int 1)]))))
2957 (lt:V2DF (match_dup 5) (match_dup 3)))
2959 (and:V2DF (match_dup 6) (match_dup 4)))
2960 (set (match_operand:V2DF 0 "register_operand" "")
2961 (plus:V2DF (match_dup 5) (match_dup 7)))]
2964 REAL_VALUE_TYPE TWO32r;
2968 real_ldexp (&TWO32r, &dconst1, 32);
2969 x = const_double_from_real_value (TWO32r, DFmode);
2971 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2972 operands[4] = force_reg (V2DFmode,
2973 ix86_build_const_vector (V2DFmode, 1, x));
2975 for (i = 5; i < 8; i++)
2976 operands[i] = gen_reg_rtx (V2DFmode);
2979 (define_expand "vec_unpacku_float_hi_v8si"
2980 [(match_operand:V4DF 0 "register_operand" "")
2981 (match_operand:V8SI 1 "register_operand" "")]
2984 REAL_VALUE_TYPE TWO32r;
2988 real_ldexp (&TWO32r, &dconst1, 32);
2989 x = const_double_from_real_value (TWO32r, DFmode);
2991 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
2992 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
2993 tmp[5] = gen_reg_rtx (V4SImode);
2995 for (i = 2; i < 5; i++)
2996 tmp[i] = gen_reg_rtx (V4DFmode);
2997 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
2998 emit_insn (gen_avx_cvtdq2pd256 (tmp[2], tmp[5]));
2999 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
3000 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
3001 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
3002 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
3006 (define_expand "vec_unpacku_float_lo_v8si"
3007 [(match_operand:V4DF 0 "register_operand" "")
3008 (match_operand:V8SI 1 "nonimmediate_operand" "")]
3011 REAL_VALUE_TYPE TWO32r;
3015 real_ldexp (&TWO32r, &dconst1, 32);
3016 x = const_double_from_real_value (TWO32r, DFmode);
3018 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
3019 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
3021 for (i = 2; i < 5; i++)
3022 tmp[i] = gen_reg_rtx (V4DFmode);
3023 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
3024 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
3025 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
3026 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
3027 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
3031 (define_expand "vec_pack_trunc_v4df"
3033 (float_truncate:V4SF
3034 (match_operand:V4DF 1 "nonimmediate_operand" "")))
3036 (float_truncate:V4SF
3037 (match_operand:V4DF 2 "nonimmediate_operand" "")))
3038 (set (match_operand:V8SF 0 "register_operand" "")
3044 operands[3] = gen_reg_rtx (V4SFmode);
3045 operands[4] = gen_reg_rtx (V4SFmode);
3048 (define_expand "vec_pack_trunc_v2df"
3049 [(match_operand:V4SF 0 "register_operand" "")
3050 (match_operand:V2DF 1 "nonimmediate_operand" "")
3051 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3056 r1 = gen_reg_rtx (V4SFmode);
3057 r2 = gen_reg_rtx (V4SFmode);
3059 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
3060 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
3061 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
3065 (define_expand "vec_pack_sfix_trunc_v4df"
3066 [(match_operand:V8SI 0 "register_operand" "")
3067 (match_operand:V4DF 1 "nonimmediate_operand" "")
3068 (match_operand:V4DF 2 "nonimmediate_operand" "")]
3073 r1 = gen_reg_rtx (V8SImode);
3074 r2 = gen_reg_rtx (V8SImode);
3076 emit_insn (gen_avx_cvttpd2dq256_2 (r1, operands[1]));
3077 emit_insn (gen_avx_cvttpd2dq256_2 (r2, operands[2]));
3078 emit_insn (gen_avx_vperm2f128v8si3 (operands[0], r1, r2, GEN_INT (0x20)));
3082 (define_expand "vec_pack_sfix_trunc_v2df"
3083 [(match_operand:V4SI 0 "register_operand" "")
3084 (match_operand:V2DF 1 "nonimmediate_operand" "")
3085 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3090 r1 = gen_reg_rtx (V4SImode);
3091 r2 = gen_reg_rtx (V4SImode);
3093 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
3094 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
3095 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3096 gen_lowpart (V2DImode, r1),
3097 gen_lowpart (V2DImode, r2)));
3101 (define_expand "vec_pack_sfix_v4df"
3102 [(match_operand:V8SI 0 "register_operand" "")
3103 (match_operand:V4DF 1 "nonimmediate_operand" "")
3104 (match_operand:V4DF 2 "nonimmediate_operand" "")]
3109 r1 = gen_reg_rtx (V8SImode);
3110 r2 = gen_reg_rtx (V8SImode);
3112 emit_insn (gen_avx_cvtpd2dq256_2 (r1, operands[1]));
3113 emit_insn (gen_avx_cvtpd2dq256_2 (r2, operands[2]));
3114 emit_insn (gen_avx_vperm2f128v8si3 (operands[0], r1, r2, GEN_INT (0x20)));
3118 (define_expand "vec_pack_sfix_v2df"
3119 [(match_operand:V4SI 0 "register_operand" "")
3120 (match_operand:V2DF 1 "nonimmediate_operand" "")
3121 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3126 r1 = gen_reg_rtx (V4SImode);
3127 r2 = gen_reg_rtx (V4SImode);
3129 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
3130 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
3131 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3132 gen_lowpart (V2DImode, r1),
3133 gen_lowpart (V2DImode, r2)));
3137 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3139 ;; Parallel single-precision floating point element swizzling
3141 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3143 (define_expand "sse_movhlps_exp"
3144 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3147 (match_operand:V4SF 1 "nonimmediate_operand" "")
3148 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3149 (parallel [(const_int 6)
3155 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3157 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
3159 /* Fix up the destination if needed. */
3160 if (dst != operands[0])
3161 emit_move_insn (operands[0], dst);
3166 (define_insn "sse_movhlps"
3167 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
3170 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3171 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
3172 (parallel [(const_int 6)
3176 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3178 movhlps\t{%2, %0|%0, %2}
3179 vmovhlps\t{%2, %1, %0|%0, %1, %2}
3180 movlps\t{%H2, %0|%0, %H2}
3181 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3182 %vmovhps\t{%2, %0|%0, %2}"
3183 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3184 (set_attr "type" "ssemov")
3185 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3186 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3188 (define_expand "sse_movlhps_exp"
3189 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3192 (match_operand:V4SF 1 "nonimmediate_operand" "")
3193 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3194 (parallel [(const_int 0)
3200 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3202 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
3204 /* Fix up the destination if needed. */
3205 if (dst != operands[0])
3206 emit_move_insn (operands[0], dst);
3211 (define_insn "sse_movlhps"
3212 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
3215 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3216 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,x,x"))
3217 (parallel [(const_int 0)
3221 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3223 movlhps\t{%2, %0|%0, %2}
3224 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3225 movhps\t{%2, %0|%0, %2}
3226 vmovhps\t{%2, %1, %0|%0, %1, %2}
3227 %vmovlps\t{%2, %H0|%H0, %2}"
3228 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3229 (set_attr "type" "ssemov")
3230 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3231 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3233 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3234 (define_insn "avx_unpckhps256"
3235 [(set (match_operand:V8SF 0 "register_operand" "=x")
3238 (match_operand:V8SF 1 "register_operand" "x")
3239 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3240 (parallel [(const_int 2) (const_int 10)
3241 (const_int 3) (const_int 11)
3242 (const_int 6) (const_int 14)
3243 (const_int 7) (const_int 15)])))]
3245 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3246 [(set_attr "type" "sselog")
3247 (set_attr "prefix" "vex")
3248 (set_attr "mode" "V8SF")])
3250 (define_expand "vec_interleave_highv8sf"
3254 (match_operand:V8SF 1 "register_operand" "x")
3255 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3256 (parallel [(const_int 0) (const_int 8)
3257 (const_int 1) (const_int 9)
3258 (const_int 4) (const_int 12)
3259 (const_int 5) (const_int 13)])))
3265 (parallel [(const_int 2) (const_int 10)
3266 (const_int 3) (const_int 11)
3267 (const_int 6) (const_int 14)
3268 (const_int 7) (const_int 15)])))
3269 (set (match_operand:V8SF 0 "register_operand" "")
3274 (parallel [(const_int 4) (const_int 5)
3275 (const_int 6) (const_int 7)
3276 (const_int 12) (const_int 13)
3277 (const_int 14) (const_int 15)])))]
3280 operands[3] = gen_reg_rtx (V8SFmode);
3281 operands[4] = gen_reg_rtx (V8SFmode);
3284 (define_insn "vec_interleave_highv4sf"
3285 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3288 (match_operand:V4SF 1 "register_operand" "0,x")
3289 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3290 (parallel [(const_int 2) (const_int 6)
3291 (const_int 3) (const_int 7)])))]
3294 unpckhps\t{%2, %0|%0, %2}
3295 vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3296 [(set_attr "isa" "noavx,avx")
3297 (set_attr "type" "sselog")
3298 (set_attr "prefix" "orig,vex")
3299 (set_attr "mode" "V4SF")])
3301 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3302 (define_insn "avx_unpcklps256"
3303 [(set (match_operand:V8SF 0 "register_operand" "=x")
3306 (match_operand:V8SF 1 "register_operand" "x")
3307 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3308 (parallel [(const_int 0) (const_int 8)
3309 (const_int 1) (const_int 9)
3310 (const_int 4) (const_int 12)
3311 (const_int 5) (const_int 13)])))]
3313 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3314 [(set_attr "type" "sselog")
3315 (set_attr "prefix" "vex")
3316 (set_attr "mode" "V8SF")])
3318 (define_expand "vec_interleave_lowv8sf"
3322 (match_operand:V8SF 1 "register_operand" "x")
3323 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3324 (parallel [(const_int 0) (const_int 8)
3325 (const_int 1) (const_int 9)
3326 (const_int 4) (const_int 12)
3327 (const_int 5) (const_int 13)])))
3333 (parallel [(const_int 2) (const_int 10)
3334 (const_int 3) (const_int 11)
3335 (const_int 6) (const_int 14)
3336 (const_int 7) (const_int 15)])))
3337 (set (match_operand:V8SF 0 "register_operand" "")
3342 (parallel [(const_int 0) (const_int 1)
3343 (const_int 2) (const_int 3)
3344 (const_int 8) (const_int 9)
3345 (const_int 10) (const_int 11)])))]
3348 operands[3] = gen_reg_rtx (V8SFmode);
3349 operands[4] = gen_reg_rtx (V8SFmode);
3352 (define_insn "vec_interleave_lowv4sf"
3353 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3356 (match_operand:V4SF 1 "register_operand" "0,x")
3357 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3358 (parallel [(const_int 0) (const_int 4)
3359 (const_int 1) (const_int 5)])))]
3362 unpcklps\t{%2, %0|%0, %2}
3363 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3364 [(set_attr "isa" "noavx,avx")
3365 (set_attr "type" "sselog")
3366 (set_attr "prefix" "orig,vex")
3367 (set_attr "mode" "V4SF")])
3369 ;; These are modeled with the same vec_concat as the others so that we
3370 ;; capture users of shufps that can use the new instructions
3371 (define_insn "avx_movshdup256"
3372 [(set (match_operand:V8SF 0 "register_operand" "=x")
3375 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3377 (parallel [(const_int 1) (const_int 1)
3378 (const_int 3) (const_int 3)
3379 (const_int 5) (const_int 5)
3380 (const_int 7) (const_int 7)])))]
3382 "vmovshdup\t{%1, %0|%0, %1}"
3383 [(set_attr "type" "sse")
3384 (set_attr "prefix" "vex")
3385 (set_attr "mode" "V8SF")])
3387 (define_insn "sse3_movshdup"
3388 [(set (match_operand:V4SF 0 "register_operand" "=x")
3391 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3393 (parallel [(const_int 1)
3398 "%vmovshdup\t{%1, %0|%0, %1}"
3399 [(set_attr "type" "sse")
3400 (set_attr "prefix_rep" "1")
3401 (set_attr "prefix" "maybe_vex")
3402 (set_attr "mode" "V4SF")])
3404 (define_insn "avx_movsldup256"
3405 [(set (match_operand:V8SF 0 "register_operand" "=x")
3408 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3410 (parallel [(const_int 0) (const_int 0)
3411 (const_int 2) (const_int 2)
3412 (const_int 4) (const_int 4)
3413 (const_int 6) (const_int 6)])))]
3415 "vmovsldup\t{%1, %0|%0, %1}"
3416 [(set_attr "type" "sse")
3417 (set_attr "prefix" "vex")
3418 (set_attr "mode" "V8SF")])
3420 (define_insn "sse3_movsldup"
3421 [(set (match_operand:V4SF 0 "register_operand" "=x")
3424 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3426 (parallel [(const_int 0)
3431 "%vmovsldup\t{%1, %0|%0, %1}"
3432 [(set_attr "type" "sse")
3433 (set_attr "prefix_rep" "1")
3434 (set_attr "prefix" "maybe_vex")
3435 (set_attr "mode" "V4SF")])
3437 (define_expand "avx_shufps256"
3438 [(match_operand:V8SF 0 "register_operand" "")
3439 (match_operand:V8SF 1 "register_operand" "")
3440 (match_operand:V8SF 2 "nonimmediate_operand" "")
3441 (match_operand:SI 3 "const_int_operand" "")]
3444 int mask = INTVAL (operands[3]);
3445 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3446 GEN_INT ((mask >> 0) & 3),
3447 GEN_INT ((mask >> 2) & 3),
3448 GEN_INT (((mask >> 4) & 3) + 8),
3449 GEN_INT (((mask >> 6) & 3) + 8),
3450 GEN_INT (((mask >> 0) & 3) + 4),
3451 GEN_INT (((mask >> 2) & 3) + 4),
3452 GEN_INT (((mask >> 4) & 3) + 12),
3453 GEN_INT (((mask >> 6) & 3) + 12)));
3457 ;; One bit in mask selects 2 elements.
3458 (define_insn "avx_shufps256_1"
3459 [(set (match_operand:V8SF 0 "register_operand" "=x")
3462 (match_operand:V8SF 1 "register_operand" "x")
3463 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3464 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3465 (match_operand 4 "const_0_to_3_operand" "")
3466 (match_operand 5 "const_8_to_11_operand" "")
3467 (match_operand 6 "const_8_to_11_operand" "")
3468 (match_operand 7 "const_4_to_7_operand" "")
3469 (match_operand 8 "const_4_to_7_operand" "")
3470 (match_operand 9 "const_12_to_15_operand" "")
3471 (match_operand 10 "const_12_to_15_operand" "")])))]
3473 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3474 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3475 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3476 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3479 mask = INTVAL (operands[3]);
3480 mask |= INTVAL (operands[4]) << 2;
3481 mask |= (INTVAL (operands[5]) - 8) << 4;
3482 mask |= (INTVAL (operands[6]) - 8) << 6;
3483 operands[3] = GEN_INT (mask);
3485 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3487 [(set_attr "type" "sselog")
3488 (set_attr "length_immediate" "1")
3489 (set_attr "prefix" "vex")
3490 (set_attr "mode" "V8SF")])
3492 (define_expand "sse_shufps"
3493 [(match_operand:V4SF 0 "register_operand" "")
3494 (match_operand:V4SF 1 "register_operand" "")
3495 (match_operand:V4SF 2 "nonimmediate_operand" "")
3496 (match_operand:SI 3 "const_int_operand" "")]
3499 int mask = INTVAL (operands[3]);
3500 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3501 GEN_INT ((mask >> 0) & 3),
3502 GEN_INT ((mask >> 2) & 3),
3503 GEN_INT (((mask >> 4) & 3) + 4),
3504 GEN_INT (((mask >> 6) & 3) + 4)));
3508 (define_insn "sse_shufps_<mode>"
3509 [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
3510 (vec_select:VI4F_128
3511 (vec_concat:<ssedoublevecmode>
3512 (match_operand:VI4F_128 1 "register_operand" "0,x")
3513 (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm"))
3514 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3515 (match_operand 4 "const_0_to_3_operand" "")
3516 (match_operand 5 "const_4_to_7_operand" "")
3517 (match_operand 6 "const_4_to_7_operand" "")])))]
3521 mask |= INTVAL (operands[3]) << 0;
3522 mask |= INTVAL (operands[4]) << 2;
3523 mask |= (INTVAL (operands[5]) - 4) << 4;
3524 mask |= (INTVAL (operands[6]) - 4) << 6;
3525 operands[3] = GEN_INT (mask);
3527 switch (which_alternative)
3530 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3532 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3537 [(set_attr "isa" "noavx,avx")
3538 (set_attr "type" "sselog")
3539 (set_attr "length_immediate" "1")
3540 (set_attr "prefix" "orig,vex")
3541 (set_attr "mode" "V4SF")])
3543 (define_insn "sse_storehps"
3544 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3546 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3547 (parallel [(const_int 2) (const_int 3)])))]
3550 %vmovhps\t{%1, %0|%0, %1}
3551 %vmovhlps\t{%1, %d0|%d0, %1}
3552 %vmovlps\t{%H1, %d0|%d0, %H1}"
3553 [(set_attr "type" "ssemov")
3554 (set_attr "prefix" "maybe_vex")
3555 (set_attr "mode" "V2SF,V4SF,V2SF")])
3557 (define_expand "sse_loadhps_exp"
3558 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3561 (match_operand:V4SF 1 "nonimmediate_operand" "")
3562 (parallel [(const_int 0) (const_int 1)]))
3563 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3566 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3568 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
3570 /* Fix up the destination if needed. */
3571 if (dst != operands[0])
3572 emit_move_insn (operands[0], dst);
3577 (define_insn "sse_loadhps"
3578 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
3581 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3582 (parallel [(const_int 0) (const_int 1)]))
3583 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))]
3586 movhps\t{%2, %0|%0, %2}
3587 vmovhps\t{%2, %1, %0|%0, %1, %2}
3588 movlhps\t{%2, %0|%0, %2}
3589 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3590 %vmovlps\t{%2, %H0|%H0, %2}"
3591 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3592 (set_attr "type" "ssemov")
3593 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3594 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
3596 (define_insn "sse_storelps"
3597 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3599 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
3600 (parallel [(const_int 0) (const_int 1)])))]
3603 %vmovlps\t{%1, %0|%0, %1}
3604 %vmovaps\t{%1, %0|%0, %1}
3605 %vmovlps\t{%1, %d0|%d0, %1}"
3606 [(set_attr "type" "ssemov")
3607 (set_attr "prefix" "maybe_vex")
3608 (set_attr "mode" "V2SF,V4SF,V2SF")])
3610 (define_expand "sse_loadlps_exp"
3611 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3613 (match_operand:V2SF 2 "nonimmediate_operand" "")
3615 (match_operand:V4SF 1 "nonimmediate_operand" "")
3616 (parallel [(const_int 2) (const_int 3)]))))]
3619 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3621 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
3623 /* Fix up the destination if needed. */
3624 if (dst != operands[0])
3625 emit_move_insn (operands[0], dst);
3630 (define_insn "sse_loadlps"
3631 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
3633 (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,x,x")
3635 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
3636 (parallel [(const_int 2) (const_int 3)]))))]
3639 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3640 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3641 movlps\t{%2, %0|%0, %2}
3642 vmovlps\t{%2, %1, %0|%0, %1, %2}
3643 %vmovlps\t{%2, %0|%0, %2}"
3644 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3645 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
3646 (set_attr "length_immediate" "1,1,*,*,*")
3647 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3648 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3650 (define_insn "sse_movss"
3651 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3653 (match_operand:V4SF 2 "register_operand" " x,x")
3654 (match_operand:V4SF 1 "register_operand" " 0,x")
3658 movss\t{%2, %0|%0, %2}
3659 vmovss\t{%2, %1, %0|%0, %1, %2}"
3660 [(set_attr "isa" "noavx,avx")
3661 (set_attr "type" "ssemov")
3662 (set_attr "prefix" "orig,vex")
3663 (set_attr "mode" "SF")])
3665 (define_expand "vec_dupv4sf"
3666 [(set (match_operand:V4SF 0 "register_operand" "")
3668 (match_operand:SF 1 "nonimmediate_operand" "")))]
3672 operands[1] = force_reg (SFmode, operands[1]);
3675 (define_insn "avx2_vec_dupv4sf"
3676 [(set (match_operand:V4SF 0 "register_operand" "=x")
3679 (match_operand:V4SF 1 "register_operand" "x")
3680 (parallel [(const_int 0)]))))]
3682 "vbroadcastss\t{%1, %0|%0, %1}"
3683 [(set_attr "type" "sselog1")
3684 (set_attr "prefix" "vex")
3685 (set_attr "mode" "V4SF")])
3687 (define_insn "*vec_dupv4sf_avx"
3688 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3690 (match_operand:SF 1 "nonimmediate_operand" "x,m")))]
3693 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
3694 vbroadcastss\t{%1, %0|%0, %1}"
3695 [(set_attr "type" "sselog1,ssemov")
3696 (set_attr "length_immediate" "1,0")
3697 (set_attr "prefix_extra" "0,1")
3698 (set_attr "prefix" "vex")
3699 (set_attr "mode" "V4SF")])
3701 (define_insn "avx2_vec_dupv8sf"
3702 [(set (match_operand:V8SF 0 "register_operand" "=x")
3705 (match_operand:V4SF 1 "register_operand" "x")
3706 (parallel [(const_int 0)]))))]
3708 "vbroadcastss\t{%1, %0|%0, %1}"
3709 [(set_attr "type" "sselog1")
3710 (set_attr "prefix" "vex")
3711 (set_attr "mode" "V8SF")])
3713 (define_insn "*vec_dupv4sf"
3714 [(set (match_operand:V4SF 0 "register_operand" "=x")
3716 (match_operand:SF 1 "register_operand" "0")))]
3718 "shufps\t{$0, %0, %0|%0, %0, 0}"
3719 [(set_attr "type" "sselog1")
3720 (set_attr "length_immediate" "1")
3721 (set_attr "mode" "V4SF")])
3723 ;; Although insertps takes register source, we prefer
3724 ;; unpcklps with register source since it is shorter.
3725 (define_insn "*vec_concatv2sf_sse4_1"
3726 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,x,x,*y ,*y")
3728 (match_operand:SF 1 "nonimmediate_operand" " 0,x,0,x,m, 0 , m")
3729 (match_operand:SF 2 "vector_move_operand" " x,x,m,m,C,*ym, C")))]
3732 unpcklps\t{%2, %0|%0, %2}
3733 vunpcklps\t{%2, %1, %0|%0, %1, %2}
3734 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3735 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3736 %vmovss\t{%1, %0|%0, %1}
3737 punpckldq\t{%2, %0|%0, %2}
3738 movd\t{%1, %0|%0, %1}"
3739 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
3740 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
3741 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
3742 (set_attr "prefix_extra" "*,*,1,1,*,*,*")
3743 (set_attr "length_immediate" "*,*,1,1,*,*,*")
3744 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
3745 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
3747 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3748 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3749 ;; alternatives pretty much forces the MMX alternative to be chosen.
3750 (define_insn "*vec_concatv2sf_sse"
3751 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
3753 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3754 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
3757 unpcklps\t{%2, %0|%0, %2}
3758 movss\t{%1, %0|%0, %1}
3759 punpckldq\t{%2, %0|%0, %2}
3760 movd\t{%1, %0|%0, %1}"
3761 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3762 (set_attr "mode" "V4SF,SF,DI,DI")])
3764 (define_insn "*vec_concatv4sf"
3765 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x")
3767 (match_operand:V2SF 1 "register_operand" " 0,x,0,x")
3768 (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
3771 movlhps\t{%2, %0|%0, %2}
3772 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3773 movhps\t{%2, %0|%0, %2}
3774 vmovhps\t{%2, %1, %0|%0, %1, %2}"
3775 [(set_attr "isa" "noavx,avx,noavx,avx")
3776 (set_attr "type" "ssemov")
3777 (set_attr "prefix" "orig,vex,orig,vex")
3778 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
3780 (define_expand "vec_init<mode>"
3781 [(match_operand:V_128 0 "register_operand" "")
3782 (match_operand 1 "" "")]
3785 ix86_expand_vector_init (false, operands[0], operands[1]);
3789 ;; Avoid combining registers from different units in a single alternative,
3790 ;; see comment above inline_secondary_memory_needed function in i386.c
3791 (define_insn "vec_set<mode>_0"
3792 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
3793 "=x,x,x ,x,x,x,x ,x ,m,m ,m")
3795 (vec_duplicate:VI4F_128
3796 (match_operand:<ssescalarmode> 2 "general_operand"
3797 " x,m,*r,m,x,x,*rm,*rm,x,fF,*r"))
3798 (match_operand:VI4F_128 1 "vector_move_operand"
3799 " C,C,C ,C,0,x,0 ,x ,0,0 ,0")
3803 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
3804 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
3805 %vmovd\t{%2, %0|%0, %2}
3806 movss\t{%2, %0|%0, %2}
3807 movss\t{%2, %0|%0, %2}
3808 vmovss\t{%2, %1, %0|%0, %1, %2}
3809 pinsrd\t{$0, %2, %0|%0, %2, 0}
3810 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
3814 [(set_attr "isa" "sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,avx,*,*,*")
3816 (cond [(eq_attr "alternative" "0,6,7")
3817 (const_string "sselog")
3818 (eq_attr "alternative" "9")
3819 (const_string "fmov")
3820 (eq_attr "alternative" "10")
3821 (const_string "imov")
3823 (const_string "ssemov")))
3824 (set_attr "prefix_extra" "*,*,*,*,*,*,1,1,*,*,*")
3825 (set_attr "length_immediate" "*,*,*,*,*,*,1,1,*,*,*")
3826 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex,*,*,*")
3827 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,*,*,*")])
3829 ;; A subset is vec_setv4sf.
3830 (define_insn "*vec_setv4sf_sse4_1"
3831 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3834 (match_operand:SF 2 "nonimmediate_operand" "xm,xm"))
3835 (match_operand:V4SF 1 "register_operand" "0,x")
3836 (match_operand:SI 3 "const_int_operand" "")))]
3838 && ((unsigned) exact_log2 (INTVAL (operands[3]))
3839 < GET_MODE_NUNITS (V4SFmode))"
3841 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
3842 switch (which_alternative)
3845 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3847 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3852 [(set_attr "isa" "noavx,avx")
3853 (set_attr "type" "sselog")
3854 (set_attr "prefix_data16" "1,*")
3855 (set_attr "prefix_extra" "1")
3856 (set_attr "length_immediate" "1")
3857 (set_attr "prefix" "orig,vex")
3858 (set_attr "mode" "V4SF")])
3860 (define_insn "sse4_1_insertps"
3861 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3862 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
3863 (match_operand:V4SF 1 "register_operand" "0,x")
3864 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
3868 if (MEM_P (operands[2]))
3870 unsigned count_s = INTVAL (operands[3]) >> 6;
3872 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
3873 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
3875 switch (which_alternative)
3878 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3880 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3885 [(set_attr "isa" "noavx,avx")
3886 (set_attr "type" "sselog")
3887 (set_attr "prefix_data16" "1,*")
3888 (set_attr "prefix_extra" "1")
3889 (set_attr "length_immediate" "1")
3890 (set_attr "prefix" "orig,vex")
3891 (set_attr "mode" "V4SF")])
3894 [(set (match_operand:VI4F_128 0 "memory_operand" "")
3896 (vec_duplicate:VI4F_128
3897 (match_operand:<ssescalarmode> 1 "nonmemory_operand" ""))
3900 "TARGET_SSE && reload_completed"
3903 emit_move_insn (adjust_address (operands[0], <ssescalarmode>mode, 0),
3908 (define_expand "vec_set<mode>"
3909 [(match_operand:V 0 "register_operand" "")
3910 (match_operand:<ssescalarmode> 1 "register_operand" "")
3911 (match_operand 2 "const_int_operand" "")]
3914 ix86_expand_vector_set (false, operands[0], operands[1],
3915 INTVAL (operands[2]));
3919 (define_insn_and_split "*vec_extractv4sf_0"
3920 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
3922 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
3923 (parallel [(const_int 0)])))]
3924 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3926 "&& reload_completed"
3929 rtx op1 = operands[1];
3931 op1 = gen_rtx_REG (SFmode, REGNO (op1));
3933 op1 = gen_lowpart (SFmode, op1);
3934 emit_move_insn (operands[0], op1);
3938 (define_insn_and_split "*sse4_1_extractps"
3939 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,x,x")
3941 (match_operand:V4SF 1 "register_operand" "x,0,x")
3942 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n")])))]
3945 %vextractps\t{%2, %1, %0|%0, %1, %2}
3948 "&& reload_completed && SSE_REG_P (operands[0])"
3951 rtx dest = gen_rtx_REG (V4SFmode, REGNO (operands[0]));
3952 switch (INTVAL (operands[2]))
3956 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
3957 operands[2], operands[2],
3958 GEN_INT (INTVAL (operands[2]) + 4),
3959 GEN_INT (INTVAL (operands[2]) + 4)));
3962 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
3965 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
3970 [(set_attr "isa" "*,noavx,avx")
3971 (set_attr "type" "sselog,*,*")
3972 (set_attr "prefix_data16" "1,*,*")
3973 (set_attr "prefix_extra" "1,*,*")
3974 (set_attr "length_immediate" "1,*,*")
3975 (set_attr "prefix" "maybe_vex,*,*")
3976 (set_attr "mode" "V4SF,*,*")])
3978 (define_insn_and_split "*vec_extract_v4sf_mem"
3979 [(set (match_operand:SF 0 "register_operand" "=x,*r,f")
3981 (match_operand:V4SF 1 "memory_operand" "o,o,o")
3982 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
3985 "&& reload_completed"
3988 int i = INTVAL (operands[2]);
3990 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
3994 (define_expand "avx_vextractf128<mode>"
3995 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "")
3996 (match_operand:V_256 1 "register_operand" "")
3997 (match_operand:SI 2 "const_0_to_1_operand" "")]
4000 rtx (*insn)(rtx, rtx);
4002 switch (INTVAL (operands[2]))
4005 insn = gen_vec_extract_lo_<mode>;
4008 insn = gen_vec_extract_hi_<mode>;
4014 emit_insn (insn (operands[0], operands[1]));
4018 (define_insn_and_split "vec_extract_lo_<mode>"
4019 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4020 (vec_select:<ssehalfvecmode>
4021 (match_operand:VI8F_256 1 "nonimmediate_operand" "xm,x")
4022 (parallel [(const_int 0) (const_int 1)])))]
4023 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4025 "&& reload_completed"
4028 rtx op1 = operands[1];
4030 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
4032 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
4033 emit_move_insn (operands[0], op1);
4037 (define_insn "vec_extract_hi_<mode>"
4038 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4039 (vec_select:<ssehalfvecmode>
4040 (match_operand:VI8F_256 1 "register_operand" "x,x")
4041 (parallel [(const_int 2) (const_int 3)])))]
4043 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
4044 [(set_attr "type" "sselog")
4045 (set_attr "prefix_extra" "1")
4046 (set_attr "length_immediate" "1")
4047 (set_attr "memory" "none,store")
4048 (set_attr "prefix" "vex")
4049 (set_attr "mode" "<sseinsnmode>")])
4051 (define_insn_and_split "vec_extract_lo_<mode>"
4052 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4053 (vec_select:<ssehalfvecmode>
4054 (match_operand:VI4F_256 1 "nonimmediate_operand" "xm,x")
4055 (parallel [(const_int 0) (const_int 1)
4056 (const_int 2) (const_int 3)])))]
4057 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4059 "&& reload_completed"
4062 rtx op1 = operands[1];
4064 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
4066 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
4067 emit_move_insn (operands[0], op1);
4071 (define_insn "vec_extract_hi_<mode>"
4072 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4073 (vec_select:<ssehalfvecmode>
4074 (match_operand:VI4F_256 1 "register_operand" "x,x")
4075 (parallel [(const_int 4) (const_int 5)
4076 (const_int 6) (const_int 7)])))]
4078 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
4079 [(set_attr "type" "sselog")
4080 (set_attr "prefix_extra" "1")
4081 (set_attr "length_immediate" "1")
4082 (set_attr "memory" "none,store")
4083 (set_attr "prefix" "vex")
4084 (set_attr "mode" "<sseinsnmode>")])
4086 (define_insn_and_split "vec_extract_lo_v16hi"
4087 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4089 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
4090 (parallel [(const_int 0) (const_int 1)
4091 (const_int 2) (const_int 3)
4092 (const_int 4) (const_int 5)
4093 (const_int 6) (const_int 7)])))]
4094 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4096 "&& reload_completed"
4099 rtx op1 = operands[1];
4101 op1 = gen_rtx_REG (V8HImode, REGNO (op1));
4103 op1 = gen_lowpart (V8HImode, op1);
4104 emit_move_insn (operands[0], op1);
4108 (define_insn "vec_extract_hi_v16hi"
4109 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4111 (match_operand:V16HI 1 "register_operand" "x,x")
4112 (parallel [(const_int 8) (const_int 9)
4113 (const_int 10) (const_int 11)
4114 (const_int 12) (const_int 13)
4115 (const_int 14) (const_int 15)])))]
4117 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
4118 [(set_attr "type" "sselog")
4119 (set_attr "prefix_extra" "1")
4120 (set_attr "length_immediate" "1")
4121 (set_attr "memory" "none,store")
4122 (set_attr "prefix" "vex")
4123 (set_attr "mode" "OI")])
4125 (define_insn_and_split "vec_extract_lo_v32qi"
4126 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4128 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
4129 (parallel [(const_int 0) (const_int 1)
4130 (const_int 2) (const_int 3)
4131 (const_int 4) (const_int 5)
4132 (const_int 6) (const_int 7)
4133 (const_int 8) (const_int 9)
4134 (const_int 10) (const_int 11)
4135 (const_int 12) (const_int 13)
4136 (const_int 14) (const_int 15)])))]
4137 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4139 "&& reload_completed"
4142 rtx op1 = operands[1];
4144 op1 = gen_rtx_REG (V16QImode, REGNO (op1));
4146 op1 = gen_lowpart (V16QImode, op1);
4147 emit_move_insn (operands[0], op1);
4151 (define_insn "vec_extract_hi_v32qi"
4152 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4154 (match_operand:V32QI 1 "register_operand" "x,x")
4155 (parallel [(const_int 16) (const_int 17)
4156 (const_int 18) (const_int 19)
4157 (const_int 20) (const_int 21)
4158 (const_int 22) (const_int 23)
4159 (const_int 24) (const_int 25)
4160 (const_int 26) (const_int 27)
4161 (const_int 28) (const_int 29)
4162 (const_int 30) (const_int 31)])))]
4164 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
4165 [(set_attr "type" "sselog")
4166 (set_attr "prefix_extra" "1")
4167 (set_attr "length_immediate" "1")
4168 (set_attr "memory" "none,store")
4169 (set_attr "prefix" "vex")
4170 (set_attr "mode" "OI")])
4172 ;; Modes handled by vec_extract patterns.
4173 (define_mode_iterator VEC_EXTRACT_MODE
4174 [(V32QI "TARGET_AVX") V16QI
4175 (V16HI "TARGET_AVX") V8HI
4176 (V8SI "TARGET_AVX") V4SI
4177 (V4DI "TARGET_AVX") V2DI
4178 (V8SF "TARGET_AVX") V4SF
4179 (V4DF "TARGET_AVX") V2DF])
4181 (define_expand "vec_extract<mode>"
4182 [(match_operand:<ssescalarmode> 0 "register_operand" "")
4183 (match_operand:VEC_EXTRACT_MODE 1 "register_operand" "")
4184 (match_operand 2 "const_int_operand" "")]
4187 ix86_expand_vector_extract (false, operands[0], operands[1],
4188 INTVAL (operands[2]));
4192 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4194 ;; Parallel double-precision floating point element swizzling
4196 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4198 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4199 (define_insn "avx_unpckhpd256"
4200 [(set (match_operand:V4DF 0 "register_operand" "=x")
4203 (match_operand:V4DF 1 "register_operand" "x")
4204 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4205 (parallel [(const_int 1) (const_int 5)
4206 (const_int 3) (const_int 7)])))]
4208 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
4209 [(set_attr "type" "sselog")
4210 (set_attr "prefix" "vex")
4211 (set_attr "mode" "V4DF")])
4213 (define_expand "vec_interleave_highv4df"
4217 (match_operand:V4DF 1 "register_operand" "x")
4218 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4219 (parallel [(const_int 0) (const_int 4)
4220 (const_int 2) (const_int 6)])))
4226 (parallel [(const_int 1) (const_int 5)
4227 (const_int 3) (const_int 7)])))
4228 (set (match_operand:V4DF 0 "register_operand" "")
4233 (parallel [(const_int 2) (const_int 3)
4234 (const_int 6) (const_int 7)])))]
4237 operands[3] = gen_reg_rtx (V4DFmode);
4238 operands[4] = gen_reg_rtx (V4DFmode);
4242 (define_expand "vec_interleave_highv2df"
4243 [(set (match_operand:V2DF 0 "register_operand" "")
4246 (match_operand:V2DF 1 "nonimmediate_operand" "")
4247 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4248 (parallel [(const_int 1)
4252 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
4253 operands[2] = force_reg (V2DFmode, operands[2]);
4256 (define_insn "*vec_interleave_highv2df"
4257 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
4260 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x")
4261 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0"))
4262 (parallel [(const_int 1)
4264 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4266 unpckhpd\t{%2, %0|%0, %2}
4267 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
4268 %vmovddup\t{%H1, %0|%0, %H1}
4269 movlpd\t{%H1, %0|%0, %H1}
4270 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
4271 %vmovhpd\t{%1, %0|%0, %1}"
4272 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
4273 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
4274 (set_attr "prefix_data16" "*,*,*,1,*,1")
4275 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
4276 (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")])
4278 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4279 (define_expand "avx_movddup256"
4280 [(set (match_operand:V4DF 0 "register_operand" "")
4283 (match_operand:V4DF 1 "nonimmediate_operand" "")
4285 (parallel [(const_int 0) (const_int 4)
4286 (const_int 2) (const_int 6)])))]
4289 (define_expand "avx_unpcklpd256"
4290 [(set (match_operand:V4DF 0 "register_operand" "")
4293 (match_operand:V4DF 1 "register_operand" "")
4294 (match_operand:V4DF 2 "nonimmediate_operand" ""))
4295 (parallel [(const_int 0) (const_int 4)
4296 (const_int 2) (const_int 6)])))]
4299 (define_insn "*avx_unpcklpd256"
4300 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
4303 (match_operand:V4DF 1 "nonimmediate_operand" " x,m")
4304 (match_operand:V4DF 2 "nonimmediate_operand" "xm,1"))
4305 (parallel [(const_int 0) (const_int 4)
4306 (const_int 2) (const_int 6)])))]
4309 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4310 vmovddup\t{%1, %0|%0, %1}"
4311 [(set_attr "type" "sselog")
4312 (set_attr "prefix" "vex")
4313 (set_attr "mode" "V4DF")])
4315 (define_expand "vec_interleave_lowv4df"
4319 (match_operand:V4DF 1 "register_operand" "x")
4320 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4321 (parallel [(const_int 0) (const_int 4)
4322 (const_int 2) (const_int 6)])))
4328 (parallel [(const_int 1) (const_int 5)
4329 (const_int 3) (const_int 7)])))
4330 (set (match_operand:V4DF 0 "register_operand" "")
4335 (parallel [(const_int 0) (const_int 1)
4336 (const_int 4) (const_int 5)])))]
4339 operands[3] = gen_reg_rtx (V4DFmode);
4340 operands[4] = gen_reg_rtx (V4DFmode);
4343 (define_expand "vec_interleave_lowv2df"
4344 [(set (match_operand:V2DF 0 "register_operand" "")
4347 (match_operand:V2DF 1 "nonimmediate_operand" "")
4348 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4349 (parallel [(const_int 0)
4353 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
4354 operands[1] = force_reg (V2DFmode, operands[1]);
4357 (define_insn "*vec_interleave_lowv2df"
4358 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,o")
4361 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0")
4362 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x"))
4363 (parallel [(const_int 0)
4365 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4367 unpcklpd\t{%2, %0|%0, %2}
4368 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4369 %vmovddup\t{%1, %0|%0, %1}
4370 movhpd\t{%2, %0|%0, %2}
4371 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4372 %vmovlpd\t{%2, %H0|%H0, %2}"
4373 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
4374 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
4375 (set_attr "prefix_data16" "*,*,*,1,*,1")
4376 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
4377 (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")])
4380 [(set (match_operand:V2DF 0 "memory_operand" "")
4383 (match_operand:V2DF 1 "register_operand" "")
4385 (parallel [(const_int 0)
4387 "TARGET_SSE3 && reload_completed"
4390 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4391 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4392 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4397 [(set (match_operand:V2DF 0 "register_operand" "")
4400 (match_operand:V2DF 1 "memory_operand" "")
4402 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "")
4403 (match_operand:SI 3 "const_int_operand" "")])))]
4404 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
4405 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
4407 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
4410 (define_expand "avx_shufpd256"
4411 [(match_operand:V4DF 0 "register_operand" "")
4412 (match_operand:V4DF 1 "register_operand" "")
4413 (match_operand:V4DF 2 "nonimmediate_operand" "")
4414 (match_operand:SI 3 "const_int_operand" "")]
4417 int mask = INTVAL (operands[3]);
4418 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4420 GEN_INT (mask & 2 ? 5 : 4),
4421 GEN_INT (mask & 4 ? 3 : 2),
4422 GEN_INT (mask & 8 ? 7 : 6)));
4426 (define_insn "avx_shufpd256_1"
4427 [(set (match_operand:V4DF 0 "register_operand" "=x")
4430 (match_operand:V4DF 1 "register_operand" "x")
4431 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4432 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4433 (match_operand 4 "const_4_to_5_operand" "")
4434 (match_operand 5 "const_2_to_3_operand" "")
4435 (match_operand 6 "const_6_to_7_operand" "")])))]
4439 mask = INTVAL (operands[3]);
4440 mask |= (INTVAL (operands[4]) - 4) << 1;
4441 mask |= (INTVAL (operands[5]) - 2) << 2;
4442 mask |= (INTVAL (operands[6]) - 6) << 3;
4443 operands[3] = GEN_INT (mask);
4445 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4447 [(set_attr "type" "sselog")
4448 (set_attr "length_immediate" "1")
4449 (set_attr "prefix" "vex")
4450 (set_attr "mode" "V4DF")])
4452 (define_expand "sse2_shufpd"
4453 [(match_operand:V2DF 0 "register_operand" "")
4454 (match_operand:V2DF 1 "register_operand" "")
4455 (match_operand:V2DF 2 "nonimmediate_operand" "")
4456 (match_operand:SI 3 "const_int_operand" "")]
4459 int mask = INTVAL (operands[3]);
4460 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4462 GEN_INT (mask & 2 ? 3 : 2)));
4466 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4467 (define_insn "avx2_interleave_highv4di"
4468 [(set (match_operand:V4DI 0 "register_operand" "=x")
4471 (match_operand:V4DI 1 "register_operand" "x")
4472 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
4473 (parallel [(const_int 1)
4478 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4479 [(set_attr "type" "sselog")
4480 (set_attr "prefix" "vex")
4481 (set_attr "mode" "OI")])
4483 (define_insn "vec_interleave_highv2di"
4484 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4487 (match_operand:V2DI 1 "register_operand" "0,x")
4488 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4489 (parallel [(const_int 1)
4493 punpckhqdq\t{%2, %0|%0, %2}
4494 vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4495 [(set_attr "isa" "noavx,avx")
4496 (set_attr "type" "sselog")
4497 (set_attr "prefix_data16" "1,*")
4498 (set_attr "prefix" "orig,vex")
4499 (set_attr "mode" "TI")])
4501 (define_insn "avx2_interleave_lowv4di"
4502 [(set (match_operand:V4DI 0 "register_operand" "=x")
4505 (match_operand:V4DI 1 "register_operand" "x")
4506 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
4507 (parallel [(const_int 0)
4512 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4513 [(set_attr "type" "sselog")
4514 (set_attr "prefix" "vex")
4515 (set_attr "mode" "OI")])
4517 (define_insn "vec_interleave_lowv2di"
4518 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4521 (match_operand:V2DI 1 "register_operand" "0,x")
4522 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4523 (parallel [(const_int 0)
4527 punpcklqdq\t{%2, %0|%0, %2}
4528 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4529 [(set_attr "isa" "noavx,avx")
4530 (set_attr "type" "sselog")
4531 (set_attr "prefix_data16" "1,*")
4532 (set_attr "prefix" "orig,vex")
4533 (set_attr "mode" "TI")])
4535 (define_insn "sse2_shufpd_<mode>"
4536 [(set (match_operand:VI8F_128 0 "register_operand" "=x,x")
4537 (vec_select:VI8F_128
4538 (vec_concat:<ssedoublevecmode>
4539 (match_operand:VI8F_128 1 "register_operand" "0,x")
4540 (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm"))
4541 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4542 (match_operand 4 "const_2_to_3_operand" "")])))]
4546 mask = INTVAL (operands[3]);
4547 mask |= (INTVAL (operands[4]) - 2) << 1;
4548 operands[3] = GEN_INT (mask);
4550 switch (which_alternative)
4553 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4555 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4560 [(set_attr "isa" "noavx,avx")
4561 (set_attr "type" "sselog")
4562 (set_attr "length_immediate" "1")
4563 (set_attr "prefix" "orig,vex")
4564 (set_attr "mode" "V2DF")])
4566 ;; Avoid combining registers from different units in a single alternative,
4567 ;; see comment above inline_secondary_memory_needed function in i386.c
4568 (define_insn "sse2_storehpd"
4569 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r")
4571 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
4572 (parallel [(const_int 1)])))]
4573 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4575 %vmovhpd\t{%1, %0|%0, %1}
4577 vunpckhpd\t{%d1, %0|%0, %d1}
4581 [(set_attr "isa" "*,noavx,avx,*,*,*")
4582 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
4583 (set (attr "prefix_data16")
4585 (and (eq_attr "alternative" "0")
4586 (not (match_test "TARGET_AVX")))
4588 (const_string "*")))
4589 (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
4590 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
4593 [(set (match_operand:DF 0 "register_operand" "")
4595 (match_operand:V2DF 1 "memory_operand" "")
4596 (parallel [(const_int 1)])))]
4597 "TARGET_SSE2 && reload_completed"
4598 [(set (match_dup 0) (match_dup 1))]
4599 "operands[1] = adjust_address (operands[1], DFmode, 8);")
4601 (define_insn "*vec_extractv2df_1_sse"
4602 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4604 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
4605 (parallel [(const_int 1)])))]
4606 "!TARGET_SSE2 && TARGET_SSE
4607 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4609 movhps\t{%1, %0|%0, %1}
4610 movhlps\t{%1, %0|%0, %1}
4611 movlps\t{%H1, %0|%0, %H1}"
4612 [(set_attr "type" "ssemov")
4613 (set_attr "mode" "V2SF,V4SF,V2SF")])
4615 ;; Avoid combining registers from different units in a single alternative,
4616 ;; see comment above inline_secondary_memory_needed function in i386.c
4617 (define_insn "sse2_storelpd"
4618 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4620 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4621 (parallel [(const_int 0)])))]
4622 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4624 %vmovlpd\t{%1, %0|%0, %1}
4629 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4630 (set_attr "prefix_data16" "1,*,*,*,*")
4631 (set_attr "prefix" "maybe_vex")
4632 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4635 [(set (match_operand:DF 0 "register_operand" "")
4637 (match_operand:V2DF 1 "nonimmediate_operand" "")
4638 (parallel [(const_int 0)])))]
4639 "TARGET_SSE2 && reload_completed"
4642 rtx op1 = operands[1];
4644 op1 = gen_rtx_REG (DFmode, REGNO (op1));
4646 op1 = gen_lowpart (DFmode, op1);
4647 emit_move_insn (operands[0], op1);
4651 (define_insn "*vec_extractv2df_0_sse"
4652 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4654 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
4655 (parallel [(const_int 0)])))]
4656 "!TARGET_SSE2 && TARGET_SSE
4657 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4659 movlps\t{%1, %0|%0, %1}
4660 movaps\t{%1, %0|%0, %1}
4661 movlps\t{%1, %0|%0, %1}"
4662 [(set_attr "type" "ssemov")
4663 (set_attr "mode" "V2SF,V4SF,V2SF")])
4665 (define_expand "sse2_loadhpd_exp"
4666 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4669 (match_operand:V2DF 1 "nonimmediate_operand" "")
4670 (parallel [(const_int 0)]))
4671 (match_operand:DF 2 "nonimmediate_operand" "")))]
4674 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4676 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
4678 /* Fix up the destination if needed. */
4679 if (dst != operands[0])
4680 emit_move_insn (operands[0], dst);
4685 ;; Avoid combining registers from different units in a single alternative,
4686 ;; see comment above inline_secondary_memory_needed function in i386.c
4687 (define_insn "sse2_loadhpd"
4688 [(set (match_operand:V2DF 0 "nonimmediate_operand"
4692 (match_operand:V2DF 1 "nonimmediate_operand"
4694 (parallel [(const_int 0)]))
4695 (match_operand:DF 2 "nonimmediate_operand"
4696 " m,m,x,x,x,*f,r")))]
4697 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4699 movhpd\t{%2, %0|%0, %2}
4700 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4701 unpcklpd\t{%2, %0|%0, %2}
4702 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4706 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
4707 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
4708 (set_attr "prefix_data16" "1,*,*,*,*,*,*")
4709 (set_attr "prefix" "orig,vex,orig,vex,*,*,*")
4710 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
4713 [(set (match_operand:V2DF 0 "memory_operand" "")
4715 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
4716 (match_operand:DF 1 "register_operand" "")))]
4717 "TARGET_SSE2 && reload_completed"
4718 [(set (match_dup 0) (match_dup 1))]
4719 "operands[0] = adjust_address (operands[0], DFmode, 8);")
4721 (define_expand "sse2_loadlpd_exp"
4722 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4724 (match_operand:DF 2 "nonimmediate_operand" "")
4726 (match_operand:V2DF 1 "nonimmediate_operand" "")
4727 (parallel [(const_int 1)]))))]
4730 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4732 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
4734 /* Fix up the destination if needed. */
4735 if (dst != operands[0])
4736 emit_move_insn (operands[0], dst);
4741 ;; Avoid combining registers from different units in a single alternative,
4742 ;; see comment above inline_secondary_memory_needed function in i386.c
4743 (define_insn "sse2_loadlpd"
4744 [(set (match_operand:V2DF 0 "nonimmediate_operand"
4745 "=x,x,x,x,x,x,x,x,m,m ,m")
4747 (match_operand:DF 2 "nonimmediate_operand"
4748 " m,m,m,x,x,0,0,x,x,*f,r")
4750 (match_operand:V2DF 1 "vector_move_operand"
4751 " C,0,x,0,x,x,o,o,0,0 ,0")
4752 (parallel [(const_int 1)]))))]
4753 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4755 %vmovsd\t{%2, %0|%0, %2}
4756 movlpd\t{%2, %0|%0, %2}
4757 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4758 movsd\t{%2, %0|%0, %2}
4759 vmovsd\t{%2, %1, %0|%0, %1, %2}
4760 shufpd\t{$2, %1, %0|%0, %1, 2}
4761 movhpd\t{%H1, %0|%0, %H1}
4762 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
4766 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
4768 (cond [(eq_attr "alternative" "5")
4769 (const_string "sselog")
4770 (eq_attr "alternative" "9")
4771 (const_string "fmov")
4772 (eq_attr "alternative" "10")
4773 (const_string "imov")
4775 (const_string "ssemov")))
4776 (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
4777 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
4778 (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
4779 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
4782 [(set (match_operand:V2DF 0 "memory_operand" "")
4784 (match_operand:DF 1 "register_operand" "")
4785 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
4786 "TARGET_SSE2 && reload_completed"
4787 [(set (match_dup 0) (match_dup 1))]
4788 "operands[0] = adjust_address (operands[0], DFmode, 8);")
4790 (define_insn "sse2_movsd"
4791 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o")
4793 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
4794 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
4798 movsd\t{%2, %0|%0, %2}
4799 vmovsd\t{%2, %1, %0|%0, %1, %2}
4800 movlpd\t{%2, %0|%0, %2}
4801 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4802 %vmovlpd\t{%2, %0|%0, %2}
4803 shufpd\t{$2, %1, %0|%0, %1, 2}
4804 movhps\t{%H1, %0|%0, %H1}
4805 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
4806 %vmovhps\t{%1, %H0|%H0, %1}"
4807 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
4810 (eq_attr "alternative" "5")
4811 (const_string "sselog")
4812 (const_string "ssemov")))
4813 (set (attr "prefix_data16")
4815 (and (eq_attr "alternative" "2,4")
4816 (not (match_test "TARGET_AVX")))
4818 (const_string "*")))
4819 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
4820 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
4821 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
4823 (define_expand "vec_dupv2df"
4824 [(set (match_operand:V2DF 0 "register_operand" "")
4826 (match_operand:DF 1 "nonimmediate_operand" "")))]
4830 operands[1] = force_reg (DFmode, operands[1]);
4833 (define_insn "*vec_dupv2df_sse3"
4834 [(set (match_operand:V2DF 0 "register_operand" "=x")
4836 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
4838 "%vmovddup\t{%1, %0|%0, %1}"
4839 [(set_attr "type" "sselog1")
4840 (set_attr "prefix" "maybe_vex")
4841 (set_attr "mode" "DF")])
4843 (define_insn "*vec_dupv2df"
4844 [(set (match_operand:V2DF 0 "register_operand" "=x")
4846 (match_operand:DF 1 "register_operand" "0")))]
4849 [(set_attr "type" "sselog1")
4850 (set_attr "mode" "V2DF")])
4852 (define_insn "*vec_concatv2df_sse3"
4853 [(set (match_operand:V2DF 0 "register_operand" "=x")
4855 (match_operand:DF 1 "nonimmediate_operand" "xm")
4858 "%vmovddup\t{%1, %0|%0, %1}"
4859 [(set_attr "type" "sselog1")
4860 (set_attr "prefix" "maybe_vex")
4861 (set_attr "mode" "DF")])
4863 (define_insn "*vec_concatv2df"
4864 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x,x,x,x,x")
4866 (match_operand:DF 1 "nonimmediate_operand" " 0,x,0,x,m,0,0")
4867 (match_operand:DF 2 "vector_move_operand" " x,x,m,m,C,x,m")))]
4870 unpcklpd\t{%2, %0|%0, %2}
4871 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4872 movhpd\t{%2, %0|%0, %2}
4873 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4874 %vmovsd\t{%1, %0|%0, %1}
4875 movlhps\t{%2, %0|%0, %2}
4876 movhps\t{%2, %0|%0, %2}"
4877 [(set_attr "isa" "sse2_noavx,avx,sse2_noavx,avx,sse2,noavx,noavx")
4880 (eq_attr "alternative" "0,1")
4881 (const_string "sselog")
4882 (const_string "ssemov")))
4883 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
4884 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
4885 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF,DF,V4SF,V2SF")])
4887 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4889 ;; Parallel integral arithmetic
4891 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4893 (define_expand "neg<mode>2"
4894 [(set (match_operand:VI_AVX2 0 "register_operand" "")
4897 (match_operand:VI_AVX2 1 "nonimmediate_operand" "")))]
4899 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
4901 (define_expand "<plusminus_insn><mode>3"
4902 [(set (match_operand:VI_AVX2 0 "register_operand" "")
4904 (match_operand:VI_AVX2 1 "nonimmediate_operand" "")
4905 (match_operand:VI_AVX2 2 "nonimmediate_operand" "")))]
4907 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4909 (define_insn "*<plusminus_insn><mode>3"
4910 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,x")
4912 (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,x")
4913 (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,xm")))]
4914 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4916 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
4917 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
4918 [(set_attr "isa" "noavx,avx")
4919 (set_attr "type" "sseiadd")
4920 (set_attr "prefix_data16" "1,*")
4921 (set_attr "prefix" "orig,vex")
4922 (set_attr "mode" "<sseinsnmode>")])
4924 (define_expand "<sse2_avx2>_<plusminus_insn><mode>3"
4925 [(set (match_operand:VI12_AVX2 0 "register_operand" "")
4926 (sat_plusminus:VI12_AVX2
4927 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "")
4928 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "")))]
4930 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4932 (define_insn "*<sse2_avx2>_<plusminus_insn><mode>3"
4933 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,x")
4934 (sat_plusminus:VI12_AVX2
4935 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,x")
4936 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,xm")))]
4937 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4939 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
4940 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
4941 [(set_attr "isa" "noavx,avx")
4942 (set_attr "type" "sseiadd")
4943 (set_attr "prefix_data16" "1,*")
4944 (set_attr "prefix" "orig,vex")
4945 (set_attr "mode" "TI")])
4947 (define_insn_and_split "mul<mode>3"
4948 [(set (match_operand:VI1_AVX2 0 "register_operand" "")
4949 (mult:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand" "")
4950 (match_operand:VI1_AVX2 2 "register_operand" "")))]
4952 && can_create_pseudo_p ()"
4959 enum machine_mode mulmode = <sseunpackmode>mode;
4961 for (i = 0; i < 6; ++i)
4962 t[i] = gen_reg_rtx (<MODE>mode);
4964 /* Unpack data such that we've got a source byte in each low byte of
4965 each word. We don't care what goes into the high byte of each word.
4966 Rather than trying to get zero in there, most convenient is to let
4967 it be a copy of the low byte. */
4968 emit_insn (gen_<vec_avx2>_interleave_high<mode> (t[0], operands[1],
4970 emit_insn (gen_<vec_avx2>_interleave_high<mode> (t[1], operands[2],
4972 emit_insn (gen_<vec_avx2>_interleave_low<mode> (t[2], operands[1],
4974 emit_insn (gen_<vec_avx2>_interleave_low<mode> (t[3], operands[2],
4977 /* Multiply words. The end-of-line annotations here give a picture of what
4978 the output of that instruction looks like. Dot means don't care; the
4979 letters are the bytes of the result with A being the most significant. */
4980 emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (mulmode, t[4]),
4981 gen_rtx_MULT (mulmode, /* .A.B.C.D.E.F.G.H */
4982 gen_lowpart (mulmode, t[0]),
4983 gen_lowpart (mulmode, t[1]))));
4984 emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (mulmode, t[5]),
4985 gen_rtx_MULT (mulmode, /* .I.J.K.L.M.N.O.P */
4986 gen_lowpart (mulmode, t[2]),
4987 gen_lowpart (mulmode, t[3]))));
4989 /* Extract the even bytes and merge them back together. */
4990 ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0);
4992 set_unique_reg_note (get_last_insn (), REG_EQUAL,
4993 gen_rtx_MULT (<MODE>mode, operands[1], operands[2]));
4997 (define_expand "mul<mode>3"
4998 [(set (match_operand:VI2_AVX2 0 "register_operand" "")
4999 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "")
5000 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "")))]
5002 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
5004 (define_insn "*mul<mode>3"
5005 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
5006 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x")
5007 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))]
5008 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
5010 pmullw\t{%2, %0|%0, %2}
5011 vpmullw\t{%2, %1, %0|%0, %1, %2}"
5012 [(set_attr "isa" "noavx,avx")
5013 (set_attr "type" "sseimul")
5014 (set_attr "prefix_data16" "1,*")
5015 (set_attr "prefix" "orig,vex")
5016 (set_attr "mode" "<sseinsnmode>")])
5018 (define_expand "<s>mul<mode>3_highpart"
5019 [(set (match_operand:VI2_AVX2 0 "register_operand" "")
5021 (lshiftrt:<ssedoublemode>
5022 (mult:<ssedoublemode>
5023 (any_extend:<ssedoublemode>
5024 (match_operand:VI2_AVX2 1 "nonimmediate_operand" ""))
5025 (any_extend:<ssedoublemode>
5026 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "")))
5029 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
5031 (define_insn "*<s>mul<mode>3_highpart"
5032 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
5034 (lshiftrt:<ssedoublemode>
5035 (mult:<ssedoublemode>
5036 (any_extend:<ssedoublemode>
5037 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
5038 (any_extend:<ssedoublemode>
5039 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
5041 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
5043 pmulh<u>w\t{%2, %0|%0, %2}
5044 vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
5045 [(set_attr "isa" "noavx,avx")
5046 (set_attr "type" "sseimul")
5047 (set_attr "prefix_data16" "1,*")
5048 (set_attr "prefix" "orig,vex")
5049 (set_attr "mode" "<sseinsnmode>")])
5051 (define_expand "avx2_umulv4siv4di3"
5052 [(set (match_operand:V4DI 0 "register_operand" "")
5056 (match_operand:V8SI 1 "nonimmediate_operand" "")
5057 (parallel [(const_int 0) (const_int 2)
5058 (const_int 4) (const_int 6)])))
5061 (match_operand:V8SI 2 "nonimmediate_operand" "")
5062 (parallel [(const_int 0) (const_int 2)
5063 (const_int 4) (const_int 6)])))))]
5065 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
5067 (define_insn "*avx_umulv4siv4di3"
5068 [(set (match_operand:V4DI 0 "register_operand" "=x")
5072 (match_operand:V8SI 1 "nonimmediate_operand" "%x")
5073 (parallel [(const_int 0) (const_int 2)
5074 (const_int 4) (const_int 6)])))
5077 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
5078 (parallel [(const_int 0) (const_int 2)
5079 (const_int 4) (const_int 6)])))))]
5080 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
5081 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5082 [(set_attr "type" "sseimul")
5083 (set_attr "prefix" "vex")
5084 (set_attr "mode" "OI")])
5086 (define_expand "sse2_umulv2siv2di3"
5087 [(set (match_operand:V2DI 0 "register_operand" "")
5091 (match_operand:V4SI 1 "nonimmediate_operand" "")
5092 (parallel [(const_int 0) (const_int 2)])))
5095 (match_operand:V4SI 2 "nonimmediate_operand" "")
5096 (parallel [(const_int 0) (const_int 2)])))))]
5098 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5100 (define_insn "*sse2_umulv2siv2di3"
5101 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5105 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
5106 (parallel [(const_int 0) (const_int 2)])))
5109 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
5110 (parallel [(const_int 0) (const_int 2)])))))]
5111 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5113 pmuludq\t{%2, %0|%0, %2}
5114 vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5115 [(set_attr "isa" "noavx,avx")
5116 (set_attr "type" "sseimul")
5117 (set_attr "prefix_data16" "1,*")
5118 (set_attr "prefix" "orig,vex")
5119 (set_attr "mode" "TI")])
5121 (define_expand "avx2_mulv4siv4di3"
5122 [(set (match_operand:V4DI 0 "register_operand" "")
5126 (match_operand:V8SI 1 "nonimmediate_operand" "")
5127 (parallel [(const_int 0) (const_int 2)
5128 (const_int 4) (const_int 6)])))
5131 (match_operand:V8SI 2 "nonimmediate_operand" "")
5132 (parallel [(const_int 0) (const_int 2)
5133 (const_int 4) (const_int 6)])))))]
5135 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
5137 (define_insn "*avx2_mulv4siv4di3"
5138 [(set (match_operand:V4DI 0 "register_operand" "=x")
5142 (match_operand:V8SI 1 "nonimmediate_operand" "x")
5143 (parallel [(const_int 0) (const_int 2)
5144 (const_int 4) (const_int 6)])))
5147 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
5148 (parallel [(const_int 0) (const_int 2)
5149 (const_int 4) (const_int 6)])))))]
5150 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
5151 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5152 [(set_attr "isa" "avx")
5153 (set_attr "type" "sseimul")
5154 (set_attr "prefix_extra" "1")
5155 (set_attr "prefix" "vex")
5156 (set_attr "mode" "OI")])
5158 (define_expand "sse4_1_mulv2siv2di3"
5159 [(set (match_operand:V2DI 0 "register_operand" "")
5163 (match_operand:V4SI 1 "nonimmediate_operand" "")
5164 (parallel [(const_int 0) (const_int 2)])))
5167 (match_operand:V4SI 2 "nonimmediate_operand" "")
5168 (parallel [(const_int 0) (const_int 2)])))))]
5170 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5172 (define_insn "*sse4_1_mulv2siv2di3"
5173 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5177 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
5178 (parallel [(const_int 0) (const_int 2)])))
5181 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
5182 (parallel [(const_int 0) (const_int 2)])))))]
5183 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5185 pmuldq\t{%2, %0|%0, %2}
5186 vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5187 [(set_attr "isa" "noavx,avx")
5188 (set_attr "type" "sseimul")
5189 (set_attr "prefix_data16" "1,*")
5190 (set_attr "prefix_extra" "1")
5191 (set_attr "prefix" "orig,vex")
5192 (set_attr "mode" "TI")])
5194 (define_expand "avx2_pmaddwd"
5195 [(set (match_operand:V8SI 0 "register_operand" "")
5200 (match_operand:V16HI 1 "nonimmediate_operand" "")
5201 (parallel [(const_int 0)
5211 (match_operand:V16HI 2 "nonimmediate_operand" "")
5212 (parallel [(const_int 0)
5222 (vec_select:V8HI (match_dup 1)
5223 (parallel [(const_int 1)
5232 (vec_select:V8HI (match_dup 2)
5233 (parallel [(const_int 1)
5240 (const_int 15)]))))))]
5242 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
5244 (define_expand "sse2_pmaddwd"
5245 [(set (match_operand:V4SI 0 "register_operand" "")
5250 (match_operand:V8HI 1 "nonimmediate_operand" "")
5251 (parallel [(const_int 0)
5257 (match_operand:V8HI 2 "nonimmediate_operand" "")
5258 (parallel [(const_int 0)
5264 (vec_select:V4HI (match_dup 1)
5265 (parallel [(const_int 1)
5270 (vec_select:V4HI (match_dup 2)
5271 (parallel [(const_int 1)
5274 (const_int 7)]))))))]
5276 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5278 (define_insn "*avx2_pmaddwd"
5279 [(set (match_operand:V8SI 0 "register_operand" "=x")
5284 (match_operand:V16HI 1 "nonimmediate_operand" "%x")
5285 (parallel [(const_int 0)
5295 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
5296 (parallel [(const_int 0)
5306 (vec_select:V8HI (match_dup 1)
5307 (parallel [(const_int 1)
5316 (vec_select:V8HI (match_dup 2)
5317 (parallel [(const_int 1)
5324 (const_int 15)]))))))]
5325 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
5326 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5327 [(set_attr "type" "sseiadd")
5328 (set_attr "prefix" "vex")
5329 (set_attr "mode" "OI")])
5331 (define_insn "*sse2_pmaddwd"
5332 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
5337 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
5338 (parallel [(const_int 0)
5344 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
5345 (parallel [(const_int 0)
5351 (vec_select:V4HI (match_dup 1)
5352 (parallel [(const_int 1)
5357 (vec_select:V4HI (match_dup 2)
5358 (parallel [(const_int 1)
5361 (const_int 7)]))))))]
5362 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5364 pmaddwd\t{%2, %0|%0, %2}
5365 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5366 [(set_attr "isa" "noavx,avx")
5367 (set_attr "type" "sseiadd")
5368 (set_attr "atom_unit" "simul")
5369 (set_attr "prefix_data16" "1,*")
5370 (set_attr "prefix" "orig,vex")
5371 (set_attr "mode" "TI")])
5373 (define_expand "mul<mode>3"
5374 [(set (match_operand:VI4_AVX2 0 "register_operand" "")
5375 (mult:VI4_AVX2 (match_operand:VI4_AVX2 1 "register_operand" "")
5376 (match_operand:VI4_AVX2 2 "register_operand" "")))]
5379 if (TARGET_SSE4_1 || TARGET_AVX)
5380 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
5383 (define_insn "*<sse4_1_avx2>_mul<mode>3"
5384 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,x")
5385 (mult:VI4_AVX2 (match_operand:VI4_AVX2 1 "nonimmediate_operand" "%0,x")
5386 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm,xm")))]
5387 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
5389 pmulld\t{%2, %0|%0, %2}
5390 vpmulld\t{%2, %1, %0|%0, %1, %2}"
5391 [(set_attr "isa" "noavx,avx")
5392 (set_attr "type" "sseimul")
5393 (set_attr "prefix_extra" "1")
5394 (set_attr "prefix" "orig,vex")
5395 (set_attr "mode" "<sseinsnmode>")])
5397 (define_insn_and_split "*sse2_mulv4si3"
5398 [(set (match_operand:V4SI 0 "register_operand" "")
5399 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5400 (match_operand:V4SI 2 "register_operand" "")))]
5401 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_AVX
5402 && can_create_pseudo_p ()"
5407 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5413 t1 = gen_reg_rtx (V4SImode);
5414 t2 = gen_reg_rtx (V4SImode);
5415 t3 = gen_reg_rtx (V4SImode);
5416 t4 = gen_reg_rtx (V4SImode);
5417 t5 = gen_reg_rtx (V4SImode);
5418 t6 = gen_reg_rtx (V4SImode);
5419 thirtytwo = GEN_INT (32);
5421 /* Multiply elements 2 and 0. */
5422 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
5425 /* Shift both input vectors down one element, so that elements 3
5426 and 1 are now in the slots for elements 2 and 0. For K8, at
5427 least, this is faster than using a shuffle. */
5428 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5429 gen_lowpart (V1TImode, op1),
5431 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5432 gen_lowpart (V1TImode, op2),
5434 /* Multiply elements 3 and 1. */
5435 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
5438 /* Move the results in element 2 down to element 1; we don't care
5439 what goes in elements 2 and 3. */
5440 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
5441 const0_rtx, const0_rtx));
5442 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
5443 const0_rtx, const0_rtx));
5445 /* Merge the parts back together. */
5446 emit_insn (gen_vec_interleave_lowv4si (op0, t5, t6));
5448 set_unique_reg_note (get_last_insn (), REG_EQUAL,
5449 gen_rtx_MULT (V4SImode, operands[1], operands[2]));
5453 (define_insn_and_split "mul<mode>3"
5454 [(set (match_operand:VI8_AVX2 0 "register_operand" "")
5455 (mult:VI8_AVX2 (match_operand:VI8_AVX2 1 "register_operand" "")
5456 (match_operand:VI8_AVX2 2 "register_operand" "")))]
5458 && can_create_pseudo_p ()"
5463 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5470 if (TARGET_XOP && <MODE>mode == V2DImode)
5472 /* op1: A,B,C,D, op2: E,F,G,H */
5473 op1 = gen_lowpart (V4SImode, op1);
5474 op2 = gen_lowpart (V4SImode, op2);
5476 t1 = gen_reg_rtx (V4SImode);
5477 t2 = gen_reg_rtx (V4SImode);
5478 t3 = gen_reg_rtx (V2DImode);
5479 t4 = gen_reg_rtx (V2DImode);
5482 emit_insn (gen_sse2_pshufd_1 (t1, op1,
5488 /* t2: (B*E),(A*F),(D*G),(C*H) */
5489 emit_insn (gen_mulv4si3 (t2, t1, op2));
5491 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
5492 emit_insn (gen_xop_phadddq (t3, t2));
5494 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
5495 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
5497 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
5498 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
5502 t1 = gen_reg_rtx (<MODE>mode);
5503 t2 = gen_reg_rtx (<MODE>mode);
5504 t3 = gen_reg_rtx (<MODE>mode);
5505 t4 = gen_reg_rtx (<MODE>mode);
5506 t5 = gen_reg_rtx (<MODE>mode);
5507 t6 = gen_reg_rtx (<MODE>mode);
5508 thirtytwo = GEN_INT (32);
5510 /* Multiply low parts. */
5511 emit_insn (gen_<sse2_avx2>_umulv<ssescalarnum>si<mode>3
5512 (t1, gen_lowpart (<ssepackmode>mode, op1),
5513 gen_lowpart (<ssepackmode>mode, op2)));
5515 /* Shift input vectors right 32 bits so we can multiply high parts. */
5516 emit_insn (gen_lshr<mode>3 (t2, op1, thirtytwo));
5517 emit_insn (gen_lshr<mode>3 (t3, op2, thirtytwo));
5519 /* Multiply high parts by low parts. */
5520 emit_insn (gen_<sse2_avx2>_umulv<ssescalarnum>si<mode>3
5521 (t4, gen_lowpart (<ssepackmode>mode, op1),
5522 gen_lowpart (<ssepackmode>mode, t3)));
5523 emit_insn (gen_<sse2_avx2>_umulv<ssescalarnum>si<mode>3
5524 (t5, gen_lowpart (<ssepackmode>mode, op2),
5525 gen_lowpart (<ssepackmode>mode, t2)));
5527 /* Shift them back. */
5528 emit_insn (gen_ashl<mode>3 (t4, t4, thirtytwo));
5529 emit_insn (gen_ashl<mode>3 (t5, t5, thirtytwo));
5531 /* Add the three parts together. */
5532 emit_insn (gen_add<mode>3 (t6, t1, t4));
5533 emit_insn (gen_add<mode>3 (op0, t6, t5));
5536 set_unique_reg_note (get_last_insn (), REG_EQUAL,
5537 gen_rtx_MULT (<MODE>mode, operands[1], operands[2]));
5541 (define_expand "vec_widen_<s>mult_hi_<mode>"
5542 [(match_operand:<sseunpackmode> 0 "register_operand" "")
5543 (any_extend:<sseunpackmode>
5544 (match_operand:VI2_AVX2 1 "register_operand" ""))
5545 (match_operand:VI2_AVX2 2 "register_operand" "")]
5548 rtx op1, op2, t1, t2, dest;
5552 t1 = gen_reg_rtx (<MODE>mode);
5553 t2 = gen_reg_rtx (<MODE>mode);
5554 dest = gen_lowpart (<MODE>mode, operands[0]);
5556 emit_insn (gen_mul<mode>3 (t1, op1, op2));
5557 emit_insn (gen_<s>mul<mode>3_highpart (t2, op1, op2));
5558 emit_insn (gen_vec_interleave_high<mode> (dest, t1, t2));
5562 (define_expand "vec_widen_<s>mult_lo_<mode>"
5563 [(match_operand:<sseunpackmode> 0 "register_operand" "")
5564 (any_extend:<sseunpackmode>
5565 (match_operand:VI2_AVX2 1 "register_operand" ""))
5566 (match_operand:VI2_AVX2 2 "register_operand" "")]
5569 rtx op1, op2, t1, t2, dest;
5573 t1 = gen_reg_rtx (<MODE>mode);
5574 t2 = gen_reg_rtx (<MODE>mode);
5575 dest = gen_lowpart (<MODE>mode, operands[0]);
5577 emit_insn (gen_mul<mode>3 (t1, op1, op2));
5578 emit_insn (gen_<s>mul<mode>3_highpart (t2, op1, op2));
5579 emit_insn (gen_vec_interleave_low<mode> (dest, t1, t2));
5583 (define_expand "vec_widen_<s>mult_hi_v8si"
5584 [(match_operand:V4DI 0 "register_operand" "")
5585 (any_extend:V4DI (match_operand:V8SI 1 "nonimmediate_operand" ""))
5586 (match_operand:V8SI 2 "nonimmediate_operand" "")]
5591 t1 = gen_reg_rtx (V4DImode);
5592 t2 = gen_reg_rtx (V4DImode);
5593 t3 = gen_reg_rtx (V8SImode);
5594 t4 = gen_reg_rtx (V8SImode);
5595 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, operands[1]),
5596 const0_rtx, const2_rtx,
5597 const1_rtx, GEN_INT (3)));
5598 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, operands[2]),
5599 const0_rtx, const2_rtx,
5600 const1_rtx, GEN_INT (3)));
5601 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1),
5602 GEN_INT (2 + (2 << 2) + (3 << 4) + (3 << 6))));
5603 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2),
5604 GEN_INT (2 + (2 << 2) + (3 << 4) + (3 << 6))));
5605 emit_insn (gen_avx2_<u>mulv4siv4di3 (operands[0], t3, t4));
5609 (define_expand "vec_widen_<s>mult_lo_v8si"
5610 [(match_operand:V4DI 0 "register_operand" "")
5611 (any_extend:V4DI (match_operand:V8SI 1 "nonimmediate_operand" ""))
5612 (match_operand:V8SI 2 "nonimmediate_operand" "")]
5617 t1 = gen_reg_rtx (V4DImode);
5618 t2 = gen_reg_rtx (V4DImode);
5619 t3 = gen_reg_rtx (V8SImode);
5620 t4 = gen_reg_rtx (V8SImode);
5621 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, operands[1]),
5622 const0_rtx, const2_rtx,
5623 const1_rtx, GEN_INT (3)));
5624 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, operands[2]),
5625 const0_rtx, const2_rtx,
5626 const1_rtx, GEN_INT (3)));
5627 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1),
5628 GEN_INT (0 + (0 << 2) + (1 << 4) + (1 << 6))));
5629 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2),
5630 GEN_INT (0 + (0 << 2) + (1 << 4) + (1 << 6))));
5631 emit_insn (gen_avx2_<u>mulv4siv4di3 (operands[0], t3, t4));
5635 (define_expand "vec_widen_smult_hi_v4si"
5636 [(match_operand:V2DI 0 "register_operand" "")
5637 (match_operand:V4SI 1 "register_operand" "")
5638 (match_operand:V4SI 2 "register_operand" "")]
5641 rtx op1, op2, t1, t2;
5645 t1 = gen_reg_rtx (V4SImode);
5646 t2 = gen_reg_rtx (V4SImode);
5650 emit_insn (gen_sse2_pshufd_1 (t1, op1, GEN_INT (0), GEN_INT (2),
5651 GEN_INT (1), GEN_INT (3)));
5652 emit_insn (gen_sse2_pshufd_1 (t2, op2, GEN_INT (0), GEN_INT (2),
5653 GEN_INT (1), GEN_INT (3)));
5654 emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
5658 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5659 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5660 emit_insn (gen_sse4_1_mulv2siv2di3 (operands[0], t1, t2));
5664 (define_expand "vec_widen_smult_lo_v4si"
5665 [(match_operand:V2DI 0 "register_operand" "")
5666 (match_operand:V4SI 1 "register_operand" "")
5667 (match_operand:V4SI 2 "register_operand" "")]
5670 rtx op1, op2, t1, t2;
5674 t1 = gen_reg_rtx (V4SImode);
5675 t2 = gen_reg_rtx (V4SImode);
5679 emit_insn (gen_sse2_pshufd_1 (t1, op1, GEN_INT (0), GEN_INT (2),
5680 GEN_INT (1), GEN_INT (3)));
5681 emit_insn (gen_sse2_pshufd_1 (t2, op2, GEN_INT (0), GEN_INT (2),
5682 GEN_INT (1), GEN_INT (3)));
5683 emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
5687 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5688 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5689 emit_insn (gen_sse4_1_mulv2siv2di3 (operands[0], t1, t2));
5693 (define_expand "vec_widen_umult_hi_v4si"
5694 [(match_operand:V2DI 0 "register_operand" "")
5695 (match_operand:V4SI 1 "register_operand" "")
5696 (match_operand:V4SI 2 "register_operand" "")]
5699 rtx op1, op2, t1, t2;
5703 t1 = gen_reg_rtx (V4SImode);
5704 t2 = gen_reg_rtx (V4SImode);
5706 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5707 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5708 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5712 (define_expand "vec_widen_umult_lo_v4si"
5713 [(match_operand:V2DI 0 "register_operand" "")
5714 (match_operand:V4SI 1 "register_operand" "")
5715 (match_operand:V4SI 2 "register_operand" "")]
5718 rtx op1, op2, t1, t2;
5722 t1 = gen_reg_rtx (V4SImode);
5723 t2 = gen_reg_rtx (V4SImode);
5725 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5726 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5727 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5731 (define_expand "sdot_prod<mode>"
5732 [(match_operand:<sseunpackmode> 0 "register_operand" "")
5733 (match_operand:VI2_AVX2 1 "register_operand" "")
5734 (match_operand:VI2_AVX2 2 "register_operand" "")
5735 (match_operand:<sseunpackmode> 3 "register_operand" "")]
5738 rtx t = gen_reg_rtx (<sseunpackmode>mode);
5739 emit_insn (gen_<sse2_avx2>_pmaddwd (t, operands[1], operands[2]));
5740 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5741 gen_rtx_PLUS (<sseunpackmode>mode,
5746 (define_code_attr sse2_sse4_1
5747 [(zero_extend "sse2") (sign_extend "sse4_1")])
5749 (define_expand "<s>dot_prodv4si"
5750 [(match_operand:V2DI 0 "register_operand" "")
5751 (any_extend:V2DI (match_operand:V4SI 1 "register_operand" ""))
5752 (match_operand:V4SI 2 "register_operand" "")
5753 (match_operand:V2DI 3 "register_operand" "")]
5754 "<CODE> == ZERO_EXTEND ? TARGET_SSE2 : TARGET_SSE4_1"
5758 t1 = gen_reg_rtx (V2DImode);
5759 emit_insn (gen_<sse2_sse4_1>_<u>mulv2siv2di3 (t1, operands[1], operands[2]));
5760 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5762 t2 = gen_reg_rtx (V4SImode);
5763 t3 = gen_reg_rtx (V4SImode);
5764 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5765 gen_lowpart (V1TImode, operands[1]),
5767 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5768 gen_lowpart (V1TImode, operands[2]),
5771 t4 = gen_reg_rtx (V2DImode);
5772 emit_insn (gen_<sse2_sse4_1>_<u>mulv2siv2di3 (t4, t2, t3));
5774 emit_insn (gen_addv2di3 (operands[0], t1, t4));
5778 (define_expand "<s>dot_prodv8si"
5779 [(match_operand:V4DI 0 "register_operand" "")
5780 (any_extend:V4DI (match_operand:V8SI 1 "register_operand" ""))
5781 (match_operand:V8SI 2 "register_operand" "")
5782 (match_operand:V4DI 3 "register_operand" "")]
5787 t1 = gen_reg_rtx (V4DImode);
5788 emit_insn (gen_avx2_<u>mulv4siv4di3 (t1, operands[1], operands[2]));
5789 emit_insn (gen_addv4di3 (t1, t1, operands[3]));
5791 t2 = gen_reg_rtx (V8SImode);
5792 t3 = gen_reg_rtx (V8SImode);
5793 emit_insn (gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, t2),
5794 gen_lowpart (V2TImode, operands[1]),
5796 emit_insn (gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, t3),
5797 gen_lowpart (V2TImode, operands[2]),
5800 t4 = gen_reg_rtx (V4DImode);
5801 emit_insn (gen_avx2_<u>mulv4siv4di3 (t4, t2, t3));
5803 emit_insn (gen_addv4di3 (operands[0], t1, t4));
5807 (define_insn "ashr<mode>3"
5808 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
5810 (match_operand:VI24_AVX2 1 "register_operand" "0,x")
5811 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5814 psra<ssemodesuffix>\t{%2, %0|%0, %2}
5815 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5816 [(set_attr "isa" "noavx,avx")
5817 (set_attr "type" "sseishft")
5818 (set (attr "length_immediate")
5819 (if_then_else (match_operand 2 "const_int_operand" "")
5821 (const_string "0")))
5822 (set_attr "prefix_data16" "1,*")
5823 (set_attr "prefix" "orig,vex")
5824 (set_attr "mode" "<sseinsnmode>")])
5826 (define_insn "<shift_insn><mode>3"
5827 [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
5828 (any_lshift:VI248_AVX2
5829 (match_operand:VI248_AVX2 1 "register_operand" "0,x")
5830 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5833 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
5834 vp<vshift><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5835 [(set_attr "isa" "noavx,avx")
5836 (set_attr "type" "sseishft")
5837 (set (attr "length_immediate")
5838 (if_then_else (match_operand 2 "const_int_operand" "")
5840 (const_string "0")))
5841 (set_attr "prefix_data16" "1,*")
5842 (set_attr "prefix" "orig,vex")
5843 (set_attr "mode" "<sseinsnmode>")])
5845 (define_expand "vec_shl_<mode>"
5846 [(set (match_operand:VI_128 0 "register_operand" "")
5848 (match_operand:VI_128 1 "register_operand" "")
5849 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5852 operands[0] = gen_lowpart (V1TImode, operands[0]);
5853 operands[1] = gen_lowpart (V1TImode, operands[1]);
5856 (define_insn "<sse2_avx2>_ashl<mode>3"
5857 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
5859 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
5860 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5863 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5865 switch (which_alternative)
5868 return "pslldq\t{%2, %0|%0, %2}";
5870 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
5875 [(set_attr "isa" "noavx,avx")
5876 (set_attr "type" "sseishft")
5877 (set_attr "length_immediate" "1")
5878 (set_attr "prefix_data16" "1,*")
5879 (set_attr "prefix" "orig,vex")
5880 (set_attr "mode" "<sseinsnmode>")])
5882 (define_expand "vec_shr_<mode>"
5883 [(set (match_operand:VI_128 0 "register_operand" "")
5885 (match_operand:VI_128 1 "register_operand" "")
5886 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5889 operands[0] = gen_lowpart (V1TImode, operands[0]);
5890 operands[1] = gen_lowpart (V1TImode, operands[1]);
5893 (define_insn "<sse2_avx2>_lshr<mode>3"
5894 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
5895 (lshiftrt:VIMAX_AVX2
5896 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
5897 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5900 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5902 switch (which_alternative)
5905 return "psrldq\t{%2, %0|%0, %2}";
5907 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
5912 [(set_attr "isa" "noavx,avx")
5913 (set_attr "type" "sseishft")
5914 (set_attr "length_immediate" "1")
5915 (set_attr "atom_unit" "sishuf")
5916 (set_attr "prefix_data16" "1,*")
5917 (set_attr "prefix" "orig,vex")
5918 (set_attr "mode" "<sseinsnmode>")])
5921 (define_expand "<code><mode>3"
5922 [(set (match_operand:VI124_256 0 "register_operand" "")
5924 (match_operand:VI124_256 1 "nonimmediate_operand" "")
5925 (match_operand:VI124_256 2 "nonimmediate_operand" "")))]
5927 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5929 (define_insn "*avx2_<code><mode>3"
5930 [(set (match_operand:VI124_256 0 "register_operand" "=x")
5932 (match_operand:VI124_256 1 "nonimmediate_operand" "%x")
5933 (match_operand:VI124_256 2 "nonimmediate_operand" "xm")))]
5934 "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5935 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5936 [(set_attr "type" "sseiadd")
5937 (set_attr "prefix_extra" "1")
5938 (set_attr "prefix" "vex")
5939 (set_attr "mode" "OI")])
5941 (define_expand "<code><mode>3"
5942 [(set (match_operand:VI8_AVX2 0 "register_operand" "")
5944 (match_operand:VI8_AVX2 1 "register_operand" "")
5945 (match_operand:VI8_AVX2 2 "register_operand" "")))]
5952 xops[0] = operands[0];
5954 if (<CODE> == SMAX || <CODE> == UMAX)
5956 xops[1] = operands[1];
5957 xops[2] = operands[2];
5961 xops[1] = operands[2];
5962 xops[2] = operands[1];
5965 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
5967 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
5968 xops[4] = operands[1];
5969 xops[5] = operands[2];
5971 ok = ix86_expand_int_vcond (xops);
5976 (define_expand "<code><mode>3"
5977 [(set (match_operand:VI124_128 0 "register_operand" "")
5979 (match_operand:VI124_128 1 "nonimmediate_operand" "")
5980 (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
5983 if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
5984 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
5990 xops[0] = operands[0];
5991 operands[1] = force_reg (<MODE>mode, operands[1]);
5992 operands[2] = force_reg (<MODE>mode, operands[2]);
5996 xops[1] = operands[1];
5997 xops[2] = operands[2];
6001 xops[1] = operands[2];
6002 xops[2] = operands[1];
6005 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6006 xops[4] = operands[1];
6007 xops[5] = operands[2];
6009 ok = ix86_expand_int_vcond (xops);
6015 (define_insn "*sse4_1_<code><mode>3"
6016 [(set (match_operand:VI14_128 0 "register_operand" "=x,x")
6018 (match_operand:VI14_128 1 "nonimmediate_operand" "%0,x")
6019 (match_operand:VI14_128 2 "nonimmediate_operand" "xm,xm")))]
6020 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6022 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
6023 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6024 [(set_attr "isa" "noavx,avx")
6025 (set_attr "type" "sseiadd")
6026 (set_attr "prefix_extra" "1,*")
6027 (set_attr "prefix" "orig,vex")
6028 (set_attr "mode" "TI")])
6030 (define_insn "*<code>v8hi3"
6031 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6033 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
6034 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
6035 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
6037 p<maxmin_int>w\t{%2, %0|%0, %2}
6038 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
6039 [(set_attr "isa" "noavx,avx")
6040 (set_attr "type" "sseiadd")
6041 (set_attr "prefix_data16" "1,*")
6042 (set_attr "prefix_extra" "*,1")
6043 (set_attr "prefix" "orig,vex")
6044 (set_attr "mode" "TI")])
6046 (define_expand "<code><mode>3"
6047 [(set (match_operand:VI124_128 0 "register_operand" "")
6049 (match_operand:VI124_128 1 "nonimmediate_operand" "")
6050 (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
6053 if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
6054 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
6055 else if (<CODE> == UMAX && <MODE>mode == V8HImode)
6057 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
6058 operands[1] = force_reg (<MODE>mode, operands[1]);
6059 if (rtx_equal_p (op3, op2))
6060 op3 = gen_reg_rtx (V8HImode);
6061 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
6062 emit_insn (gen_addv8hi3 (op0, op3, op2));
6070 operands[1] = force_reg (<MODE>mode, operands[1]);
6071 operands[2] = force_reg (<MODE>mode, operands[2]);
6073 xops[0] = operands[0];
6077 xops[1] = operands[1];
6078 xops[2] = operands[2];
6082 xops[1] = operands[2];
6083 xops[2] = operands[1];
6086 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6087 xops[4] = operands[1];
6088 xops[5] = operands[2];
6090 ok = ix86_expand_int_vcond (xops);
6096 (define_insn "*sse4_1_<code><mode>3"
6097 [(set (match_operand:VI24_128 0 "register_operand" "=x,x")
6099 (match_operand:VI24_128 1 "nonimmediate_operand" "%0,x")
6100 (match_operand:VI24_128 2 "nonimmediate_operand" "xm,xm")))]
6101 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6103 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
6104 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6105 [(set_attr "isa" "noavx,avx")
6106 (set_attr "type" "sseiadd")
6107 (set_attr "prefix_extra" "1,*")
6108 (set_attr "prefix" "orig,vex")
6109 (set_attr "mode" "TI")])
6111 (define_insn "*<code>v16qi3"
6112 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6114 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")
6115 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))]
6116 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
6118 p<maxmin_int>b\t{%2, %0|%0, %2}
6119 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
6120 [(set_attr "isa" "noavx,avx")
6121 (set_attr "type" "sseiadd")
6122 (set_attr "prefix_data16" "1,*")
6123 (set_attr "prefix_extra" "*,1")
6124 (set_attr "prefix" "orig,vex")
6125 (set_attr "mode" "TI")])
6127 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6129 ;; Parallel integral comparisons
6131 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6133 (define_expand "avx2_eq<mode>3"
6134 [(set (match_operand:VI_256 0 "register_operand" "")
6136 (match_operand:VI_256 1 "nonimmediate_operand" "")
6137 (match_operand:VI_256 2 "nonimmediate_operand" "")))]
6139 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6141 (define_insn "*avx2_eq<mode>3"
6142 [(set (match_operand:VI_256 0 "register_operand" "=x")
6144 (match_operand:VI_256 1 "nonimmediate_operand" "%x")
6145 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
6146 "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6147 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6148 [(set_attr "type" "ssecmp")
6149 (set_attr "prefix_extra" "1")
6150 (set_attr "prefix" "vex")
6151 (set_attr "mode" "OI")])
6153 (define_insn "*sse4_1_eqv2di3"
6154 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6156 (match_operand:V2DI 1 "nonimmediate_operand" "%0,x")
6157 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
6158 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
6160 pcmpeqq\t{%2, %0|%0, %2}
6161 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
6162 [(set_attr "isa" "noavx,avx")
6163 (set_attr "type" "ssecmp")
6164 (set_attr "prefix_extra" "1")
6165 (set_attr "prefix" "orig,vex")
6166 (set_attr "mode" "TI")])
6168 (define_insn "*sse2_eq<mode>3"
6169 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
6171 (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x")
6172 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
6173 "TARGET_SSE2 && !TARGET_XOP
6174 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6176 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
6177 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6178 [(set_attr "isa" "noavx,avx")
6179 (set_attr "type" "ssecmp")
6180 (set_attr "prefix_data16" "1,*")
6181 (set_attr "prefix" "orig,vex")
6182 (set_attr "mode" "TI")])
6184 (define_expand "sse2_eq<mode>3"
6185 [(set (match_operand:VI124_128 0 "register_operand" "")
6187 (match_operand:VI124_128 1 "nonimmediate_operand" "")
6188 (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
6189 "TARGET_SSE2 && !TARGET_XOP "
6190 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6192 (define_expand "sse4_1_eqv2di3"
6193 [(set (match_operand:V2DI 0 "register_operand" "")
6195 (match_operand:V2DI 1 "nonimmediate_operand" "")
6196 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
6198 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
6200 (define_insn "sse4_2_gtv2di3"
6201 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6203 (match_operand:V2DI 1 "register_operand" "0,x")
6204 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
6207 pcmpgtq\t{%2, %0|%0, %2}
6208 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
6209 [(set_attr "isa" "noavx,avx")
6210 (set_attr "type" "ssecmp")
6211 (set_attr "prefix_extra" "1")
6212 (set_attr "prefix" "orig,vex")
6213 (set_attr "mode" "TI")])
6215 (define_insn "avx2_gt<mode>3"
6216 [(set (match_operand:VI_256 0 "register_operand" "=x")
6218 (match_operand:VI_256 1 "register_operand" "x")
6219 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
6221 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6222 [(set_attr "type" "ssecmp")
6223 (set_attr "prefix_extra" "1")
6224 (set_attr "prefix" "vex")
6225 (set_attr "mode" "OI")])
6227 (define_insn "sse2_gt<mode>3"
6228 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
6230 (match_operand:VI124_128 1 "register_operand" "0,x")
6231 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
6232 "TARGET_SSE2 && !TARGET_XOP"
6234 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
6235 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6236 [(set_attr "isa" "noavx,avx")
6237 (set_attr "type" "ssecmp")
6238 (set_attr "prefix_data16" "1,*")
6239 (set_attr "prefix" "orig,vex")
6240 (set_attr "mode" "TI")])
6242 (define_expand "vcond<V_256:mode><VI_256:mode>"
6243 [(set (match_operand:V_256 0 "register_operand" "")
6245 (match_operator 3 ""
6246 [(match_operand:VI_256 4 "nonimmediate_operand" "")
6247 (match_operand:VI_256 5 "nonimmediate_operand" "")])
6248 (match_operand:V_256 1 "general_operand" "")
6249 (match_operand:V_256 2 "general_operand" "")))]
6251 && (GET_MODE_NUNITS (<V_256:MODE>mode)
6252 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
6254 bool ok = ix86_expand_int_vcond (operands);
6259 (define_expand "vcond<V_128:mode><VI124_128:mode>"
6260 [(set (match_operand:V_128 0 "register_operand" "")
6262 (match_operator 3 ""
6263 [(match_operand:VI124_128 4 "nonimmediate_operand" "")
6264 (match_operand:VI124_128 5 "nonimmediate_operand" "")])
6265 (match_operand:V_128 1 "general_operand" "")
6266 (match_operand:V_128 2 "general_operand" "")))]
6268 && (GET_MODE_NUNITS (<V_128:MODE>mode)
6269 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
6271 bool ok = ix86_expand_int_vcond (operands);
6276 (define_expand "vcond<VI8F_128:mode>v2di"
6277 [(set (match_operand:VI8F_128 0 "register_operand" "")
6278 (if_then_else:VI8F_128
6279 (match_operator 3 ""
6280 [(match_operand:V2DI 4 "nonimmediate_operand" "")
6281 (match_operand:V2DI 5 "nonimmediate_operand" "")])
6282 (match_operand:VI8F_128 1 "general_operand" "")
6283 (match_operand:VI8F_128 2 "general_operand" "")))]
6286 bool ok = ix86_expand_int_vcond (operands);
6291 (define_expand "vcondu<V_256:mode><VI_256:mode>"
6292 [(set (match_operand:V_256 0 "register_operand" "")
6294 (match_operator 3 ""
6295 [(match_operand:VI_256 4 "nonimmediate_operand" "")
6296 (match_operand:VI_256 5 "nonimmediate_operand" "")])
6297 (match_operand:V_256 1 "general_operand" "")
6298 (match_operand:V_256 2 "general_operand" "")))]
6300 && (GET_MODE_NUNITS (<V_256:MODE>mode)
6301 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
6303 bool ok = ix86_expand_int_vcond (operands);
6308 (define_expand "vcondu<V_128:mode><VI124_128:mode>"
6309 [(set (match_operand:V_128 0 "register_operand" "")
6311 (match_operator 3 ""
6312 [(match_operand:VI124_128 4 "nonimmediate_operand" "")
6313 (match_operand:VI124_128 5 "nonimmediate_operand" "")])
6314 (match_operand:V_128 1 "general_operand" "")
6315 (match_operand:V_128 2 "general_operand" "")))]
6317 && (GET_MODE_NUNITS (<V_128:MODE>mode)
6318 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
6320 bool ok = ix86_expand_int_vcond (operands);
6325 (define_expand "vcondu<VI8F_128:mode>v2di"
6326 [(set (match_operand:VI8F_128 0 "register_operand" "")
6327 (if_then_else:VI8F_128
6328 (match_operator 3 ""
6329 [(match_operand:V2DI 4 "nonimmediate_operand" "")
6330 (match_operand:V2DI 5 "nonimmediate_operand" "")])
6331 (match_operand:VI8F_128 1 "general_operand" "")
6332 (match_operand:VI8F_128 2 "general_operand" "")))]
6335 bool ok = ix86_expand_int_vcond (operands);
6340 (define_mode_iterator VEC_PERM_AVX2
6341 [V16QI V8HI V4SI V2DI V4SF V2DF
6342 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
6343 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
6344 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")])
6346 (define_expand "vec_perm<mode>"
6347 [(match_operand:VEC_PERM_AVX2 0 "register_operand" "")
6348 (match_operand:VEC_PERM_AVX2 1 "register_operand" "")
6349 (match_operand:VEC_PERM_AVX2 2 "register_operand" "")
6350 (match_operand:<sseintvecmode> 3 "register_operand" "")]
6351 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
6353 ix86_expand_vec_perm (operands);
6357 (define_mode_iterator VEC_PERM_CONST
6358 [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE")
6359 (V2DF "TARGET_SSE") (V2DI "TARGET_SSE")
6360 (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2")
6361 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
6362 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
6363 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")])
6365 (define_expand "vec_perm_const<mode>"
6366 [(match_operand:VEC_PERM_CONST 0 "register_operand" "")
6367 (match_operand:VEC_PERM_CONST 1 "register_operand" "")
6368 (match_operand:VEC_PERM_CONST 2 "register_operand" "")
6369 (match_operand:<sseintvecmode> 3 "" "")]
6372 if (ix86_expand_vec_perm_const (operands))
6378 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6380 ;; Parallel bitwise logical operations
6382 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6384 (define_expand "one_cmpl<mode>2"
6385 [(set (match_operand:VI 0 "register_operand" "")
6386 (xor:VI (match_operand:VI 1 "nonimmediate_operand" "")
6390 int i, n = GET_MODE_NUNITS (<MODE>mode);
6391 rtvec v = rtvec_alloc (n);
6393 for (i = 0; i < n; ++i)
6394 RTVEC_ELT (v, i) = constm1_rtx;
6396 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
6399 (define_expand "<sse2_avx2>_andnot<mode>3"
6400 [(set (match_operand:VI_AVX2 0 "register_operand" "")
6402 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand" ""))
6403 (match_operand:VI_AVX2 2 "nonimmediate_operand" "")))]
6406 (define_insn "*andnot<mode>3"
6407 [(set (match_operand:VI 0 "register_operand" "=x,x")
6409 (not:VI (match_operand:VI 1 "register_operand" "0,x"))
6410 (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
6413 static char buf[32];
6417 switch (get_attr_mode (insn))
6420 gcc_assert (TARGET_AVX2);
6422 gcc_assert (TARGET_SSE2);
6428 gcc_assert (TARGET_AVX);
6430 gcc_assert (TARGET_SSE);
6439 switch (which_alternative)
6442 ops = "%s\t{%%2, %%0|%%0, %%2}";
6445 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
6451 snprintf (buf, sizeof (buf), ops, tmp);
6454 [(set_attr "isa" "noavx,avx")
6455 (set_attr "type" "sselog")
6456 (set (attr "prefix_data16")
6458 (and (eq_attr "alternative" "0")
6459 (eq_attr "mode" "TI"))
6461 (const_string "*")))
6462 (set_attr "prefix" "orig,vex")
6464 (cond [(and (not (match_test "TARGET_AVX2"))
6465 (match_test "GET_MODE_SIZE (<MODE>mode) > 16"))
6466 (const_string "V8SF")
6467 (not (match_test "TARGET_SSE2"))
6468 (const_string "V4SF")
6470 (const_string "<sseinsnmode>")))])
6472 (define_expand "<code><mode>3"
6473 [(set (match_operand:VI 0 "register_operand" "")
6475 (match_operand:VI 1 "nonimmediate_operand" "")
6476 (match_operand:VI 2 "nonimmediate_operand" "")))]
6478 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
6480 (define_insn "*<code><mode>3"
6481 [(set (match_operand:VI 0 "register_operand" "=x,x")
6483 (match_operand:VI 1 "nonimmediate_operand" "%0,x")
6484 (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
6486 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6488 static char buf[32];
6492 switch (get_attr_mode (insn))
6495 gcc_assert (TARGET_AVX2);
6497 gcc_assert (TARGET_SSE2);
6503 gcc_assert (TARGET_AVX);
6505 gcc_assert (TARGET_SSE);
6514 switch (which_alternative)
6517 ops = "%s\t{%%2, %%0|%%0, %%2}";
6520 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
6526 snprintf (buf, sizeof (buf), ops, tmp);
6529 [(set_attr "isa" "noavx,avx")
6530 (set_attr "type" "sselog")
6531 (set (attr "prefix_data16")
6533 (and (eq_attr "alternative" "0")
6534 (eq_attr "mode" "TI"))
6536 (const_string "*")))
6537 (set_attr "prefix" "orig,vex")
6539 (cond [(and (not (match_test "TARGET_AVX2"))
6540 (match_test "GET_MODE_SIZE (<MODE>mode) > 16"))
6541 (const_string "V8SF")
6542 (not (match_test "TARGET_SSE2"))
6543 (const_string "V4SF")
6545 (const_string "<sseinsnmode>")))])
6547 (define_insn "*andnottf3"
6548 [(set (match_operand:TF 0 "register_operand" "=x,x")
6550 (not:TF (match_operand:TF 1 "register_operand" "0,x"))
6551 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
6554 pandn\t{%2, %0|%0, %2}
6555 vpandn\t{%2, %1, %0|%0, %1, %2}"
6556 [(set_attr "isa" "noavx,avx")
6557 (set_attr "type" "sselog")
6558 (set_attr "prefix_data16" "1,*")
6559 (set_attr "prefix" "orig,vex")
6560 (set_attr "mode" "TI")])
6562 (define_expand "<code>tf3"
6563 [(set (match_operand:TF 0 "register_operand" "")
6565 (match_operand:TF 1 "nonimmediate_operand" "")
6566 (match_operand:TF 2 "nonimmediate_operand" "")))]
6568 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
6570 (define_insn "*<code>tf3"
6571 [(set (match_operand:TF 0 "register_operand" "=x,x")
6573 (match_operand:TF 1 "nonimmediate_operand" "%0,x")
6574 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
6576 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
6578 p<logic>\t{%2, %0|%0, %2}
6579 vp<logic>\t{%2, %1, %0|%0, %1, %2}"
6580 [(set_attr "isa" "noavx,avx")
6581 (set_attr "type" "sselog")
6582 (set_attr "prefix_data16" "1,*")
6583 (set_attr "prefix" "orig,vex")
6584 (set_attr "mode" "TI")])
6586 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6588 ;; Parallel integral element swizzling
6590 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6592 (define_expand "vec_pack_trunc_<mode>"
6593 [(match_operand:<ssepackmode> 0 "register_operand" "")
6594 (match_operand:VI248_AVX2 1 "register_operand" "")
6595 (match_operand:VI248_AVX2 2 "register_operand" "")]
6598 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
6599 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
6600 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6604 (define_insn "<sse2_avx2>_packsswb"
6605 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
6606 (vec_concat:VI1_AVX2
6607 (ss_truncate:<ssehalfvecmode>
6608 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6609 (ss_truncate:<ssehalfvecmode>
6610 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6613 packsswb\t{%2, %0|%0, %2}
6614 vpacksswb\t{%2, %1, %0|%0, %1, %2}"
6615 [(set_attr "isa" "noavx,avx")
6616 (set_attr "type" "sselog")
6617 (set_attr "prefix_data16" "1,*")
6618 (set_attr "prefix" "orig,vex")
6619 (set_attr "mode" "<sseinsnmode>")])
6621 (define_insn "<sse2_avx2>_packssdw"
6622 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
6623 (vec_concat:VI2_AVX2
6624 (ss_truncate:<ssehalfvecmode>
6625 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6626 (ss_truncate:<ssehalfvecmode>
6627 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6630 packssdw\t{%2, %0|%0, %2}
6631 vpackssdw\t{%2, %1, %0|%0, %1, %2}"
6632 [(set_attr "isa" "noavx,avx")
6633 (set_attr "type" "sselog")
6634 (set_attr "prefix_data16" "1,*")
6635 (set_attr "prefix" "orig,vex")
6636 (set_attr "mode" "<sseinsnmode>")])
6638 (define_insn "<sse2_avx2>_packuswb"
6639 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
6640 (vec_concat:VI1_AVX2
6641 (us_truncate:<ssehalfvecmode>
6642 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6643 (us_truncate:<ssehalfvecmode>
6644 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6647 packuswb\t{%2, %0|%0, %2}
6648 vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6649 [(set_attr "isa" "noavx,avx")
6650 (set_attr "type" "sselog")
6651 (set_attr "prefix_data16" "1,*")
6652 (set_attr "prefix" "orig,vex")
6653 (set_attr "mode" "<sseinsnmode>")])
6655 (define_insn "avx2_interleave_highv32qi"
6656 [(set (match_operand:V32QI 0 "register_operand" "=x")
6659 (match_operand:V32QI 1 "register_operand" "x")
6660 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
6661 (parallel [(const_int 8) (const_int 40)
6662 (const_int 9) (const_int 41)
6663 (const_int 10) (const_int 42)
6664 (const_int 11) (const_int 43)
6665 (const_int 12) (const_int 44)
6666 (const_int 13) (const_int 45)
6667 (const_int 14) (const_int 46)
6668 (const_int 15) (const_int 47)
6669 (const_int 24) (const_int 56)
6670 (const_int 25) (const_int 57)
6671 (const_int 26) (const_int 58)
6672 (const_int 27) (const_int 59)
6673 (const_int 28) (const_int 60)
6674 (const_int 29) (const_int 61)
6675 (const_int 30) (const_int 62)
6676 (const_int 31) (const_int 63)])))]
6678 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6679 [(set_attr "type" "sselog")
6680 (set_attr "prefix" "vex")
6681 (set_attr "mode" "OI")])
6683 (define_insn "vec_interleave_highv16qi"
6684 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6687 (match_operand:V16QI 1 "register_operand" "0,x")
6688 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
6689 (parallel [(const_int 8) (const_int 24)
6690 (const_int 9) (const_int 25)
6691 (const_int 10) (const_int 26)
6692 (const_int 11) (const_int 27)
6693 (const_int 12) (const_int 28)
6694 (const_int 13) (const_int 29)
6695 (const_int 14) (const_int 30)
6696 (const_int 15) (const_int 31)])))]
6699 punpckhbw\t{%2, %0|%0, %2}
6700 vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6701 [(set_attr "isa" "noavx,avx")
6702 (set_attr "type" "sselog")
6703 (set_attr "prefix_data16" "1,*")
6704 (set_attr "prefix" "orig,vex")
6705 (set_attr "mode" "TI")])
6707 (define_insn "avx2_interleave_lowv32qi"
6708 [(set (match_operand:V32QI 0 "register_operand" "=x")
6711 (match_operand:V32QI 1 "register_operand" "x")
6712 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
6713 (parallel [(const_int 0) (const_int 32)
6714 (const_int 1) (const_int 33)
6715 (const_int 2) (const_int 34)
6716 (const_int 3) (const_int 35)
6717 (const_int 4) (const_int 36)
6718 (const_int 5) (const_int 37)
6719 (const_int 6) (const_int 38)
6720 (const_int 7) (const_int 39)
6721 (const_int 16) (const_int 48)
6722 (const_int 17) (const_int 49)
6723 (const_int 18) (const_int 50)
6724 (const_int 19) (const_int 51)
6725 (const_int 20) (const_int 52)
6726 (const_int 21) (const_int 53)
6727 (const_int 22) (const_int 54)
6728 (const_int 23) (const_int 55)])))]
6730 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6731 [(set_attr "type" "sselog")
6732 (set_attr "prefix" "vex")
6733 (set_attr "mode" "OI")])
6735 (define_insn "vec_interleave_lowv16qi"
6736 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6739 (match_operand:V16QI 1 "register_operand" "0,x")
6740 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
6741 (parallel [(const_int 0) (const_int 16)
6742 (const_int 1) (const_int 17)
6743 (const_int 2) (const_int 18)
6744 (const_int 3) (const_int 19)
6745 (const_int 4) (const_int 20)
6746 (const_int 5) (const_int 21)
6747 (const_int 6) (const_int 22)
6748 (const_int 7) (const_int 23)])))]
6751 punpcklbw\t{%2, %0|%0, %2}
6752 vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6753 [(set_attr "isa" "noavx,avx")
6754 (set_attr "type" "sselog")
6755 (set_attr "prefix_data16" "1,*")
6756 (set_attr "prefix" "orig,vex")
6757 (set_attr "mode" "TI")])
6759 (define_insn "avx2_interleave_highv16hi"
6760 [(set (match_operand:V16HI 0 "register_operand" "=x")
6763 (match_operand:V16HI 1 "register_operand" "x")
6764 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
6765 (parallel [(const_int 4) (const_int 20)
6766 (const_int 5) (const_int 21)
6767 (const_int 6) (const_int 22)
6768 (const_int 7) (const_int 23)
6769 (const_int 12) (const_int 28)
6770 (const_int 13) (const_int 29)
6771 (const_int 14) (const_int 30)
6772 (const_int 15) (const_int 31)])))]
6774 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6775 [(set_attr "type" "sselog")
6776 (set_attr "prefix" "vex")
6777 (set_attr "mode" "OI")])
6779 (define_insn "vec_interleave_highv8hi"
6780 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6783 (match_operand:V8HI 1 "register_operand" "0,x")
6784 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
6785 (parallel [(const_int 4) (const_int 12)
6786 (const_int 5) (const_int 13)
6787 (const_int 6) (const_int 14)
6788 (const_int 7) (const_int 15)])))]
6791 punpckhwd\t{%2, %0|%0, %2}
6792 vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6793 [(set_attr "isa" "noavx,avx")
6794 (set_attr "type" "sselog")
6795 (set_attr "prefix_data16" "1,*")
6796 (set_attr "prefix" "orig,vex")
6797 (set_attr "mode" "TI")])
6799 (define_insn "avx2_interleave_lowv16hi"
6800 [(set (match_operand:V16HI 0 "register_operand" "=x")
6803 (match_operand:V16HI 1 "register_operand" "x")
6804 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
6805 (parallel [(const_int 0) (const_int 16)
6806 (const_int 1) (const_int 17)
6807 (const_int 2) (const_int 18)
6808 (const_int 3) (const_int 19)
6809 (const_int 8) (const_int 24)
6810 (const_int 9) (const_int 25)
6811 (const_int 10) (const_int 26)
6812 (const_int 11) (const_int 27)])))]
6814 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6815 [(set_attr "type" "sselog")
6816 (set_attr "prefix" "vex")
6817 (set_attr "mode" "OI")])
6819 (define_insn "vec_interleave_lowv8hi"
6820 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6823 (match_operand:V8HI 1 "register_operand" "0,x")
6824 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
6825 (parallel [(const_int 0) (const_int 8)
6826 (const_int 1) (const_int 9)
6827 (const_int 2) (const_int 10)
6828 (const_int 3) (const_int 11)])))]
6831 punpcklwd\t{%2, %0|%0, %2}
6832 vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6833 [(set_attr "isa" "noavx,avx")
6834 (set_attr "type" "sselog")
6835 (set_attr "prefix_data16" "1,*")
6836 (set_attr "prefix" "orig,vex")
6837 (set_attr "mode" "TI")])
6839 (define_insn "avx2_interleave_highv8si"
6840 [(set (match_operand:V8SI 0 "register_operand" "=x")
6843 (match_operand:V8SI 1 "register_operand" "x")
6844 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
6845 (parallel [(const_int 2) (const_int 10)
6846 (const_int 3) (const_int 11)
6847 (const_int 6) (const_int 14)
6848 (const_int 7) (const_int 15)])))]
6850 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6851 [(set_attr "type" "sselog")
6852 (set_attr "prefix" "vex")
6853 (set_attr "mode" "OI")])
6855 (define_insn "vec_interleave_highv4si"
6856 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6859 (match_operand:V4SI 1 "register_operand" "0,x")
6860 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6861 (parallel [(const_int 2) (const_int 6)
6862 (const_int 3) (const_int 7)])))]
6865 punpckhdq\t{%2, %0|%0, %2}
6866 vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6867 [(set_attr "isa" "noavx,avx")
6868 (set_attr "type" "sselog")
6869 (set_attr "prefix_data16" "1,*")
6870 (set_attr "prefix" "orig,vex")
6871 (set_attr "mode" "TI")])
6873 (define_insn "avx2_interleave_lowv8si"
6874 [(set (match_operand:V8SI 0 "register_operand" "=x")
6877 (match_operand:V8SI 1 "register_operand" "x")
6878 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
6879 (parallel [(const_int 0) (const_int 8)
6880 (const_int 1) (const_int 9)
6881 (const_int 4) (const_int 12)
6882 (const_int 5) (const_int 13)])))]
6884 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6885 [(set_attr "type" "sselog")
6886 (set_attr "prefix" "vex")
6887 (set_attr "mode" "OI")])
6889 (define_insn "vec_interleave_lowv4si"
6890 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6893 (match_operand:V4SI 1 "register_operand" "0,x")
6894 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6895 (parallel [(const_int 0) (const_int 4)
6896 (const_int 1) (const_int 5)])))]
6899 punpckldq\t{%2, %0|%0, %2}
6900 vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6901 [(set_attr "isa" "noavx,avx")
6902 (set_attr "type" "sselog")
6903 (set_attr "prefix_data16" "1,*")
6904 (set_attr "prefix" "orig,vex")
6905 (set_attr "mode" "TI")])
6907 (define_expand "vec_interleave_high<mode>"
6908 [(match_operand:VI_256 0 "register_operand" "=x")
6909 (match_operand:VI_256 1 "register_operand" "x")
6910 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
6913 rtx t1 = gen_reg_rtx (<MODE>mode);
6914 rtx t2 = gen_reg_rtx (<MODE>mode);
6915 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
6916 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
6917 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, operands[0]),
6918 gen_lowpart (V4DImode, t1),
6919 gen_lowpart (V4DImode, t2), GEN_INT (1 + (3 << 4))));
6923 (define_expand "vec_interleave_low<mode>"
6924 [(match_operand:VI_256 0 "register_operand" "=x")
6925 (match_operand:VI_256 1 "register_operand" "x")
6926 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
6929 rtx t1 = gen_reg_rtx (<MODE>mode);
6930 rtx t2 = gen_reg_rtx (<MODE>mode);
6931 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
6932 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
6933 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, operands[0]),
6934 gen_lowpart (V4DImode, t1),
6935 gen_lowpart (V4DImode, t2), GEN_INT (0 + (2 << 4))));
6939 ;; Modes handled by pinsr patterns.
6940 (define_mode_iterator PINSR_MODE
6941 [(V16QI "TARGET_SSE4_1") V8HI
6942 (V4SI "TARGET_SSE4_1")
6943 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
6945 (define_mode_attr sse2p4_1
6946 [(V16QI "sse4_1") (V8HI "sse2")
6947 (V4SI "sse4_1") (V2DI "sse4_1")])
6949 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
6950 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
6951 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x")
6952 (vec_merge:PINSR_MODE
6953 (vec_duplicate:PINSR_MODE
6954 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m"))
6955 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x")
6956 (match_operand:SI 3 "const_int_operand" "")))]
6958 && ((unsigned) exact_log2 (INTVAL (operands[3]))
6959 < GET_MODE_NUNITS (<MODE>mode))"
6961 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6963 switch (which_alternative)
6966 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
6967 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
6970 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
6972 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
6973 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
6976 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6981 [(set_attr "isa" "noavx,noavx,avx,avx")
6982 (set_attr "type" "sselog")
6983 (set (attr "prefix_rex")
6985 (and (not (match_test "TARGET_AVX"))
6986 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
6988 (const_string "*")))
6989 (set (attr "prefix_data16")
6991 (and (not (match_test "TARGET_AVX"))
6992 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
6994 (const_string "*")))
6995 (set (attr "prefix_extra")
6997 (and (not (match_test "TARGET_AVX"))
6998 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
7000 (const_string "1")))
7001 (set_attr "length_immediate" "1")
7002 (set_attr "prefix" "orig,orig,vex,vex")
7003 (set_attr "mode" "TI")])
7005 (define_insn "*sse4_1_pextrb_<mode>"
7006 [(set (match_operand:SWI48 0 "register_operand" "=r")
7009 (match_operand:V16QI 1 "register_operand" "x")
7010 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
7012 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
7013 [(set_attr "type" "sselog")
7014 (set_attr "prefix_extra" "1")
7015 (set_attr "length_immediate" "1")
7016 (set_attr "prefix" "maybe_vex")
7017 (set_attr "mode" "TI")])
7019 (define_insn "*sse4_1_pextrb_memory"
7020 [(set (match_operand:QI 0 "memory_operand" "=m")
7022 (match_operand:V16QI 1 "register_operand" "x")
7023 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
7025 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
7026 [(set_attr "type" "sselog")
7027 (set_attr "prefix_extra" "1")
7028 (set_attr "length_immediate" "1")
7029 (set_attr "prefix" "maybe_vex")
7030 (set_attr "mode" "TI")])
7032 (define_insn "*sse2_pextrw_<mode>"
7033 [(set (match_operand:SWI48 0 "register_operand" "=r")
7036 (match_operand:V8HI 1 "register_operand" "x")
7037 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
7039 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
7040 [(set_attr "type" "sselog")
7041 (set_attr "prefix_data16" "1")
7042 (set_attr "length_immediate" "1")
7043 (set_attr "prefix" "maybe_vex")
7044 (set_attr "mode" "TI")])
7046 (define_insn "*sse4_1_pextrw_memory"
7047 [(set (match_operand:HI 0 "memory_operand" "=m")
7049 (match_operand:V8HI 1 "register_operand" "x")
7050 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
7052 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
7053 [(set_attr "type" "sselog")
7054 (set_attr "prefix_extra" "1")
7055 (set_attr "length_immediate" "1")
7056 (set_attr "prefix" "maybe_vex")
7057 (set_attr "mode" "TI")])
7059 (define_insn "*sse4_1_pextrd"
7060 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
7062 (match_operand:V4SI 1 "register_operand" "x")
7063 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
7065 "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
7066 [(set_attr "type" "sselog")
7067 (set_attr "prefix_extra" "1")
7068 (set_attr "length_immediate" "1")
7069 (set_attr "prefix" "maybe_vex")
7070 (set_attr "mode" "TI")])
7072 (define_insn "*sse4_1_pextrd_zext"
7073 [(set (match_operand:DI 0 "register_operand" "=r")
7076 (match_operand:V4SI 1 "register_operand" "x")
7077 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))]
7078 "TARGET_64BIT && TARGET_SSE4_1"
7079 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
7080 [(set_attr "type" "sselog")
7081 (set_attr "prefix_extra" "1")
7082 (set_attr "length_immediate" "1")
7083 (set_attr "prefix" "maybe_vex")
7084 (set_attr "mode" "TI")])
7086 ;; It must come before *vec_extractv2di_1_rex64 since it is preferred.
7087 (define_insn "*sse4_1_pextrq"
7088 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
7090 (match_operand:V2DI 1 "register_operand" "x")
7091 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
7092 "TARGET_SSE4_1 && TARGET_64BIT"
7093 "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
7094 [(set_attr "type" "sselog")
7095 (set_attr "prefix_rex" "1")
7096 (set_attr "prefix_extra" "1")
7097 (set_attr "length_immediate" "1")
7098 (set_attr "prefix" "maybe_vex")
7099 (set_attr "mode" "TI")])
7101 (define_expand "avx2_pshufdv3"
7102 [(match_operand:V8SI 0 "register_operand" "")
7103 (match_operand:V8SI 1 "nonimmediate_operand" "")
7104 (match_operand:SI 2 "const_0_to_255_operand" "")]
7107 int mask = INTVAL (operands[2]);
7108 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
7109 GEN_INT ((mask >> 0) & 3),
7110 GEN_INT ((mask >> 2) & 3),
7111 GEN_INT ((mask >> 4) & 3),
7112 GEN_INT ((mask >> 6) & 3),
7113 GEN_INT (((mask >> 0) & 3) + 4),
7114 GEN_INT (((mask >> 2) & 3) + 4),
7115 GEN_INT (((mask >> 4) & 3) + 4),
7116 GEN_INT (((mask >> 6) & 3) + 4)));
7120 (define_insn "avx2_pshufd_1"
7121 [(set (match_operand:V8SI 0 "register_operand" "=x")
7123 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
7124 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7125 (match_operand 3 "const_0_to_3_operand" "")
7126 (match_operand 4 "const_0_to_3_operand" "")
7127 (match_operand 5 "const_0_to_3_operand" "")
7128 (match_operand 6 "const_4_to_7_operand" "")
7129 (match_operand 7 "const_4_to_7_operand" "")
7130 (match_operand 8 "const_4_to_7_operand" "")
7131 (match_operand 9 "const_4_to_7_operand" "")])))]
7133 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
7134 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
7135 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
7136 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
7139 mask |= INTVAL (operands[2]) << 0;
7140 mask |= INTVAL (operands[3]) << 2;
7141 mask |= INTVAL (operands[4]) << 4;
7142 mask |= INTVAL (operands[5]) << 6;
7143 operands[2] = GEN_INT (mask);
7145 return "vpshufd\t{%2, %1, %0|%0, %1, %2}";
7147 [(set_attr "type" "sselog1")
7148 (set_attr "prefix" "vex")
7149 (set_attr "length_immediate" "1")
7150 (set_attr "mode" "OI")])
7152 (define_expand "sse2_pshufd"
7153 [(match_operand:V4SI 0 "register_operand" "")
7154 (match_operand:V4SI 1 "nonimmediate_operand" "")
7155 (match_operand:SI 2 "const_int_operand" "")]
7158 int mask = INTVAL (operands[2]);
7159 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
7160 GEN_INT ((mask >> 0) & 3),
7161 GEN_INT ((mask >> 2) & 3),
7162 GEN_INT ((mask >> 4) & 3),
7163 GEN_INT ((mask >> 6) & 3)));
7167 (define_insn "sse2_pshufd_1"
7168 [(set (match_operand:V4SI 0 "register_operand" "=x")
7170 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7171 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7172 (match_operand 3 "const_0_to_3_operand" "")
7173 (match_operand 4 "const_0_to_3_operand" "")
7174 (match_operand 5 "const_0_to_3_operand" "")])))]
7178 mask |= INTVAL (operands[2]) << 0;
7179 mask |= INTVAL (operands[3]) << 2;
7180 mask |= INTVAL (operands[4]) << 4;
7181 mask |= INTVAL (operands[5]) << 6;
7182 operands[2] = GEN_INT (mask);
7184 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
7186 [(set_attr "type" "sselog1")
7187 (set_attr "prefix_data16" "1")
7188 (set_attr "prefix" "maybe_vex")
7189 (set_attr "length_immediate" "1")
7190 (set_attr "mode" "TI")])
7192 (define_expand "avx2_pshuflwv3"
7193 [(match_operand:V16HI 0 "register_operand" "")
7194 (match_operand:V16HI 1 "nonimmediate_operand" "")
7195 (match_operand:SI 2 "const_0_to_255_operand" "")]
7198 int mask = INTVAL (operands[2]);
7199 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
7200 GEN_INT ((mask >> 0) & 3),
7201 GEN_INT ((mask >> 2) & 3),
7202 GEN_INT ((mask >> 4) & 3),
7203 GEN_INT ((mask >> 6) & 3),
7204 GEN_INT (((mask >> 0) & 3) + 8),
7205 GEN_INT (((mask >> 2) & 3) + 8),
7206 GEN_INT (((mask >> 4) & 3) + 8),
7207 GEN_INT (((mask >> 6) & 3) + 8)));
7211 (define_insn "avx2_pshuflw_1"
7212 [(set (match_operand:V16HI 0 "register_operand" "=x")
7214 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
7215 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7216 (match_operand 3 "const_0_to_3_operand" "")
7217 (match_operand 4 "const_0_to_3_operand" "")
7218 (match_operand 5 "const_0_to_3_operand" "")
7223 (match_operand 6 "const_8_to_11_operand" "")
7224 (match_operand 7 "const_8_to_11_operand" "")
7225 (match_operand 8 "const_8_to_11_operand" "")
7226 (match_operand 9 "const_8_to_11_operand" "")
7232 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
7233 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
7234 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
7235 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
7238 mask |= INTVAL (operands[2]) << 0;
7239 mask |= INTVAL (operands[3]) << 2;
7240 mask |= INTVAL (operands[4]) << 4;
7241 mask |= INTVAL (operands[5]) << 6;
7242 operands[2] = GEN_INT (mask);
7244 return "vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7246 [(set_attr "type" "sselog")
7247 (set_attr "prefix" "vex")
7248 (set_attr "length_immediate" "1")
7249 (set_attr "mode" "OI")])
7251 (define_expand "sse2_pshuflw"
7252 [(match_operand:V8HI 0 "register_operand" "")
7253 (match_operand:V8HI 1 "nonimmediate_operand" "")
7254 (match_operand:SI 2 "const_int_operand" "")]
7257 int mask = INTVAL (operands[2]);
7258 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
7259 GEN_INT ((mask >> 0) & 3),
7260 GEN_INT ((mask >> 2) & 3),
7261 GEN_INT ((mask >> 4) & 3),
7262 GEN_INT ((mask >> 6) & 3)));
7266 (define_insn "sse2_pshuflw_1"
7267 [(set (match_operand:V8HI 0 "register_operand" "=x")
7269 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7270 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7271 (match_operand 3 "const_0_to_3_operand" "")
7272 (match_operand 4 "const_0_to_3_operand" "")
7273 (match_operand 5 "const_0_to_3_operand" "")
7281 mask |= INTVAL (operands[2]) << 0;
7282 mask |= INTVAL (operands[3]) << 2;
7283 mask |= INTVAL (operands[4]) << 4;
7284 mask |= INTVAL (operands[5]) << 6;
7285 operands[2] = GEN_INT (mask);
7287 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7289 [(set_attr "type" "sselog")
7290 (set_attr "prefix_data16" "0")
7291 (set_attr "prefix_rep" "1")
7292 (set_attr "prefix" "maybe_vex")
7293 (set_attr "length_immediate" "1")
7294 (set_attr "mode" "TI")])
7296 (define_expand "avx2_pshufhwv3"
7297 [(match_operand:V16HI 0 "register_operand" "")
7298 (match_operand:V16HI 1 "nonimmediate_operand" "")
7299 (match_operand:SI 2 "const_0_to_255_operand" "")]
7302 int mask = INTVAL (operands[2]);
7303 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
7304 GEN_INT (((mask >> 0) & 3) + 4),
7305 GEN_INT (((mask >> 2) & 3) + 4),
7306 GEN_INT (((mask >> 4) & 3) + 4),
7307 GEN_INT (((mask >> 6) & 3) + 4),
7308 GEN_INT (((mask >> 0) & 3) + 12),
7309 GEN_INT (((mask >> 2) & 3) + 12),
7310 GEN_INT (((mask >> 4) & 3) + 12),
7311 GEN_INT (((mask >> 6) & 3) + 12)));
7315 (define_insn "avx2_pshufhw_1"
7316 [(set (match_operand:V16HI 0 "register_operand" "=x")
7318 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
7319 (parallel [(const_int 0)
7323 (match_operand 2 "const_4_to_7_operand" "")
7324 (match_operand 3 "const_4_to_7_operand" "")
7325 (match_operand 4 "const_4_to_7_operand" "")
7326 (match_operand 5 "const_4_to_7_operand" "")
7331 (match_operand 6 "const_12_to_15_operand" "")
7332 (match_operand 7 "const_12_to_15_operand" "")
7333 (match_operand 8 "const_12_to_15_operand" "")
7334 (match_operand 9 "const_12_to_15_operand" "")])))]
7336 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
7337 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
7338 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
7339 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
7342 mask |= (INTVAL (operands[2]) - 4) << 0;
7343 mask |= (INTVAL (operands[3]) - 4) << 2;
7344 mask |= (INTVAL (operands[4]) - 4) << 4;
7345 mask |= (INTVAL (operands[5]) - 4) << 6;
7346 operands[2] = GEN_INT (mask);
7348 return "vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7350 [(set_attr "type" "sselog")
7351 (set_attr "prefix" "vex")
7352 (set_attr "length_immediate" "1")
7353 (set_attr "mode" "OI")])
7355 (define_expand "sse2_pshufhw"
7356 [(match_operand:V8HI 0 "register_operand" "")
7357 (match_operand:V8HI 1 "nonimmediate_operand" "")
7358 (match_operand:SI 2 "const_int_operand" "")]
7361 int mask = INTVAL (operands[2]);
7362 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
7363 GEN_INT (((mask >> 0) & 3) + 4),
7364 GEN_INT (((mask >> 2) & 3) + 4),
7365 GEN_INT (((mask >> 4) & 3) + 4),
7366 GEN_INT (((mask >> 6) & 3) + 4)));
7370 (define_insn "sse2_pshufhw_1"
7371 [(set (match_operand:V8HI 0 "register_operand" "=x")
7373 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7374 (parallel [(const_int 0)
7378 (match_operand 2 "const_4_to_7_operand" "")
7379 (match_operand 3 "const_4_to_7_operand" "")
7380 (match_operand 4 "const_4_to_7_operand" "")
7381 (match_operand 5 "const_4_to_7_operand" "")])))]
7385 mask |= (INTVAL (operands[2]) - 4) << 0;
7386 mask |= (INTVAL (operands[3]) - 4) << 2;
7387 mask |= (INTVAL (operands[4]) - 4) << 4;
7388 mask |= (INTVAL (operands[5]) - 4) << 6;
7389 operands[2] = GEN_INT (mask);
7391 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7393 [(set_attr "type" "sselog")
7394 (set_attr "prefix_rep" "1")
7395 (set_attr "prefix_data16" "0")
7396 (set_attr "prefix" "maybe_vex")
7397 (set_attr "length_immediate" "1")
7398 (set_attr "mode" "TI")])
7400 (define_expand "sse2_loadd"
7401 [(set (match_operand:V4SI 0 "register_operand" "")
7404 (match_operand:SI 1 "nonimmediate_operand" ""))
7408 "operands[2] = CONST0_RTX (V4SImode);")
7410 (define_insn "sse2_loadld"
7411 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x,x,x")
7414 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x"))
7415 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,x")
7419 %vmovd\t{%2, %0|%0, %2}
7420 %vmovd\t{%2, %0|%0, %2}
7421 movss\t{%2, %0|%0, %2}
7422 movss\t{%2, %0|%0, %2}
7423 vmovss\t{%2, %1, %0|%0, %1, %2}"
7424 [(set_attr "isa" "sse2,*,noavx,noavx,avx")
7425 (set_attr "type" "ssemov")
7426 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex")
7427 (set_attr "mode" "TI,TI,V4SF,SF,SF")])
7429 (define_insn_and_split "sse2_stored"
7430 [(set (match_operand:SI 0 "nonimmediate_operand" "=xm,r")
7432 (match_operand:V4SI 1 "register_operand" "x,Yi")
7433 (parallel [(const_int 0)])))]
7436 "&& reload_completed
7437 && (TARGET_INTER_UNIT_MOVES
7438 || MEM_P (operands [0])
7439 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7440 [(set (match_dup 0) (match_dup 1))]
7441 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
7443 (define_insn_and_split "*vec_ext_v4si_mem"
7444 [(set (match_operand:SI 0 "register_operand" "=r")
7446 (match_operand:V4SI 1 "memory_operand" "o")
7447 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
7453 int i = INTVAL (operands[2]);
7455 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
7459 (define_expand "sse_storeq"
7460 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7462 (match_operand:V2DI 1 "register_operand" "")
7463 (parallel [(const_int 0)])))]
7466 (define_insn "*sse2_storeq_rex64"
7467 [(set (match_operand:DI 0 "nonimmediate_operand" "=xm,*r,r")
7469 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
7470 (parallel [(const_int 0)])))]
7471 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7475 mov{q}\t{%1, %0|%0, %1}"
7476 [(set_attr "type" "*,*,imov")
7477 (set_attr "mode" "*,*,DI")])
7479 (define_insn "*sse2_storeq"
7480 [(set (match_operand:DI 0 "nonimmediate_operand" "=xm")
7482 (match_operand:V2DI 1 "register_operand" "x")
7483 (parallel [(const_int 0)])))]
7488 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7490 (match_operand:V2DI 1 "register_operand" "")
7491 (parallel [(const_int 0)])))]
7494 && (TARGET_INTER_UNIT_MOVES
7495 || MEM_P (operands [0])
7496 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7497 [(set (match_dup 0) (match_dup 1))]
7498 "operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));")
7500 (define_insn "*vec_extractv2di_1_rex64"
7501 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,x,r")
7503 (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o,o")
7504 (parallel [(const_int 1)])))]
7505 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7507 %vmovhps\t{%1, %0|%0, %1}
7508 psrldq\t{$8, %0|%0, 8}
7509 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7510 %vmovq\t{%H1, %0|%0, %H1}
7511 mov{q}\t{%H1, %0|%0, %H1}"
7512 [(set_attr "isa" "*,noavx,avx,*,*")
7513 (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,imov")
7514 (set_attr "length_immediate" "*,1,1,*,*")
7515 (set_attr "memory" "*,none,none,*,*")
7516 (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig")
7517 (set_attr "mode" "V2SF,TI,TI,TI,DI")])
7519 (define_insn "*vec_extractv2di_1"
7520 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,x,x,x")
7522 (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o,x,o")
7523 (parallel [(const_int 1)])))]
7524 "!TARGET_64BIT && TARGET_SSE
7525 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7527 %vmovhps\t{%1, %0|%0, %1}
7528 psrldq\t{$8, %0|%0, 8}
7529 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7530 %vmovq\t{%H1, %0|%0, %H1}
7531 movhlps\t{%1, %0|%0, %1}
7532 movlps\t{%H1, %0|%0, %H1}"
7533 [(set_attr "isa" "*,sse2_noavx,avx,sse2,noavx,noavx")
7534 (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,ssemov,ssemov")
7535 (set_attr "length_immediate" "*,1,1,*,*,*")
7536 (set_attr "memory" "*,none,none,*,*,*")
7537 (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig,orig")
7538 (set_attr "mode" "V2SF,TI,TI,TI,V4SF,V2SF")])
7540 (define_insn "*vec_dupv4si_avx"
7541 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7543 (match_operand:SI 1 "nonimmediate_operand" " x,m")))]
7546 vpshufd\t{$0, %1, %0|%0, %1, 0}
7547 vbroadcastss\t{%1, %0|%0, %1}"
7548 [(set_attr "type" "sselog1,ssemov")
7549 (set_attr "length_immediate" "1,0")
7550 (set_attr "prefix_extra" "0,1")
7551 (set_attr "prefix" "vex")
7552 (set_attr "mode" "TI,V4SF")])
7554 (define_insn "*vec_dupv4si"
7555 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7557 (match_operand:SI 1 "register_operand" " x,0")))]
7560 pshufd\t{$0, %1, %0|%0, %1, 0}
7561 shufps\t{$0, %0, %0|%0, %0, 0}"
7562 [(set_attr "isa" "sse2,*")
7563 (set_attr "type" "sselog1")
7564 (set_attr "length_immediate" "1")
7565 (set_attr "mode" "TI,V4SF")])
7567 (define_insn "*vec_dupv2di_sse3"
7568 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
7570 (match_operand:DI 1 "nonimmediate_operand" " 0,x,m")))]
7574 vpunpcklqdq\t{%d1, %0|%0, %d1}
7575 %vmovddup\t{%1, %0|%0, %1}"
7576 [(set_attr "isa" "noavx,avx,*")
7577 (set_attr "type" "sselog1")
7578 (set_attr "prefix" "orig,vex,maybe_vex")
7579 (set_attr "mode" "TI,TI,DF")])
7581 (define_insn "*vec_dupv2di"
7582 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7584 (match_operand:DI 1 "register_operand" " 0,0")))]
7589 [(set_attr "isa" "sse2,*")
7590 (set_attr "type" "sselog1,ssemov")
7591 (set_attr "mode" "TI,V4SF")])
7593 (define_insn "*vec_concatv2si_sse4_1"
7594 [(set (match_operand:V2SI 0 "register_operand" "=x, x,x,x, x, *y,*y")
7596 (match_operand:SI 1 "nonimmediate_operand" " 0, x,0,x,rm, 0,rm")
7597 (match_operand:SI 2 "vector_move_operand" "rm,rm,x,x, C,*ym, C")))]
7600 pinsrd\t{$1, %2, %0|%0, %2, 1}
7601 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
7602 punpckldq\t{%2, %0|%0, %2}
7603 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7604 %vmovd\t{%1, %0|%0, %1}
7605 punpckldq\t{%2, %0|%0, %2}
7606 movd\t{%1, %0|%0, %1}"
7607 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
7608 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
7609 (set_attr "prefix_extra" "1,1,*,*,*,*,*")
7610 (set_attr "length_immediate" "1,1,*,*,*,*,*")
7611 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
7612 (set_attr "mode" "TI,TI,TI,TI,TI,DI,DI")])
7614 ;; ??? In theory we can match memory for the MMX alternative, but allowing
7615 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7616 ;; alternatives pretty much forces the MMX alternative to be chosen.
7617 (define_insn "*vec_concatv2si_sse2"
7618 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
7620 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
7621 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
7624 punpckldq\t{%2, %0|%0, %2}
7625 movd\t{%1, %0|%0, %1}
7626 punpckldq\t{%2, %0|%0, %2}
7627 movd\t{%1, %0|%0, %1}"
7628 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7629 (set_attr "mode" "TI,TI,DI,DI")])
7631 (define_insn "*vec_concatv2si_sse"
7632 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
7634 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
7635 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
7638 unpcklps\t{%2, %0|%0, %2}
7639 movss\t{%1, %0|%0, %1}
7640 punpckldq\t{%2, %0|%0, %2}
7641 movd\t{%1, %0|%0, %1}"
7642 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7643 (set_attr "mode" "V4SF,V4SF,DI,DI")])
7645 (define_insn "*vec_concatv4si"
7646 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x,x")
7648 (match_operand:V2SI 1 "register_operand" " 0,x,0,0,x")
7649 (match_operand:V2SI 2 "nonimmediate_operand" " x,x,x,m,m")))]
7652 punpcklqdq\t{%2, %0|%0, %2}
7653 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7654 movlhps\t{%2, %0|%0, %2}
7655 movhps\t{%2, %0|%0, %2}
7656 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7657 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
7658 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
7659 (set_attr "prefix" "orig,vex,orig,orig,vex")
7660 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
7662 ;; movd instead of movq is required to handle broken assemblers.
7663 (define_insn "*vec_concatv2di_rex64"
7664 [(set (match_operand:V2DI 0 "register_operand"
7665 "=x,x ,x ,Yi,!x,x,x,x,x")
7667 (match_operand:DI 1 "nonimmediate_operand"
7668 " 0,x ,xm,r ,*y,0,x,0,x")
7669 (match_operand:DI 2 "vector_move_operand"
7670 "rm,rm,C ,C ,C ,x,x,m,m")))]
7673 pinsrq\t{$1, %2, %0|%0, %2, 1}
7674 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
7675 %vmovq\t{%1, %0|%0, %1}
7676 %vmovd\t{%1, %0|%0, %1}
7677 movq2dq\t{%1, %0|%0, %1}
7678 punpcklqdq\t{%2, %0|%0, %2}
7679 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7680 movhps\t{%2, %0|%0, %2}
7681 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7682 [(set_attr "isa" "sse4_noavx,avx,*,*,*,noavx,avx,noavx,avx")
7685 (eq_attr "alternative" "0,1,5,6")
7686 (const_string "sselog")
7687 (const_string "ssemov")))
7688 (set (attr "prefix_rex")
7690 (and (eq_attr "alternative" "0,3")
7691 (not (match_test "TARGET_AVX")))
7693 (const_string "*")))
7694 (set_attr "prefix_extra" "1,1,*,*,*,*,*,*,*")
7695 (set_attr "length_immediate" "1,1,*,*,*,*,*,*,*")
7696 (set_attr "prefix" "orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex")
7697 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,V2SF,V2SF")])
7699 (define_insn "vec_concatv2di"
7700 [(set (match_operand:V2DI 0 "register_operand" "=x,?x,x,x,x,x,x")
7702 (match_operand:DI 1 "nonimmediate_operand" "xm,*y,0,x,0,0,x")
7703 (match_operand:DI 2 "vector_move_operand" " C, C,x,x,x,m,m")))]
7704 "!TARGET_64BIT && TARGET_SSE"
7706 %vmovq\t{%1, %0|%0, %1}
7707 movq2dq\t{%1, %0|%0, %1}
7708 punpcklqdq\t{%2, %0|%0, %2}
7709 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7710 movlhps\t{%2, %0|%0, %2}
7711 movhps\t{%2, %0|%0, %2}
7712 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7713 [(set_attr "isa" "sse2,sse2,sse2_noavx,avx,noavx,noavx,avx")
7714 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,ssemov,ssemov")
7715 (set_attr "prefix" "maybe_vex,orig,orig,vex,orig,orig,vex")
7716 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")])
7718 (define_expand "vec_unpacks_lo_<mode>"
7719 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7720 (match_operand:VI124_AVX2 1 "register_operand" "")]
7722 "ix86_expand_sse_unpack (operands, false, false); DONE;")
7724 (define_expand "vec_unpacks_hi_<mode>"
7725 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7726 (match_operand:VI124_AVX2 1 "register_operand" "")]
7728 "ix86_expand_sse_unpack (operands, false, true); DONE;")
7730 (define_expand "vec_unpacku_lo_<mode>"
7731 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7732 (match_operand:VI124_AVX2 1 "register_operand" "")]
7734 "ix86_expand_sse_unpack (operands, true, false); DONE;")
7736 (define_expand "vec_unpacku_hi_<mode>"
7737 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7738 (match_operand:VI124_AVX2 1 "register_operand" "")]
7740 "ix86_expand_sse_unpack (operands, true, true); DONE;")
7742 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7746 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7748 (define_expand "avx2_uavgv32qi3"
7749 [(set (match_operand:V32QI 0 "register_operand" "")
7755 (match_operand:V32QI 1 "nonimmediate_operand" ""))
7757 (match_operand:V32QI 2 "nonimmediate_operand" "")))
7758 (const_vector:V32QI [(const_int 1) (const_int 1)
7759 (const_int 1) (const_int 1)
7760 (const_int 1) (const_int 1)
7761 (const_int 1) (const_int 1)
7762 (const_int 1) (const_int 1)
7763 (const_int 1) (const_int 1)
7764 (const_int 1) (const_int 1)
7765 (const_int 1) (const_int 1)
7766 (const_int 1) (const_int 1)
7767 (const_int 1) (const_int 1)
7768 (const_int 1) (const_int 1)
7769 (const_int 1) (const_int 1)
7770 (const_int 1) (const_int 1)
7771 (const_int 1) (const_int 1)
7772 (const_int 1) (const_int 1)
7773 (const_int 1) (const_int 1)]))
7776 "ix86_fixup_binary_operands_no_copy (PLUS, V32QImode, operands);")
7778 (define_expand "sse2_uavgv16qi3"
7779 [(set (match_operand:V16QI 0 "register_operand" "")
7785 (match_operand:V16QI 1 "nonimmediate_operand" ""))
7787 (match_operand:V16QI 2 "nonimmediate_operand" "")))
7788 (const_vector:V16QI [(const_int 1) (const_int 1)
7789 (const_int 1) (const_int 1)
7790 (const_int 1) (const_int 1)
7791 (const_int 1) (const_int 1)
7792 (const_int 1) (const_int 1)
7793 (const_int 1) (const_int 1)
7794 (const_int 1) (const_int 1)
7795 (const_int 1) (const_int 1)]))
7798 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
7800 (define_insn "*avx2_uavgv32qi3"
7801 [(set (match_operand:V32QI 0 "register_operand" "=x")
7807 (match_operand:V32QI 1 "nonimmediate_operand" "%x"))
7809 (match_operand:V32QI 2 "nonimmediate_operand" "xm")))
7810 (const_vector:V32QI [(const_int 1) (const_int 1)
7811 (const_int 1) (const_int 1)
7812 (const_int 1) (const_int 1)
7813 (const_int 1) (const_int 1)
7814 (const_int 1) (const_int 1)
7815 (const_int 1) (const_int 1)
7816 (const_int 1) (const_int 1)
7817 (const_int 1) (const_int 1)
7818 (const_int 1) (const_int 1)
7819 (const_int 1) (const_int 1)
7820 (const_int 1) (const_int 1)
7821 (const_int 1) (const_int 1)
7822 (const_int 1) (const_int 1)
7823 (const_int 1) (const_int 1)
7824 (const_int 1) (const_int 1)
7825 (const_int 1) (const_int 1)]))
7827 "TARGET_AVX2 && ix86_binary_operator_ok (PLUS, V32QImode, operands)"
7828 "vpavgb\t{%2, %1, %0|%0, %1, %2}"
7829 [(set_attr "type" "sseiadd")
7830 (set_attr "prefix" "vex")
7831 (set_attr "mode" "OI")])
7833 (define_insn "*sse2_uavgv16qi3"
7834 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
7840 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x"))
7842 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))
7843 (const_vector:V16QI [(const_int 1) (const_int 1)
7844 (const_int 1) (const_int 1)
7845 (const_int 1) (const_int 1)
7846 (const_int 1) (const_int 1)
7847 (const_int 1) (const_int 1)
7848 (const_int 1) (const_int 1)
7849 (const_int 1) (const_int 1)
7850 (const_int 1) (const_int 1)]))
7852 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7854 pavgb\t{%2, %0|%0, %2}
7855 vpavgb\t{%2, %1, %0|%0, %1, %2}"
7856 [(set_attr "isa" "noavx,avx")
7857 (set_attr "type" "sseiadd")
7858 (set_attr "prefix_data16" "1,*")
7859 (set_attr "prefix" "orig,vex")
7860 (set_attr "mode" "TI")])
7862 (define_expand "avx2_uavgv16hi3"
7863 [(set (match_operand:V16HI 0 "register_operand" "")
7869 (match_operand:V16HI 1 "nonimmediate_operand" ""))
7871 (match_operand:V16HI 2 "nonimmediate_operand" "")))
7872 (const_vector:V16HI [(const_int 1) (const_int 1)
7873 (const_int 1) (const_int 1)
7874 (const_int 1) (const_int 1)
7875 (const_int 1) (const_int 1)
7876 (const_int 1) (const_int 1)
7877 (const_int 1) (const_int 1)
7878 (const_int 1) (const_int 1)
7879 (const_int 1) (const_int 1)]))
7882 "ix86_fixup_binary_operands_no_copy (PLUS, V16HImode, operands);")
7884 (define_expand "sse2_uavgv8hi3"
7885 [(set (match_operand:V8HI 0 "register_operand" "")
7891 (match_operand:V8HI 1 "nonimmediate_operand" ""))
7893 (match_operand:V8HI 2 "nonimmediate_operand" "")))
7894 (const_vector:V8HI [(const_int 1) (const_int 1)
7895 (const_int 1) (const_int 1)
7896 (const_int 1) (const_int 1)
7897 (const_int 1) (const_int 1)]))
7900 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
7902 (define_insn "*avx2_uavgv16hi3"
7903 [(set (match_operand:V16HI 0 "register_operand" "=x")
7909 (match_operand:V16HI 1 "nonimmediate_operand" "%x"))
7911 (match_operand:V16HI 2 "nonimmediate_operand" "xm")))
7912 (const_vector:V16HI [(const_int 1) (const_int 1)
7913 (const_int 1) (const_int 1)
7914 (const_int 1) (const_int 1)
7915 (const_int 1) (const_int 1)
7916 (const_int 1) (const_int 1)
7917 (const_int 1) (const_int 1)
7918 (const_int 1) (const_int 1)
7919 (const_int 1) (const_int 1)]))
7921 "TARGET_AVX2 && ix86_binary_operator_ok (PLUS, V16HImode, operands)"
7922 "vpavgw\t{%2, %1, %0|%0, %1, %2}"
7923 [(set_attr "type" "sseiadd")
7924 (set_attr "prefix" "vex")
7925 (set_attr "mode" "OI")])
7927 (define_insn "*sse2_uavgv8hi3"
7928 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7934 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
7936 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
7937 (const_vector:V8HI [(const_int 1) (const_int 1)
7938 (const_int 1) (const_int 1)
7939 (const_int 1) (const_int 1)
7940 (const_int 1) (const_int 1)]))
7942 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7944 pavgw\t{%2, %0|%0, %2}
7945 vpavgw\t{%2, %1, %0|%0, %1, %2}"
7946 [(set_attr "isa" "noavx,avx")
7947 (set_attr "type" "sseiadd")
7948 (set_attr "prefix_data16" "1,*")
7949 (set_attr "prefix" "orig,vex")
7950 (set_attr "mode" "TI")])
7952 ;; The correct representation for this is absolutely enormous, and
7953 ;; surely not generally useful.
7954 (define_insn "<sse2_avx2>_psadbw"
7955 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x,x")
7956 (unspec:VI8_AVX2 [(match_operand:<ssebytemode> 1 "register_operand" "0,x")
7957 (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,xm")]
7961 psadbw\t{%2, %0|%0, %2}
7962 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
7963 [(set_attr "isa" "noavx,avx")
7964 (set_attr "type" "sseiadd")
7965 (set_attr "atom_unit" "simul")
7966 (set_attr "prefix_data16" "1,*")
7967 (set_attr "prefix" "orig,vex")
7968 (set_attr "mode" "<sseinsnmode>")])
7970 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
7971 [(set (match_operand:SI 0 "register_operand" "=r")
7973 [(match_operand:VF 1 "register_operand" "x")]
7976 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
7977 [(set_attr "type" "ssemov")
7978 (set_attr "prefix" "maybe_vex")
7979 (set_attr "mode" "<MODE>")])
7981 (define_insn "avx2_pmovmskb"
7982 [(set (match_operand:SI 0 "register_operand" "=r")
7983 (unspec:SI [(match_operand:V32QI 1 "register_operand" "x")]
7986 "vpmovmskb\t{%1, %0|%0, %1}"
7987 [(set_attr "type" "ssemov")
7988 (set_attr "prefix" "vex")
7989 (set_attr "mode" "DI")])
7991 (define_insn "sse2_pmovmskb"
7992 [(set (match_operand:SI 0 "register_operand" "=r")
7993 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
7996 "%vpmovmskb\t{%1, %0|%0, %1}"
7997 [(set_attr "type" "ssemov")
7998 (set_attr "prefix_data16" "1")
7999 (set_attr "prefix" "maybe_vex")
8000 (set_attr "mode" "SI")])
8002 (define_expand "sse2_maskmovdqu"
8003 [(set (match_operand:V16QI 0 "memory_operand" "")
8004 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
8005 (match_operand:V16QI 2 "register_operand" "")
8010 (define_insn "*sse2_maskmovdqu"
8011 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
8012 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8013 (match_operand:V16QI 2 "register_operand" "x")
8014 (mem:V16QI (match_dup 0))]
8017 "%vmaskmovdqu\t{%2, %1|%1, %2}"
8018 [(set_attr "type" "ssemov")
8019 (set_attr "prefix_data16" "1")
8020 ;; The implicit %rdi operand confuses default length_vex computation.
8021 (set (attr "length_vex")
8022 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
8023 (set_attr "prefix" "maybe_vex")
8024 (set_attr "mode" "TI")])
8026 (define_insn "sse_ldmxcsr"
8027 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
8031 [(set_attr "type" "sse")
8032 (set_attr "atom_sse_attr" "mxcsr")
8033 (set_attr "prefix" "maybe_vex")
8034 (set_attr "memory" "load")])
8036 (define_insn "sse_stmxcsr"
8037 [(set (match_operand:SI 0 "memory_operand" "=m")
8038 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
8041 [(set_attr "type" "sse")
8042 (set_attr "atom_sse_attr" "mxcsr")
8043 (set_attr "prefix" "maybe_vex")
8044 (set_attr "memory" "store")])
8046 (define_expand "sse_sfence"
8048 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
8049 "TARGET_SSE || TARGET_3DNOW_A"
8051 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8052 MEM_VOLATILE_P (operands[0]) = 1;
8055 (define_insn "*sse_sfence"
8056 [(set (match_operand:BLK 0 "" "")
8057 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
8058 "TARGET_SSE || TARGET_3DNOW_A"
8060 [(set_attr "type" "sse")
8061 (set_attr "length_address" "0")
8062 (set_attr "atom_sse_attr" "fence")
8063 (set_attr "memory" "unknown")])
8065 (define_insn "sse2_clflush"
8066 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
8070 [(set_attr "type" "sse")
8071 (set_attr "atom_sse_attr" "fence")
8072 (set_attr "memory" "unknown")])
8074 (define_expand "sse2_mfence"
8076 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8079 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8080 MEM_VOLATILE_P (operands[0]) = 1;
8083 (define_insn "*sse2_mfence"
8084 [(set (match_operand:BLK 0 "" "")
8085 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8086 "TARGET_64BIT || TARGET_SSE2"
8088 [(set_attr "type" "sse")
8089 (set_attr "length_address" "0")
8090 (set_attr "atom_sse_attr" "fence")
8091 (set_attr "memory" "unknown")])
8093 (define_expand "sse2_lfence"
8095 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8098 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8099 MEM_VOLATILE_P (operands[0]) = 1;
8102 (define_insn "*sse2_lfence"
8103 [(set (match_operand:BLK 0 "" "")
8104 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8107 [(set_attr "type" "sse")
8108 (set_attr "length_address" "0")
8109 (set_attr "atom_sse_attr" "lfence")
8110 (set_attr "memory" "unknown")])
8112 (define_insn "sse3_mwait"
8113 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8114 (match_operand:SI 1 "register_operand" "c")]
8117 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
8118 ;; Since 32bit register operands are implicitly zero extended to 64bit,
8119 ;; we only need to set up 32bit registers.
8121 [(set_attr "length" "3")])
8123 (define_insn "sse3_monitor"
8124 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8125 (match_operand:SI 1 "register_operand" "c")
8126 (match_operand:SI 2 "register_operand" "d")]
8128 "TARGET_SSE3 && !TARGET_64BIT"
8129 "monitor\t%0, %1, %2"
8130 [(set_attr "length" "3")])
8132 (define_insn "sse3_monitor64"
8133 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
8134 (match_operand:SI 1 "register_operand" "c")
8135 (match_operand:SI 2 "register_operand" "d")]
8137 "TARGET_SSE3 && TARGET_64BIT"
8138 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
8139 ;; RCX and RDX are used. Since 32bit register operands are implicitly
8140 ;; zero extended to 64bit, we only need to set up 32bit registers.
8142 [(set_attr "length" "3")])
8144 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8146 ;; SSSE3 instructions
8148 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8150 (define_insn "avx2_phaddwv16hi3"
8151 [(set (match_operand:V16HI 0 "register_operand" "=x")
8158 (match_operand:V16HI 1 "register_operand" "x")
8159 (parallel [(const_int 0)]))
8160 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8162 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8163 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8166 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8167 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8169 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8170 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8174 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8175 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8177 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8178 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8181 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8182 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8184 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8185 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8191 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8192 (parallel [(const_int 0)]))
8193 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8195 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8196 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8199 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8200 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8202 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8203 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8207 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8208 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8210 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8211 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8214 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8215 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8217 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8218 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8220 "vphaddw\t{%2, %1, %0|%0, %1, %2}"
8221 [(set_attr "type" "sseiadd")
8222 (set_attr "prefix_extra" "1")
8223 (set_attr "prefix" "vex")
8224 (set_attr "mode" "OI")])
8226 (define_insn "ssse3_phaddwv8hi3"
8227 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8233 (match_operand:V8HI 1 "register_operand" "0,x")
8234 (parallel [(const_int 0)]))
8235 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8237 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8238 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8241 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8242 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8244 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8245 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8250 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8251 (parallel [(const_int 0)]))
8252 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8254 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8255 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8258 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8259 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8261 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8262 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8265 phaddw\t{%2, %0|%0, %2}
8266 vphaddw\t{%2, %1, %0|%0, %1, %2}"
8267 [(set_attr "isa" "noavx,avx")
8268 (set_attr "type" "sseiadd")
8269 (set_attr "atom_unit" "complex")
8270 (set_attr "prefix_data16" "1,*")
8271 (set_attr "prefix_extra" "1")
8272 (set_attr "prefix" "orig,vex")
8273 (set_attr "mode" "TI")])
8275 (define_insn "ssse3_phaddwv4hi3"
8276 [(set (match_operand:V4HI 0 "register_operand" "=y")
8281 (match_operand:V4HI 1 "register_operand" "0")
8282 (parallel [(const_int 0)]))
8283 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8285 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8286 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8290 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8291 (parallel [(const_int 0)]))
8292 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8294 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8295 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8297 "phaddw\t{%2, %0|%0, %2}"
8298 [(set_attr "type" "sseiadd")
8299 (set_attr "atom_unit" "complex")
8300 (set_attr "prefix_extra" "1")
8301 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8302 (set_attr "mode" "DI")])
8304 (define_insn "avx2_phadddv8si3"
8305 [(set (match_operand:V8SI 0 "register_operand" "=x")
8311 (match_operand:V8SI 1 "register_operand" "x")
8312 (parallel [(const_int 0)]))
8313 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8315 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8316 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8319 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
8320 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
8322 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
8323 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
8328 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
8329 (parallel [(const_int 0)]))
8330 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8332 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8333 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
8336 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
8337 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
8339 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
8340 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
8342 "vphaddd\t{%2, %1, %0|%0, %1, %2}"
8343 [(set_attr "type" "sseiadd")
8344 (set_attr "prefix_extra" "1")
8345 (set_attr "prefix" "vex")
8346 (set_attr "mode" "OI")])
8348 (define_insn "ssse3_phadddv4si3"
8349 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
8354 (match_operand:V4SI 1 "register_operand" "0,x")
8355 (parallel [(const_int 0)]))
8356 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8358 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8359 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8363 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
8364 (parallel [(const_int 0)]))
8365 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8367 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8368 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8371 phaddd\t{%2, %0|%0, %2}
8372 vphaddd\t{%2, %1, %0|%0, %1, %2}"
8373 [(set_attr "isa" "noavx,avx")
8374 (set_attr "type" "sseiadd")
8375 (set_attr "atom_unit" "complex")
8376 (set_attr "prefix_data16" "1,*")
8377 (set_attr "prefix_extra" "1")
8378 (set_attr "prefix" "orig,vex")
8379 (set_attr "mode" "TI")])
8381 (define_insn "ssse3_phadddv2si3"
8382 [(set (match_operand:V2SI 0 "register_operand" "=y")
8386 (match_operand:V2SI 1 "register_operand" "0")
8387 (parallel [(const_int 0)]))
8388 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8391 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8392 (parallel [(const_int 0)]))
8393 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8395 "phaddd\t{%2, %0|%0, %2}"
8396 [(set_attr "type" "sseiadd")
8397 (set_attr "atom_unit" "complex")
8398 (set_attr "prefix_extra" "1")
8399 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8400 (set_attr "mode" "DI")])
8402 (define_insn "avx2_phaddswv16hi3"
8403 [(set (match_operand:V16HI 0 "register_operand" "=x")
8410 (match_operand:V16HI 1 "register_operand" "x")
8411 (parallel [(const_int 0)]))
8412 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8414 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8415 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8418 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8419 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8421 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8422 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8426 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8427 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8429 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8430 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8433 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8434 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8436 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8437 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8443 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8444 (parallel [(const_int 0)]))
8445 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8447 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8448 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8451 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8452 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8454 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8455 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8459 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8460 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8462 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8463 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8466 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8467 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8469 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8470 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8472 "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8473 [(set_attr "type" "sseiadd")
8474 (set_attr "prefix_extra" "1")
8475 (set_attr "prefix" "vex")
8476 (set_attr "mode" "OI")])
8478 (define_insn "ssse3_phaddswv8hi3"
8479 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8485 (match_operand:V8HI 1 "register_operand" "0,x")
8486 (parallel [(const_int 0)]))
8487 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8489 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8490 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8493 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8494 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8496 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8497 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8502 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8503 (parallel [(const_int 0)]))
8504 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8506 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8507 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8510 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8511 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8513 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8514 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8517 phaddsw\t{%2, %0|%0, %2}
8518 vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8519 [(set_attr "isa" "noavx,avx")
8520 (set_attr "type" "sseiadd")
8521 (set_attr "atom_unit" "complex")
8522 (set_attr "prefix_data16" "1,*")
8523 (set_attr "prefix_extra" "1")
8524 (set_attr "prefix" "orig,vex")
8525 (set_attr "mode" "TI")])
8527 (define_insn "ssse3_phaddswv4hi3"
8528 [(set (match_operand:V4HI 0 "register_operand" "=y")
8533 (match_operand:V4HI 1 "register_operand" "0")
8534 (parallel [(const_int 0)]))
8535 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8537 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8538 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8542 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8543 (parallel [(const_int 0)]))
8544 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8546 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8547 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8549 "phaddsw\t{%2, %0|%0, %2}"
8550 [(set_attr "type" "sseiadd")
8551 (set_attr "atom_unit" "complex")
8552 (set_attr "prefix_extra" "1")
8553 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8554 (set_attr "mode" "DI")])
8556 (define_insn "avx2_phsubwv16hi3"
8557 [(set (match_operand:V16HI 0 "register_operand" "=x")
8564 (match_operand:V16HI 1 "register_operand" "x")
8565 (parallel [(const_int 0)]))
8566 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8568 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8569 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8572 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8573 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8575 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8576 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8580 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8581 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8583 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8584 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8587 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8588 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8590 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8591 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8597 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8598 (parallel [(const_int 0)]))
8599 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8601 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8602 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8605 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8606 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8608 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8609 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8613 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8614 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8616 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8617 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8620 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8621 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8623 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8624 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8626 "vphsubw\t{%2, %1, %0|%0, %1, %2}"
8627 [(set_attr "type" "sseiadd")
8628 (set_attr "prefix_extra" "1")
8629 (set_attr "prefix" "vex")
8630 (set_attr "mode" "OI")])
8632 (define_insn "ssse3_phsubwv8hi3"
8633 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8639 (match_operand:V8HI 1 "register_operand" "0,x")
8640 (parallel [(const_int 0)]))
8641 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8643 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8644 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8647 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8648 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8650 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8651 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8656 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8657 (parallel [(const_int 0)]))
8658 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8660 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8661 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8664 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8665 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8667 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8668 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8671 phsubw\t{%2, %0|%0, %2}
8672 vphsubw\t{%2, %1, %0|%0, %1, %2}"
8673 [(set_attr "isa" "noavx,avx")
8674 (set_attr "type" "sseiadd")
8675 (set_attr "atom_unit" "complex")
8676 (set_attr "prefix_data16" "1,*")
8677 (set_attr "prefix_extra" "1")
8678 (set_attr "prefix" "orig,vex")
8679 (set_attr "mode" "TI")])
8681 (define_insn "ssse3_phsubwv4hi3"
8682 [(set (match_operand:V4HI 0 "register_operand" "=y")
8687 (match_operand:V4HI 1 "register_operand" "0")
8688 (parallel [(const_int 0)]))
8689 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8691 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8692 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8696 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8697 (parallel [(const_int 0)]))
8698 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8700 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8701 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8703 "phsubw\t{%2, %0|%0, %2}"
8704 [(set_attr "type" "sseiadd")
8705 (set_attr "atom_unit" "complex")
8706 (set_attr "prefix_extra" "1")
8707 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8708 (set_attr "mode" "DI")])
8710 (define_insn "avx2_phsubdv8si3"
8711 [(set (match_operand:V8SI 0 "register_operand" "=x")
8717 (match_operand:V8SI 1 "register_operand" "x")
8718 (parallel [(const_int 0)]))
8719 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8721 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8722 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8725 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
8726 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
8728 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
8729 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
8734 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
8735 (parallel [(const_int 0)]))
8736 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8738 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8739 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
8742 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
8743 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
8745 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
8746 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
8748 "vphsubd\t{%2, %1, %0|%0, %1, %2}"
8749 [(set_attr "type" "sseiadd")
8750 (set_attr "prefix_extra" "1")
8751 (set_attr "prefix" "vex")
8752 (set_attr "mode" "OI")])
8754 (define_insn "ssse3_phsubdv4si3"
8755 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
8760 (match_operand:V4SI 1 "register_operand" "0,x")
8761 (parallel [(const_int 0)]))
8762 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8764 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8765 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8769 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
8770 (parallel [(const_int 0)]))
8771 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8773 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8774 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8777 phsubd\t{%2, %0|%0, %2}
8778 vphsubd\t{%2, %1, %0|%0, %1, %2}"
8780 [(set_attr "isa" "noavx,avx")
8781 (set_attr "type" "sseiadd")
8782 (set_attr "atom_unit" "complex")
8783 (set_attr "prefix_data16" "1,*")
8784 (set_attr "prefix_extra" "1")
8785 (set_attr "prefix" "orig,vex")
8786 (set_attr "mode" "TI")])
8788 (define_insn "ssse3_phsubdv2si3"
8789 [(set (match_operand:V2SI 0 "register_operand" "=y")
8793 (match_operand:V2SI 1 "register_operand" "0")
8794 (parallel [(const_int 0)]))
8795 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8798 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8799 (parallel [(const_int 0)]))
8800 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8802 "phsubd\t{%2, %0|%0, %2}"
8803 [(set_attr "type" "sseiadd")
8804 (set_attr "atom_unit" "complex")
8805 (set_attr "prefix_extra" "1")
8806 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8807 (set_attr "mode" "DI")])
8809 (define_insn "avx2_phsubswv16hi3"
8810 [(set (match_operand:V16HI 0 "register_operand" "=x")
8817 (match_operand:V16HI 1 "register_operand" "x")
8818 (parallel [(const_int 0)]))
8819 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8821 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8822 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8825 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8826 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8828 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8829 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8833 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8834 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8836 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8837 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8840 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8841 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8843 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8844 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8850 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8851 (parallel [(const_int 0)]))
8852 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8854 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8855 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8858 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8859 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8861 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8862 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8866 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8867 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8869 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8870 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8873 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8874 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8876 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8877 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8879 "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8880 [(set_attr "type" "sseiadd")
8881 (set_attr "prefix_extra" "1")
8882 (set_attr "prefix" "vex")
8883 (set_attr "mode" "OI")])
8885 (define_insn "ssse3_phsubswv8hi3"
8886 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8892 (match_operand:V8HI 1 "register_operand" "0,x")
8893 (parallel [(const_int 0)]))
8894 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8896 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8897 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8900 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8901 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8903 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8904 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8909 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8910 (parallel [(const_int 0)]))
8911 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8913 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8914 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8917 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8918 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8920 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8921 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8924 phsubsw\t{%2, %0|%0, %2}
8925 vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8926 [(set_attr "isa" "noavx,avx")
8927 (set_attr "type" "sseiadd")
8928 (set_attr "atom_unit" "complex")
8929 (set_attr "prefix_data16" "1,*")
8930 (set_attr "prefix_extra" "1")
8931 (set_attr "prefix" "orig,vex")
8932 (set_attr "mode" "TI")])
8934 (define_insn "ssse3_phsubswv4hi3"
8935 [(set (match_operand:V4HI 0 "register_operand" "=y")
8940 (match_operand:V4HI 1 "register_operand" "0")
8941 (parallel [(const_int 0)]))
8942 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8944 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8945 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8949 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8950 (parallel [(const_int 0)]))
8951 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8953 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8954 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8956 "phsubsw\t{%2, %0|%0, %2}"
8957 [(set_attr "type" "sseiadd")
8958 (set_attr "atom_unit" "complex")
8959 (set_attr "prefix_extra" "1")
8960 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8961 (set_attr "mode" "DI")])
8963 (define_insn "avx2_pmaddubsw256"
8964 [(set (match_operand:V16HI 0 "register_operand" "=x")
8969 (match_operand:V32QI 1 "register_operand" "x")
8970 (parallel [(const_int 0)
8988 (match_operand:V32QI 2 "nonimmediate_operand" "xm")
8989 (parallel [(const_int 0)
9007 (vec_select:V16QI (match_dup 1)
9008 (parallel [(const_int 1)
9025 (vec_select:V16QI (match_dup 2)
9026 (parallel [(const_int 1)
9041 (const_int 31)]))))))]
9043 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
9044 [(set_attr "type" "sseiadd")
9045 (set_attr "prefix_extra" "1")
9046 (set_attr "prefix" "vex")
9047 (set_attr "mode" "OI")])
9049 (define_insn "ssse3_pmaddubsw128"
9050 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9055 (match_operand:V16QI 1 "register_operand" "0,x")
9056 (parallel [(const_int 0)
9066 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
9067 (parallel [(const_int 0)
9077 (vec_select:V8QI (match_dup 1)
9078 (parallel [(const_int 1)
9087 (vec_select:V8QI (match_dup 2)
9088 (parallel [(const_int 1)
9095 (const_int 15)]))))))]
9098 pmaddubsw\t{%2, %0|%0, %2}
9099 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
9100 [(set_attr "isa" "noavx,avx")
9101 (set_attr "type" "sseiadd")
9102 (set_attr "atom_unit" "simul")
9103 (set_attr "prefix_data16" "1,*")
9104 (set_attr "prefix_extra" "1")
9105 (set_attr "prefix" "orig,vex")
9106 (set_attr "mode" "TI")])
9108 (define_insn "ssse3_pmaddubsw"
9109 [(set (match_operand:V4HI 0 "register_operand" "=y")
9114 (match_operand:V8QI 1 "register_operand" "0")
9115 (parallel [(const_int 0)
9121 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
9122 (parallel [(const_int 0)
9128 (vec_select:V4QI (match_dup 1)
9129 (parallel [(const_int 1)
9134 (vec_select:V4QI (match_dup 2)
9135 (parallel [(const_int 1)
9138 (const_int 7)]))))))]
9140 "pmaddubsw\t{%2, %0|%0, %2}"
9141 [(set_attr "type" "sseiadd")
9142 (set_attr "atom_unit" "simul")
9143 (set_attr "prefix_extra" "1")
9144 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9145 (set_attr "mode" "DI")])
9147 (define_expand "avx2_umulhrswv16hi3"
9148 [(set (match_operand:V16HI 0 "register_operand" "")
9155 (match_operand:V16HI 1 "nonimmediate_operand" ""))
9157 (match_operand:V16HI 2 "nonimmediate_operand" "")))
9159 (const_vector:V16HI [(const_int 1) (const_int 1)
9160 (const_int 1) (const_int 1)
9161 (const_int 1) (const_int 1)
9162 (const_int 1) (const_int 1)
9163 (const_int 1) (const_int 1)
9164 (const_int 1) (const_int 1)
9165 (const_int 1) (const_int 1)
9166 (const_int 1) (const_int 1)]))
9169 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
9171 (define_insn "*avx2_umulhrswv16hi3"
9172 [(set (match_operand:V16HI 0 "register_operand" "=x")
9179 (match_operand:V16HI 1 "nonimmediate_operand" "%x"))
9181 (match_operand:V16HI 2 "nonimmediate_operand" "xm")))
9183 (const_vector:V16HI [(const_int 1) (const_int 1)
9184 (const_int 1) (const_int 1)
9185 (const_int 1) (const_int 1)
9186 (const_int 1) (const_int 1)
9187 (const_int 1) (const_int 1)
9188 (const_int 1) (const_int 1)
9189 (const_int 1) (const_int 1)
9190 (const_int 1) (const_int 1)]))
9192 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
9193 "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9194 [(set_attr "type" "sseimul")
9195 (set_attr "prefix_extra" "1")
9196 (set_attr "prefix" "vex")
9197 (set_attr "mode" "OI")])
9199 (define_expand "ssse3_pmulhrswv8hi3"
9200 [(set (match_operand:V8HI 0 "register_operand" "")
9207 (match_operand:V8HI 1 "nonimmediate_operand" ""))
9209 (match_operand:V8HI 2 "nonimmediate_operand" "")))
9211 (const_vector:V8HI [(const_int 1) (const_int 1)
9212 (const_int 1) (const_int 1)
9213 (const_int 1) (const_int 1)
9214 (const_int 1) (const_int 1)]))
9217 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
9219 (define_insn "*ssse3_pmulhrswv8hi3"
9220 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9227 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
9229 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
9231 (const_vector:V8HI [(const_int 1) (const_int 1)
9232 (const_int 1) (const_int 1)
9233 (const_int 1) (const_int 1)
9234 (const_int 1) (const_int 1)]))
9236 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9238 pmulhrsw\t{%2, %0|%0, %2}
9239 vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9240 [(set_attr "isa" "noavx,avx")
9241 (set_attr "type" "sseimul")
9242 (set_attr "prefix_data16" "1,*")
9243 (set_attr "prefix_extra" "1")
9244 (set_attr "prefix" "orig,vex")
9245 (set_attr "mode" "TI")])
9247 (define_expand "ssse3_pmulhrswv4hi3"
9248 [(set (match_operand:V4HI 0 "register_operand" "")
9255 (match_operand:V4HI 1 "nonimmediate_operand" ""))
9257 (match_operand:V4HI 2 "nonimmediate_operand" "")))
9259 (const_vector:V4HI [(const_int 1) (const_int 1)
9260 (const_int 1) (const_int 1)]))
9263 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
9265 (define_insn "*ssse3_pmulhrswv4hi3"
9266 [(set (match_operand:V4HI 0 "register_operand" "=y")
9273 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
9275 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
9277 (const_vector:V4HI [(const_int 1) (const_int 1)
9278 (const_int 1) (const_int 1)]))
9280 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
9281 "pmulhrsw\t{%2, %0|%0, %2}"
9282 [(set_attr "type" "sseimul")
9283 (set_attr "prefix_extra" "1")
9284 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9285 (set_attr "mode" "DI")])
9287 (define_insn "<ssse3_avx2>_pshufb<mode>3"
9288 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
9289 (unspec:VI1_AVX2 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
9290 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")]
9294 pshufb\t{%2, %0|%0, %2}
9295 vpshufb\t{%2, %1, %0|%0, %1, %2}"
9296 [(set_attr "isa" "noavx,avx")
9297 (set_attr "type" "sselog1")
9298 (set_attr "prefix_data16" "1,*")
9299 (set_attr "prefix_extra" "1")
9300 (set_attr "prefix" "orig,vex")
9301 (set_attr "mode" "<sseinsnmode>")])
9303 (define_insn "ssse3_pshufbv8qi3"
9304 [(set (match_operand:V8QI 0 "register_operand" "=y")
9305 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
9306 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
9309 "pshufb\t{%2, %0|%0, %2}";
9310 [(set_attr "type" "sselog1")
9311 (set_attr "prefix_extra" "1")
9312 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9313 (set_attr "mode" "DI")])
9315 (define_insn "<ssse3_avx2>_psign<mode>3"
9316 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
9318 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
9319 (match_operand:VI124_AVX2 2 "nonimmediate_operand" "xm,xm")]
9323 psign<ssemodesuffix>\t{%2, %0|%0, %2}
9324 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9325 [(set_attr "isa" "noavx,avx")
9326 (set_attr "type" "sselog1")
9327 (set_attr "prefix_data16" "1,*")
9328 (set_attr "prefix_extra" "1")
9329 (set_attr "prefix" "orig,vex")
9330 (set_attr "mode" "<sseinsnmode>")])
9332 (define_insn "ssse3_psign<mode>3"
9333 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9335 [(match_operand:MMXMODEI 1 "register_operand" "0")
9336 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
9339 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
9340 [(set_attr "type" "sselog1")
9341 (set_attr "prefix_extra" "1")
9342 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9343 (set_attr "mode" "DI")])
9345 (define_insn "<ssse3_avx2>_palignr<mode>"
9346 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x")
9347 (unspec:SSESCALARMODE [(match_operand:SSESCALARMODE 1 "register_operand" "0,x")
9348 (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,xm")
9349 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
9353 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9355 switch (which_alternative)
9358 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9360 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9365 [(set_attr "isa" "noavx,avx")
9366 (set_attr "type" "sseishft")
9367 (set_attr "atom_unit" "sishuf")
9368 (set_attr "prefix_data16" "1,*")
9369 (set_attr "prefix_extra" "1")
9370 (set_attr "length_immediate" "1")
9371 (set_attr "prefix" "orig,vex")
9372 (set_attr "mode" "<sseinsnmode>")])
9374 (define_insn "ssse3_palignrdi"
9375 [(set (match_operand:DI 0 "register_operand" "=y")
9376 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
9377 (match_operand:DI 2 "nonimmediate_operand" "ym")
9378 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9382 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9383 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9385 [(set_attr "type" "sseishft")
9386 (set_attr "atom_unit" "sishuf")
9387 (set_attr "prefix_extra" "1")
9388 (set_attr "length_immediate" "1")
9389 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9390 (set_attr "mode" "DI")])
9392 (define_insn "abs<mode>2"
9393 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x")
9395 (match_operand:VI124_AVX2 1 "nonimmediate_operand" "xm")))]
9397 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
9398 [(set_attr "type" "sselog1")
9399 (set_attr "prefix_data16" "1")
9400 (set_attr "prefix_extra" "1")
9401 (set_attr "prefix" "maybe_vex")
9402 (set_attr "mode" "<sseinsnmode>")])
9404 (define_insn "abs<mode>2"
9405 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9407 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
9409 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
9410 [(set_attr "type" "sselog1")
9411 (set_attr "prefix_rep" "0")
9412 (set_attr "prefix_extra" "1")
9413 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9414 (set_attr "mode" "DI")])
9416 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9418 ;; AMD SSE4A instructions
9420 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9422 (define_insn "sse4a_movnt<mode>"
9423 [(set (match_operand:MODEF 0 "memory_operand" "=m")
9425 [(match_operand:MODEF 1 "register_operand" "x")]
9428 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
9429 [(set_attr "type" "ssemov")
9430 (set_attr "mode" "<MODE>")])
9432 (define_insn "sse4a_vmmovnt<mode>"
9433 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
9434 (unspec:<ssescalarmode>
9435 [(vec_select:<ssescalarmode>
9436 (match_operand:VF_128 1 "register_operand" "x")
9437 (parallel [(const_int 0)]))]
9440 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
9441 [(set_attr "type" "ssemov")
9442 (set_attr "mode" "<ssescalarmode>")])
9444 (define_insn "sse4a_extrqi"
9445 [(set (match_operand:V2DI 0 "register_operand" "=x")
9446 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9447 (match_operand 2 "const_0_to_255_operand" "")
9448 (match_operand 3 "const_0_to_255_operand" "")]
9451 "extrq\t{%3, %2, %0|%0, %2, %3}"
9452 [(set_attr "type" "sse")
9453 (set_attr "prefix_data16" "1")
9454 (set_attr "length_immediate" "2")
9455 (set_attr "mode" "TI")])
9457 (define_insn "sse4a_extrq"
9458 [(set (match_operand:V2DI 0 "register_operand" "=x")
9459 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9460 (match_operand:V16QI 2 "register_operand" "x")]
9463 "extrq\t{%2, %0|%0, %2}"
9464 [(set_attr "type" "sse")
9465 (set_attr "prefix_data16" "1")
9466 (set_attr "mode" "TI")])
9468 (define_insn "sse4a_insertqi"
9469 [(set (match_operand:V2DI 0 "register_operand" "=x")
9470 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9471 (match_operand:V2DI 2 "register_operand" "x")
9472 (match_operand 3 "const_0_to_255_operand" "")
9473 (match_operand 4 "const_0_to_255_operand" "")]
9476 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
9477 [(set_attr "type" "sseins")
9478 (set_attr "prefix_data16" "0")
9479 (set_attr "prefix_rep" "1")
9480 (set_attr "length_immediate" "2")
9481 (set_attr "mode" "TI")])
9483 (define_insn "sse4a_insertq"
9484 [(set (match_operand:V2DI 0 "register_operand" "=x")
9485 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9486 (match_operand:V2DI 2 "register_operand" "x")]
9489 "insertq\t{%2, %0|%0, %2}"
9490 [(set_attr "type" "sseins")
9491 (set_attr "prefix_data16" "0")
9492 (set_attr "prefix_rep" "1")
9493 (set_attr "mode" "TI")])
9495 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9497 ;; Intel SSE4.1 instructions
9499 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9501 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
9502 [(set (match_operand:VF 0 "register_operand" "=x,x")
9504 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
9505 (match_operand:VF 1 "register_operand" "0,x")
9506 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "")))]
9509 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
9510 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9511 [(set_attr "isa" "noavx,avx")
9512 (set_attr "type" "ssemov")
9513 (set_attr "length_immediate" "1")
9514 (set_attr "prefix_data16" "1,*")
9515 (set_attr "prefix_extra" "1")
9516 (set_attr "prefix" "orig,vex")
9517 (set_attr "mode" "<MODE>")])
9519 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
9520 [(set (match_operand:VF 0 "reg_not_xmm0_operand_maybe_avx" "=x,x")
9522 [(match_operand:VF 1 "reg_not_xmm0_operand_maybe_avx" "0,x")
9523 (match_operand:VF 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
9524 (match_operand:VF 3 "register_operand" "Yz,x")]
9528 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
9529 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9530 [(set_attr "isa" "noavx,avx")
9531 (set_attr "type" "ssemov")
9532 (set_attr "length_immediate" "1")
9533 (set_attr "prefix_data16" "1,*")
9534 (set_attr "prefix_extra" "1")
9535 (set_attr "prefix" "orig,vex")
9536 (set_attr "mode" "<MODE>")])
9538 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
9539 [(set (match_operand:VF 0 "register_operand" "=x,x")
9541 [(match_operand:VF 1 "nonimmediate_operand" "%0,x")
9542 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
9543 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9547 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
9548 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9549 [(set_attr "isa" "noavx,avx")
9550 (set_attr "type" "ssemul")
9551 (set_attr "length_immediate" "1")
9552 (set_attr "prefix_data16" "1,*")
9553 (set_attr "prefix_extra" "1")
9554 (set_attr "prefix" "orig,vex")
9555 (set_attr "mode" "<MODE>")])
9557 (define_insn "<sse4_1_avx2>_movntdqa"
9558 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x")
9559 (unspec:VI8_AVX2 [(match_operand:VI8_AVX2 1 "memory_operand" "m")]
9562 "%vmovntdqa\t{%1, %0|%0, %1}"
9563 [(set_attr "type" "ssemov")
9564 (set_attr "prefix_extra" "1")
9565 (set_attr "prefix" "maybe_vex")
9566 (set_attr "mode" "<sseinsnmode>")])
9568 (define_insn "<sse4_1_avx2>_mpsadbw"
9569 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
9570 (unspec:VI1_AVX2 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
9571 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
9572 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9576 mpsadbw\t{%3, %2, %0|%0, %2, %3}
9577 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9578 [(set_attr "isa" "noavx,avx")
9579 (set_attr "type" "sselog1")
9580 (set_attr "length_immediate" "1")
9581 (set_attr "prefix_extra" "1")
9582 (set_attr "prefix" "orig,vex")
9583 (set_attr "mode" "<sseinsnmode>")])
9585 (define_insn "avx2_packusdw"
9586 [(set (match_operand:V16HI 0 "register_operand" "=x")
9589 (match_operand:V8SI 1 "register_operand" "x"))
9591 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))))]
9593 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9594 [(set_attr "type" "sselog")
9595 (set_attr "prefix_extra" "1")
9596 (set_attr "prefix" "vex")
9597 (set_attr "mode" "OI")])
9599 (define_insn "sse4_1_packusdw"
9600 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9603 (match_operand:V4SI 1 "register_operand" "0,x"))
9605 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))]
9608 packusdw\t{%2, %0|%0, %2}
9609 vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9610 [(set_attr "isa" "noavx,avx")
9611 (set_attr "type" "sselog")
9612 (set_attr "prefix_extra" "1")
9613 (set_attr "prefix" "orig,vex")
9614 (set_attr "mode" "TI")])
9616 (define_insn "<sse4_1_avx2>_pblendvb"
9617 [(set (match_operand:VI1_AVX2 0 "reg_not_xmm0_operand" "=x,x")
9619 [(match_operand:VI1_AVX2 1 "reg_not_xmm0_operand_maybe_avx" "0,x")
9620 (match_operand:VI1_AVX2 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
9621 (match_operand:VI1_AVX2 3 "register_operand" "Yz,x")]
9625 pblendvb\t{%3, %2, %0|%0, %2, %3}
9626 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9627 [(set_attr "isa" "noavx,avx")
9628 (set_attr "type" "ssemov")
9629 (set_attr "prefix_extra" "1")
9630 (set_attr "length_immediate" "*,1")
9631 (set_attr "prefix" "orig,vex")
9632 (set_attr "mode" "<sseinsnmode>")])
9634 (define_insn "sse4_1_pblendw"
9635 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9637 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
9638 (match_operand:V8HI 1 "register_operand" "0,x")
9639 (match_operand:SI 3 "const_0_to_255_operand" "n,n")))]
9642 pblendw\t{%3, %2, %0|%0, %2, %3}
9643 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9644 [(set_attr "isa" "noavx,avx")
9645 (set_attr "type" "ssemov")
9646 (set_attr "prefix_extra" "1")
9647 (set_attr "length_immediate" "1")
9648 (set_attr "prefix" "orig,vex")
9649 (set_attr "mode" "TI")])
9651 ;; The builtin uses an 8-bit immediate. Expand that.
9652 (define_expand "avx2_pblendw"
9653 [(set (match_operand:V16HI 0 "register_operand" "")
9655 (match_operand:V16HI 2 "nonimmediate_operand" "")
9656 (match_operand:V16HI 1 "register_operand" "")
9657 (match_operand:SI 3 "const_0_to_255_operand" "")))]
9660 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
9661 operands[3] = GEN_INT (val << 8 | val);
9664 (define_insn "*avx2_pblendw"
9665 [(set (match_operand:V16HI 0 "register_operand" "=x")
9667 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
9668 (match_operand:V16HI 1 "register_operand" "x")
9669 (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
9672 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
9673 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9675 [(set_attr "type" "ssemov")
9676 (set_attr "prefix_extra" "1")
9677 (set_attr "length_immediate" "1")
9678 (set_attr "prefix" "vex")
9679 (set_attr "mode" "OI")])
9681 (define_insn "avx2_pblendd<mode>"
9682 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
9684 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
9685 (match_operand:VI4_AVX2 1 "register_operand" "x")
9686 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9688 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9689 [(set_attr "type" "ssemov")
9690 (set_attr "prefix_extra" "1")
9691 (set_attr "length_immediate" "1")
9692 (set_attr "prefix" "vex")
9693 (set_attr "mode" "<sseinsnmode>")])
9695 (define_insn "sse4_1_phminposuw"
9696 [(set (match_operand:V8HI 0 "register_operand" "=x")
9697 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
9698 UNSPEC_PHMINPOSUW))]
9700 "%vphminposuw\t{%1, %0|%0, %1}"
9701 [(set_attr "type" "sselog1")
9702 (set_attr "prefix_extra" "1")
9703 (set_attr "prefix" "maybe_vex")
9704 (set_attr "mode" "TI")])
9706 (define_insn "avx2_<code>v16qiv16hi2"
9707 [(set (match_operand:V16HI 0 "register_operand" "=x")
9709 (match_operand:V16QI 1 "nonimmediate_operand" "xm")))]
9711 "vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
9712 [(set_attr "type" "ssemov")
9713 (set_attr "prefix_extra" "1")
9714 (set_attr "prefix" "vex")
9715 (set_attr "mode" "OI")])
9717 (define_insn "sse4_1_<code>v8qiv8hi2"
9718 [(set (match_operand:V8HI 0 "register_operand" "=x")
9721 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9722 (parallel [(const_int 0)
9731 "%vpmov<extsuffix>bw\t{%1, %0|%0, %q1}"
9732 [(set_attr "type" "ssemov")
9733 (set_attr "prefix_extra" "1")
9734 (set_attr "prefix" "maybe_vex")
9735 (set_attr "mode" "TI")])
9737 (define_insn "avx2_<code>v8qiv8si2"
9738 [(set (match_operand:V8SI 0 "register_operand" "=x")
9741 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9742 (parallel [(const_int 0)
9751 "vpmov<extsuffix>bd\t{%1, %0|%0, %q1}"
9752 [(set_attr "type" "ssemov")
9753 (set_attr "prefix_extra" "1")
9754 (set_attr "prefix" "vex")
9755 (set_attr "mode" "OI")])
9757 (define_insn "sse4_1_<code>v4qiv4si2"
9758 [(set (match_operand:V4SI 0 "register_operand" "=x")
9761 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9762 (parallel [(const_int 0)
9767 "%vpmov<extsuffix>bd\t{%1, %0|%0, %k1}"
9768 [(set_attr "type" "ssemov")
9769 (set_attr "prefix_extra" "1")
9770 (set_attr "prefix" "maybe_vex")
9771 (set_attr "mode" "TI")])
9773 (define_insn "avx2_<code>v8hiv8si2"
9774 [(set (match_operand:V8SI 0 "register_operand" "=x")
9776 (match_operand:V8HI 1 "nonimmediate_operand" "xm")))]
9778 "vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
9779 [(set_attr "type" "ssemov")
9780 (set_attr "prefix_extra" "1")
9781 (set_attr "prefix" "vex")
9782 (set_attr "mode" "OI")])
9784 (define_insn "sse4_1_<code>v4hiv4si2"
9785 [(set (match_operand:V4SI 0 "register_operand" "=x")
9788 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9789 (parallel [(const_int 0)
9794 "%vpmov<extsuffix>wd\t{%1, %0|%0, %q1}"
9795 [(set_attr "type" "ssemov")
9796 (set_attr "prefix_extra" "1")
9797 (set_attr "prefix" "maybe_vex")
9798 (set_attr "mode" "TI")])
9800 (define_insn "avx2_<code>v4qiv4di2"
9801 [(set (match_operand:V4DI 0 "register_operand" "=x")
9804 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9805 (parallel [(const_int 0)
9810 "vpmov<extsuffix>bq\t{%1, %0|%0, %k1}"
9811 [(set_attr "type" "ssemov")
9812 (set_attr "prefix_extra" "1")
9813 (set_attr "prefix" "vex")
9814 (set_attr "mode" "OI")])
9816 (define_insn "sse4_1_<code>v2qiv2di2"
9817 [(set (match_operand:V2DI 0 "register_operand" "=x")
9820 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9821 (parallel [(const_int 0)
9824 "%vpmov<extsuffix>bq\t{%1, %0|%0, %w1}"
9825 [(set_attr "type" "ssemov")
9826 (set_attr "prefix_extra" "1")
9827 (set_attr "prefix" "maybe_vex")
9828 (set_attr "mode" "TI")])
9830 (define_insn "avx2_<code>v4hiv4di2"
9831 [(set (match_operand:V4DI 0 "register_operand" "=x")
9834 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9835 (parallel [(const_int 0)
9840 "vpmov<extsuffix>wq\t{%1, %0|%0, %q1}"
9841 [(set_attr "type" "ssemov")
9842 (set_attr "prefix_extra" "1")
9843 (set_attr "prefix" "vex")
9844 (set_attr "mode" "OI")])
9846 (define_insn "sse4_1_<code>v2hiv2di2"
9847 [(set (match_operand:V2DI 0 "register_operand" "=x")
9850 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9851 (parallel [(const_int 0)
9854 "%vpmov<extsuffix>wq\t{%1, %0|%0, %k1}"
9855 [(set_attr "type" "ssemov")
9856 (set_attr "prefix_extra" "1")
9857 (set_attr "prefix" "maybe_vex")
9858 (set_attr "mode" "TI")])
9860 (define_insn "avx2_<code>v4siv4di2"
9861 [(set (match_operand:V4DI 0 "register_operand" "=x")
9863 (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
9865 "vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
9866 [(set_attr "type" "ssemov")
9867 (set_attr "prefix_extra" "1")
9868 (set_attr "mode" "OI")])
9870 (define_insn "sse4_1_<code>v2siv2di2"
9871 [(set (match_operand:V2DI 0 "register_operand" "=x")
9874 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9875 (parallel [(const_int 0)
9878 "%vpmov<extsuffix>dq\t{%1, %0|%0, %q1}"
9879 [(set_attr "type" "ssemov")
9880 (set_attr "prefix_extra" "1")
9881 (set_attr "prefix" "maybe_vex")
9882 (set_attr "mode" "TI")])
9884 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
9885 ;; setting FLAGS_REG. But it is not a really compare instruction.
9886 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
9887 [(set (reg:CC FLAGS_REG)
9888 (unspec:CC [(match_operand:VF 0 "register_operand" "x")
9889 (match_operand:VF 1 "nonimmediate_operand" "xm")]
9892 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
9893 [(set_attr "type" "ssecomi")
9894 (set_attr "prefix_extra" "1")
9895 (set_attr "prefix" "vex")
9896 (set_attr "mode" "<MODE>")])
9898 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
9899 ;; But it is not a really compare instruction.
9900 (define_insn "avx_ptest256"
9901 [(set (reg:CC FLAGS_REG)
9902 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
9903 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
9906 "vptest\t{%1, %0|%0, %1}"
9907 [(set_attr "type" "ssecomi")
9908 (set_attr "prefix_extra" "1")
9909 (set_attr "prefix" "vex")
9910 (set_attr "mode" "OI")])
9912 (define_insn "sse4_1_ptest"
9913 [(set (reg:CC FLAGS_REG)
9914 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
9915 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
9918 "%vptest\t{%1, %0|%0, %1}"
9919 [(set_attr "type" "ssecomi")
9920 (set_attr "prefix_extra" "1")
9921 (set_attr "prefix" "maybe_vex")
9922 (set_attr "mode" "TI")])
9924 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
9925 [(set (match_operand:VF 0 "register_operand" "=x")
9927 [(match_operand:VF 1 "nonimmediate_operand" "xm")
9928 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9931 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9932 [(set_attr "type" "ssecvt")
9933 (set (attr "prefix_data16")
9935 (match_test "TARGET_AVX")
9937 (const_string "1")))
9938 (set_attr "prefix_extra" "1")
9939 (set_attr "length_immediate" "1")
9940 (set_attr "prefix" "maybe_vex")
9941 (set_attr "mode" "<MODE>")])
9943 (define_insn "sse4_1_round<ssescalarmodesuffix>"
9944 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
9947 [(match_operand:VF_128 2 "register_operand" "x,x")
9948 (match_operand:SI 3 "const_0_to_15_operand" "n,n")]
9950 (match_operand:VF_128 1 "register_operand" "0,x")
9954 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
9955 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9956 [(set_attr "isa" "noavx,avx")
9957 (set_attr "type" "ssecvt")
9958 (set_attr "length_immediate" "1")
9959 (set_attr "prefix_data16" "1,*")
9960 (set_attr "prefix_extra" "1")
9961 (set_attr "prefix" "orig,vex")
9962 (set_attr "mode" "<MODE>")])
9964 (define_expand "round<mode>2"
9967 (match_operand:VF 1 "nonimmediate_operand" "")
9969 (set (match_operand:VF 0 "register_operand" "")
9971 [(match_dup 4) (match_dup 5)]
9973 "TARGET_ROUND && !flag_trapping_math"
9975 enum machine_mode scalar_mode;
9976 const struct real_format *fmt;
9977 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
9980 scalar_mode = GET_MODE_INNER (<MODE>mode);
9982 /* load nextafter (0.5, 0.0) */
9983 fmt = REAL_MODE_FORMAT (scalar_mode);
9984 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
9985 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
9986 half = const_double_from_real_value (pred_half, scalar_mode);
9988 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
9989 vec_half = force_reg (<MODE>mode, vec_half);
9991 operands[3] = gen_reg_rtx (<MODE>mode);
9992 emit_insn (gen_copysign<mode>3 (operands[3], vec_half, operands[1]));
9994 operands[4] = gen_reg_rtx (<MODE>mode);
9995 operands[5] = GEN_INT (ROUND_TRUNC);
9998 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10000 ;; Intel SSE4.2 string/text processing instructions
10002 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10004 (define_insn_and_split "sse4_2_pcmpestr"
10005 [(set (match_operand:SI 0 "register_operand" "=c,c")
10007 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
10008 (match_operand:SI 3 "register_operand" "a,a")
10009 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
10010 (match_operand:SI 5 "register_operand" "d,d")
10011 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
10013 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
10021 (set (reg:CC FLAGS_REG)
10030 && can_create_pseudo_p ()"
10035 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10036 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10037 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10040 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
10041 operands[3], operands[4],
10042 operands[5], operands[6]));
10044 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
10045 operands[3], operands[4],
10046 operands[5], operands[6]));
10047 if (flags && !(ecx || xmm0))
10048 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
10049 operands[2], operands[3],
10050 operands[4], operands[5],
10052 if (!(flags || ecx || xmm0))
10053 emit_note (NOTE_INSN_DELETED);
10057 [(set_attr "type" "sselog")
10058 (set_attr "prefix_data16" "1")
10059 (set_attr "prefix_extra" "1")
10060 (set_attr "length_immediate" "1")
10061 (set_attr "memory" "none,load")
10062 (set_attr "mode" "TI")])
10064 (define_insn "sse4_2_pcmpestri"
10065 [(set (match_operand:SI 0 "register_operand" "=c,c")
10067 [(match_operand:V16QI 1 "register_operand" "x,x")
10068 (match_operand:SI 2 "register_operand" "a,a")
10069 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
10070 (match_operand:SI 4 "register_operand" "d,d")
10071 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
10073 (set (reg:CC FLAGS_REG)
10082 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
10083 [(set_attr "type" "sselog")
10084 (set_attr "prefix_data16" "1")
10085 (set_attr "prefix_extra" "1")
10086 (set_attr "prefix" "maybe_vex")
10087 (set_attr "length_immediate" "1")
10088 (set_attr "memory" "none,load")
10089 (set_attr "mode" "TI")])
10091 (define_insn "sse4_2_pcmpestrm"
10092 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10094 [(match_operand:V16QI 1 "register_operand" "x,x")
10095 (match_operand:SI 2 "register_operand" "a,a")
10096 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
10097 (match_operand:SI 4 "register_operand" "d,d")
10098 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
10100 (set (reg:CC FLAGS_REG)
10109 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
10110 [(set_attr "type" "sselog")
10111 (set_attr "prefix_data16" "1")
10112 (set_attr "prefix_extra" "1")
10113 (set_attr "length_immediate" "1")
10114 (set_attr "prefix" "maybe_vex")
10115 (set_attr "memory" "none,load")
10116 (set_attr "mode" "TI")])
10118 (define_insn "sse4_2_pcmpestr_cconly"
10119 [(set (reg:CC FLAGS_REG)
10121 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10122 (match_operand:SI 3 "register_operand" "a,a,a,a")
10123 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
10124 (match_operand:SI 5 "register_operand" "d,d,d,d")
10125 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
10127 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10128 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10131 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10132 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10133 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
10134 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
10135 [(set_attr "type" "sselog")
10136 (set_attr "prefix_data16" "1")
10137 (set_attr "prefix_extra" "1")
10138 (set_attr "length_immediate" "1")
10139 (set_attr "memory" "none,load,none,load")
10140 (set_attr "prefix" "maybe_vex")
10141 (set_attr "mode" "TI")])
10143 (define_insn_and_split "sse4_2_pcmpistr"
10144 [(set (match_operand:SI 0 "register_operand" "=c,c")
10146 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
10147 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
10148 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
10150 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
10156 (set (reg:CC FLAGS_REG)
10163 && can_create_pseudo_p ()"
10168 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10169 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10170 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10173 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
10174 operands[3], operands[4]));
10176 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
10177 operands[3], operands[4]));
10178 if (flags && !(ecx || xmm0))
10179 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
10180 operands[2], operands[3],
10182 if (!(flags || ecx || xmm0))
10183 emit_note (NOTE_INSN_DELETED);
10187 [(set_attr "type" "sselog")
10188 (set_attr "prefix_data16" "1")
10189 (set_attr "prefix_extra" "1")
10190 (set_attr "length_immediate" "1")
10191 (set_attr "memory" "none,load")
10192 (set_attr "mode" "TI")])
10194 (define_insn "sse4_2_pcmpistri"
10195 [(set (match_operand:SI 0 "register_operand" "=c,c")
10197 [(match_operand:V16QI 1 "register_operand" "x,x")
10198 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10199 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10201 (set (reg:CC FLAGS_REG)
10208 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
10209 [(set_attr "type" "sselog")
10210 (set_attr "prefix_data16" "1")
10211 (set_attr "prefix_extra" "1")
10212 (set_attr "length_immediate" "1")
10213 (set_attr "prefix" "maybe_vex")
10214 (set_attr "memory" "none,load")
10215 (set_attr "mode" "TI")])
10217 (define_insn "sse4_2_pcmpistrm"
10218 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10220 [(match_operand:V16QI 1 "register_operand" "x,x")
10221 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10222 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10224 (set (reg:CC FLAGS_REG)
10231 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
10232 [(set_attr "type" "sselog")
10233 (set_attr "prefix_data16" "1")
10234 (set_attr "prefix_extra" "1")
10235 (set_attr "length_immediate" "1")
10236 (set_attr "prefix" "maybe_vex")
10237 (set_attr "memory" "none,load")
10238 (set_attr "mode" "TI")])
10240 (define_insn "sse4_2_pcmpistr_cconly"
10241 [(set (reg:CC FLAGS_REG)
10243 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10244 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
10245 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
10247 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10248 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10251 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10252 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10253 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
10254 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
10255 [(set_attr "type" "sselog")
10256 (set_attr "prefix_data16" "1")
10257 (set_attr "prefix_extra" "1")
10258 (set_attr "length_immediate" "1")
10259 (set_attr "memory" "none,load,none,load")
10260 (set_attr "prefix" "maybe_vex")
10261 (set_attr "mode" "TI")])
10263 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10265 ;; XOP instructions
10267 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10269 ;; XOP parallel integer multiply/add instructions.
10270 ;; Note the XOP multiply/add instructions
10271 ;; a[i] = b[i] * c[i] + d[i];
10272 ;; do not allow the value being added to be a memory operation.
10273 (define_insn "xop_pmacsww"
10274 [(set (match_operand:V8HI 0 "register_operand" "=x")
10277 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10278 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10279 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10281 "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10282 [(set_attr "type" "ssemuladd")
10283 (set_attr "mode" "TI")])
10285 (define_insn "xop_pmacssww"
10286 [(set (match_operand:V8HI 0 "register_operand" "=x")
10288 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10289 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10290 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10292 "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10293 [(set_attr "type" "ssemuladd")
10294 (set_attr "mode" "TI")])
10296 (define_insn "xop_pmacsdd"
10297 [(set (match_operand:V4SI 0 "register_operand" "=x")
10300 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10301 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10302 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10304 "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10305 [(set_attr "type" "ssemuladd")
10306 (set_attr "mode" "TI")])
10308 (define_insn "xop_pmacssdd"
10309 [(set (match_operand:V4SI 0 "register_operand" "=x")
10311 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10312 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10313 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10315 "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10316 [(set_attr "type" "ssemuladd")
10317 (set_attr "mode" "TI")])
10319 (define_insn "xop_pmacssdql"
10320 [(set (match_operand:V2DI 0 "register_operand" "=x")
10325 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10326 (parallel [(const_int 1)
10329 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10330 (parallel [(const_int 1)
10332 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10334 "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10335 [(set_attr "type" "ssemuladd")
10336 (set_attr "mode" "TI")])
10338 (define_insn "xop_pmacssdqh"
10339 [(set (match_operand:V2DI 0 "register_operand" "=x")
10344 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10345 (parallel [(const_int 0)
10349 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10350 (parallel [(const_int 0)
10352 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10354 "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10355 [(set_attr "type" "ssemuladd")
10356 (set_attr "mode" "TI")])
10358 (define_insn "xop_pmacsdql"
10359 [(set (match_operand:V2DI 0 "register_operand" "=x")
10364 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10365 (parallel [(const_int 1)
10369 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10370 (parallel [(const_int 1)
10372 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10374 "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10375 [(set_attr "type" "ssemuladd")
10376 (set_attr "mode" "TI")])
10378 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10379 ;; fake it with a multiply/add. In general, we expect the define_split to
10380 ;; occur before register allocation, so we have to handle the corner case where
10381 ;; the target is the same as operands 1/2
10382 (define_insn_and_split "xop_mulv2div2di3_low"
10383 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10387 (match_operand:V4SI 1 "register_operand" "%x")
10388 (parallel [(const_int 1)
10392 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10393 (parallel [(const_int 1)
10394 (const_int 3)])))))]
10397 "&& reload_completed"
10398 [(set (match_dup 0)
10406 (parallel [(const_int 1)
10411 (parallel [(const_int 1)
10415 operands[3] = CONST0_RTX (V2DImode);
10417 [(set_attr "type" "ssemul")
10418 (set_attr "mode" "TI")])
10420 (define_insn "xop_pmacsdqh"
10421 [(set (match_operand:V2DI 0 "register_operand" "=x")
10426 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10427 (parallel [(const_int 0)
10431 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10432 (parallel [(const_int 0)
10434 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10436 "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10437 [(set_attr "type" "ssemuladd")
10438 (set_attr "mode" "TI")])
10440 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10441 ;; fake it with a multiply/add. In general, we expect the define_split to
10442 ;; occur before register allocation, so we have to handle the corner case where
10443 ;; the target is the same as either operands[1] or operands[2]
10444 (define_insn_and_split "xop_mulv2div2di3_high"
10445 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10449 (match_operand:V4SI 1 "register_operand" "%x")
10450 (parallel [(const_int 0)
10454 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10455 (parallel [(const_int 0)
10456 (const_int 2)])))))]
10459 "&& reload_completed"
10460 [(set (match_dup 0)
10468 (parallel [(const_int 0)
10473 (parallel [(const_int 0)
10477 operands[3] = CONST0_RTX (V2DImode);
10479 [(set_attr "type" "ssemul")
10480 (set_attr "mode" "TI")])
10482 ;; XOP parallel integer multiply/add instructions for the intrinisics
10483 (define_insn "xop_pmacsswd"
10484 [(set (match_operand:V4SI 0 "register_operand" "=x")
10489 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10490 (parallel [(const_int 1)
10496 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10497 (parallel [(const_int 1)
10501 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10503 "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10504 [(set_attr "type" "ssemuladd")
10505 (set_attr "mode" "TI")])
10507 (define_insn "xop_pmacswd"
10508 [(set (match_operand:V4SI 0 "register_operand" "=x")
10513 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10514 (parallel [(const_int 1)
10520 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10521 (parallel [(const_int 1)
10525 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10527 "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10528 [(set_attr "type" "ssemuladd")
10529 (set_attr "mode" "TI")])
10531 (define_insn "xop_pmadcsswd"
10532 [(set (match_operand:V4SI 0 "register_operand" "=x")
10538 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10539 (parallel [(const_int 0)
10545 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10546 (parallel [(const_int 0)
10554 (parallel [(const_int 1)
10561 (parallel [(const_int 1)
10564 (const_int 7)])))))
10565 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10567 "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10568 [(set_attr "type" "ssemuladd")
10569 (set_attr "mode" "TI")])
10571 (define_insn "xop_pmadcswd"
10572 [(set (match_operand:V4SI 0 "register_operand" "=x")
10578 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10579 (parallel [(const_int 0)
10585 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10586 (parallel [(const_int 0)
10594 (parallel [(const_int 1)
10601 (parallel [(const_int 1)
10604 (const_int 7)])))))
10605 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10607 "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10608 [(set_attr "type" "ssemuladd")
10609 (set_attr "mode" "TI")])
10611 ;; XOP parallel XMM conditional moves
10612 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
10613 [(set (match_operand:V 0 "register_operand" "=x,x")
10615 (match_operand:V 3 "nonimmediate_operand" "x,m")
10616 (match_operand:V 1 "register_operand" "x,x")
10617 (match_operand:V 2 "nonimmediate_operand" "xm,x")))]
10619 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10620 [(set_attr "type" "sse4arg")])
10622 ;; XOP horizontal add/subtract instructions
10623 (define_insn "xop_phaddbw"
10624 [(set (match_operand:V8HI 0 "register_operand" "=x")
10628 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10629 (parallel [(const_int 0)
10640 (parallel [(const_int 1)
10647 (const_int 15)])))))]
10649 "vphaddbw\t{%1, %0|%0, %1}"
10650 [(set_attr "type" "sseiadd1")])
10652 (define_insn "xop_phaddbd"
10653 [(set (match_operand:V4SI 0 "register_operand" "=x")
10658 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10659 (parallel [(const_int 0)
10666 (parallel [(const_int 1)
10669 (const_int 13)]))))
10674 (parallel [(const_int 2)
10681 (parallel [(const_int 3)
10684 (const_int 15)]))))))]
10686 "vphaddbd\t{%1, %0|%0, %1}"
10687 [(set_attr "type" "sseiadd1")])
10689 (define_insn "xop_phaddbq"
10690 [(set (match_operand:V2DI 0 "register_operand" "=x")
10696 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10697 (parallel [(const_int 0)
10702 (parallel [(const_int 1)
10708 (parallel [(const_int 2)
10713 (parallel [(const_int 3)
10714 (const_int 7)])))))
10720 (parallel [(const_int 8)
10725 (parallel [(const_int 9)
10726 (const_int 13)]))))
10731 (parallel [(const_int 10)
10736 (parallel [(const_int 11)
10737 (const_int 15)])))))))]
10739 "vphaddbq\t{%1, %0|%0, %1}"
10740 [(set_attr "type" "sseiadd1")])
10742 (define_insn "xop_phaddwd"
10743 [(set (match_operand:V4SI 0 "register_operand" "=x")
10747 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10748 (parallel [(const_int 0)
10755 (parallel [(const_int 1)
10758 (const_int 7)])))))]
10760 "vphaddwd\t{%1, %0|%0, %1}"
10761 [(set_attr "type" "sseiadd1")])
10763 (define_insn "xop_phaddwq"
10764 [(set (match_operand:V2DI 0 "register_operand" "=x")
10769 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10770 (parallel [(const_int 0)
10775 (parallel [(const_int 1)
10781 (parallel [(const_int 2)
10786 (parallel [(const_int 3)
10787 (const_int 7)]))))))]
10789 "vphaddwq\t{%1, %0|%0, %1}"
10790 [(set_attr "type" "sseiadd1")])
10792 (define_insn "xop_phadddq"
10793 [(set (match_operand:V2DI 0 "register_operand" "=x")
10797 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10798 (parallel [(const_int 0)
10803 (parallel [(const_int 1)
10804 (const_int 3)])))))]
10806 "vphadddq\t{%1, %0|%0, %1}"
10807 [(set_attr "type" "sseiadd1")])
10809 (define_insn "xop_phaddubw"
10810 [(set (match_operand:V8HI 0 "register_operand" "=x")
10814 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10815 (parallel [(const_int 0)
10826 (parallel [(const_int 1)
10833 (const_int 15)])))))]
10835 "vphaddubw\t{%1, %0|%0, %1}"
10836 [(set_attr "type" "sseiadd1")])
10838 (define_insn "xop_phaddubd"
10839 [(set (match_operand:V4SI 0 "register_operand" "=x")
10844 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10845 (parallel [(const_int 0)
10852 (parallel [(const_int 1)
10855 (const_int 13)]))))
10860 (parallel [(const_int 2)
10867 (parallel [(const_int 3)
10870 (const_int 15)]))))))]
10872 "vphaddubd\t{%1, %0|%0, %1}"
10873 [(set_attr "type" "sseiadd1")])
10875 (define_insn "xop_phaddubq"
10876 [(set (match_operand:V2DI 0 "register_operand" "=x")
10882 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10883 (parallel [(const_int 0)
10888 (parallel [(const_int 1)
10894 (parallel [(const_int 2)
10899 (parallel [(const_int 3)
10900 (const_int 7)])))))
10906 (parallel [(const_int 8)
10911 (parallel [(const_int 9)
10912 (const_int 13)]))))
10917 (parallel [(const_int 10)
10922 (parallel [(const_int 11)
10923 (const_int 15)])))))))]
10925 "vphaddubq\t{%1, %0|%0, %1}"
10926 [(set_attr "type" "sseiadd1")])
10928 (define_insn "xop_phadduwd"
10929 [(set (match_operand:V4SI 0 "register_operand" "=x")
10933 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10934 (parallel [(const_int 0)
10941 (parallel [(const_int 1)
10944 (const_int 7)])))))]
10946 "vphadduwd\t{%1, %0|%0, %1}"
10947 [(set_attr "type" "sseiadd1")])
10949 (define_insn "xop_phadduwq"
10950 [(set (match_operand:V2DI 0 "register_operand" "=x")
10955 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10956 (parallel [(const_int 0)
10961 (parallel [(const_int 1)
10967 (parallel [(const_int 2)
10972 (parallel [(const_int 3)
10973 (const_int 7)]))))))]
10975 "vphadduwq\t{%1, %0|%0, %1}"
10976 [(set_attr "type" "sseiadd1")])
10978 (define_insn "xop_phaddudq"
10979 [(set (match_operand:V2DI 0 "register_operand" "=x")
10983 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10984 (parallel [(const_int 0)
10989 (parallel [(const_int 1)
10990 (const_int 3)])))))]
10992 "vphaddudq\t{%1, %0|%0, %1}"
10993 [(set_attr "type" "sseiadd1")])
10995 (define_insn "xop_phsubbw"
10996 [(set (match_operand:V8HI 0 "register_operand" "=x")
11000 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11001 (parallel [(const_int 0)
11012 (parallel [(const_int 1)
11019 (const_int 15)])))))]
11021 "vphsubbw\t{%1, %0|%0, %1}"
11022 [(set_attr "type" "sseiadd1")])
11024 (define_insn "xop_phsubwd"
11025 [(set (match_operand:V4SI 0 "register_operand" "=x")
11029 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11030 (parallel [(const_int 0)
11037 (parallel [(const_int 1)
11040 (const_int 7)])))))]
11042 "vphsubwd\t{%1, %0|%0, %1}"
11043 [(set_attr "type" "sseiadd1")])
11045 (define_insn "xop_phsubdq"
11046 [(set (match_operand:V2DI 0 "register_operand" "=x")
11050 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
11051 (parallel [(const_int 0)
11056 (parallel [(const_int 1)
11057 (const_int 3)])))))]
11059 "vphsubdq\t{%1, %0|%0, %1}"
11060 [(set_attr "type" "sseiadd1")])
11062 ;; XOP permute instructions
11063 (define_insn "xop_pperm"
11064 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
11066 [(match_operand:V16QI 1 "register_operand" "x,x")
11067 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
11068 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
11069 UNSPEC_XOP_PERMUTE))]
11070 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11071 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11072 [(set_attr "type" "sse4arg")
11073 (set_attr "mode" "TI")])
11075 ;; XOP pack instructions that combine two vectors into a smaller vector
11076 (define_insn "xop_pperm_pack_v2di_v4si"
11077 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
11080 (match_operand:V2DI 1 "register_operand" "x,x"))
11082 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
11083 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11084 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11085 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11086 [(set_attr "type" "sse4arg")
11087 (set_attr "mode" "TI")])
11089 (define_insn "xop_pperm_pack_v4si_v8hi"
11090 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
11093 (match_operand:V4SI 1 "register_operand" "x,x"))
11095 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
11096 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11097 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11098 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11099 [(set_attr "type" "sse4arg")
11100 (set_attr "mode" "TI")])
11102 (define_insn "xop_pperm_pack_v8hi_v16qi"
11103 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
11106 (match_operand:V8HI 1 "register_operand" "x,x"))
11108 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
11109 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11110 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11111 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11112 [(set_attr "type" "sse4arg")
11113 (set_attr "mode" "TI")])
11115 ;; XOP packed rotate instructions
11116 (define_expand "rotl<mode>3"
11117 [(set (match_operand:VI_128 0 "register_operand" "")
11119 (match_operand:VI_128 1 "nonimmediate_operand" "")
11120 (match_operand:SI 2 "general_operand")))]
11123 /* If we were given a scalar, convert it to parallel */
11124 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11126 rtvec vs = rtvec_alloc (<ssescalarnum>);
11127 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11128 rtx reg = gen_reg_rtx (<MODE>mode);
11129 rtx op2 = operands[2];
11132 if (GET_MODE (op2) != <ssescalarmode>mode)
11134 op2 = gen_reg_rtx (<ssescalarmode>mode);
11135 convert_move (op2, operands[2], false);
11138 for (i = 0; i < <ssescalarnum>; i++)
11139 RTVEC_ELT (vs, i) = op2;
11141 emit_insn (gen_vec_init<mode> (reg, par));
11142 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11147 (define_expand "rotr<mode>3"
11148 [(set (match_operand:VI_128 0 "register_operand" "")
11150 (match_operand:VI_128 1 "nonimmediate_operand" "")
11151 (match_operand:SI 2 "general_operand")))]
11154 /* If we were given a scalar, convert it to parallel */
11155 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11157 rtvec vs = rtvec_alloc (<ssescalarnum>);
11158 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11159 rtx neg = gen_reg_rtx (<MODE>mode);
11160 rtx reg = gen_reg_rtx (<MODE>mode);
11161 rtx op2 = operands[2];
11164 if (GET_MODE (op2) != <ssescalarmode>mode)
11166 op2 = gen_reg_rtx (<ssescalarmode>mode);
11167 convert_move (op2, operands[2], false);
11170 for (i = 0; i < <ssescalarnum>; i++)
11171 RTVEC_ELT (vs, i) = op2;
11173 emit_insn (gen_vec_init<mode> (reg, par));
11174 emit_insn (gen_neg<mode>2 (neg, reg));
11175 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
11180 (define_insn "xop_rotl<mode>3"
11181 [(set (match_operand:VI_128 0 "register_operand" "=x")
11183 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
11184 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11186 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11187 [(set_attr "type" "sseishft")
11188 (set_attr "length_immediate" "1")
11189 (set_attr "mode" "TI")])
11191 (define_insn "xop_rotr<mode>3"
11192 [(set (match_operand:VI_128 0 "register_operand" "=x")
11194 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
11195 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11198 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
11199 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
11201 [(set_attr "type" "sseishft")
11202 (set_attr "length_immediate" "1")
11203 (set_attr "mode" "TI")])
11205 (define_expand "vrotr<mode>3"
11206 [(match_operand:VI_128 0 "register_operand" "")
11207 (match_operand:VI_128 1 "register_operand" "")
11208 (match_operand:VI_128 2 "register_operand" "")]
11211 rtx reg = gen_reg_rtx (<MODE>mode);
11212 emit_insn (gen_neg<mode>2 (reg, operands[2]));
11213 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11217 (define_expand "vrotl<mode>3"
11218 [(match_operand:VI_128 0 "register_operand" "")
11219 (match_operand:VI_128 1 "register_operand" "")
11220 (match_operand:VI_128 2 "register_operand" "")]
11223 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
11227 (define_insn "xop_vrotl<mode>3"
11228 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
11229 (if_then_else:VI_128
11231 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
11234 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
11238 (neg:VI_128 (match_dup 2)))))]
11239 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11240 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11241 [(set_attr "type" "sseishft")
11242 (set_attr "prefix_data16" "0")
11243 (set_attr "prefix_extra" "2")
11244 (set_attr "mode" "TI")])
11246 ;; XOP packed shift instructions.
11247 (define_expand "vlshr<mode>3"
11248 [(set (match_operand:VI12_128 0 "register_operand" "")
11250 (match_operand:VI12_128 1 "register_operand" "")
11251 (match_operand:VI12_128 2 "nonimmediate_operand" "")))]
11254 rtx neg = gen_reg_rtx (<MODE>mode);
11255 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11256 emit_insn (gen_xop_lshl<mode>3 (operands[0], operands[1], neg));
11260 (define_expand "vlshr<mode>3"
11261 [(set (match_operand:VI48_128 0 "register_operand" "")
11263 (match_operand:VI48_128 1 "register_operand" "")
11264 (match_operand:VI48_128 2 "nonimmediate_operand" "")))]
11265 "TARGET_AVX2 || TARGET_XOP"
11269 rtx neg = gen_reg_rtx (<MODE>mode);
11270 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11271 emit_insn (gen_xop_lshl<mode>3 (operands[0], operands[1], neg));
11276 (define_expand "vlshr<mode>3"
11277 [(set (match_operand:VI48_256 0 "register_operand" "")
11279 (match_operand:VI48_256 1 "register_operand" "")
11280 (match_operand:VI48_256 2 "nonimmediate_operand" "")))]
11283 (define_expand "vashr<mode>3"
11284 [(set (match_operand:VI128_128 0 "register_operand" "")
11285 (ashiftrt:VI128_128
11286 (match_operand:VI128_128 1 "register_operand" "")
11287 (match_operand:VI128_128 2 "nonimmediate_operand" "")))]
11290 rtx neg = gen_reg_rtx (<MODE>mode);
11291 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11292 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], neg));
11296 (define_expand "vashrv4si3"
11297 [(set (match_operand:V4SI 0 "register_operand" "")
11298 (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand" "")
11299 (match_operand:V4SI 2 "nonimmediate_operand" "")))]
11300 "TARGET_AVX2 || TARGET_XOP"
11304 rtx neg = gen_reg_rtx (V4SImode);
11305 emit_insn (gen_negv4si2 (neg, operands[2]));
11306 emit_insn (gen_xop_ashlv4si3 (operands[0], operands[1], neg));
11311 (define_expand "vashrv8si3"
11312 [(set (match_operand:V8SI 0 "register_operand" "")
11313 (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand" "")
11314 (match_operand:V8SI 2 "nonimmediate_operand" "")))]
11317 (define_expand "vashl<mode>3"
11318 [(set (match_operand:VI12_128 0 "register_operand" "")
11320 (match_operand:VI12_128 1 "register_operand" "")
11321 (match_operand:VI12_128 2 "nonimmediate_operand" "")))]
11324 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], operands[2]));
11328 (define_expand "vashl<mode>3"
11329 [(set (match_operand:VI48_128 0 "register_operand" "")
11331 (match_operand:VI48_128 1 "register_operand" "")
11332 (match_operand:VI48_128 2 "nonimmediate_operand" "")))]
11333 "TARGET_AVX2 || TARGET_XOP"
11337 operands[2] = force_reg (<MODE>mode, operands[2]);
11338 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], operands[2]));
11343 (define_expand "vashl<mode>3"
11344 [(set (match_operand:VI48_256 0 "register_operand" "")
11346 (match_operand:VI48_256 1 "register_operand" "")
11347 (match_operand:VI48_256 2 "nonimmediate_operand" "")))]
11350 (define_insn "xop_ashl<mode>3"
11351 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
11352 (if_then_else:VI_128
11354 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
11357 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
11361 (neg:VI_128 (match_dup 2)))))]
11362 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11363 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11364 [(set_attr "type" "sseishft")
11365 (set_attr "prefix_data16" "0")
11366 (set_attr "prefix_extra" "2")
11367 (set_attr "mode" "TI")])
11369 (define_insn "xop_lshl<mode>3"
11370 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
11371 (if_then_else:VI_128
11373 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
11376 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
11380 (neg:VI_128 (match_dup 2)))))]
11381 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11382 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11383 [(set_attr "type" "sseishft")
11384 (set_attr "prefix_data16" "0")
11385 (set_attr "prefix_extra" "2")
11386 (set_attr "mode" "TI")])
11388 ;; SSE2 doesn't have some shift variants, so define versions for XOP
11389 (define_expand "ashlv16qi3"
11390 [(set (match_operand:V16QI 0 "register_operand" "")
11392 (match_operand:V16QI 1 "register_operand" "")
11393 (match_operand:SI 2 "nonmemory_operand" "")))]
11396 rtx reg = gen_reg_rtx (V16QImode);
11400 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
11401 for (i = 0; i < 16; i++)
11402 XVECEXP (par, 0, i) = operands[2];
11404 emit_insn (gen_vec_initv16qi (reg, par));
11405 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11409 (define_expand "<shift_insn>v16qi3"
11410 [(set (match_operand:V16QI 0 "register_operand" "")
11412 (match_operand:V16QI 1 "register_operand" "")
11413 (match_operand:SI 2 "nonmemory_operand" "")))]
11416 rtx reg = gen_reg_rtx (V16QImode);
11418 bool negate = false;
11419 rtx (*shift_insn)(rtx, rtx, rtx);
11422 if (CONST_INT_P (operands[2]))
11423 operands[2] = GEN_INT (-INTVAL (operands[2]));
11427 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
11428 for (i = 0; i < 16; i++)
11429 XVECEXP (par, 0, i) = operands[2];
11431 emit_insn (gen_vec_initv16qi (reg, par));
11434 emit_insn (gen_negv16qi2 (reg, reg));
11436 if (<CODE> == LSHIFTRT)
11437 shift_insn = gen_xop_lshlv16qi3;
11439 shift_insn = gen_xop_ashlv16qi3;
11441 emit_insn (shift_insn (operands[0], operands[1], reg));
11445 (define_expand "ashrv2di3"
11446 [(set (match_operand:V2DI 0 "register_operand" "")
11448 (match_operand:V2DI 1 "register_operand" "")
11449 (match_operand:DI 2 "nonmemory_operand" "")))]
11452 rtx reg = gen_reg_rtx (V2DImode);
11454 bool negate = false;
11457 if (CONST_INT_P (operands[2]))
11458 operands[2] = GEN_INT (-INTVAL (operands[2]));
11462 par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
11463 for (i = 0; i < 2; i++)
11464 XVECEXP (par, 0, i) = operands[2];
11466 emit_insn (gen_vec_initv2di (reg, par));
11469 emit_insn (gen_negv2di2 (reg, reg));
11471 emit_insn (gen_xop_ashlv2di3 (operands[0], operands[1], reg));
11475 ;; XOP FRCZ support
11476 (define_insn "xop_frcz<mode>2"
11477 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
11479 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
11482 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
11483 [(set_attr "type" "ssecvt1")
11484 (set_attr "mode" "<MODE>")])
11487 (define_expand "xop_vmfrcz<mode>2"
11488 [(set (match_operand:VF_128 0 "register_operand")
11491 [(match_operand:VF_128 1 "nonimmediate_operand")]
11497 operands[3] = CONST0_RTX (<MODE>mode);
11500 (define_insn "*xop_vmfrcz_<mode>"
11501 [(set (match_operand:VF_128 0 "register_operand" "=x")
11504 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
11506 (match_operand:VF_128 2 "const0_operand")
11509 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
11510 [(set_attr "type" "ssecvt1")
11511 (set_attr "mode" "<MODE>")])
11513 (define_insn "xop_maskcmp<mode>3"
11514 [(set (match_operand:VI_128 0 "register_operand" "=x")
11515 (match_operator:VI_128 1 "ix86_comparison_int_operator"
11516 [(match_operand:VI_128 2 "register_operand" "x")
11517 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
11519 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
11520 [(set_attr "type" "sse4arg")
11521 (set_attr "prefix_data16" "0")
11522 (set_attr "prefix_rep" "0")
11523 (set_attr "prefix_extra" "2")
11524 (set_attr "length_immediate" "1")
11525 (set_attr "mode" "TI")])
11527 (define_insn "xop_maskcmp_uns<mode>3"
11528 [(set (match_operand:VI_128 0 "register_operand" "=x")
11529 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
11530 [(match_operand:VI_128 2 "register_operand" "x")
11531 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
11533 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
11534 [(set_attr "type" "ssecmp")
11535 (set_attr "prefix_data16" "0")
11536 (set_attr "prefix_rep" "0")
11537 (set_attr "prefix_extra" "2")
11538 (set_attr "length_immediate" "1")
11539 (set_attr "mode" "TI")])
11541 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
11542 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
11543 ;; the exact instruction generated for the intrinsic.
11544 (define_insn "xop_maskcmp_uns2<mode>3"
11545 [(set (match_operand:VI_128 0 "register_operand" "=x")
11547 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
11548 [(match_operand:VI_128 2 "register_operand" "x")
11549 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
11550 UNSPEC_XOP_UNSIGNED_CMP))]
11552 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
11553 [(set_attr "type" "ssecmp")
11554 (set_attr "prefix_data16" "0")
11555 (set_attr "prefix_extra" "2")
11556 (set_attr "length_immediate" "1")
11557 (set_attr "mode" "TI")])
11559 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
11560 ;; being added here to be complete.
11561 (define_insn "xop_pcom_tf<mode>3"
11562 [(set (match_operand:VI_128 0 "register_operand" "=x")
11564 [(match_operand:VI_128 1 "register_operand" "x")
11565 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
11566 (match_operand:SI 3 "const_int_operand" "n")]
11567 UNSPEC_XOP_TRUEFALSE))]
11570 return ((INTVAL (operands[3]) != 0)
11571 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11572 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
11574 [(set_attr "type" "ssecmp")
11575 (set_attr "prefix_data16" "0")
11576 (set_attr "prefix_extra" "2")
11577 (set_attr "length_immediate" "1")
11578 (set_attr "mode" "TI")])
11580 (define_insn "xop_vpermil2<mode>3"
11581 [(set (match_operand:VF 0 "register_operand" "=x")
11583 [(match_operand:VF 1 "register_operand" "x")
11584 (match_operand:VF 2 "nonimmediate_operand" "%x")
11585 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm")
11586 (match_operand:SI 4 "const_0_to_3_operand" "n")]
11589 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
11590 [(set_attr "type" "sse4arg")
11591 (set_attr "length_immediate" "1")
11592 (set_attr "mode" "<MODE>")])
11594 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11596 (define_insn "aesenc"
11597 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11598 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11599 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11603 aesenc\t{%2, %0|%0, %2}
11604 vaesenc\t{%2, %1, %0|%0, %1, %2}"
11605 [(set_attr "isa" "noavx,avx")
11606 (set_attr "type" "sselog1")
11607 (set_attr "prefix_extra" "1")
11608 (set_attr "prefix" "orig,vex")
11609 (set_attr "mode" "TI")])
11611 (define_insn "aesenclast"
11612 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11613 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11614 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11615 UNSPEC_AESENCLAST))]
11618 aesenclast\t{%2, %0|%0, %2}
11619 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
11620 [(set_attr "isa" "noavx,avx")
11621 (set_attr "type" "sselog1")
11622 (set_attr "prefix_extra" "1")
11623 (set_attr "prefix" "orig,vex")
11624 (set_attr "mode" "TI")])
11626 (define_insn "aesdec"
11627 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11628 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11629 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11633 aesdec\t{%2, %0|%0, %2}
11634 vaesdec\t{%2, %1, %0|%0, %1, %2}"
11635 [(set_attr "isa" "noavx,avx")
11636 (set_attr "type" "sselog1")
11637 (set_attr "prefix_extra" "1")
11638 (set_attr "prefix" "orig,vex")
11639 (set_attr "mode" "TI")])
11641 (define_insn "aesdeclast"
11642 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11643 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11644 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11645 UNSPEC_AESDECLAST))]
11648 aesdeclast\t{%2, %0|%0, %2}
11649 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
11650 [(set_attr "isa" "noavx,avx")
11651 (set_attr "type" "sselog1")
11652 (set_attr "prefix_extra" "1")
11653 (set_attr "prefix" "orig,vex")
11654 (set_attr "mode" "TI")])
11656 (define_insn "aesimc"
11657 [(set (match_operand:V2DI 0 "register_operand" "=x")
11658 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
11661 "%vaesimc\t{%1, %0|%0, %1}"
11662 [(set_attr "type" "sselog1")
11663 (set_attr "prefix_extra" "1")
11664 (set_attr "prefix" "maybe_vex")
11665 (set_attr "mode" "TI")])
11667 (define_insn "aeskeygenassist"
11668 [(set (match_operand:V2DI 0 "register_operand" "=x")
11669 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
11670 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11671 UNSPEC_AESKEYGENASSIST))]
11673 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
11674 [(set_attr "type" "sselog1")
11675 (set_attr "prefix_extra" "1")
11676 (set_attr "length_immediate" "1")
11677 (set_attr "prefix" "maybe_vex")
11678 (set_attr "mode" "TI")])
11680 (define_insn "pclmulqdq"
11681 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11682 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11683 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")
11684 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
11688 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
11689 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11690 [(set_attr "isa" "noavx,avx")
11691 (set_attr "type" "sselog1")
11692 (set_attr "prefix_extra" "1")
11693 (set_attr "length_immediate" "1")
11694 (set_attr "prefix" "orig,vex")
11695 (set_attr "mode" "TI")])
11697 (define_expand "avx_vzeroall"
11698 [(match_par_dup 0 [(const_int 0)])]
11701 int nregs = TARGET_64BIT ? 16 : 8;
11704 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11706 XVECEXP (operands[0], 0, 0)
11707 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11710 for (regno = 0; regno < nregs; regno++)
11711 XVECEXP (operands[0], 0, regno + 1)
11712 = gen_rtx_SET (VOIDmode,
11713 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
11714 CONST0_RTX (V8SImode));
11717 (define_insn "*avx_vzeroall"
11718 [(match_parallel 0 "vzeroall_operation"
11719 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
11722 [(set_attr "type" "sse")
11723 (set_attr "modrm" "0")
11724 (set_attr "memory" "none")
11725 (set_attr "prefix" "vex")
11726 (set_attr "mode" "OI")])
11728 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
11729 ;; if the upper 128bits are unused.
11730 (define_insn "avx_vzeroupper"
11731 [(unspec_volatile [(match_operand 0 "const_int_operand" "")]
11732 UNSPECV_VZEROUPPER)]
11735 [(set_attr "type" "sse")
11736 (set_attr "modrm" "0")
11737 (set_attr "memory" "none")
11738 (set_attr "prefix" "vex")
11739 (set_attr "mode" "OI")])
11741 (define_mode_attr AVXTOSSEMODE
11742 [(V4DI "V2DI") (V2DI "V2DI")
11743 (V8SI "V4SI") (V4SI "V4SI")
11744 (V16HI "V8HI") (V8HI "V8HI")
11745 (V32QI "V16QI") (V16QI "V16QI")])
11747 (define_insn "avx2_pbroadcast<mode>"
11748 [(set (match_operand:VI 0 "register_operand" "=x")
11750 (vec_select:<ssescalarmode>
11751 (match_operand:<AVXTOSSEMODE> 1 "nonimmediate_operand" "xm")
11752 (parallel [(const_int 0)]))))]
11754 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %1}"
11755 [(set_attr "type" "ssemov")
11756 (set_attr "prefix_extra" "1")
11757 (set_attr "prefix" "vex")
11758 (set_attr "mode" "<sseinsnmode>")])
11760 (define_insn "avx2_permvarv8si"
11761 [(set (match_operand:V8SI 0 "register_operand" "=x")
11763 [(match_operand:V8SI 1 "register_operand" "x")
11764 (match_operand:V8SI 2 "nonimmediate_operand" "xm")]
11767 "vpermd\t{%2, %1, %0|%0, %1, %2}"
11768 [(set_attr "type" "sselog")
11769 (set_attr "prefix" "vex")
11770 (set_attr "mode" "OI")])
11772 (define_insn "avx2_permv4df"
11773 [(set (match_operand:V4DF 0 "register_operand" "=x")
11775 [(match_operand:V4DF 1 "register_operand" "xm")
11776 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11779 "vpermpd\t{%2, %1, %0|%0, %1, %2}"
11780 [(set_attr "type" "sselog")
11781 (set_attr "prefix_extra" "1")
11782 (set_attr "prefix" "vex")
11783 (set_attr "mode" "OI")])
11785 (define_insn "avx2_permvarv8sf"
11786 [(set (match_operand:V8SF 0 "register_operand" "=x")
11788 [(match_operand:V8SF 1 "register_operand" "x")
11789 (match_operand:V8SF 2 "nonimmediate_operand" "xm")]
11792 "vpermps\t{%2, %1, %0|%0, %1, %2}"
11793 [(set_attr "type" "sselog")
11794 (set_attr "prefix" "vex")
11795 (set_attr "mode" "OI")])
11797 (define_expand "avx2_permv4di"
11798 [(match_operand:V4DI 0 "register_operand" "")
11799 (match_operand:V4DI 1 "nonimmediate_operand" "")
11800 (match_operand:SI 2 "const_0_to_255_operand" "")]
11803 int mask = INTVAL (operands[2]);
11804 emit_insn (gen_avx2_permv4di_1 (operands[0], operands[1],
11805 GEN_INT ((mask >> 0) & 3),
11806 GEN_INT ((mask >> 2) & 3),
11807 GEN_INT ((mask >> 4) & 3),
11808 GEN_INT ((mask >> 6) & 3)));
11812 (define_insn "avx2_permv4di_1"
11813 [(set (match_operand:V4DI 0 "register_operand" "=x")
11815 (match_operand:V4DI 1 "nonimmediate_operand" "xm")
11816 (parallel [(match_operand 2 "const_0_to_3_operand" "")
11817 (match_operand 3 "const_0_to_3_operand" "")
11818 (match_operand 4 "const_0_to_3_operand" "")
11819 (match_operand 5 "const_0_to_3_operand" "")])))]
11823 mask |= INTVAL (operands[2]) << 0;
11824 mask |= INTVAL (operands[3]) << 2;
11825 mask |= INTVAL (operands[4]) << 4;
11826 mask |= INTVAL (operands[5]) << 6;
11827 operands[2] = GEN_INT (mask);
11828 return "vpermq\t{%2, %1, %0|%0, %1, %2}";
11830 [(set_attr "type" "sselog")
11831 (set_attr "prefix" "vex")
11832 (set_attr "mode" "OI")])
11834 (define_insn "avx2_permv2ti"
11835 [(set (match_operand:V4DI 0 "register_operand" "=x")
11837 [(match_operand:V4DI 1 "register_operand" "x")
11838 (match_operand:V4DI 2 "nonimmediate_operand" "xm")
11839 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11842 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11843 [(set_attr "type" "sselog")
11844 (set_attr "prefix" "vex")
11845 (set_attr "mode" "OI")])
11847 (define_insn "avx2_vec_dupv4df"
11848 [(set (match_operand:V4DF 0 "register_operand" "=x")
11849 (vec_duplicate:V4DF
11851 (match_operand:V2DF 1 "register_operand" "x")
11852 (parallel [(const_int 0)]))))]
11854 "vbroadcastsd\t{%1, %0|%0, %1}"
11855 [(set_attr "type" "sselog1")
11856 (set_attr "prefix" "vex")
11857 (set_attr "mode" "V4DF")])
11859 ;; Modes handled by AVX vec_dup patterns.
11860 (define_mode_iterator AVX_VEC_DUP_MODE
11861 [V8SI V8SF V4DI V4DF])
11863 (define_insn "vec_dup<mode>"
11864 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x")
11865 (vec_duplicate:AVX_VEC_DUP_MODE
11866 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,?x")))]
11869 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
11871 [(set_attr "type" "ssemov")
11872 (set_attr "prefix_extra" "1")
11873 (set_attr "prefix" "vex")
11874 (set_attr "mode" "V8SF")])
11876 (define_insn "avx2_vbroadcasti128_<mode>"
11877 [(set (match_operand:VI_256 0 "register_operand" "=x")
11879 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m")
11882 "vbroadcasti128\t{%1, %0|%0, %1}"
11883 [(set_attr "type" "ssemov")
11884 (set_attr "prefix_extra" "1")
11885 (set_attr "prefix" "vex")
11886 (set_attr "mode" "OI")])
11889 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "")
11890 (vec_duplicate:AVX_VEC_DUP_MODE
11891 (match_operand:<ssescalarmode> 1 "register_operand" "")))]
11892 "TARGET_AVX && reload_completed"
11893 [(set (match_dup 2)
11894 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
11896 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
11897 "operands[2] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[0]));")
11899 (define_insn "avx_vbroadcastf128_<mode>"
11900 [(set (match_operand:V_256 0 "register_operand" "=x,x,x")
11902 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
11906 vbroadcast<i128>\t{%1, %0|%0, %1}
11907 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
11908 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
11909 [(set_attr "type" "ssemov,sselog1,sselog1")
11910 (set_attr "prefix_extra" "1")
11911 (set_attr "length_immediate" "0,1,1")
11912 (set_attr "prefix" "vex")
11913 (set_attr "mode" "<sseinsnmode>")])
11915 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
11916 ;; If it so happens that the input is in memory, use vbroadcast.
11917 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
11918 (define_insn "*avx_vperm_broadcast_v4sf"
11919 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
11921 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
11922 (match_parallel 2 "avx_vbroadcast_operand"
11923 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11926 int elt = INTVAL (operands[3]);
11927 switch (which_alternative)
11931 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
11932 return "vbroadcastss\t{%1, %0|%0, %1}";
11934 operands[2] = GEN_INT (elt * 0x55);
11935 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
11937 gcc_unreachable ();
11940 [(set_attr "type" "ssemov,ssemov,sselog1")
11941 (set_attr "prefix_extra" "1")
11942 (set_attr "length_immediate" "0,0,1")
11943 (set_attr "prefix" "vex")
11944 (set_attr "mode" "SF,SF,V4SF")])
11946 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
11947 [(set (match_operand:VF_256 0 "register_operand" "=x,x,x")
11949 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x")
11950 (match_parallel 2 "avx_vbroadcast_operand"
11951 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11954 "&& reload_completed"
11955 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
11957 rtx op0 = operands[0], op1 = operands[1];
11958 int elt = INTVAL (operands[3]);
11964 /* Shuffle element we care about into all elements of the 128-bit lane.
11965 The other lane gets shuffled too, but we don't care. */
11966 if (<MODE>mode == V4DFmode)
11967 mask = (elt & 1 ? 15 : 0);
11969 mask = (elt & 3) * 0x55;
11970 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
11972 /* Shuffle the lane we care about into both lanes of the dest. */
11973 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
11974 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
11978 operands[1] = adjust_address_nv (op1, <ssescalarmode>mode,
11979 elt * GET_MODE_SIZE (<ssescalarmode>mode));
11982 (define_expand "avx_vpermil<mode>"
11983 [(set (match_operand:VF2 0 "register_operand" "")
11985 (match_operand:VF2 1 "nonimmediate_operand" "")
11986 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11989 int mask = INTVAL (operands[2]);
11990 rtx perm[<ssescalarnum>];
11992 perm[0] = GEN_INT (mask & 1);
11993 perm[1] = GEN_INT ((mask >> 1) & 1);
11994 if (<MODE>mode == V4DFmode)
11996 perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
11997 perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
12001 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
12004 (define_expand "avx_vpermil<mode>"
12005 [(set (match_operand:VF1 0 "register_operand" "")
12007 (match_operand:VF1 1 "nonimmediate_operand" "")
12008 (match_operand:SI 2 "const_0_to_255_operand" "")))]
12011 int mask = INTVAL (operands[2]);
12012 rtx perm[<ssescalarnum>];
12014 perm[0] = GEN_INT (mask & 3);
12015 perm[1] = GEN_INT ((mask >> 2) & 3);
12016 perm[2] = GEN_INT ((mask >> 4) & 3);
12017 perm[3] = GEN_INT ((mask >> 6) & 3);
12018 if (<MODE>mode == V8SFmode)
12020 perm[4] = GEN_INT ((mask & 3) + 4);
12021 perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
12022 perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
12023 perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
12027 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
12030 (define_insn "*avx_vpermilp<mode>"
12031 [(set (match_operand:VF 0 "register_operand" "=x")
12033 (match_operand:VF 1 "nonimmediate_operand" "xm")
12034 (match_parallel 2 ""
12035 [(match_operand 3 "const_int_operand" "")])))]
12037 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
12039 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
12040 operands[2] = GEN_INT (mask);
12041 return "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
12043 [(set_attr "type" "sselog")
12044 (set_attr "prefix_extra" "1")
12045 (set_attr "length_immediate" "1")
12046 (set_attr "prefix" "vex")
12047 (set_attr "mode" "<MODE>")])
12049 (define_insn "avx_vpermilvar<mode>3"
12050 [(set (match_operand:VF 0 "register_operand" "=x")
12052 [(match_operand:VF 1 "register_operand" "x")
12053 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "xm")]
12056 "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12057 [(set_attr "type" "sselog")
12058 (set_attr "prefix_extra" "1")
12059 (set_attr "prefix" "vex")
12060 (set_attr "mode" "<MODE>")])
12062 (define_expand "avx_vperm2f128<mode>3"
12063 [(set (match_operand:AVX256MODE2P 0 "register_operand" "")
12064 (unspec:AVX256MODE2P
12065 [(match_operand:AVX256MODE2P 1 "register_operand" "")
12066 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "")
12067 (match_operand:SI 3 "const_0_to_255_operand" "")]
12068 UNSPEC_VPERMIL2F128))]
12071 int mask = INTVAL (operands[3]);
12072 if ((mask & 0x88) == 0)
12074 rtx perm[<ssescalarnum>], t1, t2;
12075 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
12077 base = (mask & 3) * nelt2;
12078 for (i = 0; i < nelt2; ++i)
12079 perm[i] = GEN_INT (base + i);
12081 base = ((mask >> 4) & 3) * nelt2;
12082 for (i = 0; i < nelt2; ++i)
12083 perm[i + nelt2] = GEN_INT (base + i);
12085 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
12086 operands[1], operands[2]);
12087 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
12088 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
12089 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
12095 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
12096 ;; means that in order to represent this properly in rtl we'd have to
12097 ;; nest *another* vec_concat with a zero operand and do the select from
12098 ;; a 4x wide vector. That doesn't seem very nice.
12099 (define_insn "*avx_vperm2f128<mode>_full"
12100 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
12101 (unspec:AVX256MODE2P
12102 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
12103 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
12104 (match_operand:SI 3 "const_0_to_255_operand" "n")]
12105 UNSPEC_VPERMIL2F128))]
12107 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12108 [(set_attr "type" "sselog")
12109 (set_attr "prefix_extra" "1")
12110 (set_attr "length_immediate" "1")
12111 (set_attr "prefix" "vex")
12112 (set_attr "mode" "<sseinsnmode>")])
12114 (define_insn "*avx_vperm2f128<mode>_nozero"
12115 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
12116 (vec_select:AVX256MODE2P
12117 (vec_concat:<ssedoublevecmode>
12118 (match_operand:AVX256MODE2P 1 "register_operand" "x")
12119 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
12120 (match_parallel 3 ""
12121 [(match_operand 4 "const_int_operand" "")])))]
12123 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
12125 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
12126 operands[3] = GEN_INT (mask);
12127 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
12129 [(set_attr "type" "sselog")
12130 (set_attr "prefix_extra" "1")
12131 (set_attr "length_immediate" "1")
12132 (set_attr "prefix" "vex")
12133 (set_attr "mode" "<sseinsnmode>")])
12135 (define_expand "avx_vinsertf128<mode>"
12136 [(match_operand:V_256 0 "register_operand" "")
12137 (match_operand:V_256 1 "register_operand" "")
12138 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "")
12139 (match_operand:SI 3 "const_0_to_1_operand" "")]
12142 rtx (*insn)(rtx, rtx, rtx);
12144 switch (INTVAL (operands[3]))
12147 insn = gen_vec_set_lo_<mode>;
12150 insn = gen_vec_set_hi_<mode>;
12153 gcc_unreachable ();
12156 emit_insn (insn (operands[0], operands[1], operands[2]));
12160 (define_insn "avx2_vec_set_lo_v4di"
12161 [(set (match_operand:V4DI 0 "register_operand" "=x")
12163 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
12165 (match_operand:V4DI 1 "register_operand" "x")
12166 (parallel [(const_int 2) (const_int 3)]))))]
12168 "vinserti128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12169 [(set_attr "type" "sselog")
12170 (set_attr "prefix_extra" "1")
12171 (set_attr "length_immediate" "1")
12172 (set_attr "prefix" "vex")
12173 (set_attr "mode" "OI")])
12175 (define_insn "avx2_vec_set_hi_v4di"
12176 [(set (match_operand:V4DI 0 "register_operand" "=x")
12179 (match_operand:V4DI 1 "register_operand" "x")
12180 (parallel [(const_int 0) (const_int 1)]))
12181 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
12183 "vinserti128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12184 [(set_attr "type" "sselog")
12185 (set_attr "prefix_extra" "1")
12186 (set_attr "length_immediate" "1")
12187 (set_attr "prefix" "vex")
12188 (set_attr "mode" "OI")])
12190 (define_insn "vec_set_lo_<mode>"
12191 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
12192 (vec_concat:VI8F_256
12193 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
12194 (vec_select:<ssehalfvecmode>
12195 (match_operand:VI8F_256 1 "register_operand" "x")
12196 (parallel [(const_int 2) (const_int 3)]))))]
12198 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12199 [(set_attr "type" "sselog")
12200 (set_attr "prefix_extra" "1")
12201 (set_attr "length_immediate" "1")
12202 (set_attr "prefix" "vex")
12203 (set_attr "mode" "<sseinsnmode>")])
12205 (define_insn "vec_set_hi_<mode>"
12206 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
12207 (vec_concat:VI8F_256
12208 (vec_select:<ssehalfvecmode>
12209 (match_operand:VI8F_256 1 "register_operand" "x")
12210 (parallel [(const_int 0) (const_int 1)]))
12211 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
12213 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12214 [(set_attr "type" "sselog")
12215 (set_attr "prefix_extra" "1")
12216 (set_attr "length_immediate" "1")
12217 (set_attr "prefix" "vex")
12218 (set_attr "mode" "<sseinsnmode>")])
12220 (define_insn "vec_set_lo_<mode>"
12221 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
12222 (vec_concat:VI4F_256
12223 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
12224 (vec_select:<ssehalfvecmode>
12225 (match_operand:VI4F_256 1 "register_operand" "x")
12226 (parallel [(const_int 4) (const_int 5)
12227 (const_int 6) (const_int 7)]))))]
12229 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12230 [(set_attr "type" "sselog")
12231 (set_attr "prefix_extra" "1")
12232 (set_attr "length_immediate" "1")
12233 (set_attr "prefix" "vex")
12234 (set_attr "mode" "<sseinsnmode>")])
12236 (define_insn "vec_set_hi_<mode>"
12237 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
12238 (vec_concat:VI4F_256
12239 (vec_select:<ssehalfvecmode>
12240 (match_operand:VI4F_256 1 "register_operand" "x")
12241 (parallel [(const_int 0) (const_int 1)
12242 (const_int 2) (const_int 3)]))
12243 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
12245 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12246 [(set_attr "type" "sselog")
12247 (set_attr "prefix_extra" "1")
12248 (set_attr "length_immediate" "1")
12249 (set_attr "prefix" "vex")
12250 (set_attr "mode" "<sseinsnmode>")])
12252 (define_insn "vec_set_lo_v16hi"
12253 [(set (match_operand:V16HI 0 "register_operand" "=x")
12255 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
12257 (match_operand:V16HI 1 "register_operand" "x")
12258 (parallel [(const_int 8) (const_int 9)
12259 (const_int 10) (const_int 11)
12260 (const_int 12) (const_int 13)
12261 (const_int 14) (const_int 15)]))))]
12263 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12264 [(set_attr "type" "sselog")
12265 (set_attr "prefix_extra" "1")
12266 (set_attr "length_immediate" "1")
12267 (set_attr "prefix" "vex")
12268 (set_attr "mode" "OI")])
12270 (define_insn "vec_set_hi_v16hi"
12271 [(set (match_operand:V16HI 0 "register_operand" "=x")
12274 (match_operand:V16HI 1 "register_operand" "x")
12275 (parallel [(const_int 0) (const_int 1)
12276 (const_int 2) (const_int 3)
12277 (const_int 4) (const_int 5)
12278 (const_int 6) (const_int 7)]))
12279 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
12281 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12282 [(set_attr "type" "sselog")
12283 (set_attr "prefix_extra" "1")
12284 (set_attr "length_immediate" "1")
12285 (set_attr "prefix" "vex")
12286 (set_attr "mode" "OI")])
12288 (define_insn "vec_set_lo_v32qi"
12289 [(set (match_operand:V32QI 0 "register_operand" "=x")
12291 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
12293 (match_operand:V32QI 1 "register_operand" "x")
12294 (parallel [(const_int 16) (const_int 17)
12295 (const_int 18) (const_int 19)
12296 (const_int 20) (const_int 21)
12297 (const_int 22) (const_int 23)
12298 (const_int 24) (const_int 25)
12299 (const_int 26) (const_int 27)
12300 (const_int 28) (const_int 29)
12301 (const_int 30) (const_int 31)]))))]
12303 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12304 [(set_attr "type" "sselog")
12305 (set_attr "prefix_extra" "1")
12306 (set_attr "length_immediate" "1")
12307 (set_attr "prefix" "vex")
12308 (set_attr "mode" "OI")])
12310 (define_insn "vec_set_hi_v32qi"
12311 [(set (match_operand:V32QI 0 "register_operand" "=x")
12314 (match_operand:V32QI 1 "register_operand" "x")
12315 (parallel [(const_int 0) (const_int 1)
12316 (const_int 2) (const_int 3)
12317 (const_int 4) (const_int 5)
12318 (const_int 6) (const_int 7)
12319 (const_int 8) (const_int 9)
12320 (const_int 10) (const_int 11)
12321 (const_int 12) (const_int 13)
12322 (const_int 14) (const_int 15)]))
12323 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
12325 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12326 [(set_attr "type" "sselog")
12327 (set_attr "prefix_extra" "1")
12328 (set_attr "length_immediate" "1")
12329 (set_attr "prefix" "vex")
12330 (set_attr "mode" "OI")])
12332 (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
12333 [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
12335 [(match_operand:<sseintvecmode> 2 "register_operand" "x")
12336 (match_operand:V48_AVX2 1 "memory_operand" "m")]
12339 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
12340 [(set_attr "type" "sselog1")
12341 (set_attr "prefix_extra" "1")
12342 (set_attr "prefix" "vex")
12343 (set_attr "mode" "<sseinsnmode>")])
12345 (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
12346 [(set (match_operand:V48_AVX2 0 "memory_operand" "=m")
12348 [(match_operand:<sseintvecmode> 1 "register_operand" "x")
12349 (match_operand:V48_AVX2 2 "register_operand" "x")
12353 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12354 [(set_attr "type" "sselog1")
12355 (set_attr "prefix_extra" "1")
12356 (set_attr "prefix" "vex")
12357 (set_attr "mode" "<sseinsnmode>")])
12359 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
12360 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
12361 (unspec:AVX256MODE2P
12362 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
12366 "&& reload_completed"
12369 rtx op0 = operands[0];
12370 rtx op1 = operands[1];
12372 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
12374 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
12375 emit_move_insn (op0, op1);
12379 (define_expand "vec_init<mode>"
12380 [(match_operand:V_256 0 "register_operand" "")
12381 (match_operand 1 "" "")]
12384 ix86_expand_vector_init (false, operands[0], operands[1]);
12388 (define_expand "avx2_extracti128"
12389 [(match_operand:V2DI 0 "nonimmediate_operand" "")
12390 (match_operand:V4DI 1 "register_operand" "")
12391 (match_operand:SI 2 "const_0_to_1_operand" "")]
12394 rtx (*insn)(rtx, rtx);
12396 switch (INTVAL (operands[2]))
12399 insn = gen_vec_extract_lo_v4di;
12402 insn = gen_vec_extract_hi_v4di;
12405 gcc_unreachable ();
12408 emit_insn (insn (operands[0], operands[1]));
12412 (define_expand "avx2_inserti128"
12413 [(match_operand:V4DI 0 "register_operand" "")
12414 (match_operand:V4DI 1 "register_operand" "")
12415 (match_operand:V2DI 2 "nonimmediate_operand" "")
12416 (match_operand:SI 3 "const_0_to_1_operand" "")]
12419 rtx (*insn)(rtx, rtx, rtx);
12421 switch (INTVAL (operands[3]))
12424 insn = gen_avx2_vec_set_lo_v4di;
12427 insn = gen_avx2_vec_set_hi_v4di;
12430 gcc_unreachable ();
12433 emit_insn (insn (operands[0], operands[1], operands[2]));
12437 (define_insn "avx2_ashrv<mode>"
12438 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
12440 (match_operand:VI4_AVX2 1 "register_operand" "x")
12441 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")))]
12443 "vpsravd\t{%2, %1, %0|%0, %1, %2}"
12444 [(set_attr "type" "sseishft")
12445 (set_attr "prefix" "vex")
12446 (set_attr "mode" "<sseinsnmode>")])
12448 (define_insn "avx2_<shift_insn>v<mode>"
12449 [(set (match_operand:VI48_AVX2 0 "register_operand" "=x")
12450 (any_lshift:VI48_AVX2
12451 (match_operand:VI48_AVX2 1 "register_operand" "x")
12452 (match_operand:VI48_AVX2 2 "nonimmediate_operand" "xm")))]
12454 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12455 [(set_attr "type" "sseishft")
12456 (set_attr "prefix" "vex")
12457 (set_attr "mode" "<sseinsnmode>")])
12459 (define_insn "avx_vec_concat<mode>"
12460 [(set (match_operand:V_256 0 "register_operand" "=x,x")
12462 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
12463 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))]
12466 switch (which_alternative)
12469 return "vinsert<i128>\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
12471 switch (get_attr_mode (insn))
12474 return "vmovaps\t{%1, %x0|%x0, %1}";
12476 return "vmovapd\t{%1, %x0|%x0, %1}";
12478 return "vmovdqa\t{%1, %x0|%x0, %1}";
12481 gcc_unreachable ();
12484 [(set_attr "type" "sselog,ssemov")
12485 (set_attr "prefix_extra" "1,*")
12486 (set_attr "length_immediate" "1,*")
12487 (set_attr "prefix" "vex")
12488 (set_attr "mode" "<sseinsnmode>")])
12490 (define_insn "vcvtph2ps"
12491 [(set (match_operand:V4SF 0 "register_operand" "=x")
12493 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
12495 (parallel [(const_int 0) (const_int 1)
12496 (const_int 1) (const_int 2)])))]
12498 "vcvtph2ps\t{%1, %0|%0, %1}"
12499 [(set_attr "type" "ssecvt")
12500 (set_attr "prefix" "vex")
12501 (set_attr "mode" "V4SF")])
12503 (define_insn "*vcvtph2ps_load"
12504 [(set (match_operand:V4SF 0 "register_operand" "=x")
12505 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
12506 UNSPEC_VCVTPH2PS))]
12508 "vcvtph2ps\t{%1, %0|%0, %1}"
12509 [(set_attr "type" "ssecvt")
12510 (set_attr "prefix" "vex")
12511 (set_attr "mode" "V8SF")])
12513 (define_insn "vcvtph2ps256"
12514 [(set (match_operand:V8SF 0 "register_operand" "=x")
12515 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
12516 UNSPEC_VCVTPH2PS))]
12518 "vcvtph2ps\t{%1, %0|%0, %1}"
12519 [(set_attr "type" "ssecvt")
12520 (set_attr "prefix" "vex")
12521 (set_attr "mode" "V8SF")])
12523 (define_expand "vcvtps2ph"
12524 [(set (match_operand:V8HI 0 "register_operand" "")
12526 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "")
12527 (match_operand:SI 2 "const_0_to_255_operand" "")]
12531 "operands[3] = CONST0_RTX (V4HImode);")
12533 (define_insn "*vcvtps2ph"
12534 [(set (match_operand:V8HI 0 "register_operand" "=x")
12536 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12537 (match_operand:SI 2 "const_0_to_255_operand" "N")]
12539 (match_operand:V4HI 3 "const0_operand" "")))]
12541 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12542 [(set_attr "type" "ssecvt")
12543 (set_attr "prefix" "vex")
12544 (set_attr "mode" "V4SF")])
12546 (define_insn "*vcvtps2ph_store"
12547 [(set (match_operand:V4HI 0 "memory_operand" "=m")
12548 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12549 (match_operand:SI 2 "const_0_to_255_operand" "N")]
12550 UNSPEC_VCVTPS2PH))]
12552 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12553 [(set_attr "type" "ssecvt")
12554 (set_attr "prefix" "vex")
12555 (set_attr "mode" "V4SF")])
12557 (define_insn "vcvtps2ph256"
12558 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
12559 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
12560 (match_operand:SI 2 "const_0_to_255_operand" "N")]
12561 UNSPEC_VCVTPS2PH))]
12563 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12564 [(set_attr "type" "ssecvt")
12565 (set_attr "prefix" "vex")
12566 (set_attr "mode" "V8SF")])
12568 ;; For gather* insn patterns
12569 (define_mode_iterator VEC_GATHER_MODE
12570 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
12571 (define_mode_attr VEC_GATHER_MODE
12572 [(V2DI "V4SI") (V2DF "V4SI")
12573 (V4DI "V4SI") (V4DF "V4SI")
12574 (V4SI "V4SI") (V4SF "V4SI")
12575 (V8SI "V8SI") (V8SF "V8SI")])
12577 (define_expand "avx2_gathersi<mode>"
12578 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "")
12579 (unspec:VEC_GATHER_MODE
12580 [(match_operand:VEC_GATHER_MODE 1 "register_operand" "")
12581 (mem:<ssescalarmode>
12583 [(match_operand 2 "vsib_address_operand" "")
12584 (match_operand:<VEC_GATHER_MODE> 3 "register_operand" "")
12585 (match_operand:SI 5 "const1248_operand " "")]))
12586 (mem:BLK (scratch))
12587 (match_operand:VEC_GATHER_MODE 4 "register_operand" "")]
12589 (clobber (match_scratch:VEC_GATHER_MODE 6 ""))])]
12593 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
12594 operands[5]), UNSPEC_VSIBADDR);
12597 (define_insn "*avx2_gathersi<mode>"
12598 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
12599 (unspec:VEC_GATHER_MODE
12600 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
12601 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
12603 [(match_operand:P 3 "vsib_address_operand" "p")
12604 (match_operand:<VEC_GATHER_MODE> 4 "register_operand" "x")
12605 (match_operand:SI 6 "const1248_operand" "n")]
12607 (mem:BLK (scratch))
12608 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
12610 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
12612 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
12613 [(set_attr "type" "ssemov")
12614 (set_attr "prefix" "vex")
12615 (set_attr "mode" "<sseinsnmode>")])
12617 (define_expand "avx2_gatherdi<mode>"
12618 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "")
12619 (unspec:VEC_GATHER_MODE
12620 [(match_operand:VEC_GATHER_MODE 1 "register_operand" "")
12621 (mem:<ssescalarmode>
12623 [(match_operand 2 "vsib_address_operand" "")
12624 (match_operand:<AVXMODE48P_DI> 3 "register_operand" "")
12625 (match_operand:SI 5 "const1248_operand " "")]))
12626 (mem:BLK (scratch))
12627 (match_operand:VEC_GATHER_MODE 4 "register_operand" "")]
12629 (clobber (match_scratch:VEC_GATHER_MODE 6 ""))])]
12633 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
12634 operands[5]), UNSPEC_VSIBADDR);
12637 (define_insn "*avx2_gatherdi<mode>"
12638 [(set (match_operand:AVXMODE48P_DI 0 "register_operand" "=&x")
12639 (unspec:AVXMODE48P_DI
12640 [(match_operand:AVXMODE48P_DI 2 "register_operand" "0")
12641 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
12643 [(match_operand:P 3 "vsib_address_operand" "p")
12644 (match_operand:<AVXMODE48P_DI> 4 "register_operand" "x")
12645 (match_operand:SI 6 "const1248_operand" "n")]
12647 (mem:BLK (scratch))
12648 (match_operand:AVXMODE48P_DI 5 "register_operand" "1")]
12650 (clobber (match_scratch:AVXMODE48P_DI 1 "=&x"))]
12652 "v<sseintprefix>gatherq<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
12653 [(set_attr "type" "ssemov")
12654 (set_attr "prefix" "vex")
12655 (set_attr "mode" "<sseinsnmode>")])
12657 ;; Special handling for VEX.256 with float arguments
12658 ;; since there're still xmms as operands
12659 (define_expand "avx2_gatherdi<mode>256"
12660 [(parallel [(set (match_operand:VI4F_128 0 "register_operand" "")
12662 [(match_operand:VI4F_128 1 "register_operand" "")
12663 (mem:<ssescalarmode>
12665 [(match_operand 2 "vsib_address_operand" "")
12666 (match_operand:V4DI 3 "register_operand" "")
12667 (match_operand:SI 5 "const1248_operand " "")]))
12668 (mem:BLK (scratch))
12669 (match_operand:VI4F_128 4 "register_operand" "")]
12671 (clobber (match_scratch:VI4F_128 6 ""))])]
12675 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
12676 operands[5]), UNSPEC_VSIBADDR);
12679 (define_insn "*avx2_gatherdi<mode>256"
12680 [(set (match_operand:VI4F_128 0 "register_operand" "=x")
12682 [(match_operand:VI4F_128 2 "register_operand" "0")
12683 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
12685 [(match_operand:P 3 "vsib_address_operand" "p")
12686 (match_operand:V4DI 4 "register_operand" "x")
12687 (match_operand:SI 6 "const1248_operand" "n")]
12689 (mem:BLK (scratch))
12690 (match_operand:VI4F_128 5 "register_operand" "1")]
12692 (clobber (match_scratch:VI4F_128 1 "=&x"))]
12694 "v<sseintprefix>gatherq<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
12695 [(set_attr "type" "ssemov")
12696 (set_attr "prefix" "vex")
12697 (set_attr "mode" "<sseinsnmode>")])