1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 ;; All vector modes including V?TImode, used in move patterns.
22 (define_mode_iterator V16
23 [(V32QI "TARGET_AVX") V16QI
24 (V16HI "TARGET_AVX") V8HI
25 (V8SI "TARGET_AVX") V4SI
26 (V4DI "TARGET_AVX") V2DI
27 (V2TI "TARGET_AVX") V1TI
28 (V8SF "TARGET_AVX") V4SF
29 (V4DF "TARGET_AVX") V2DF])
32 (define_mode_iterator V
33 [(V32QI "TARGET_AVX") V16QI
34 (V16HI "TARGET_AVX") V8HI
35 (V8SI "TARGET_AVX") V4SI
36 (V4DI "TARGET_AVX") V2DI
37 (V8SF "TARGET_AVX") V4SF
38 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
40 ;; All 128bit vector modes
41 (define_mode_iterator V_128
42 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
44 ;; All 256bit vector modes
45 (define_mode_iterator V_256
46 [V32QI V16HI V8SI V4DI V8SF V4DF])
48 ;; All vector float modes
49 (define_mode_iterator VF
50 [(V8SF "TARGET_AVX") V4SF
51 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
53 ;; All SFmode vector float modes
54 (define_mode_iterator VF1
55 [(V8SF "TARGET_AVX") V4SF])
57 ;; All DFmode vector float modes
58 (define_mode_iterator VF2
59 [(V4DF "TARGET_AVX") V2DF])
61 ;; All 128bit vector float modes
62 (define_mode_iterator VF_128
63 [V4SF (V2DF "TARGET_SSE2")])
65 ;; All 256bit vector float modes
66 (define_mode_iterator VF_256
69 ;; All vector integer modes
70 (define_mode_iterator VI
71 [(V32QI "TARGET_AVX") V16QI
72 (V16HI "TARGET_AVX") V8HI
73 (V8SI "TARGET_AVX") V4SI
74 (V4DI "TARGET_AVX") V2DI])
76 (define_mode_iterator VI_AVX2
77 [(V32QI "TARGET_AVX2") V16QI
78 (V16HI "TARGET_AVX2") V8HI
79 (V8SI "TARGET_AVX2") V4SI
80 (V4DI "TARGET_AVX2") V2DI])
82 ;; All QImode vector integer modes
83 (define_mode_iterator VI1
84 [(V32QI "TARGET_AVX") V16QI])
86 ;; All DImode vector integer modes
87 (define_mode_iterator VI8
88 [(V4DI "TARGET_AVX") V2DI])
90 (define_mode_iterator VI1_AVX2
91 [(V32QI "TARGET_AVX2") V16QI])
93 (define_mode_iterator VI2_AVX2
94 [(V16HI "TARGET_AVX2") V8HI])
96 (define_mode_iterator VI4_AVX2
97 [(V8SI "TARGET_AVX2") V4SI])
99 (define_mode_iterator VI8_AVX2
100 [(V4DI "TARGET_AVX2") V2DI])
102 ;; ??? We should probably use TImode instead.
103 (define_mode_iterator VIMAX_AVX2
104 [(V2TI "TARGET_AVX2") V1TI])
106 ;; ??? This should probably be dropped in favor of VIMAX_AVX2.
107 (define_mode_iterator SSESCALARMODE
108 [(V2TI "TARGET_AVX2") TI])
110 (define_mode_iterator VI12_AVX2
111 [(V32QI "TARGET_AVX2") V16QI
112 (V16HI "TARGET_AVX2") V8HI])
114 (define_mode_iterator VI24_AVX2
115 [(V16HI "TARGET_AVX2") V8HI
116 (V8SI "TARGET_AVX2") V4SI])
118 (define_mode_iterator VI124_AVX2
119 [(V32QI "TARGET_AVX2") V16QI
120 (V16HI "TARGET_AVX2") V8HI
121 (V8SI "TARGET_AVX2") V4SI])
123 (define_mode_iterator VI248_AVX2
124 [(V16HI "TARGET_AVX2") V8HI
125 (V8SI "TARGET_AVX2") V4SI
126 (V4DI "TARGET_AVX2") V2DI])
128 (define_mode_iterator VI4SD_AVX2
131 (define_mode_iterator V48_AVX2
134 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
135 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
137 (define_mode_attr sse2_avx2
138 [(V16QI "sse2") (V32QI "avx2")
139 (V8HI "sse2") (V16HI "avx2")
140 (V4SI "sse2") (V8SI "avx2")
141 (V2DI "sse2") (V4DI "avx2")
142 (V1TI "sse2") (V2TI "avx2")])
144 (define_mode_attr ssse3_avx2
145 [(V16QI "ssse3") (V32QI "avx2")
146 (V8HI "ssse3") (V16HI "avx2")
147 (V4SI "ssse3") (V8SI "avx2")
148 (V2DI "ssse3") (V4DI "avx2")
149 (TI "ssse3") (V2TI "avx2")])
151 (define_mode_attr sse4_1_avx2
152 [(V16QI "sse4_1") (V32QI "avx2")
153 (V8HI "sse4_1") (V16HI "avx2")
154 (V4SI "sse4_1") (V8SI "avx2")
155 (V2DI "sse4_1") (V4DI "avx2")])
157 (define_mode_attr avx_avx2
158 [(V4SF "avx") (V2DF "avx")
159 (V8SF "avx") (V4DF "avx")
160 (V4SI "avx2") (V2DI "avx2")
161 (V8SI "avx2") (V4DI "avx2")])
163 (define_mode_attr vec_avx2
164 [(V16QI "vec") (V32QI "avx2")
165 (V8HI "vec") (V16HI "avx2")
166 (V4SI "vec") (V8SI "avx2")
167 (V2DI "vec") (V4DI "avx2")])
169 ;; Mapping of logic-shift operators
170 (define_code_iterator lshift [lshiftrt ashift])
172 ;; Base name for define_insn
173 (define_code_attr lshift_insn [(lshiftrt "srl") (ashift "sll")])
175 ;; Base name for insn mnemonic
176 (define_code_attr lshift [(lshiftrt "lshr") (ashift "lshl")])
178 (define_mode_attr ssedoublemode
179 [(V16HI "V16SI") (V8HI "V8SI")])
181 (define_mode_attr ssebytemode
182 [(V4DI "V32QI") (V2DI "V16QI")])
184 ;; All 128bit vector integer modes
185 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
187 ;; All 256bit vector integer modes
188 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
190 ;; Random 128bit vector integer mode combinations
191 (define_mode_iterator VI12_128 [V16QI V8HI])
192 (define_mode_iterator VI14_128 [V16QI V4SI])
193 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
194 (define_mode_iterator VI24_128 [V8HI V4SI])
195 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
197 ;; Random 256bit vector integer mode combinations
198 (define_mode_iterator VI124_256 [V32QI V16HI V8SI])
200 ;; Int-float size matches
201 (define_mode_iterator VI4F_128 [V4SI V4SF])
202 (define_mode_iterator VI8F_128 [V2DI V2DF])
203 (define_mode_iterator VI4F_256 [V8SI V8SF])
204 (define_mode_iterator VI8F_256 [V4DI V4DF])
206 ;; Mapping from float mode to required SSE level
207 (define_mode_attr sse
208 [(SF "sse") (DF "sse2")
209 (V4SF "sse") (V2DF "sse2")
210 (V8SF "avx") (V4DF "avx")])
212 (define_mode_attr sse2
213 [(V16QI "sse2") (V32QI "avx")
214 (V2DI "sse2") (V4DI "avx")])
216 (define_mode_attr sse3
217 [(V16QI "sse3") (V32QI "avx")])
219 (define_mode_attr sse4_1
220 [(V4SF "sse4_1") (V2DF "sse4_1")
221 (V8SF "avx") (V4DF "avx")])
223 (define_mode_attr avxsizesuffix
224 [(V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
225 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
226 (V8SF "256") (V4DF "256")
227 (V4SF "") (V2DF "")])
229 ;; SSE instruction mode
230 (define_mode_attr sseinsnmode
231 [(V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
232 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
233 (V8SF "V8SF") (V4DF "V4DF")
234 (V4SF "V4SF") (V2DF "V2DF")
237 ;; Mapping of vector float modes to an integer mode of the same size
238 (define_mode_attr sseintvecmode
239 [(V8SF "V8SI") (V4DF "V4DI")
240 (V4SF "V4SI") (V2DF "V2DI")
241 (V4DF "V4DI") (V8SF "V8SI")
242 (V8SI "V8SI") (V4DI "V4DI")
243 (V4SI "V4SI") (V2DI "V2DI")
244 (V16HI "V16HI") (V8HI "V8HI")
245 (V32QI "V32QI") (V16QI "V16QI")])
247 ;; Mapping of vector modes to a vector mode of double size
248 (define_mode_attr ssedoublevecmode
249 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
250 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
251 (V8SF "V16SF") (V4DF "V8DF")
252 (V4SF "V8SF") (V2DF "V4DF")])
254 ;; Mapping of vector modes to a vector mode of half size
255 (define_mode_attr ssehalfvecmode
256 [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
257 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
258 (V8SF "V4SF") (V4DF "V2DF")
261 ;; Mapping of vector modes back to the scalar modes
262 (define_mode_attr ssescalarmode
263 [(V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI")
264 (V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI")
265 (V8SF "SF") (V4DF "DF")
266 (V4SF "SF") (V2DF "DF")])
268 ;; Number of scalar elements in each vector type
269 (define_mode_attr ssescalarnum
270 [(V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
271 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
272 (V8SF "8") (V4DF "4")
273 (V4SF "4") (V2DF "2")])
275 ;; SSE prefix for integer vector modes
276 (define_mode_attr sseintprefix
277 [(V2DI "p") (V2DF "")
280 (V8SI "p") (V8SF "")])
282 ;; SSE scalar suffix for vector modes
283 (define_mode_attr ssescalarmodesuffix
285 (V8SF "ss") (V4DF "sd")
286 (V4SF "ss") (V2DF "sd")
287 (V8SI "ss") (V4DI "sd")
290 ;; Pack/unpack vector modes
291 (define_mode_attr sseunpackmode
292 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
293 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")])
295 (define_mode_attr ssepackmode
296 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
297 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")])
299 ;; Mapping of the max integer size for xop rotate immediate constraint
300 (define_mode_attr sserotatemax
301 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
303 ;; Mapping of mode to cast intrinsic name
304 (define_mode_attr castmode [(V8SI "si") (V8SF "ps") (V4DF "pd")])
306 ;; Instruction suffix for sign and zero extensions.
307 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
309 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
310 (define_mode_attr i128
311 [(V8SF "f128") (V4DF "f128") (V32QI "%~128") (V16HI "%~128")
312 (V8SI "%~128") (V4DI "%~128")])
315 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
317 (define_mode_iterator AVXMODE48P_DI
318 [V2DI V2DF V4DI V4DF V4SF V4SI])
319 (define_mode_attr AVXMODE48P_DI
320 [(V2DI "V2DI") (V2DF "V2DI")
321 (V4DI "V4DI") (V4DF "V4DI")
322 (V4SI "V2DI") (V4SF "V2DI")
323 (V8SI "V4DI") (V8SF "V4DI")])
325 (define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
327 ;; Mapping of immediate bits for blend instructions
328 (define_mode_attr blendbits
329 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
331 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
333 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
337 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
339 ;; All of these patterns are enabled for SSE1 as well as SSE2.
340 ;; This is essential for maintaining stable calling conventions.
342 (define_expand "mov<mode>"
343 [(set (match_operand:V16 0 "nonimmediate_operand" "")
344 (match_operand:V16 1 "nonimmediate_operand" ""))]
347 ix86_expand_vector_move (<MODE>mode, operands);
351 (define_insn "*mov<mode>_internal"
352 [(set (match_operand:V16 0 "nonimmediate_operand" "=x,x ,m")
353 (match_operand:V16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
355 && (register_operand (operands[0], <MODE>mode)
356 || register_operand (operands[1], <MODE>mode))"
358 switch (which_alternative)
361 return standard_sse_constant_opcode (insn, operands[1]);
364 switch (get_attr_mode (insn))
369 && (misaligned_operand (operands[0], <MODE>mode)
370 || misaligned_operand (operands[1], <MODE>mode)))
371 return "vmovups\t{%1, %0|%0, %1}";
373 return "%vmovaps\t{%1, %0|%0, %1}";
378 && (misaligned_operand (operands[0], <MODE>mode)
379 || misaligned_operand (operands[1], <MODE>mode)))
380 return "vmovupd\t{%1, %0|%0, %1}";
381 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
382 return "%vmovaps\t{%1, %0|%0, %1}";
384 return "%vmovapd\t{%1, %0|%0, %1}";
389 && (misaligned_operand (operands[0], <MODE>mode)
390 || misaligned_operand (operands[1], <MODE>mode)))
391 return "vmovdqu\t{%1, %0|%0, %1}";
392 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
393 return "%vmovaps\t{%1, %0|%0, %1}";
395 return "%vmovdqa\t{%1, %0|%0, %1}";
404 [(set_attr "type" "sselog1,ssemov,ssemov")
405 (set_attr "prefix" "maybe_vex")
407 (cond [(match_test "TARGET_AVX")
408 (const_string "<sseinsnmode>")
409 (ior (ior (match_test "optimize_function_for_size_p (cfun)")
410 (not (match_test "TARGET_SSE2")))
411 (and (eq_attr "alternative" "2")
412 (match_test "TARGET_SSE_TYPELESS_STORES")))
413 (const_string "V4SF")
414 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
415 (const_string "V4SF")
416 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
417 (const_string "V2DF")
419 (const_string "TI")))])
421 (define_insn "sse2_movq128"
422 [(set (match_operand:V2DI 0 "register_operand" "=x")
425 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
426 (parallel [(const_int 0)]))
429 "%vmovq\t{%1, %0|%0, %1}"
430 [(set_attr "type" "ssemov")
431 (set_attr "prefix" "maybe_vex")
432 (set_attr "mode" "TI")])
434 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
435 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
436 ;; from memory, we'd prefer to load the memory directly into the %xmm
437 ;; register. To facilitate this happy circumstance, this pattern won't
438 ;; split until after register allocation. If the 64-bit value didn't
439 ;; come from memory, this is the best we can do. This is much better
440 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
443 (define_insn_and_split "movdi_to_sse"
445 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
446 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
447 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
448 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
450 "&& reload_completed"
453 if (register_operand (operands[1], DImode))
455 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
456 Assemble the 64-bit DImode value in an xmm register. */
457 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
458 gen_rtx_SUBREG (SImode, operands[1], 0)));
459 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
460 gen_rtx_SUBREG (SImode, operands[1], 4)));
461 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
464 else if (memory_operand (operands[1], DImode))
465 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
466 operands[1], const0_rtx));
472 [(set (match_operand:V4SF 0 "register_operand" "")
473 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
474 "TARGET_SSE && reload_completed"
477 (vec_duplicate:V4SF (match_dup 1))
481 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
482 operands[2] = CONST0_RTX (V4SFmode);
486 [(set (match_operand:V2DF 0 "register_operand" "")
487 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
488 "TARGET_SSE2 && reload_completed"
489 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
491 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
492 operands[2] = CONST0_RTX (DFmode);
495 (define_expand "push<mode>1"
496 [(match_operand:V16 0 "register_operand" "")]
499 ix86_expand_push (<MODE>mode, operands[0]);
503 (define_expand "movmisalign<mode>"
504 [(set (match_operand:V16 0 "nonimmediate_operand" "")
505 (match_operand:V16 1 "nonimmediate_operand" ""))]
508 ix86_expand_vector_move_misalign (<MODE>mode, operands);
512 (define_expand "<sse>_movu<ssemodesuffix><avxsizesuffix>"
513 [(set (match_operand:VF 0 "nonimmediate_operand" "")
515 [(match_operand:VF 1 "nonimmediate_operand" "")]
519 if (MEM_P (operands[0]) && MEM_P (operands[1]))
520 operands[1] = force_reg (<MODE>mode, operands[1]);
523 (define_insn "*<sse>_movu<ssemodesuffix><avxsizesuffix>"
524 [(set (match_operand:VF 0 "nonimmediate_operand" "=x,m")
526 [(match_operand:VF 1 "nonimmediate_operand" "xm,x")]
528 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
529 "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
530 [(set_attr "type" "ssemov")
531 (set_attr "movu" "1")
532 (set_attr "prefix" "maybe_vex")
533 (set_attr "mode" "<MODE>")])
535 (define_expand "<sse2>_movdqu<avxsizesuffix>"
536 [(set (match_operand:VI1 0 "nonimmediate_operand" "")
537 (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "")]
541 if (MEM_P (operands[0]) && MEM_P (operands[1]))
542 operands[1] = force_reg (<MODE>mode, operands[1]);
545 (define_insn "*<sse2>_movdqu<avxsizesuffix>"
546 [(set (match_operand:VI1 0 "nonimmediate_operand" "=x,m")
547 (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "xm,x")]
549 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
550 "%vmovdqu\t{%1, %0|%0, %1}"
551 [(set_attr "type" "ssemov")
552 (set_attr "movu" "1")
553 (set (attr "prefix_data16")
555 (match_test "TARGET_AVX")
558 (set_attr "prefix" "maybe_vex")
559 (set_attr "mode" "<sseinsnmode>")])
561 (define_insn "<sse3>_lddqu<avxsizesuffix>"
562 [(set (match_operand:VI1 0 "register_operand" "=x")
563 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
566 "%vlddqu\t{%1, %0|%0, %1}"
567 [(set_attr "type" "ssemov")
568 (set_attr "movu" "1")
569 (set (attr "prefix_data16")
571 (match_test "TARGET_AVX")
574 (set (attr "prefix_rep")
576 (match_test "TARGET_AVX")
579 (set_attr "prefix" "maybe_vex")
580 (set_attr "mode" "<sseinsnmode>")])
582 (define_insn "sse2_movntsi"
583 [(set (match_operand:SI 0 "memory_operand" "=m")
584 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
587 "movnti\t{%1, %0|%0, %1}"
588 [(set_attr "type" "ssemov")
589 (set_attr "prefix_data16" "0")
590 (set_attr "mode" "V2DF")])
592 (define_insn "<sse>_movnt<mode>"
593 [(set (match_operand:VF 0 "memory_operand" "=m")
594 (unspec:VF [(match_operand:VF 1 "register_operand" "x")]
597 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
598 [(set_attr "type" "ssemov")
599 (set_attr "prefix" "maybe_vex")
600 (set_attr "mode" "<MODE>")])
602 (define_insn "<sse2>_movnt<mode>"
603 [(set (match_operand:VI8 0 "memory_operand" "=m")
604 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "x")]
607 "%vmovntdq\t{%1, %0|%0, %1}"
608 [(set_attr "type" "ssecvt")
609 (set (attr "prefix_data16")
611 (match_test "TARGET_AVX")
614 (set_attr "prefix" "maybe_vex")
615 (set_attr "mode" "<sseinsnmode>")])
617 ; Expand patterns for non-temporal stores. At the moment, only those
618 ; that directly map to insns are defined; it would be possible to
619 ; define patterns for other modes that would expand to several insns.
621 ;; Modes handled by storent patterns.
622 (define_mode_iterator STORENT_MODE
623 [(SI "TARGET_SSE2") (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
625 (V8SF "TARGET_AVX") V4SF
626 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
628 (define_expand "storent<mode>"
629 [(set (match_operand:STORENT_MODE 0 "memory_operand" "")
631 [(match_operand:STORENT_MODE 1 "register_operand" "")]
635 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
637 ;; Parallel floating point arithmetic
639 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
641 (define_expand "<code><mode>2"
642 [(set (match_operand:VF 0 "register_operand" "")
644 (match_operand:VF 1 "register_operand" "")))]
646 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
648 (define_insn_and_split "*absneg<mode>2"
649 [(set (match_operand:VF 0 "register_operand" "=x,x,x,x")
650 (match_operator:VF 3 "absneg_operator"
651 [(match_operand:VF 1 "nonimmediate_operand" "0, xm,x, m")]))
652 (use (match_operand:VF 2 "nonimmediate_operand" "xm,0, xm,x"))]
655 "&& reload_completed"
658 enum rtx_code absneg_op;
664 if (MEM_P (operands[1]))
665 op1 = operands[2], op2 = operands[1];
667 op1 = operands[1], op2 = operands[2];
672 if (rtx_equal_p (operands[0], operands[1]))
678 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
679 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
680 t = gen_rtx_SET (VOIDmode, operands[0], t);
684 [(set_attr "isa" "noavx,noavx,avx,avx")])
686 (define_expand "<plusminus_insn><mode>3"
687 [(set (match_operand:VF 0 "register_operand" "")
689 (match_operand:VF 1 "nonimmediate_operand" "")
690 (match_operand:VF 2 "nonimmediate_operand" "")))]
692 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
694 (define_insn "*<plusminus_insn><mode>3"
695 [(set (match_operand:VF 0 "register_operand" "=x,x")
697 (match_operand:VF 1 "nonimmediate_operand" "<comm>0,x")
698 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
699 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
701 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
702 v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
703 [(set_attr "isa" "noavx,avx")
704 (set_attr "type" "sseadd")
705 (set_attr "prefix" "orig,vex")
706 (set_attr "mode" "<MODE>")])
708 (define_insn "<sse>_vm<plusminus_insn><mode>3"
709 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
712 (match_operand:VF_128 1 "register_operand" "0,x")
713 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
718 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}
719 v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
720 [(set_attr "isa" "noavx,avx")
721 (set_attr "type" "sseadd")
722 (set_attr "prefix" "orig,vex")
723 (set_attr "mode" "<ssescalarmode>")])
725 (define_expand "mul<mode>3"
726 [(set (match_operand:VF 0 "register_operand" "")
728 (match_operand:VF 1 "nonimmediate_operand" "")
729 (match_operand:VF 2 "nonimmediate_operand" "")))]
731 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
733 (define_insn "*mul<mode>3"
734 [(set (match_operand:VF 0 "register_operand" "=x,x")
736 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
737 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
738 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
740 mul<ssemodesuffix>\t{%2, %0|%0, %2}
741 vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
742 [(set_attr "isa" "noavx,avx")
743 (set_attr "type" "ssemul")
744 (set_attr "prefix" "orig,vex")
745 (set_attr "mode" "<MODE>")])
747 (define_insn "<sse>_vmmul<mode>3"
748 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
751 (match_operand:VF_128 1 "register_operand" "0,x")
752 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
757 mul<ssescalarmodesuffix>\t{%2, %0|%0, %2}
758 vmul<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
759 [(set_attr "isa" "noavx,avx")
760 (set_attr "type" "ssemul")
761 (set_attr "prefix" "orig,vex")
762 (set_attr "mode" "<ssescalarmode>")])
764 (define_expand "div<mode>3"
765 [(set (match_operand:VF2 0 "register_operand" "")
766 (div:VF2 (match_operand:VF2 1 "register_operand" "")
767 (match_operand:VF2 2 "nonimmediate_operand" "")))]
769 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
771 (define_expand "div<mode>3"
772 [(set (match_operand:VF1 0 "register_operand" "")
773 (div:VF1 (match_operand:VF1 1 "register_operand" "")
774 (match_operand:VF1 2 "nonimmediate_operand" "")))]
777 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
780 && TARGET_RECIP_VEC_DIV
781 && !optimize_insn_for_size_p ()
782 && flag_finite_math_only && !flag_trapping_math
783 && flag_unsafe_math_optimizations)
785 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
790 (define_insn "<sse>_div<mode>3"
791 [(set (match_operand:VF 0 "register_operand" "=x,x")
793 (match_operand:VF 1 "register_operand" "0,x")
794 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
797 div<ssemodesuffix>\t{%2, %0|%0, %2}
798 vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
799 [(set_attr "isa" "noavx,avx")
800 (set_attr "type" "ssediv")
801 (set_attr "prefix" "orig,vex")
802 (set_attr "mode" "<MODE>")])
804 (define_insn "<sse>_vmdiv<mode>3"
805 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
808 (match_operand:VF_128 1 "register_operand" "0,x")
809 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
814 div<ssescalarmodesuffix>\t{%2, %0|%0, %2}
815 vdiv<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
816 [(set_attr "isa" "noavx,avx")
817 (set_attr "type" "ssediv")
818 (set_attr "prefix" "orig,vex")
819 (set_attr "mode" "<ssescalarmode>")])
821 (define_insn "<sse>_rcp<mode>2"
822 [(set (match_operand:VF1 0 "register_operand" "=x")
824 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
826 "%vrcpps\t{%1, %0|%0, %1}"
827 [(set_attr "type" "sse")
828 (set_attr "atom_sse_attr" "rcp")
829 (set_attr "prefix" "maybe_vex")
830 (set_attr "mode" "<MODE>")])
832 (define_insn "sse_vmrcpv4sf2"
833 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
835 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
837 (match_operand:V4SF 2 "register_operand" "0,x")
841 rcpss\t{%1, %0|%0, %1}
842 vrcpss\t{%1, %2, %0|%0, %2, %1}"
843 [(set_attr "isa" "noavx,avx")
844 (set_attr "type" "sse")
845 (set_attr "atom_sse_attr" "rcp")
846 (set_attr "prefix" "orig,vex")
847 (set_attr "mode" "SF")])
849 (define_expand "sqrt<mode>2"
850 [(set (match_operand:VF2 0 "register_operand" "")
851 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand" "")))]
854 (define_expand "sqrt<mode>2"
855 [(set (match_operand:VF1 0 "register_operand" "")
856 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand" "")))]
860 && TARGET_RECIP_VEC_SQRT
861 && !optimize_insn_for_size_p ()
862 && flag_finite_math_only && !flag_trapping_math
863 && flag_unsafe_math_optimizations)
865 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
870 (define_insn "<sse>_sqrt<mode>2"
871 [(set (match_operand:VF 0 "register_operand" "=x")
872 (sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "xm")))]
874 "%vsqrt<ssemodesuffix>\t{%1, %0|%0, %1}"
875 [(set_attr "type" "sse")
876 (set_attr "atom_sse_attr" "sqrt")
877 (set_attr "prefix" "maybe_vex")
878 (set_attr "mode" "<MODE>")])
880 (define_insn "<sse>_vmsqrt<mode>2"
881 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
884 (match_operand:VF_128 1 "nonimmediate_operand" "xm,xm"))
885 (match_operand:VF_128 2 "register_operand" "0,x")
889 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}
890 vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
891 [(set_attr "isa" "noavx,avx")
892 (set_attr "type" "sse")
893 (set_attr "atom_sse_attr" "sqrt")
894 (set_attr "prefix" "orig,vex")
895 (set_attr "mode" "<ssescalarmode>")])
897 (define_expand "rsqrt<mode>2"
898 [(set (match_operand:VF1 0 "register_operand" "")
900 [(match_operand:VF1 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
903 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
907 (define_insn "<sse>_rsqrt<mode>2"
908 [(set (match_operand:VF1 0 "register_operand" "=x")
910 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
912 "%vrsqrtps\t{%1, %0|%0, %1}"
913 [(set_attr "type" "sse")
914 (set_attr "prefix" "maybe_vex")
915 (set_attr "mode" "<MODE>")])
917 (define_insn "sse_vmrsqrtv4sf2"
918 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
920 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
922 (match_operand:V4SF 2 "register_operand" "0,x")
926 rsqrtss\t{%1, %0|%0, %1}
927 vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
928 [(set_attr "isa" "noavx,avx")
929 (set_attr "type" "sse")
930 (set_attr "prefix" "orig,vex")
931 (set_attr "mode" "SF")])
933 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
934 ;; isn't really correct, as those rtl operators aren't defined when
935 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
937 (define_expand "<code><mode>3"
938 [(set (match_operand:VF 0 "register_operand" "")
940 (match_operand:VF 1 "nonimmediate_operand" "")
941 (match_operand:VF 2 "nonimmediate_operand" "")))]
944 if (!flag_finite_math_only)
945 operands[1] = force_reg (<MODE>mode, operands[1]);
946 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
949 (define_insn "*<code><mode>3_finite"
950 [(set (match_operand:VF 0 "register_operand" "=x,x")
952 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
953 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
954 "TARGET_SSE && flag_finite_math_only
955 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
957 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
958 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
959 [(set_attr "isa" "noavx,avx")
960 (set_attr "type" "sseadd")
961 (set_attr "prefix" "orig,vex")
962 (set_attr "mode" "<MODE>")])
964 (define_insn "*<code><mode>3"
965 [(set (match_operand:VF 0 "register_operand" "=x,x")
967 (match_operand:VF 1 "register_operand" "0,x")
968 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
969 "TARGET_SSE && !flag_finite_math_only"
971 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
972 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
973 [(set_attr "isa" "noavx,avx")
974 (set_attr "type" "sseadd")
975 (set_attr "prefix" "orig,vex")
976 (set_attr "mode" "<MODE>")])
978 (define_insn "<sse>_vm<code><mode>3"
979 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
982 (match_operand:VF_128 1 "register_operand" "0,x")
983 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
988 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %2}
989 v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
990 [(set_attr "isa" "noavx,avx")
991 (set_attr "type" "sse")
992 (set_attr "prefix" "orig,vex")
993 (set_attr "mode" "<ssescalarmode>")])
995 ;; These versions of the min/max patterns implement exactly the operations
996 ;; min = (op1 < op2 ? op1 : op2)
997 ;; max = (!(op1 < op2) ? op1 : op2)
998 ;; Their operands are not commutative, and thus they may be used in the
999 ;; presence of -0.0 and NaN.
1001 (define_insn "*ieee_smin<mode>3"
1002 [(set (match_operand:VF 0 "register_operand" "=x,x")
1004 [(match_operand:VF 1 "register_operand" "0,x")
1005 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
1009 min<ssemodesuffix>\t{%2, %0|%0, %2}
1010 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1011 [(set_attr "isa" "noavx,avx")
1012 (set_attr "type" "sseadd")
1013 (set_attr "prefix" "orig,vex")
1014 (set_attr "mode" "<MODE>")])
1016 (define_insn "*ieee_smax<mode>3"
1017 [(set (match_operand:VF 0 "register_operand" "=x,x")
1019 [(match_operand:VF 1 "register_operand" "0,x")
1020 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
1024 max<ssemodesuffix>\t{%2, %0|%0, %2}
1025 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1026 [(set_attr "isa" "noavx,avx")
1027 (set_attr "type" "sseadd")
1028 (set_attr "prefix" "orig,vex")
1029 (set_attr "mode" "<MODE>")])
1031 (define_insn "avx_addsubv4df3"
1032 [(set (match_operand:V4DF 0 "register_operand" "=x")
1035 (match_operand:V4DF 1 "register_operand" "x")
1036 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1037 (minus:V4DF (match_dup 1) (match_dup 2))
1040 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1041 [(set_attr "type" "sseadd")
1042 (set_attr "prefix" "vex")
1043 (set_attr "mode" "V4DF")])
1045 (define_insn "sse3_addsubv2df3"
1046 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1049 (match_operand:V2DF 1 "register_operand" "0,x")
1050 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
1051 (minus:V2DF (match_dup 1) (match_dup 2))
1055 addsubpd\t{%2, %0|%0, %2}
1056 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1057 [(set_attr "isa" "noavx,avx")
1058 (set_attr "type" "sseadd")
1059 (set_attr "atom_unit" "complex")
1060 (set_attr "prefix" "orig,vex")
1061 (set_attr "mode" "V2DF")])
1063 (define_insn "avx_addsubv8sf3"
1064 [(set (match_operand:V8SF 0 "register_operand" "=x")
1067 (match_operand:V8SF 1 "register_operand" "x")
1068 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1069 (minus:V8SF (match_dup 1) (match_dup 2))
1072 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1073 [(set_attr "type" "sseadd")
1074 (set_attr "prefix" "vex")
1075 (set_attr "mode" "V8SF")])
1077 (define_insn "sse3_addsubv4sf3"
1078 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1081 (match_operand:V4SF 1 "register_operand" "0,x")
1082 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
1083 (minus:V4SF (match_dup 1) (match_dup 2))
1087 addsubps\t{%2, %0|%0, %2}
1088 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1089 [(set_attr "isa" "noavx,avx")
1090 (set_attr "type" "sseadd")
1091 (set_attr "prefix" "orig,vex")
1092 (set_attr "prefix_rep" "1,*")
1093 (set_attr "mode" "V4SF")])
1095 (define_insn "avx_h<plusminus_insn>v4df3"
1096 [(set (match_operand:V4DF 0 "register_operand" "=x")
1101 (match_operand:V4DF 1 "register_operand" "x")
1102 (parallel [(const_int 0)]))
1103 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1105 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1106 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
1110 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1111 (parallel [(const_int 0)]))
1112 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
1114 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1115 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1117 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1118 [(set_attr "type" "sseadd")
1119 (set_attr "prefix" "vex")
1120 (set_attr "mode" "V4DF")])
1122 (define_insn "sse3_h<plusminus_insn>v2df3"
1123 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1127 (match_operand:V2DF 1 "register_operand" "0,x")
1128 (parallel [(const_int 0)]))
1129 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1132 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
1133 (parallel [(const_int 0)]))
1134 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1137 h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}
1138 vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1139 [(set_attr "isa" "noavx,avx")
1140 (set_attr "type" "sseadd")
1141 (set_attr "prefix" "orig,vex")
1142 (set_attr "mode" "V2DF")])
1144 (define_insn "avx_h<plusminus_insn>v8sf3"
1145 [(set (match_operand:V8SF 0 "register_operand" "=x")
1151 (match_operand:V8SF 1 "register_operand" "x")
1152 (parallel [(const_int 0)]))
1153 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1155 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1156 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1160 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1161 (parallel [(const_int 0)]))
1162 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1164 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1165 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1169 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1170 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1172 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1173 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1176 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1177 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1179 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1180 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1182 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1183 [(set_attr "type" "sseadd")
1184 (set_attr "prefix" "vex")
1185 (set_attr "mode" "V8SF")])
1187 (define_insn "sse3_h<plusminus_insn>v4sf3"
1188 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1193 (match_operand:V4SF 1 "register_operand" "0,x")
1194 (parallel [(const_int 0)]))
1195 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1197 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1198 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1202 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
1203 (parallel [(const_int 0)]))
1204 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1206 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1207 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1210 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
1211 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1212 [(set_attr "isa" "noavx,avx")
1213 (set_attr "type" "sseadd")
1214 (set_attr "atom_unit" "complex")
1215 (set_attr "prefix" "orig,vex")
1216 (set_attr "prefix_rep" "1,*")
1217 (set_attr "mode" "V4SF")])
1219 (define_expand "reduc_splus_v4df"
1220 [(match_operand:V4DF 0 "register_operand" "")
1221 (match_operand:V4DF 1 "register_operand" "")]
1224 rtx tmp = gen_reg_rtx (V4DFmode);
1225 rtx tmp2 = gen_reg_rtx (V4DFmode);
1226 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
1227 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
1228 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
1232 (define_expand "reduc_splus_v2df"
1233 [(match_operand:V2DF 0 "register_operand" "")
1234 (match_operand:V2DF 1 "register_operand" "")]
1237 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1241 (define_expand "reduc_splus_v8sf"
1242 [(match_operand:V8SF 0 "register_operand" "")
1243 (match_operand:V8SF 1 "register_operand" "")]
1246 rtx tmp = gen_reg_rtx (V8SFmode);
1247 rtx tmp2 = gen_reg_rtx (V8SFmode);
1248 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
1249 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
1250 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
1251 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
1255 (define_expand "reduc_splus_v4sf"
1256 [(match_operand:V4SF 0 "register_operand" "")
1257 (match_operand:V4SF 1 "register_operand" "")]
1262 rtx tmp = gen_reg_rtx (V4SFmode);
1263 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1264 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1267 ix86_expand_reduc (gen_addv4sf3, operands[0], operands[1]);
1271 ;; Modes handled by reduc_sm{in,ax}* patterns.
1272 (define_mode_iterator REDUC_SMINMAX_MODE
1273 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
1274 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
1275 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
1276 (V4SF "TARGET_SSE")])
1278 (define_expand "reduc_<code>_<mode>"
1279 [(smaxmin:REDUC_SMINMAX_MODE
1280 (match_operand:REDUC_SMINMAX_MODE 0 "register_operand" "")
1281 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand" ""))]
1284 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1288 (define_expand "reduc_<code>_<mode>"
1290 (match_operand:VI_256 0 "register_operand" "")
1291 (match_operand:VI_256 1 "register_operand" ""))]
1294 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1298 (define_expand "reduc_umin_v8hi"
1300 (match_operand:V8HI 0 "register_operand" "")
1301 (match_operand:V8HI 1 "register_operand" ""))]
1304 ix86_expand_reduc (gen_uminv8hi3, operands[0], operands[1]);
1308 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1310 ;; Parallel floating point comparisons
1312 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1314 (define_insn "avx_cmp<mode>3"
1315 [(set (match_operand:VF 0 "register_operand" "=x")
1317 [(match_operand:VF 1 "register_operand" "x")
1318 (match_operand:VF 2 "nonimmediate_operand" "xm")
1319 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1322 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1323 [(set_attr "type" "ssecmp")
1324 (set_attr "length_immediate" "1")
1325 (set_attr "prefix" "vex")
1326 (set_attr "mode" "<MODE>")])
1328 (define_insn "avx_vmcmp<mode>3"
1329 [(set (match_operand:VF_128 0 "register_operand" "=x")
1332 [(match_operand:VF_128 1 "register_operand" "x")
1333 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
1334 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1339 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1340 [(set_attr "type" "ssecmp")
1341 (set_attr "length_immediate" "1")
1342 (set_attr "prefix" "vex")
1343 (set_attr "mode" "<ssescalarmode>")])
1345 (define_insn "*<sse>_maskcmp<mode>3_comm"
1346 [(set (match_operand:VF 0 "register_operand" "=x,x")
1347 (match_operator:VF 3 "sse_comparison_operator"
1348 [(match_operand:VF 1 "register_operand" "%0,x")
1349 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1351 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
1353 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1354 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1355 [(set_attr "isa" "noavx,avx")
1356 (set_attr "type" "ssecmp")
1357 (set_attr "length_immediate" "1")
1358 (set_attr "prefix" "orig,vex")
1359 (set_attr "mode" "<MODE>")])
1361 (define_insn "<sse>_maskcmp<mode>3"
1362 [(set (match_operand:VF 0 "register_operand" "=x,x")
1363 (match_operator:VF 3 "sse_comparison_operator"
1364 [(match_operand:VF 1 "register_operand" "0,x")
1365 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1368 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1369 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1370 [(set_attr "isa" "noavx,avx")
1371 (set_attr "type" "ssecmp")
1372 (set_attr "length_immediate" "1")
1373 (set_attr "prefix" "orig,vex")
1374 (set_attr "mode" "<MODE>")])
1376 (define_insn "<sse>_vmmaskcmp<mode>3"
1377 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1379 (match_operator:VF_128 3 "sse_comparison_operator"
1380 [(match_operand:VF_128 1 "register_operand" "0,x")
1381 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
1386 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %2}
1387 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1388 [(set_attr "isa" "noavx,avx")
1389 (set_attr "type" "ssecmp")
1390 (set_attr "length_immediate" "1,*")
1391 (set_attr "prefix" "orig,vex")
1392 (set_attr "mode" "<ssescalarmode>")])
1394 (define_insn "<sse>_comi"
1395 [(set (reg:CCFP FLAGS_REG)
1398 (match_operand:<ssevecmode> 0 "register_operand" "x")
1399 (parallel [(const_int 0)]))
1401 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1402 (parallel [(const_int 0)]))))]
1403 "SSE_FLOAT_MODE_P (<MODE>mode)"
1404 "%vcomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1405 [(set_attr "type" "ssecomi")
1406 (set_attr "prefix" "maybe_vex")
1407 (set_attr "prefix_rep" "0")
1408 (set (attr "prefix_data16")
1409 (if_then_else (eq_attr "mode" "DF")
1411 (const_string "0")))
1412 (set_attr "mode" "<MODE>")])
1414 (define_insn "<sse>_ucomi"
1415 [(set (reg:CCFPU FLAGS_REG)
1418 (match_operand:<ssevecmode> 0 "register_operand" "x")
1419 (parallel [(const_int 0)]))
1421 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1422 (parallel [(const_int 0)]))))]
1423 "SSE_FLOAT_MODE_P (<MODE>mode)"
1424 "%vucomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1425 [(set_attr "type" "ssecomi")
1426 (set_attr "prefix" "maybe_vex")
1427 (set_attr "prefix_rep" "0")
1428 (set (attr "prefix_data16")
1429 (if_then_else (eq_attr "mode" "DF")
1431 (const_string "0")))
1432 (set_attr "mode" "<MODE>")])
1434 (define_expand "vcond<V_256:mode><VF_256:mode>"
1435 [(set (match_operand:V_256 0 "register_operand" "")
1437 (match_operator 3 ""
1438 [(match_operand:VF_256 4 "nonimmediate_operand" "")
1439 (match_operand:VF_256 5 "nonimmediate_operand" "")])
1440 (match_operand:V_256 1 "general_operand" "")
1441 (match_operand:V_256 2 "general_operand" "")))]
1443 && (GET_MODE_NUNITS (<V_256:MODE>mode)
1444 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
1446 bool ok = ix86_expand_fp_vcond (operands);
1451 (define_expand "vcond<V_128:mode><VF_128:mode>"
1452 [(set (match_operand:V_128 0 "register_operand" "")
1454 (match_operator 3 ""
1455 [(match_operand:VF_128 4 "nonimmediate_operand" "")
1456 (match_operand:VF_128 5 "nonimmediate_operand" "")])
1457 (match_operand:V_128 1 "general_operand" "")
1458 (match_operand:V_128 2 "general_operand" "")))]
1460 && (GET_MODE_NUNITS (<V_128:MODE>mode)
1461 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
1463 bool ok = ix86_expand_fp_vcond (operands);
1468 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1470 ;; Parallel floating point logical operations
1472 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1474 (define_insn "<sse>_andnot<mode>3"
1475 [(set (match_operand:VF 0 "register_operand" "=x,x")
1478 (match_operand:VF 1 "register_operand" "0,x"))
1479 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1482 static char buf[32];
1485 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1487 switch (which_alternative)
1490 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1493 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1499 snprintf (buf, sizeof (buf), insn, suffix);
1502 [(set_attr "isa" "noavx,avx")
1503 (set_attr "type" "sselog")
1504 (set_attr "prefix" "orig,vex")
1505 (set_attr "mode" "<MODE>")])
1507 (define_expand "<code><mode>3"
1508 [(set (match_operand:VF 0 "register_operand" "")
1510 (match_operand:VF 1 "nonimmediate_operand" "")
1511 (match_operand:VF 2 "nonimmediate_operand" "")))]
1513 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1515 (define_insn "*<code><mode>3"
1516 [(set (match_operand:VF 0 "register_operand" "=x,x")
1518 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
1519 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1520 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1522 static char buf[32];
1525 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1527 switch (which_alternative)
1530 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1533 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1539 snprintf (buf, sizeof (buf), insn, suffix);
1542 [(set_attr "isa" "noavx,avx")
1543 (set_attr "type" "sselog")
1544 (set_attr "prefix" "orig,vex")
1545 (set_attr "mode" "<MODE>")])
1547 (define_expand "copysign<mode>3"
1550 (not:VF (match_dup 3))
1551 (match_operand:VF 1 "nonimmediate_operand" "")))
1553 (and:VF (match_dup 3)
1554 (match_operand:VF 2 "nonimmediate_operand" "")))
1555 (set (match_operand:VF 0 "register_operand" "")
1556 (ior:VF (match_dup 4) (match_dup 5)))]
1559 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
1561 operands[4] = gen_reg_rtx (<MODE>mode);
1562 operands[5] = gen_reg_rtx (<MODE>mode);
1565 ;; Also define scalar versions. These are used for abs, neg, and
1566 ;; conditional move. Using subregs into vector modes causes register
1567 ;; allocation lossage. These patterns do not allow memory operands
1568 ;; because the native instructions read the full 128-bits.
1570 (define_insn "*andnot<mode>3"
1571 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1574 (match_operand:MODEF 1 "register_operand" "0,x"))
1575 (match_operand:MODEF 2 "register_operand" "x,x")))]
1576 "SSE_FLOAT_MODE_P (<MODE>mode)"
1578 static char buf[32];
1581 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1583 switch (which_alternative)
1586 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1589 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1595 snprintf (buf, sizeof (buf), insn, suffix);
1598 [(set_attr "isa" "noavx,avx")
1599 (set_attr "type" "sselog")
1600 (set_attr "prefix" "orig,vex")
1601 (set_attr "mode" "<ssevecmode>")])
1603 (define_insn "*<code><mode>3"
1604 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1606 (match_operand:MODEF 1 "register_operand" "%0,x")
1607 (match_operand:MODEF 2 "register_operand" "x,x")))]
1608 "SSE_FLOAT_MODE_P (<MODE>mode)"
1610 static char buf[32];
1613 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1615 switch (which_alternative)
1618 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1621 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1627 snprintf (buf, sizeof (buf), insn, suffix);
1630 [(set_attr "isa" "noavx,avx")
1631 (set_attr "type" "sselog")
1632 (set_attr "prefix" "orig,vex")
1633 (set_attr "mode" "<ssevecmode>")])
1635 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1637 ;; FMA4 floating point multiply/accumulate instructions. This
1638 ;; includes the scalar version of the instructions as well as the
1641 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1643 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1644 ;; combine to generate a multiply/add with two memory references. We then
1645 ;; split this insn, into loading up the destination register with one of the
1646 ;; memory operations. If we don't manage to split the insn, reload will
1647 ;; generate the appropriate moves. The reason this is needed, is that combine
1648 ;; has already folded one of the memory references into both the multiply and
1649 ;; add insns, and it can't generate a new pseudo. I.e.:
1650 ;; (set (reg1) (mem (addr1)))
1651 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1652 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1654 ;; ??? This is historic, pre-dating the gimple fma transformation.
1655 ;; We could now properly represent that only one memory operand is
1656 ;; allowed and not be penalized during optimization.
1658 ;; Intrinsic FMA operations.
1660 ;; The standard names for fma is only available with SSE math enabled.
1661 (define_expand "fma<mode>4"
1662 [(set (match_operand:FMAMODE 0 "register_operand")
1664 (match_operand:FMAMODE 1 "nonimmediate_operand")
1665 (match_operand:FMAMODE 2 "nonimmediate_operand")
1666 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1667 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1669 (define_expand "fms<mode>4"
1670 [(set (match_operand:FMAMODE 0 "register_operand")
1672 (match_operand:FMAMODE 1 "nonimmediate_operand")
1673 (match_operand:FMAMODE 2 "nonimmediate_operand")
1674 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1675 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1677 (define_expand "fnma<mode>4"
1678 [(set (match_operand:FMAMODE 0 "register_operand")
1680 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1681 (match_operand:FMAMODE 2 "nonimmediate_operand")
1682 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1683 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1685 (define_expand "fnms<mode>4"
1686 [(set (match_operand:FMAMODE 0 "register_operand")
1688 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1689 (match_operand:FMAMODE 2 "nonimmediate_operand")
1690 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1691 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1693 ;; The builtin for fma4intrin.h is not constrained by SSE math enabled.
1694 (define_expand "fma4i_fmadd_<mode>"
1695 [(set (match_operand:FMAMODE 0 "register_operand")
1697 (match_operand:FMAMODE 1 "nonimmediate_operand")
1698 (match_operand:FMAMODE 2 "nonimmediate_operand")
1699 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1700 "TARGET_FMA || TARGET_FMA4")
1702 (define_insn "*fma4i_fmadd_<mode>"
1703 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1705 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1706 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1707 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1709 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1710 [(set_attr "type" "ssemuladd")
1711 (set_attr "mode" "<MODE>")])
1713 (define_insn "*fma4i_fmsub_<mode>"
1714 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1716 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1717 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1719 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1721 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1722 [(set_attr "type" "ssemuladd")
1723 (set_attr "mode" "<MODE>")])
1725 (define_insn "*fma4i_fnmadd_<mode>"
1726 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1729 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1730 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1731 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1733 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1734 [(set_attr "type" "ssemuladd")
1735 (set_attr "mode" "<MODE>")])
1737 (define_insn "*fma4i_fnmsub_<mode>"
1738 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1741 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1742 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1744 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1746 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1747 [(set_attr "type" "ssemuladd")
1748 (set_attr "mode" "<MODE>")])
1750 ;; Scalar versions of the above. Unlike ADDSS et al, these write the
1751 ;; entire destination register, with the high-order elements zeroed.
1753 (define_expand "fma4i_vmfmadd_<mode>"
1754 [(set (match_operand:VF_128 0 "register_operand")
1757 (match_operand:VF_128 1 "nonimmediate_operand")
1758 (match_operand:VF_128 2 "nonimmediate_operand")
1759 (match_operand:VF_128 3 "nonimmediate_operand"))
1764 operands[4] = CONST0_RTX (<MODE>mode);
1767 (define_expand "fmai_vmfmadd_<mode>"
1768 [(set (match_operand:VF_128 0 "register_operand")
1771 (match_operand:VF_128 1 "nonimmediate_operand")
1772 (match_operand:VF_128 2 "nonimmediate_operand")
1773 (match_operand:VF_128 3 "nonimmediate_operand"))
1778 (define_insn "*fmai_fmadd_<mode>"
1779 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1782 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")
1783 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1784 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))
1789 vfmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1790 vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1791 vfmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1792 [(set_attr "type" "ssemuladd")
1793 (set_attr "mode" "<MODE>")])
1795 (define_insn "*fmai_fmsub_<mode>"
1796 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1799 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")
1800 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1802 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")))
1807 vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1808 vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1809 vfmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1810 [(set_attr "type" "ssemuladd")
1811 (set_attr "mode" "<MODE>")])
1813 (define_insn "*fmai_fnmadd_<mode>"
1814 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1818 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x"))
1819 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1820 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))
1825 vfnmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1826 vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1827 vfnmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1828 [(set_attr "type" "ssemuladd")
1829 (set_attr "mode" "<MODE>")])
1831 (define_insn "*fmai_fnmsub_<mode>"
1832 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1836 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x"))
1837 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1839 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")))
1844 vfnmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1845 vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1846 vfnmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1847 [(set_attr "type" "ssemuladd")
1848 (set_attr "mode" "<MODE>")])
1850 (define_insn "*fma4i_vmfmadd_<mode>"
1851 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1854 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
1855 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1856 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
1857 (match_operand:VF_128 4 "const0_operand" "")
1860 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1861 [(set_attr "type" "ssemuladd")
1862 (set_attr "mode" "<MODE>")])
1864 (define_insn "*fma4i_vmfmsub_<mode>"
1865 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1868 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
1869 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1871 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
1872 (match_operand:VF_128 4 "const0_operand" "")
1875 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1876 [(set_attr "type" "ssemuladd")
1877 (set_attr "mode" "<MODE>")])
1879 (define_insn "*fma4i_vmfnmadd_<mode>"
1880 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1884 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
1885 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1886 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
1887 (match_operand:VF_128 4 "const0_operand" "")
1890 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1891 [(set_attr "type" "ssemuladd")
1892 (set_attr "mode" "<MODE>")])
1894 (define_insn "*fma4i_vmfnmsub_<mode>"
1895 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1899 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
1900 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1902 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
1903 (match_operand:VF_128 4 "const0_operand" "")
1906 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1907 [(set_attr "type" "ssemuladd")
1908 (set_attr "mode" "<MODE>")])
1910 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1912 ;; FMA4 Parallel floating point multiply addsub and subadd operations.
1914 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1916 ;; It would be possible to represent these without the UNSPEC as
1919 ;; (fma op1 op2 op3)
1920 ;; (fma op1 op2 (neg op3))
1923 ;; But this doesn't seem useful in practice.
1925 (define_expand "fmaddsub_<mode>"
1926 [(set (match_operand:VF 0 "register_operand")
1928 [(match_operand:VF 1 "nonimmediate_operand")
1929 (match_operand:VF 2 "nonimmediate_operand")
1930 (match_operand:VF 3 "nonimmediate_operand")]
1932 "TARGET_FMA || TARGET_FMA4")
1934 (define_insn "*fma4_fmaddsub_<mode>"
1935 [(set (match_operand:VF 0 "register_operand" "=x,x")
1937 [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
1938 (match_operand:VF 2 "nonimmediate_operand" " x,m")
1939 (match_operand:VF 3 "nonimmediate_operand" "xm,x")]
1942 "vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1943 [(set_attr "type" "ssemuladd")
1944 (set_attr "mode" "<MODE>")])
1946 (define_insn "*fma4_fmsubadd_<mode>"
1947 [(set (match_operand:VF 0 "register_operand" "=x,x")
1949 [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
1950 (match_operand:VF 2 "nonimmediate_operand" " x,m")
1952 (match_operand:VF 3 "nonimmediate_operand" "xm,x"))]
1955 "vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1956 [(set_attr "type" "ssemuladd")
1957 (set_attr "mode" "<MODE>")])
1959 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1961 ;; FMA3 floating point multiply/accumulate instructions.
1963 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1965 (define_insn "*fma_fmadd_<mode>"
1966 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1968 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
1969 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1970 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
1973 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1974 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1975 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1976 [(set_attr "type" "ssemuladd")
1977 (set_attr "mode" "<MODE>")])
1979 (define_insn "*fma_fmsub_<mode>"
1980 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1982 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
1983 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1985 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
1988 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1989 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1990 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1991 [(set_attr "type" "ssemuladd")
1992 (set_attr "mode" "<MODE>")])
1994 (define_insn "*fma_fnmadd_<mode>"
1995 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1998 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
1999 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
2000 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
2003 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2004 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2005 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2006 [(set_attr "type" "ssemuladd")
2007 (set_attr "mode" "<MODE>")])
2009 (define_insn "*fma_fnmsub_<mode>"
2010 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
2013 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
2014 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
2016 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
2019 vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2020 vfnmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2021 vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2022 [(set_attr "type" "ssemuladd")
2023 (set_attr "mode" "<MODE>")])
2025 (define_insn "*fma_fmaddsub_<mode>"
2026 [(set (match_operand:VF 0 "register_operand" "=x,x,x")
2028 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
2029 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
2030 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0")]
2034 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2035 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2036 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2037 [(set_attr "type" "ssemuladd")
2038 (set_attr "mode" "<MODE>")])
2040 (define_insn "*fma_fmsubadd_<mode>"
2041 [(set (match_operand:VF 0 "register_operand" "=x,x,x")
2043 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
2044 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
2046 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0"))]
2050 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2051 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2052 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2053 [(set_attr "type" "ssemuladd")
2054 (set_attr "mode" "<MODE>")])
2056 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2058 ;; Parallel single-precision floating point conversion operations
2060 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2062 (define_insn "sse_cvtpi2ps"
2063 [(set (match_operand:V4SF 0 "register_operand" "=x")
2066 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2067 (match_operand:V4SF 1 "register_operand" "0")
2070 "cvtpi2ps\t{%2, %0|%0, %2}"
2071 [(set_attr "type" "ssecvt")
2072 (set_attr "mode" "V4SF")])
2074 (define_insn "sse_cvtps2pi"
2075 [(set (match_operand:V2SI 0 "register_operand" "=y")
2077 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2079 (parallel [(const_int 0) (const_int 1)])))]
2081 "cvtps2pi\t{%1, %0|%0, %1}"
2082 [(set_attr "type" "ssecvt")
2083 (set_attr "unit" "mmx")
2084 (set_attr "mode" "DI")])
2086 (define_insn "sse_cvttps2pi"
2087 [(set (match_operand:V2SI 0 "register_operand" "=y")
2089 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2090 (parallel [(const_int 0) (const_int 1)])))]
2092 "cvttps2pi\t{%1, %0|%0, %1}"
2093 [(set_attr "type" "ssecvt")
2094 (set_attr "unit" "mmx")
2095 (set_attr "prefix_rep" "0")
2096 (set_attr "mode" "SF")])
2098 (define_insn "sse_cvtsi2ss"
2099 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2102 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2103 (match_operand:V4SF 1 "register_operand" "0,0,x")
2107 cvtsi2ss\t{%2, %0|%0, %2}
2108 cvtsi2ss\t{%2, %0|%0, %2}
2109 vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2110 [(set_attr "isa" "noavx,noavx,avx")
2111 (set_attr "type" "sseicvt")
2112 (set_attr "athlon_decode" "vector,double,*")
2113 (set_attr "amdfam10_decode" "vector,double,*")
2114 (set_attr "bdver1_decode" "double,direct,*")
2115 (set_attr "prefix" "orig,orig,vex")
2116 (set_attr "mode" "SF")])
2118 (define_insn "sse_cvtsi2ssq"
2119 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2122 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2123 (match_operand:V4SF 1 "register_operand" "0,0,x")
2125 "TARGET_SSE && TARGET_64BIT"
2127 cvtsi2ssq\t{%2, %0|%0, %2}
2128 cvtsi2ssq\t{%2, %0|%0, %2}
2129 vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2130 [(set_attr "isa" "noavx,noavx,avx")
2131 (set_attr "type" "sseicvt")
2132 (set_attr "athlon_decode" "vector,double,*")
2133 (set_attr "amdfam10_decode" "vector,double,*")
2134 (set_attr "bdver1_decode" "double,direct,*")
2135 (set_attr "length_vex" "*,*,4")
2136 (set_attr "prefix_rex" "1,1,*")
2137 (set_attr "prefix" "orig,orig,vex")
2138 (set_attr "mode" "SF")])
2140 (define_insn "sse_cvtss2si"
2141 [(set (match_operand:SI 0 "register_operand" "=r,r")
2144 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2145 (parallel [(const_int 0)]))]
2146 UNSPEC_FIX_NOTRUNC))]
2148 "%vcvtss2si\t{%1, %0|%0, %1}"
2149 [(set_attr "type" "sseicvt")
2150 (set_attr "athlon_decode" "double,vector")
2151 (set_attr "bdver1_decode" "double,double")
2152 (set_attr "prefix_rep" "1")
2153 (set_attr "prefix" "maybe_vex")
2154 (set_attr "mode" "SI")])
2156 (define_insn "sse_cvtss2si_2"
2157 [(set (match_operand:SI 0 "register_operand" "=r,r")
2158 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2159 UNSPEC_FIX_NOTRUNC))]
2161 "%vcvtss2si\t{%1, %0|%0, %1}"
2162 [(set_attr "type" "sseicvt")
2163 (set_attr "athlon_decode" "double,vector")
2164 (set_attr "amdfam10_decode" "double,double")
2165 (set_attr "bdver1_decode" "double,double")
2166 (set_attr "prefix_rep" "1")
2167 (set_attr "prefix" "maybe_vex")
2168 (set_attr "mode" "SI")])
2170 (define_insn "sse_cvtss2siq"
2171 [(set (match_operand:DI 0 "register_operand" "=r,r")
2174 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2175 (parallel [(const_int 0)]))]
2176 UNSPEC_FIX_NOTRUNC))]
2177 "TARGET_SSE && TARGET_64BIT"
2178 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2179 [(set_attr "type" "sseicvt")
2180 (set_attr "athlon_decode" "double,vector")
2181 (set_attr "bdver1_decode" "double,double")
2182 (set_attr "prefix_rep" "1")
2183 (set_attr "prefix" "maybe_vex")
2184 (set_attr "mode" "DI")])
2186 (define_insn "sse_cvtss2siq_2"
2187 [(set (match_operand:DI 0 "register_operand" "=r,r")
2188 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2189 UNSPEC_FIX_NOTRUNC))]
2190 "TARGET_SSE && TARGET_64BIT"
2191 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2192 [(set_attr "type" "sseicvt")
2193 (set_attr "athlon_decode" "double,vector")
2194 (set_attr "amdfam10_decode" "double,double")
2195 (set_attr "bdver1_decode" "double,double")
2196 (set_attr "prefix_rep" "1")
2197 (set_attr "prefix" "maybe_vex")
2198 (set_attr "mode" "DI")])
2200 (define_insn "sse_cvttss2si"
2201 [(set (match_operand:SI 0 "register_operand" "=r,r")
2204 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2205 (parallel [(const_int 0)]))))]
2207 "%vcvttss2si\t{%1, %0|%0, %1}"
2208 [(set_attr "type" "sseicvt")
2209 (set_attr "athlon_decode" "double,vector")
2210 (set_attr "amdfam10_decode" "double,double")
2211 (set_attr "bdver1_decode" "double,double")
2212 (set_attr "prefix_rep" "1")
2213 (set_attr "prefix" "maybe_vex")
2214 (set_attr "mode" "SI")])
2216 (define_insn "sse_cvttss2siq"
2217 [(set (match_operand:DI 0 "register_operand" "=r,r")
2220 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2221 (parallel [(const_int 0)]))))]
2222 "TARGET_SSE && TARGET_64BIT"
2223 "%vcvttss2si{q}\t{%1, %0|%0, %1}"
2224 [(set_attr "type" "sseicvt")
2225 (set_attr "athlon_decode" "double,vector")
2226 (set_attr "amdfam10_decode" "double,double")
2227 (set_attr "bdver1_decode" "double,double")
2228 (set_attr "prefix_rep" "1")
2229 (set_attr "prefix" "maybe_vex")
2230 (set_attr "mode" "DI")])
2232 (define_insn "avx_cvtdq2ps256"
2233 [(set (match_operand:V8SF 0 "register_operand" "=x")
2234 (float:V8SF (match_operand:V8SI 1 "nonimmediate_operand" "xm")))]
2236 "vcvtdq2ps\t{%1, %0|%0, %1}"
2237 [(set_attr "type" "ssecvt")
2238 (set_attr "prefix" "vex")
2239 (set_attr "mode" "V8SF")])
2241 (define_insn "sse2_cvtdq2ps"
2242 [(set (match_operand:V4SF 0 "register_operand" "=x")
2243 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2245 "%vcvtdq2ps\t{%1, %0|%0, %1}"
2246 [(set_attr "type" "ssecvt")
2247 (set_attr "prefix" "maybe_vex")
2248 (set_attr "mode" "V4SF")])
2250 (define_expand "sse2_cvtudq2ps"
2252 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "")))
2254 (lt:V4SF (match_dup 5) (match_dup 3)))
2256 (and:V4SF (match_dup 6) (match_dup 4)))
2257 (set (match_operand:V4SF 0 "register_operand" "")
2258 (plus:V4SF (match_dup 5) (match_dup 7)))]
2261 REAL_VALUE_TYPE TWO32r;
2265 real_ldexp (&TWO32r, &dconst1, 32);
2266 x = const_double_from_real_value (TWO32r, SFmode);
2268 operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
2269 operands[4] = force_reg (V4SFmode,
2270 ix86_build_const_vector (V4SFmode, 1, x));
2272 for (i = 5; i < 8; i++)
2273 operands[i] = gen_reg_rtx (V4SFmode);
2276 (define_insn "avx_cvtps2dq256"
2277 [(set (match_operand:V8SI 0 "register_operand" "=x")
2278 (unspec:V8SI [(match_operand:V8SF 1 "nonimmediate_operand" "xm")]
2279 UNSPEC_FIX_NOTRUNC))]
2281 "vcvtps2dq\t{%1, %0|%0, %1}"
2282 [(set_attr "type" "ssecvt")
2283 (set_attr "prefix" "vex")
2284 (set_attr "mode" "OI")])
2286 (define_insn "sse2_cvtps2dq"
2287 [(set (match_operand:V4SI 0 "register_operand" "=x")
2288 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2289 UNSPEC_FIX_NOTRUNC))]
2291 "%vcvtps2dq\t{%1, %0|%0, %1}"
2292 [(set_attr "type" "ssecvt")
2293 (set (attr "prefix_data16")
2295 (match_test "TARGET_AVX")
2297 (const_string "1")))
2298 (set_attr "prefix" "maybe_vex")
2299 (set_attr "mode" "TI")])
2301 (define_insn "avx_cvttps2dq256"
2302 [(set (match_operand:V8SI 0 "register_operand" "=x")
2303 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
2305 "vcvttps2dq\t{%1, %0|%0, %1}"
2306 [(set_attr "type" "ssecvt")
2307 (set_attr "prefix" "vex")
2308 (set_attr "mode" "OI")])
2310 (define_insn "sse2_cvttps2dq"
2311 [(set (match_operand:V4SI 0 "register_operand" "=x")
2312 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2314 "%vcvttps2dq\t{%1, %0|%0, %1}"
2315 [(set_attr "type" "ssecvt")
2316 (set (attr "prefix_rep")
2318 (match_test "TARGET_AVX")
2320 (const_string "1")))
2321 (set (attr "prefix_data16")
2323 (match_test "TARGET_AVX")
2325 (const_string "0")))
2326 (set_attr "prefix_data16" "0")
2327 (set_attr "prefix" "maybe_vex")
2328 (set_attr "mode" "TI")])
2330 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2332 ;; Parallel double-precision floating point conversion operations
2334 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2336 (define_insn "sse2_cvtpi2pd"
2337 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2338 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2340 "cvtpi2pd\t{%1, %0|%0, %1}"
2341 [(set_attr "type" "ssecvt")
2342 (set_attr "unit" "mmx,*")
2343 (set_attr "prefix_data16" "1,*")
2344 (set_attr "mode" "V2DF")])
2346 (define_insn "sse2_cvtpd2pi"
2347 [(set (match_operand:V2SI 0 "register_operand" "=y")
2348 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2349 UNSPEC_FIX_NOTRUNC))]
2351 "cvtpd2pi\t{%1, %0|%0, %1}"
2352 [(set_attr "type" "ssecvt")
2353 (set_attr "unit" "mmx")
2354 (set_attr "bdver1_decode" "double")
2355 (set_attr "prefix_data16" "1")
2356 (set_attr "mode" "DI")])
2358 (define_insn "sse2_cvttpd2pi"
2359 [(set (match_operand:V2SI 0 "register_operand" "=y")
2360 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2362 "cvttpd2pi\t{%1, %0|%0, %1}"
2363 [(set_attr "type" "ssecvt")
2364 (set_attr "unit" "mmx")
2365 (set_attr "bdver1_decode" "double")
2366 (set_attr "prefix_data16" "1")
2367 (set_attr "mode" "TI")])
2369 (define_insn "sse2_cvtsi2sd"
2370 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2373 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2374 (match_operand:V2DF 1 "register_operand" "0,0,x")
2378 cvtsi2sd\t{%2, %0|%0, %2}
2379 cvtsi2sd\t{%2, %0|%0, %2}
2380 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2381 [(set_attr "isa" "noavx,noavx,avx")
2382 (set_attr "type" "sseicvt")
2383 (set_attr "athlon_decode" "double,direct,*")
2384 (set_attr "amdfam10_decode" "vector,double,*")
2385 (set_attr "bdver1_decode" "double,direct,*")
2386 (set_attr "prefix" "orig,orig,vex")
2387 (set_attr "mode" "DF")])
2389 (define_insn "sse2_cvtsi2sdq"
2390 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2393 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2394 (match_operand:V2DF 1 "register_operand" "0,0,x")
2396 "TARGET_SSE2 && TARGET_64BIT"
2398 cvtsi2sdq\t{%2, %0|%0, %2}
2399 cvtsi2sdq\t{%2, %0|%0, %2}
2400 vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2401 [(set_attr "isa" "noavx,noavx,avx")
2402 (set_attr "type" "sseicvt")
2403 (set_attr "athlon_decode" "double,direct,*")
2404 (set_attr "amdfam10_decode" "vector,double,*")
2405 (set_attr "bdver1_decode" "double,direct,*")
2406 (set_attr "length_vex" "*,*,4")
2407 (set_attr "prefix_rex" "1,1,*")
2408 (set_attr "prefix" "orig,orig,vex")
2409 (set_attr "mode" "DF")])
2411 (define_insn "sse2_cvtsd2si"
2412 [(set (match_operand:SI 0 "register_operand" "=r,r")
2415 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2416 (parallel [(const_int 0)]))]
2417 UNSPEC_FIX_NOTRUNC))]
2419 "%vcvtsd2si\t{%1, %0|%0, %1}"
2420 [(set_attr "type" "sseicvt")
2421 (set_attr "athlon_decode" "double,vector")
2422 (set_attr "bdver1_decode" "double,double")
2423 (set_attr "prefix_rep" "1")
2424 (set_attr "prefix" "maybe_vex")
2425 (set_attr "mode" "SI")])
2427 (define_insn "sse2_cvtsd2si_2"
2428 [(set (match_operand:SI 0 "register_operand" "=r,r")
2429 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2430 UNSPEC_FIX_NOTRUNC))]
2432 "%vcvtsd2si\t{%1, %0|%0, %1}"
2433 [(set_attr "type" "sseicvt")
2434 (set_attr "athlon_decode" "double,vector")
2435 (set_attr "amdfam10_decode" "double,double")
2436 (set_attr "bdver1_decode" "double,double")
2437 (set_attr "prefix_rep" "1")
2438 (set_attr "prefix" "maybe_vex")
2439 (set_attr "mode" "SI")])
2441 (define_insn "sse2_cvtsd2siq"
2442 [(set (match_operand:DI 0 "register_operand" "=r,r")
2445 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2446 (parallel [(const_int 0)]))]
2447 UNSPEC_FIX_NOTRUNC))]
2448 "TARGET_SSE2 && TARGET_64BIT"
2449 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2450 [(set_attr "type" "sseicvt")
2451 (set_attr "athlon_decode" "double,vector")
2452 (set_attr "bdver1_decode" "double,double")
2453 (set_attr "prefix_rep" "1")
2454 (set_attr "prefix" "maybe_vex")
2455 (set_attr "mode" "DI")])
2457 (define_insn "sse2_cvtsd2siq_2"
2458 [(set (match_operand:DI 0 "register_operand" "=r,r")
2459 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2460 UNSPEC_FIX_NOTRUNC))]
2461 "TARGET_SSE2 && TARGET_64BIT"
2462 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2463 [(set_attr "type" "sseicvt")
2464 (set_attr "athlon_decode" "double,vector")
2465 (set_attr "amdfam10_decode" "double,double")
2466 (set_attr "bdver1_decode" "double,double")
2467 (set_attr "prefix_rep" "1")
2468 (set_attr "prefix" "maybe_vex")
2469 (set_attr "mode" "DI")])
2471 (define_insn "sse2_cvttsd2si"
2472 [(set (match_operand:SI 0 "register_operand" "=r,r")
2475 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2476 (parallel [(const_int 0)]))))]
2478 "%vcvttsd2si\t{%1, %0|%0, %1}"
2479 [(set_attr "type" "sseicvt")
2480 (set_attr "athlon_decode" "double,vector")
2481 (set_attr "amdfam10_decode" "double,double")
2482 (set_attr "bdver1_decode" "double,double")
2483 (set_attr "prefix_rep" "1")
2484 (set_attr "prefix" "maybe_vex")
2485 (set_attr "mode" "SI")])
2487 (define_insn "sse2_cvttsd2siq"
2488 [(set (match_operand:DI 0 "register_operand" "=r,r")
2491 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2492 (parallel [(const_int 0)]))))]
2493 "TARGET_SSE2 && TARGET_64BIT"
2494 "%vcvttsd2si{q}\t{%1, %0|%0, %1}"
2495 [(set_attr "type" "sseicvt")
2496 (set_attr "athlon_decode" "double,vector")
2497 (set_attr "amdfam10_decode" "double,double")
2498 (set_attr "bdver1_decode" "double,double")
2499 (set_attr "prefix_rep" "1")
2500 (set_attr "prefix" "maybe_vex")
2501 (set_attr "mode" "DI")])
2503 (define_insn "avx_cvtdq2pd256"
2504 [(set (match_operand:V4DF 0 "register_operand" "=x")
2505 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2507 "vcvtdq2pd\t{%1, %0|%0, %1}"
2508 [(set_attr "type" "ssecvt")
2509 (set_attr "prefix" "vex")
2510 (set_attr "mode" "V4DF")])
2512 (define_insn "avx_cvtdq2pd256_2"
2513 [(set (match_operand:V4DF 0 "register_operand" "=x")
2516 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
2517 (parallel [(const_int 0) (const_int 1)
2518 (const_int 2) (const_int 3)]))))]
2520 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
2521 [(set_attr "type" "ssecvt")
2522 (set_attr "prefix" "vex")
2523 (set_attr "mode" "V4DF")])
2525 (define_insn "sse2_cvtdq2pd"
2526 [(set (match_operand:V2DF 0 "register_operand" "=x")
2529 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2530 (parallel [(const_int 0) (const_int 1)]))))]
2532 "%vcvtdq2pd\t{%1, %0|%0, %1}"
2533 [(set_attr "type" "ssecvt")
2534 (set_attr "prefix" "maybe_vex")
2535 (set_attr "mode" "V2DF")])
2537 (define_insn "avx_cvtpd2dq256"
2538 [(set (match_operand:V4SI 0 "register_operand" "=x")
2539 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2540 UNSPEC_FIX_NOTRUNC))]
2542 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2543 [(set_attr "type" "ssecvt")
2544 (set_attr "prefix" "vex")
2545 (set_attr "mode" "OI")])
2547 (define_expand "sse2_cvtpd2dq"
2548 [(set (match_operand:V4SI 0 "register_operand" "")
2550 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2554 "operands[2] = CONST0_RTX (V2SImode);")
2556 (define_insn "*sse2_cvtpd2dq"
2557 [(set (match_operand:V4SI 0 "register_operand" "=x")
2559 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2561 (match_operand:V2SI 2 "const0_operand" "")))]
2565 return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
2567 return "cvtpd2dq\t{%1, %0|%0, %1}";
2569 [(set_attr "type" "ssecvt")
2570 (set_attr "prefix_rep" "1")
2571 (set_attr "prefix_data16" "0")
2572 (set_attr "prefix" "maybe_vex")
2573 (set_attr "mode" "TI")
2574 (set_attr "amdfam10_decode" "double")
2575 (set_attr "athlon_decode" "vector")
2576 (set_attr "bdver1_decode" "double")])
2578 (define_insn "avx_cvttpd2dq256"
2579 [(set (match_operand:V4SI 0 "register_operand" "=x")
2580 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2582 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2583 [(set_attr "type" "ssecvt")
2584 (set_attr "prefix" "vex")
2585 (set_attr "mode" "OI")])
2587 (define_expand "sse2_cvttpd2dq"
2588 [(set (match_operand:V4SI 0 "register_operand" "")
2590 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2593 "operands[2] = CONST0_RTX (V2SImode);")
2595 (define_insn "*sse2_cvttpd2dq"
2596 [(set (match_operand:V4SI 0 "register_operand" "=x")
2598 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2599 (match_operand:V2SI 2 "const0_operand" "")))]
2603 return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
2605 return "cvttpd2dq\t{%1, %0|%0, %1}";
2607 [(set_attr "type" "ssecvt")
2608 (set_attr "amdfam10_decode" "double")
2609 (set_attr "athlon_decode" "vector")
2610 (set_attr "bdver1_decode" "double")
2611 (set_attr "prefix" "maybe_vex")
2612 (set_attr "mode" "TI")])
2614 (define_insn "sse2_cvtsd2ss"
2615 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2618 (float_truncate:V2SF
2619 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,xm")))
2620 (match_operand:V4SF 1 "register_operand" "0,0,x")
2624 cvtsd2ss\t{%2, %0|%0, %2}
2625 cvtsd2ss\t{%2, %0|%0, %2}
2626 vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2627 [(set_attr "isa" "noavx,noavx,avx")
2628 (set_attr "type" "ssecvt")
2629 (set_attr "athlon_decode" "vector,double,*")
2630 (set_attr "amdfam10_decode" "vector,double,*")
2631 (set_attr "bdver1_decode" "direct,direct,*")
2632 (set_attr "prefix" "orig,orig,vex")
2633 (set_attr "mode" "SF")])
2635 (define_insn "sse2_cvtss2sd"
2636 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2640 (match_operand:V4SF 2 "nonimmediate_operand" "x,m,xm")
2641 (parallel [(const_int 0) (const_int 1)])))
2642 (match_operand:V2DF 1 "register_operand" "0,0,x")
2646 cvtss2sd\t{%2, %0|%0, %2}
2647 cvtss2sd\t{%2, %0|%0, %2}
2648 vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
2649 [(set_attr "isa" "noavx,noavx,avx")
2650 (set_attr "type" "ssecvt")
2651 (set_attr "amdfam10_decode" "vector,double,*")
2652 (set_attr "athlon_decode" "direct,direct,*")
2653 (set_attr "bdver1_decode" "direct,direct,*")
2654 (set_attr "prefix" "orig,orig,vex")
2655 (set_attr "mode" "DF")])
2657 (define_insn "avx_cvtpd2ps256"
2658 [(set (match_operand:V4SF 0 "register_operand" "=x")
2659 (float_truncate:V4SF
2660 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2662 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
2663 [(set_attr "type" "ssecvt")
2664 (set_attr "prefix" "vex")
2665 (set_attr "mode" "V4SF")])
2667 (define_expand "sse2_cvtpd2ps"
2668 [(set (match_operand:V4SF 0 "register_operand" "")
2670 (float_truncate:V2SF
2671 (match_operand:V2DF 1 "nonimmediate_operand" ""))
2674 "operands[2] = CONST0_RTX (V2SFmode);")
2676 (define_insn "*sse2_cvtpd2ps"
2677 [(set (match_operand:V4SF 0 "register_operand" "=x")
2679 (float_truncate:V2SF
2680 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2681 (match_operand:V2SF 2 "const0_operand" "")))]
2685 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
2687 return "cvtpd2ps\t{%1, %0|%0, %1}";
2689 [(set_attr "type" "ssecvt")
2690 (set_attr "amdfam10_decode" "double")
2691 (set_attr "athlon_decode" "vector")
2692 (set_attr "bdver1_decode" "double")
2693 (set_attr "prefix_data16" "1")
2694 (set_attr "prefix" "maybe_vex")
2695 (set_attr "mode" "V4SF")])
2697 (define_insn "avx_cvtps2pd256"
2698 [(set (match_operand:V4DF 0 "register_operand" "=x")
2700 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2702 "vcvtps2pd\t{%1, %0|%0, %1}"
2703 [(set_attr "type" "ssecvt")
2704 (set_attr "prefix" "vex")
2705 (set_attr "mode" "V4DF")])
2707 (define_insn "*avx_cvtps2pd256_2"
2708 [(set (match_operand:V4DF 0 "register_operand" "=x")
2711 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
2712 (parallel [(const_int 0) (const_int 1)
2713 (const_int 2) (const_int 3)]))))]
2715 "vcvtps2pd\t{%x1, %0|%0, %x1}"
2716 [(set_attr "type" "ssecvt")
2717 (set_attr "prefix" "vex")
2718 (set_attr "mode" "V4DF")])
2720 (define_insn "sse2_cvtps2pd"
2721 [(set (match_operand:V2DF 0 "register_operand" "=x")
2724 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2725 (parallel [(const_int 0) (const_int 1)]))))]
2727 "%vcvtps2pd\t{%1, %0|%0, %1}"
2728 [(set_attr "type" "ssecvt")
2729 (set_attr "amdfam10_decode" "direct")
2730 (set_attr "athlon_decode" "double")
2731 (set_attr "bdver1_decode" "double")
2732 (set_attr "prefix_data16" "0")
2733 (set_attr "prefix" "maybe_vex")
2734 (set_attr "mode" "V2DF")])
2736 (define_expand "vec_unpacks_hi_v4sf"
2741 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2742 (parallel [(const_int 6) (const_int 7)
2743 (const_int 2) (const_int 3)])))
2744 (set (match_operand:V2DF 0 "register_operand" "")
2748 (parallel [(const_int 0) (const_int 1)]))))]
2750 "operands[2] = gen_reg_rtx (V4SFmode);")
2752 (define_expand "vec_unpacks_hi_v8sf"
2755 (match_operand:V8SF 1 "nonimmediate_operand" "")
2756 (parallel [(const_int 4) (const_int 5)
2757 (const_int 6) (const_int 7)])))
2758 (set (match_operand:V4DF 0 "register_operand" "")
2762 "operands[2] = gen_reg_rtx (V4SFmode);")
2764 (define_expand "vec_unpacks_lo_v4sf"
2765 [(set (match_operand:V2DF 0 "register_operand" "")
2768 (match_operand:V4SF 1 "nonimmediate_operand" "")
2769 (parallel [(const_int 0) (const_int 1)]))))]
2772 (define_expand "vec_unpacks_lo_v8sf"
2773 [(set (match_operand:V4DF 0 "register_operand" "")
2776 (match_operand:V8SF 1 "nonimmediate_operand" "")
2777 (parallel [(const_int 0) (const_int 1)
2778 (const_int 2) (const_int 3)]))))]
2781 (define_mode_attr sseunpackfltmode
2782 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF") (V8SI "V4DF")])
2784 (define_expand "vec_unpacks_float_hi_<mode>"
2785 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2786 (match_operand:VI2_AVX2 1 "register_operand" "")]
2789 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2791 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
2792 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2793 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2797 (define_expand "vec_unpacks_float_lo_<mode>"
2798 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2799 (match_operand:VI2_AVX2 1 "register_operand" "")]
2802 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2804 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
2805 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2806 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2810 (define_expand "vec_unpacku_float_hi_<mode>"
2811 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2812 (match_operand:VI2_AVX2 1 "register_operand" "")]
2815 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2817 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
2818 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2819 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2823 (define_expand "vec_unpacku_float_lo_<mode>"
2824 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2825 (match_operand:VI2_AVX2 1 "register_operand" "")]
2828 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2830 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
2831 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2832 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2836 (define_expand "vec_unpacks_float_hi_v4si"
2839 (match_operand:V4SI 1 "nonimmediate_operand" "")
2840 (parallel [(const_int 2) (const_int 3)
2841 (const_int 2) (const_int 3)])))
2842 (set (match_operand:V2DF 0 "register_operand" "")
2846 (parallel [(const_int 0) (const_int 1)]))))]
2848 "operands[2] = gen_reg_rtx (V4SImode);")
2850 (define_expand "vec_unpacks_float_lo_v4si"
2851 [(set (match_operand:V2DF 0 "register_operand" "")
2854 (match_operand:V4SI 1 "nonimmediate_operand" "")
2855 (parallel [(const_int 0) (const_int 1)]))))]
2858 (define_expand "vec_unpacks_float_hi_v8si"
2861 (match_operand:V8SI 1 "nonimmediate_operand" "")
2862 (parallel [(const_int 4) (const_int 5)
2863 (const_int 6) (const_int 7)])))
2864 (set (match_operand:V4DF 0 "register_operand" "")
2868 "operands[2] = gen_reg_rtx (V4SImode);")
2870 (define_expand "vec_unpacks_float_lo_v8si"
2871 [(set (match_operand:V4DF 0 "register_operand" "")
2874 (match_operand:V8SI 1 "nonimmediate_operand" "")
2875 (parallel [(const_int 0) (const_int 1)
2876 (const_int 2) (const_int 3)]))))]
2879 (define_expand "vec_unpacku_float_hi_v4si"
2882 (match_operand:V4SI 1 "nonimmediate_operand" "")
2883 (parallel [(const_int 2) (const_int 3)
2884 (const_int 2) (const_int 3)])))
2889 (parallel [(const_int 0) (const_int 1)]))))
2891 (lt:V2DF (match_dup 6) (match_dup 3)))
2893 (and:V2DF (match_dup 7) (match_dup 4)))
2894 (set (match_operand:V2DF 0 "register_operand" "")
2895 (plus:V2DF (match_dup 6) (match_dup 8)))]
2898 REAL_VALUE_TYPE TWO32r;
2902 real_ldexp (&TWO32r, &dconst1, 32);
2903 x = const_double_from_real_value (TWO32r, DFmode);
2905 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2906 operands[4] = force_reg (V2DFmode,
2907 ix86_build_const_vector (V2DFmode, 1, x));
2909 operands[5] = gen_reg_rtx (V4SImode);
2911 for (i = 6; i < 9; i++)
2912 operands[i] = gen_reg_rtx (V2DFmode);
2915 (define_expand "vec_unpacku_float_lo_v4si"
2919 (match_operand:V4SI 1 "nonimmediate_operand" "")
2920 (parallel [(const_int 0) (const_int 1)]))))
2922 (lt:V2DF (match_dup 5) (match_dup 3)))
2924 (and:V2DF (match_dup 6) (match_dup 4)))
2925 (set (match_operand:V2DF 0 "register_operand" "")
2926 (plus:V2DF (match_dup 5) (match_dup 7)))]
2929 REAL_VALUE_TYPE TWO32r;
2933 real_ldexp (&TWO32r, &dconst1, 32);
2934 x = const_double_from_real_value (TWO32r, DFmode);
2936 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2937 operands[4] = force_reg (V2DFmode,
2938 ix86_build_const_vector (V2DFmode, 1, x));
2940 for (i = 5; i < 8; i++)
2941 operands[i] = gen_reg_rtx (V2DFmode);
2944 (define_expand "vec_unpacku_float_hi_v8si"
2945 [(match_operand:V4DF 0 "register_operand" "")
2946 (match_operand:V8SI 1 "register_operand" "")]
2949 REAL_VALUE_TYPE TWO32r;
2953 real_ldexp (&TWO32r, &dconst1, 32);
2954 x = const_double_from_real_value (TWO32r, DFmode);
2956 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
2957 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
2958 tmp[5] = gen_reg_rtx (V4SImode);
2960 for (i = 2; i < 5; i++)
2961 tmp[i] = gen_reg_rtx (V4DFmode);
2962 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
2963 emit_insn (gen_avx_cvtdq2pd256 (tmp[2], tmp[5]));
2964 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
2965 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
2966 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
2967 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
2971 (define_expand "vec_unpacku_float_lo_v8si"
2972 [(match_operand:V4DF 0 "register_operand" "")
2973 (match_operand:V8SI 1 "nonimmediate_operand" "")]
2976 REAL_VALUE_TYPE TWO32r;
2980 real_ldexp (&TWO32r, &dconst1, 32);
2981 x = const_double_from_real_value (TWO32r, DFmode);
2983 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
2984 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
2986 for (i = 2; i < 5; i++)
2987 tmp[i] = gen_reg_rtx (V4DFmode);
2988 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
2989 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
2990 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
2991 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
2992 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
2996 (define_expand "vec_pack_trunc_v4df"
2998 (float_truncate:V4SF
2999 (match_operand:V4DF 1 "nonimmediate_operand" "")))
3001 (float_truncate:V4SF
3002 (match_operand:V4DF 2 "nonimmediate_operand" "")))
3003 (set (match_operand:V8SF 0 "register_operand" "")
3009 operands[3] = gen_reg_rtx (V4SFmode);
3010 operands[4] = gen_reg_rtx (V4SFmode);
3013 (define_expand "vec_pack_trunc_v2df"
3014 [(match_operand:V4SF 0 "register_operand" "")
3015 (match_operand:V2DF 1 "nonimmediate_operand" "")
3016 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3021 r1 = gen_reg_rtx (V4SFmode);
3022 r2 = gen_reg_rtx (V4SFmode);
3024 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
3025 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
3026 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
3030 (define_expand "vec_pack_sfix_trunc_v2df"
3031 [(match_operand:V4SI 0 "register_operand" "")
3032 (match_operand:V2DF 1 "nonimmediate_operand" "")
3033 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3038 r1 = gen_reg_rtx (V4SImode);
3039 r2 = gen_reg_rtx (V4SImode);
3041 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
3042 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
3043 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3044 gen_lowpart (V2DImode, r1),
3045 gen_lowpart (V2DImode, r2)));
3049 (define_expand "vec_pack_sfix_v2df"
3050 [(match_operand:V4SI 0 "register_operand" "")
3051 (match_operand:V2DF 1 "nonimmediate_operand" "")
3052 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3057 r1 = gen_reg_rtx (V4SImode);
3058 r2 = gen_reg_rtx (V4SImode);
3060 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
3061 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
3062 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3063 gen_lowpart (V2DImode, r1),
3064 gen_lowpart (V2DImode, r2)));
3068 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3070 ;; Parallel single-precision floating point element swizzling
3072 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3074 (define_expand "sse_movhlps_exp"
3075 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3078 (match_operand:V4SF 1 "nonimmediate_operand" "")
3079 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3080 (parallel [(const_int 6)
3086 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3088 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
3090 /* Fix up the destination if needed. */
3091 if (dst != operands[0])
3092 emit_move_insn (operands[0], dst);
3097 (define_insn "sse_movhlps"
3098 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
3101 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3102 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
3103 (parallel [(const_int 6)
3107 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3109 movhlps\t{%2, %0|%0, %2}
3110 vmovhlps\t{%2, %1, %0|%0, %1, %2}
3111 movlps\t{%H2, %0|%0, %H2}
3112 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3113 %vmovhps\t{%2, %0|%0, %2}"
3114 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3115 (set_attr "type" "ssemov")
3116 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3117 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3119 (define_expand "sse_movlhps_exp"
3120 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3123 (match_operand:V4SF 1 "nonimmediate_operand" "")
3124 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3125 (parallel [(const_int 0)
3131 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3133 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
3135 /* Fix up the destination if needed. */
3136 if (dst != operands[0])
3137 emit_move_insn (operands[0], dst);
3142 (define_insn "sse_movlhps"
3143 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
3146 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3147 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,x,x"))
3148 (parallel [(const_int 0)
3152 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3154 movlhps\t{%2, %0|%0, %2}
3155 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3156 movhps\t{%2, %0|%0, %2}
3157 vmovhps\t{%2, %1, %0|%0, %1, %2}
3158 %vmovlps\t{%2, %H0|%H0, %2}"
3159 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3160 (set_attr "type" "ssemov")
3161 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3162 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3164 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3165 (define_insn "avx_unpckhps256"
3166 [(set (match_operand:V8SF 0 "register_operand" "=x")
3169 (match_operand:V8SF 1 "register_operand" "x")
3170 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3171 (parallel [(const_int 2) (const_int 10)
3172 (const_int 3) (const_int 11)
3173 (const_int 6) (const_int 14)
3174 (const_int 7) (const_int 15)])))]
3176 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3177 [(set_attr "type" "sselog")
3178 (set_attr "prefix" "vex")
3179 (set_attr "mode" "V8SF")])
3181 (define_expand "vec_interleave_highv8sf"
3185 (match_operand:V8SF 1 "register_operand" "x")
3186 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3187 (parallel [(const_int 0) (const_int 8)
3188 (const_int 1) (const_int 9)
3189 (const_int 4) (const_int 12)
3190 (const_int 5) (const_int 13)])))
3196 (parallel [(const_int 2) (const_int 10)
3197 (const_int 3) (const_int 11)
3198 (const_int 6) (const_int 14)
3199 (const_int 7) (const_int 15)])))
3200 (set (match_operand:V8SF 0 "register_operand" "")
3205 (parallel [(const_int 4) (const_int 5)
3206 (const_int 6) (const_int 7)
3207 (const_int 12) (const_int 13)
3208 (const_int 14) (const_int 15)])))]
3211 operands[3] = gen_reg_rtx (V8SFmode);
3212 operands[4] = gen_reg_rtx (V8SFmode);
3215 (define_insn "vec_interleave_highv4sf"
3216 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3219 (match_operand:V4SF 1 "register_operand" "0,x")
3220 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3221 (parallel [(const_int 2) (const_int 6)
3222 (const_int 3) (const_int 7)])))]
3225 unpckhps\t{%2, %0|%0, %2}
3226 vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3227 [(set_attr "isa" "noavx,avx")
3228 (set_attr "type" "sselog")
3229 (set_attr "prefix" "orig,vex")
3230 (set_attr "mode" "V4SF")])
3232 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3233 (define_insn "avx_unpcklps256"
3234 [(set (match_operand:V8SF 0 "register_operand" "=x")
3237 (match_operand:V8SF 1 "register_operand" "x")
3238 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3239 (parallel [(const_int 0) (const_int 8)
3240 (const_int 1) (const_int 9)
3241 (const_int 4) (const_int 12)
3242 (const_int 5) (const_int 13)])))]
3244 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3245 [(set_attr "type" "sselog")
3246 (set_attr "prefix" "vex")
3247 (set_attr "mode" "V8SF")])
3249 (define_expand "vec_interleave_lowv8sf"
3253 (match_operand:V8SF 1 "register_operand" "x")
3254 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3255 (parallel [(const_int 0) (const_int 8)
3256 (const_int 1) (const_int 9)
3257 (const_int 4) (const_int 12)
3258 (const_int 5) (const_int 13)])))
3264 (parallel [(const_int 2) (const_int 10)
3265 (const_int 3) (const_int 11)
3266 (const_int 6) (const_int 14)
3267 (const_int 7) (const_int 15)])))
3268 (set (match_operand:V8SF 0 "register_operand" "")
3273 (parallel [(const_int 0) (const_int 1)
3274 (const_int 2) (const_int 3)
3275 (const_int 8) (const_int 9)
3276 (const_int 10) (const_int 11)])))]
3279 operands[3] = gen_reg_rtx (V8SFmode);
3280 operands[4] = gen_reg_rtx (V8SFmode);
3283 (define_insn "vec_interleave_lowv4sf"
3284 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3287 (match_operand:V4SF 1 "register_operand" "0,x")
3288 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3289 (parallel [(const_int 0) (const_int 4)
3290 (const_int 1) (const_int 5)])))]
3293 unpcklps\t{%2, %0|%0, %2}
3294 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3295 [(set_attr "isa" "noavx,avx")
3296 (set_attr "type" "sselog")
3297 (set_attr "prefix" "orig,vex")
3298 (set_attr "mode" "V4SF")])
3300 ;; These are modeled with the same vec_concat as the others so that we
3301 ;; capture users of shufps that can use the new instructions
3302 (define_insn "avx_movshdup256"
3303 [(set (match_operand:V8SF 0 "register_operand" "=x")
3306 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3308 (parallel [(const_int 1) (const_int 1)
3309 (const_int 3) (const_int 3)
3310 (const_int 5) (const_int 5)
3311 (const_int 7) (const_int 7)])))]
3313 "vmovshdup\t{%1, %0|%0, %1}"
3314 [(set_attr "type" "sse")
3315 (set_attr "prefix" "vex")
3316 (set_attr "mode" "V8SF")])
3318 (define_insn "sse3_movshdup"
3319 [(set (match_operand:V4SF 0 "register_operand" "=x")
3322 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3324 (parallel [(const_int 1)
3329 "%vmovshdup\t{%1, %0|%0, %1}"
3330 [(set_attr "type" "sse")
3331 (set_attr "prefix_rep" "1")
3332 (set_attr "prefix" "maybe_vex")
3333 (set_attr "mode" "V4SF")])
3335 (define_insn "avx_movsldup256"
3336 [(set (match_operand:V8SF 0 "register_operand" "=x")
3339 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3341 (parallel [(const_int 0) (const_int 0)
3342 (const_int 2) (const_int 2)
3343 (const_int 4) (const_int 4)
3344 (const_int 6) (const_int 6)])))]
3346 "vmovsldup\t{%1, %0|%0, %1}"
3347 [(set_attr "type" "sse")
3348 (set_attr "prefix" "vex")
3349 (set_attr "mode" "V8SF")])
3351 (define_insn "sse3_movsldup"
3352 [(set (match_operand:V4SF 0 "register_operand" "=x")
3355 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3357 (parallel [(const_int 0)
3362 "%vmovsldup\t{%1, %0|%0, %1}"
3363 [(set_attr "type" "sse")
3364 (set_attr "prefix_rep" "1")
3365 (set_attr "prefix" "maybe_vex")
3366 (set_attr "mode" "V4SF")])
3368 (define_expand "avx_shufps256"
3369 [(match_operand:V8SF 0 "register_operand" "")
3370 (match_operand:V8SF 1 "register_operand" "")
3371 (match_operand:V8SF 2 "nonimmediate_operand" "")
3372 (match_operand:SI 3 "const_int_operand" "")]
3375 int mask = INTVAL (operands[3]);
3376 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3377 GEN_INT ((mask >> 0) & 3),
3378 GEN_INT ((mask >> 2) & 3),
3379 GEN_INT (((mask >> 4) & 3) + 8),
3380 GEN_INT (((mask >> 6) & 3) + 8),
3381 GEN_INT (((mask >> 0) & 3) + 4),
3382 GEN_INT (((mask >> 2) & 3) + 4),
3383 GEN_INT (((mask >> 4) & 3) + 12),
3384 GEN_INT (((mask >> 6) & 3) + 12)));
3388 ;; One bit in mask selects 2 elements.
3389 (define_insn "avx_shufps256_1"
3390 [(set (match_operand:V8SF 0 "register_operand" "=x")
3393 (match_operand:V8SF 1 "register_operand" "x")
3394 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3395 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3396 (match_operand 4 "const_0_to_3_operand" "")
3397 (match_operand 5 "const_8_to_11_operand" "")
3398 (match_operand 6 "const_8_to_11_operand" "")
3399 (match_operand 7 "const_4_to_7_operand" "")
3400 (match_operand 8 "const_4_to_7_operand" "")
3401 (match_operand 9 "const_12_to_15_operand" "")
3402 (match_operand 10 "const_12_to_15_operand" "")])))]
3404 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3405 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3406 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3407 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3410 mask = INTVAL (operands[3]);
3411 mask |= INTVAL (operands[4]) << 2;
3412 mask |= (INTVAL (operands[5]) - 8) << 4;
3413 mask |= (INTVAL (operands[6]) - 8) << 6;
3414 operands[3] = GEN_INT (mask);
3416 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3418 [(set_attr "type" "sselog")
3419 (set_attr "length_immediate" "1")
3420 (set_attr "prefix" "vex")
3421 (set_attr "mode" "V8SF")])
3423 (define_expand "sse_shufps"
3424 [(match_operand:V4SF 0 "register_operand" "")
3425 (match_operand:V4SF 1 "register_operand" "")
3426 (match_operand:V4SF 2 "nonimmediate_operand" "")
3427 (match_operand:SI 3 "const_int_operand" "")]
3430 int mask = INTVAL (operands[3]);
3431 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3432 GEN_INT ((mask >> 0) & 3),
3433 GEN_INT ((mask >> 2) & 3),
3434 GEN_INT (((mask >> 4) & 3) + 4),
3435 GEN_INT (((mask >> 6) & 3) + 4)));
3439 (define_insn "sse_shufps_<mode>"
3440 [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
3441 (vec_select:VI4F_128
3442 (vec_concat:<ssedoublevecmode>
3443 (match_operand:VI4F_128 1 "register_operand" "0,x")
3444 (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm"))
3445 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3446 (match_operand 4 "const_0_to_3_operand" "")
3447 (match_operand 5 "const_4_to_7_operand" "")
3448 (match_operand 6 "const_4_to_7_operand" "")])))]
3452 mask |= INTVAL (operands[3]) << 0;
3453 mask |= INTVAL (operands[4]) << 2;
3454 mask |= (INTVAL (operands[5]) - 4) << 4;
3455 mask |= (INTVAL (operands[6]) - 4) << 6;
3456 operands[3] = GEN_INT (mask);
3458 switch (which_alternative)
3461 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3463 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3468 [(set_attr "isa" "noavx,avx")
3469 (set_attr "type" "sselog")
3470 (set_attr "length_immediate" "1")
3471 (set_attr "prefix" "orig,vex")
3472 (set_attr "mode" "V4SF")])
3474 (define_insn "sse_storehps"
3475 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3477 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3478 (parallel [(const_int 2) (const_int 3)])))]
3481 %vmovhps\t{%1, %0|%0, %1}
3482 %vmovhlps\t{%1, %d0|%d0, %1}
3483 %vmovlps\t{%H1, %d0|%d0, %H1}"
3484 [(set_attr "type" "ssemov")
3485 (set_attr "prefix" "maybe_vex")
3486 (set_attr "mode" "V2SF,V4SF,V2SF")])
3488 (define_expand "sse_loadhps_exp"
3489 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3492 (match_operand:V4SF 1 "nonimmediate_operand" "")
3493 (parallel [(const_int 0) (const_int 1)]))
3494 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3497 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3499 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
3501 /* Fix up the destination if needed. */
3502 if (dst != operands[0])
3503 emit_move_insn (operands[0], dst);
3508 (define_insn "sse_loadhps"
3509 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
3512 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3513 (parallel [(const_int 0) (const_int 1)]))
3514 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))]
3517 movhps\t{%2, %0|%0, %2}
3518 vmovhps\t{%2, %1, %0|%0, %1, %2}
3519 movlhps\t{%2, %0|%0, %2}
3520 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3521 %vmovlps\t{%2, %H0|%H0, %2}"
3522 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3523 (set_attr "type" "ssemov")
3524 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3525 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
3527 (define_insn "sse_storelps"
3528 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3530 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
3531 (parallel [(const_int 0) (const_int 1)])))]
3534 %vmovlps\t{%1, %0|%0, %1}
3535 %vmovaps\t{%1, %0|%0, %1}
3536 %vmovlps\t{%1, %d0|%d0, %1}"
3537 [(set_attr "type" "ssemov")
3538 (set_attr "prefix" "maybe_vex")
3539 (set_attr "mode" "V2SF,V4SF,V2SF")])
3541 (define_expand "sse_loadlps_exp"
3542 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3544 (match_operand:V2SF 2 "nonimmediate_operand" "")
3546 (match_operand:V4SF 1 "nonimmediate_operand" "")
3547 (parallel [(const_int 2) (const_int 3)]))))]
3550 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3552 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
3554 /* Fix up the destination if needed. */
3555 if (dst != operands[0])
3556 emit_move_insn (operands[0], dst);
3561 (define_insn "sse_loadlps"
3562 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
3564 (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,x,x")
3566 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
3567 (parallel [(const_int 2) (const_int 3)]))))]
3570 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3571 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3572 movlps\t{%2, %0|%0, %2}
3573 vmovlps\t{%2, %1, %0|%0, %1, %2}
3574 %vmovlps\t{%2, %0|%0, %2}"
3575 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3576 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
3577 (set_attr "length_immediate" "1,1,*,*,*")
3578 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3579 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3581 (define_insn "sse_movss"
3582 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3584 (match_operand:V4SF 2 "register_operand" " x,x")
3585 (match_operand:V4SF 1 "register_operand" " 0,x")
3589 movss\t{%2, %0|%0, %2}
3590 vmovss\t{%2, %1, %0|%0, %1, %2}"
3591 [(set_attr "isa" "noavx,avx")
3592 (set_attr "type" "ssemov")
3593 (set_attr "prefix" "orig,vex")
3594 (set_attr "mode" "SF")])
3596 (define_expand "vec_dupv4sf"
3597 [(set (match_operand:V4SF 0 "register_operand" "")
3599 (match_operand:SF 1 "nonimmediate_operand" "")))]
3603 operands[1] = force_reg (SFmode, operands[1]);
3606 (define_insn "avx2_vec_dupv4sf"
3607 [(set (match_operand:V4SF 0 "register_operand" "=x")
3610 (match_operand:V4SF 1 "register_operand" "x")
3611 (parallel [(const_int 0)]))))]
3613 "vbroadcastss\t{%1, %0|%0, %1}"
3614 [(set_attr "type" "sselog1")
3615 (set_attr "prefix" "vex")
3616 (set_attr "mode" "V4SF")])
3618 (define_insn "*vec_dupv4sf_avx"
3619 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3621 (match_operand:SF 1 "nonimmediate_operand" "x,m")))]
3624 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
3625 vbroadcastss\t{%1, %0|%0, %1}"
3626 [(set_attr "type" "sselog1,ssemov")
3627 (set_attr "length_immediate" "1,0")
3628 (set_attr "prefix_extra" "0,1")
3629 (set_attr "prefix" "vex")
3630 (set_attr "mode" "V4SF")])
3632 (define_insn "avx2_vec_dupv8sf"
3633 [(set (match_operand:V8SF 0 "register_operand" "=x")
3636 (match_operand:V4SF 1 "register_operand" "x")
3637 (parallel [(const_int 0)]))))]
3639 "vbroadcastss\t{%1, %0|%0, %1}"
3640 [(set_attr "type" "sselog1")
3641 (set_attr "prefix" "vex")
3642 (set_attr "mode" "V8SF")])
3644 (define_insn "*vec_dupv4sf"
3645 [(set (match_operand:V4SF 0 "register_operand" "=x")
3647 (match_operand:SF 1 "register_operand" "0")))]
3649 "shufps\t{$0, %0, %0|%0, %0, 0}"
3650 [(set_attr "type" "sselog1")
3651 (set_attr "length_immediate" "1")
3652 (set_attr "mode" "V4SF")])
3654 ;; Although insertps takes register source, we prefer
3655 ;; unpcklps with register source since it is shorter.
3656 (define_insn "*vec_concatv2sf_sse4_1"
3657 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,x,x,*y ,*y")
3659 (match_operand:SF 1 "nonimmediate_operand" " 0,x,0,x,m, 0 , m")
3660 (match_operand:SF 2 "vector_move_operand" " x,x,m,m,C,*ym, C")))]
3663 unpcklps\t{%2, %0|%0, %2}
3664 vunpcklps\t{%2, %1, %0|%0, %1, %2}
3665 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3666 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3667 %vmovss\t{%1, %0|%0, %1}
3668 punpckldq\t{%2, %0|%0, %2}
3669 movd\t{%1, %0|%0, %1}"
3670 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
3671 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
3672 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
3673 (set_attr "prefix_extra" "*,*,1,1,*,*,*")
3674 (set_attr "length_immediate" "*,*,1,1,*,*,*")
3675 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
3676 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
3678 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3679 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3680 ;; alternatives pretty much forces the MMX alternative to be chosen.
3681 (define_insn "*vec_concatv2sf_sse"
3682 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
3684 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3685 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
3688 unpcklps\t{%2, %0|%0, %2}
3689 movss\t{%1, %0|%0, %1}
3690 punpckldq\t{%2, %0|%0, %2}
3691 movd\t{%1, %0|%0, %1}"
3692 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3693 (set_attr "mode" "V4SF,SF,DI,DI")])
3695 (define_insn "*vec_concatv4sf"
3696 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x")
3698 (match_operand:V2SF 1 "register_operand" " 0,x,0,x")
3699 (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
3702 movlhps\t{%2, %0|%0, %2}
3703 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3704 movhps\t{%2, %0|%0, %2}
3705 vmovhps\t{%2, %1, %0|%0, %1, %2}"
3706 [(set_attr "isa" "noavx,avx,noavx,avx")
3707 (set_attr "type" "ssemov")
3708 (set_attr "prefix" "orig,vex,orig,vex")
3709 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
3711 (define_expand "vec_init<mode>"
3712 [(match_operand:V_128 0 "register_operand" "")
3713 (match_operand 1 "" "")]
3716 ix86_expand_vector_init (false, operands[0], operands[1]);
3720 ;; Avoid combining registers from different units in a single alternative,
3721 ;; see comment above inline_secondary_memory_needed function in i386.c
3722 (define_insn "vec_set<mode>_0"
3723 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
3724 "=x,x,x ,x,x,x,x ,x ,m,m ,m")
3726 (vec_duplicate:VI4F_128
3727 (match_operand:<ssescalarmode> 2 "general_operand"
3728 " x,m,*r,m,x,x,*rm,*rm,x,fF,*r"))
3729 (match_operand:VI4F_128 1 "vector_move_operand"
3730 " C,C,C ,C,0,x,0 ,x ,0,0 ,0")
3734 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
3735 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
3736 %vmovd\t{%2, %0|%0, %2}
3737 movss\t{%2, %0|%0, %2}
3738 movss\t{%2, %0|%0, %2}
3739 vmovss\t{%2, %1, %0|%0, %1, %2}
3740 pinsrd\t{$0, %2, %0|%0, %2, 0}
3741 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
3745 [(set_attr "isa" "sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,avx,*,*,*")
3747 (cond [(eq_attr "alternative" "0,6,7")
3748 (const_string "sselog")
3749 (eq_attr "alternative" "9")
3750 (const_string "fmov")
3751 (eq_attr "alternative" "10")
3752 (const_string "imov")
3754 (const_string "ssemov")))
3755 (set_attr "prefix_extra" "*,*,*,*,*,*,1,1,*,*,*")
3756 (set_attr "length_immediate" "*,*,*,*,*,*,1,1,*,*,*")
3757 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex,*,*,*")
3758 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,*,*,*")])
3760 ;; A subset is vec_setv4sf.
3761 (define_insn "*vec_setv4sf_sse4_1"
3762 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3765 (match_operand:SF 2 "nonimmediate_operand" "xm,xm"))
3766 (match_operand:V4SF 1 "register_operand" "0,x")
3767 (match_operand:SI 3 "const_int_operand" "")))]
3769 && ((unsigned) exact_log2 (INTVAL (operands[3]))
3770 < GET_MODE_NUNITS (V4SFmode))"
3772 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
3773 switch (which_alternative)
3776 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3778 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3783 [(set_attr "isa" "noavx,avx")
3784 (set_attr "type" "sselog")
3785 (set_attr "prefix_data16" "1,*")
3786 (set_attr "prefix_extra" "1")
3787 (set_attr "length_immediate" "1")
3788 (set_attr "prefix" "orig,vex")
3789 (set_attr "mode" "V4SF")])
3791 (define_insn "sse4_1_insertps"
3792 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3793 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
3794 (match_operand:V4SF 1 "register_operand" "0,x")
3795 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
3799 if (MEM_P (operands[2]))
3801 unsigned count_s = INTVAL (operands[3]) >> 6;
3803 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
3804 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
3806 switch (which_alternative)
3809 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3811 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3816 [(set_attr "isa" "noavx,avx")
3817 (set_attr "type" "sselog")
3818 (set_attr "prefix_data16" "1,*")
3819 (set_attr "prefix_extra" "1")
3820 (set_attr "length_immediate" "1")
3821 (set_attr "prefix" "orig,vex")
3822 (set_attr "mode" "V4SF")])
3825 [(set (match_operand:VI4F_128 0 "memory_operand" "")
3827 (vec_duplicate:VI4F_128
3828 (match_operand:<ssescalarmode> 1 "nonmemory_operand" ""))
3831 "TARGET_SSE && reload_completed"
3834 emit_move_insn (adjust_address (operands[0], <ssescalarmode>mode, 0),
3839 (define_expand "vec_set<mode>"
3840 [(match_operand:V 0 "register_operand" "")
3841 (match_operand:<ssescalarmode> 1 "register_operand" "")
3842 (match_operand 2 "const_int_operand" "")]
3845 ix86_expand_vector_set (false, operands[0], operands[1],
3846 INTVAL (operands[2]));
3850 (define_insn_and_split "*vec_extractv4sf_0"
3851 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
3853 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
3854 (parallel [(const_int 0)])))]
3855 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3857 "&& reload_completed"
3860 rtx op1 = operands[1];
3862 op1 = gen_rtx_REG (SFmode, REGNO (op1));
3864 op1 = gen_lowpart (SFmode, op1);
3865 emit_move_insn (operands[0], op1);
3869 (define_insn_and_split "*sse4_1_extractps"
3870 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,x,x")
3872 (match_operand:V4SF 1 "register_operand" "x,0,x")
3873 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n")])))]
3876 %vextractps\t{%2, %1, %0|%0, %1, %2}
3879 "&& reload_completed && SSE_REG_P (operands[0])"
3882 rtx dest = gen_rtx_REG (V4SFmode, REGNO (operands[0]));
3883 switch (INTVAL (operands[2]))
3887 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
3888 operands[2], operands[2],
3889 GEN_INT (INTVAL (operands[2]) + 4),
3890 GEN_INT (INTVAL (operands[2]) + 4)));
3893 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
3896 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
3901 [(set_attr "isa" "*,noavx,avx")
3902 (set_attr "type" "sselog,*,*")
3903 (set_attr "prefix_data16" "1,*,*")
3904 (set_attr "prefix_extra" "1,*,*")
3905 (set_attr "length_immediate" "1,*,*")
3906 (set_attr "prefix" "maybe_vex,*,*")
3907 (set_attr "mode" "V4SF,*,*")])
3909 (define_insn_and_split "*vec_extract_v4sf_mem"
3910 [(set (match_operand:SF 0 "register_operand" "=x,*r,f")
3912 (match_operand:V4SF 1 "memory_operand" "o,o,o")
3913 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
3916 "&& reload_completed"
3919 int i = INTVAL (operands[2]);
3921 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
3925 (define_expand "avx_vextractf128<mode>"
3926 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "")
3927 (match_operand:V_256 1 "register_operand" "")
3928 (match_operand:SI 2 "const_0_to_1_operand" "")]
3931 rtx (*insn)(rtx, rtx);
3933 switch (INTVAL (operands[2]))
3936 insn = gen_vec_extract_lo_<mode>;
3939 insn = gen_vec_extract_hi_<mode>;
3945 emit_insn (insn (operands[0], operands[1]));
3949 (define_insn_and_split "vec_extract_lo_<mode>"
3950 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
3951 (vec_select:<ssehalfvecmode>
3952 (match_operand:VI8F_256 1 "nonimmediate_operand" "xm,x")
3953 (parallel [(const_int 0) (const_int 1)])))]
3954 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3956 "&& reload_completed"
3959 rtx op1 = operands[1];
3961 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
3963 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
3964 emit_move_insn (operands[0], op1);
3968 (define_insn "vec_extract_hi_<mode>"
3969 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
3970 (vec_select:<ssehalfvecmode>
3971 (match_operand:VI8F_256 1 "register_operand" "x,x")
3972 (parallel [(const_int 2) (const_int 3)])))]
3974 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
3975 [(set_attr "type" "sselog")
3976 (set_attr "prefix_extra" "1")
3977 (set_attr "length_immediate" "1")
3978 (set_attr "memory" "none,store")
3979 (set_attr "prefix" "vex")
3980 (set_attr "mode" "<sseinsnmode>")])
3982 (define_insn_and_split "vec_extract_lo_<mode>"
3983 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
3984 (vec_select:<ssehalfvecmode>
3985 (match_operand:VI4F_256 1 "nonimmediate_operand" "xm,x")
3986 (parallel [(const_int 0) (const_int 1)
3987 (const_int 2) (const_int 3)])))]
3988 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3990 "&& reload_completed"
3993 rtx op1 = operands[1];
3995 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
3997 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
3998 emit_move_insn (operands[0], op1);
4002 (define_insn "vec_extract_hi_<mode>"
4003 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4004 (vec_select:<ssehalfvecmode>
4005 (match_operand:VI4F_256 1 "register_operand" "x,x")
4006 (parallel [(const_int 4) (const_int 5)
4007 (const_int 6) (const_int 7)])))]
4009 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
4010 [(set_attr "type" "sselog")
4011 (set_attr "prefix_extra" "1")
4012 (set_attr "length_immediate" "1")
4013 (set_attr "memory" "none,store")
4014 (set_attr "prefix" "vex")
4015 (set_attr "mode" "<sseinsnmode>")])
4017 (define_insn_and_split "vec_extract_lo_v16hi"
4018 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4020 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
4021 (parallel [(const_int 0) (const_int 1)
4022 (const_int 2) (const_int 3)
4023 (const_int 4) (const_int 5)
4024 (const_int 6) (const_int 7)])))]
4025 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4027 "&& reload_completed"
4030 rtx op1 = operands[1];
4032 op1 = gen_rtx_REG (V8HImode, REGNO (op1));
4034 op1 = gen_lowpart (V8HImode, op1);
4035 emit_move_insn (operands[0], op1);
4039 (define_insn "vec_extract_hi_v16hi"
4040 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4042 (match_operand:V16HI 1 "register_operand" "x,x")
4043 (parallel [(const_int 8) (const_int 9)
4044 (const_int 10) (const_int 11)
4045 (const_int 12) (const_int 13)
4046 (const_int 14) (const_int 15)])))]
4048 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
4049 [(set_attr "type" "sselog")
4050 (set_attr "prefix_extra" "1")
4051 (set_attr "length_immediate" "1")
4052 (set_attr "memory" "none,store")
4053 (set_attr "prefix" "vex")
4054 (set_attr "mode" "OI")])
4056 (define_insn_and_split "vec_extract_lo_v32qi"
4057 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4059 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
4060 (parallel [(const_int 0) (const_int 1)
4061 (const_int 2) (const_int 3)
4062 (const_int 4) (const_int 5)
4063 (const_int 6) (const_int 7)
4064 (const_int 8) (const_int 9)
4065 (const_int 10) (const_int 11)
4066 (const_int 12) (const_int 13)
4067 (const_int 14) (const_int 15)])))]
4068 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4070 "&& reload_completed"
4073 rtx op1 = operands[1];
4075 op1 = gen_rtx_REG (V16QImode, REGNO (op1));
4077 op1 = gen_lowpart (V16QImode, op1);
4078 emit_move_insn (operands[0], op1);
4082 (define_insn "vec_extract_hi_v32qi"
4083 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4085 (match_operand:V32QI 1 "register_operand" "x,x")
4086 (parallel [(const_int 16) (const_int 17)
4087 (const_int 18) (const_int 19)
4088 (const_int 20) (const_int 21)
4089 (const_int 22) (const_int 23)
4090 (const_int 24) (const_int 25)
4091 (const_int 26) (const_int 27)
4092 (const_int 28) (const_int 29)
4093 (const_int 30) (const_int 31)])))]
4095 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
4096 [(set_attr "type" "sselog")
4097 (set_attr "prefix_extra" "1")
4098 (set_attr "length_immediate" "1")
4099 (set_attr "memory" "none,store")
4100 (set_attr "prefix" "vex")
4101 (set_attr "mode" "OI")])
4103 ;; Modes handled by vec_extract patterns.
4104 (define_mode_iterator VEC_EXTRACT_MODE
4105 [(V32QI "TARGET_AVX") V16QI
4106 (V16HI "TARGET_AVX") V8HI
4107 (V8SI "TARGET_AVX") V4SI
4108 (V4DI "TARGET_AVX") V2DI
4109 (V8SF "TARGET_AVX") V4SF
4110 (V4DF "TARGET_AVX") V2DF])
4112 (define_expand "vec_extract<mode>"
4113 [(match_operand:<ssescalarmode> 0 "register_operand" "")
4114 (match_operand:VEC_EXTRACT_MODE 1 "register_operand" "")
4115 (match_operand 2 "const_int_operand" "")]
4118 ix86_expand_vector_extract (false, operands[0], operands[1],
4119 INTVAL (operands[2]));
4123 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4125 ;; Parallel double-precision floating point element swizzling
4127 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4129 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4130 (define_insn "avx_unpckhpd256"
4131 [(set (match_operand:V4DF 0 "register_operand" "=x")
4134 (match_operand:V4DF 1 "register_operand" "x")
4135 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4136 (parallel [(const_int 1) (const_int 5)
4137 (const_int 3) (const_int 7)])))]
4139 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
4140 [(set_attr "type" "sselog")
4141 (set_attr "prefix" "vex")
4142 (set_attr "mode" "V4DF")])
4144 (define_expand "vec_interleave_highv4df"
4148 (match_operand:V4DF 1 "register_operand" "x")
4149 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4150 (parallel [(const_int 0) (const_int 4)
4151 (const_int 2) (const_int 6)])))
4157 (parallel [(const_int 1) (const_int 5)
4158 (const_int 3) (const_int 7)])))
4159 (set (match_operand:V4DF 0 "register_operand" "")
4164 (parallel [(const_int 2) (const_int 3)
4165 (const_int 6) (const_int 7)])))]
4168 operands[3] = gen_reg_rtx (V4DFmode);
4169 operands[4] = gen_reg_rtx (V4DFmode);
4173 (define_expand "vec_interleave_highv2df"
4174 [(set (match_operand:V2DF 0 "register_operand" "")
4177 (match_operand:V2DF 1 "nonimmediate_operand" "")
4178 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4179 (parallel [(const_int 1)
4183 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
4184 operands[2] = force_reg (V2DFmode, operands[2]);
4187 (define_insn "*vec_interleave_highv2df"
4188 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
4191 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x")
4192 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0"))
4193 (parallel [(const_int 1)
4195 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4197 unpckhpd\t{%2, %0|%0, %2}
4198 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
4199 %vmovddup\t{%H1, %0|%0, %H1}
4200 movlpd\t{%H1, %0|%0, %H1}
4201 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
4202 %vmovhpd\t{%1, %0|%0, %1}"
4203 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
4204 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
4205 (set_attr "prefix_data16" "*,*,*,1,*,1")
4206 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
4207 (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")])
4209 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4210 (define_expand "avx_movddup256"
4211 [(set (match_operand:V4DF 0 "register_operand" "")
4214 (match_operand:V4DF 1 "nonimmediate_operand" "")
4216 (parallel [(const_int 0) (const_int 4)
4217 (const_int 2) (const_int 6)])))]
4220 (define_expand "avx_unpcklpd256"
4221 [(set (match_operand:V4DF 0 "register_operand" "")
4224 (match_operand:V4DF 1 "register_operand" "")
4225 (match_operand:V4DF 2 "nonimmediate_operand" ""))
4226 (parallel [(const_int 0) (const_int 4)
4227 (const_int 2) (const_int 6)])))]
4230 (define_insn "*avx_unpcklpd256"
4231 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
4234 (match_operand:V4DF 1 "nonimmediate_operand" " x,m")
4235 (match_operand:V4DF 2 "nonimmediate_operand" "xm,1"))
4236 (parallel [(const_int 0) (const_int 4)
4237 (const_int 2) (const_int 6)])))]
4240 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4241 vmovddup\t{%1, %0|%0, %1}"
4242 [(set_attr "type" "sselog")
4243 (set_attr "prefix" "vex")
4244 (set_attr "mode" "V4DF")])
4246 (define_expand "vec_interleave_lowv4df"
4250 (match_operand:V4DF 1 "register_operand" "x")
4251 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4252 (parallel [(const_int 0) (const_int 4)
4253 (const_int 2) (const_int 6)])))
4259 (parallel [(const_int 1) (const_int 5)
4260 (const_int 3) (const_int 7)])))
4261 (set (match_operand:V4DF 0 "register_operand" "")
4266 (parallel [(const_int 0) (const_int 1)
4267 (const_int 4) (const_int 5)])))]
4270 operands[3] = gen_reg_rtx (V4DFmode);
4271 operands[4] = gen_reg_rtx (V4DFmode);
4274 (define_expand "vec_interleave_lowv2df"
4275 [(set (match_operand:V2DF 0 "register_operand" "")
4278 (match_operand:V2DF 1 "nonimmediate_operand" "")
4279 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4280 (parallel [(const_int 0)
4284 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
4285 operands[1] = force_reg (V2DFmode, operands[1]);
4288 (define_insn "*vec_interleave_lowv2df"
4289 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,o")
4292 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0")
4293 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x"))
4294 (parallel [(const_int 0)
4296 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4298 unpcklpd\t{%2, %0|%0, %2}
4299 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4300 %vmovddup\t{%1, %0|%0, %1}
4301 movhpd\t{%2, %0|%0, %2}
4302 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4303 %vmovlpd\t{%2, %H0|%H0, %2}"
4304 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
4305 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
4306 (set_attr "prefix_data16" "*,*,*,1,*,1")
4307 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
4308 (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")])
4311 [(set (match_operand:V2DF 0 "memory_operand" "")
4314 (match_operand:V2DF 1 "register_operand" "")
4316 (parallel [(const_int 0)
4318 "TARGET_SSE3 && reload_completed"
4321 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4322 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4323 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4328 [(set (match_operand:V2DF 0 "register_operand" "")
4331 (match_operand:V2DF 1 "memory_operand" "")
4333 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "")
4334 (match_operand:SI 3 "const_int_operand" "")])))]
4335 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
4336 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
4338 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
4341 (define_expand "avx_shufpd256"
4342 [(match_operand:V4DF 0 "register_operand" "")
4343 (match_operand:V4DF 1 "register_operand" "")
4344 (match_operand:V4DF 2 "nonimmediate_operand" "")
4345 (match_operand:SI 3 "const_int_operand" "")]
4348 int mask = INTVAL (operands[3]);
4349 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4351 GEN_INT (mask & 2 ? 5 : 4),
4352 GEN_INT (mask & 4 ? 3 : 2),
4353 GEN_INT (mask & 8 ? 7 : 6)));
4357 (define_insn "avx_shufpd256_1"
4358 [(set (match_operand:V4DF 0 "register_operand" "=x")
4361 (match_operand:V4DF 1 "register_operand" "x")
4362 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4363 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4364 (match_operand 4 "const_4_to_5_operand" "")
4365 (match_operand 5 "const_2_to_3_operand" "")
4366 (match_operand 6 "const_6_to_7_operand" "")])))]
4370 mask = INTVAL (operands[3]);
4371 mask |= (INTVAL (operands[4]) - 4) << 1;
4372 mask |= (INTVAL (operands[5]) - 2) << 2;
4373 mask |= (INTVAL (operands[6]) - 6) << 3;
4374 operands[3] = GEN_INT (mask);
4376 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4378 [(set_attr "type" "sselog")
4379 (set_attr "length_immediate" "1")
4380 (set_attr "prefix" "vex")
4381 (set_attr "mode" "V4DF")])
4383 (define_expand "sse2_shufpd"
4384 [(match_operand:V2DF 0 "register_operand" "")
4385 (match_operand:V2DF 1 "register_operand" "")
4386 (match_operand:V2DF 2 "nonimmediate_operand" "")
4387 (match_operand:SI 3 "const_int_operand" "")]
4390 int mask = INTVAL (operands[3]);
4391 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4393 GEN_INT (mask & 2 ? 3 : 2)));
4397 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4398 (define_insn "avx2_interleave_highv4di"
4399 [(set (match_operand:V4DI 0 "register_operand" "=x")
4402 (match_operand:V4DI 1 "register_operand" "x")
4403 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
4404 (parallel [(const_int 1)
4409 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4410 [(set_attr "type" "sselog")
4411 (set_attr "prefix" "vex")
4412 (set_attr "mode" "OI")])
4414 (define_insn "vec_interleave_highv2di"
4415 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4418 (match_operand:V2DI 1 "register_operand" "0,x")
4419 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4420 (parallel [(const_int 1)
4424 punpckhqdq\t{%2, %0|%0, %2}
4425 vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4426 [(set_attr "isa" "noavx,avx")
4427 (set_attr "type" "sselog")
4428 (set_attr "prefix_data16" "1,*")
4429 (set_attr "prefix" "orig,vex")
4430 (set_attr "mode" "TI")])
4432 (define_insn "avx2_interleave_lowv4di"
4433 [(set (match_operand:V4DI 0 "register_operand" "=x")
4436 (match_operand:V4DI 1 "register_operand" "x")
4437 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
4438 (parallel [(const_int 0)
4443 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4444 [(set_attr "type" "sselog")
4445 (set_attr "prefix" "vex")
4446 (set_attr "mode" "OI")])
4448 (define_insn "vec_interleave_lowv2di"
4449 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4452 (match_operand:V2DI 1 "register_operand" "0,x")
4453 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4454 (parallel [(const_int 0)
4458 punpcklqdq\t{%2, %0|%0, %2}
4459 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4460 [(set_attr "isa" "noavx,avx")
4461 (set_attr "type" "sselog")
4462 (set_attr "prefix_data16" "1,*")
4463 (set_attr "prefix" "orig,vex")
4464 (set_attr "mode" "TI")])
4466 (define_insn "sse2_shufpd_<mode>"
4467 [(set (match_operand:VI8F_128 0 "register_operand" "=x,x")
4468 (vec_select:VI8F_128
4469 (vec_concat:<ssedoublevecmode>
4470 (match_operand:VI8F_128 1 "register_operand" "0,x")
4471 (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm"))
4472 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4473 (match_operand 4 "const_2_to_3_operand" "")])))]
4477 mask = INTVAL (operands[3]);
4478 mask |= (INTVAL (operands[4]) - 2) << 1;
4479 operands[3] = GEN_INT (mask);
4481 switch (which_alternative)
4484 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4486 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4491 [(set_attr "isa" "noavx,avx")
4492 (set_attr "type" "sselog")
4493 (set_attr "length_immediate" "1")
4494 (set_attr "prefix" "orig,vex")
4495 (set_attr "mode" "V2DF")])
4497 ;; Avoid combining registers from different units in a single alternative,
4498 ;; see comment above inline_secondary_memory_needed function in i386.c
4499 (define_insn "sse2_storehpd"
4500 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r")
4502 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
4503 (parallel [(const_int 1)])))]
4504 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4506 %vmovhpd\t{%1, %0|%0, %1}
4508 vunpckhpd\t{%d1, %0|%0, %d1}
4512 [(set_attr "isa" "*,noavx,avx,*,*,*")
4513 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
4514 (set (attr "prefix_data16")
4516 (and (eq_attr "alternative" "0")
4517 (not (match_test "TARGET_AVX")))
4519 (const_string "*")))
4520 (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
4521 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
4524 [(set (match_operand:DF 0 "register_operand" "")
4526 (match_operand:V2DF 1 "memory_operand" "")
4527 (parallel [(const_int 1)])))]
4528 "TARGET_SSE2 && reload_completed"
4529 [(set (match_dup 0) (match_dup 1))]
4530 "operands[1] = adjust_address (operands[1], DFmode, 8);")
4532 (define_insn "*vec_extractv2df_1_sse"
4533 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4535 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
4536 (parallel [(const_int 1)])))]
4537 "!TARGET_SSE2 && TARGET_SSE
4538 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4540 movhps\t{%1, %0|%0, %1}
4541 movhlps\t{%1, %0|%0, %1}
4542 movlps\t{%H1, %0|%0, %H1}"
4543 [(set_attr "type" "ssemov")
4544 (set_attr "mode" "V2SF,V4SF,V2SF")])
4546 ;; Avoid combining registers from different units in a single alternative,
4547 ;; see comment above inline_secondary_memory_needed function in i386.c
4548 (define_insn "sse2_storelpd"
4549 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4551 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4552 (parallel [(const_int 0)])))]
4553 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4555 %vmovlpd\t{%1, %0|%0, %1}
4560 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4561 (set_attr "prefix_data16" "1,*,*,*,*")
4562 (set_attr "prefix" "maybe_vex")
4563 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4566 [(set (match_operand:DF 0 "register_operand" "")
4568 (match_operand:V2DF 1 "nonimmediate_operand" "")
4569 (parallel [(const_int 0)])))]
4570 "TARGET_SSE2 && reload_completed"
4573 rtx op1 = operands[1];
4575 op1 = gen_rtx_REG (DFmode, REGNO (op1));
4577 op1 = gen_lowpart (DFmode, op1);
4578 emit_move_insn (operands[0], op1);
4582 (define_insn "*vec_extractv2df_0_sse"
4583 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4585 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
4586 (parallel [(const_int 0)])))]
4587 "!TARGET_SSE2 && TARGET_SSE
4588 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4590 movlps\t{%1, %0|%0, %1}
4591 movaps\t{%1, %0|%0, %1}
4592 movlps\t{%1, %0|%0, %1}"
4593 [(set_attr "type" "ssemov")
4594 (set_attr "mode" "V2SF,V4SF,V2SF")])
4596 (define_expand "sse2_loadhpd_exp"
4597 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4600 (match_operand:V2DF 1 "nonimmediate_operand" "")
4601 (parallel [(const_int 0)]))
4602 (match_operand:DF 2 "nonimmediate_operand" "")))]
4605 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4607 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
4609 /* Fix up the destination if needed. */
4610 if (dst != operands[0])
4611 emit_move_insn (operands[0], dst);
4616 ;; Avoid combining registers from different units in a single alternative,
4617 ;; see comment above inline_secondary_memory_needed function in i386.c
4618 (define_insn "sse2_loadhpd"
4619 [(set (match_operand:V2DF 0 "nonimmediate_operand"
4623 (match_operand:V2DF 1 "nonimmediate_operand"
4625 (parallel [(const_int 0)]))
4626 (match_operand:DF 2 "nonimmediate_operand"
4627 " m,m,x,x,x,*f,r")))]
4628 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4630 movhpd\t{%2, %0|%0, %2}
4631 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4632 unpcklpd\t{%2, %0|%0, %2}
4633 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4637 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
4638 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
4639 (set_attr "prefix_data16" "1,*,*,*,*,*,*")
4640 (set_attr "prefix" "orig,vex,orig,vex,*,*,*")
4641 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
4644 [(set (match_operand:V2DF 0 "memory_operand" "")
4646 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
4647 (match_operand:DF 1 "register_operand" "")))]
4648 "TARGET_SSE2 && reload_completed"
4649 [(set (match_dup 0) (match_dup 1))]
4650 "operands[0] = adjust_address (operands[0], DFmode, 8);")
4652 (define_expand "sse2_loadlpd_exp"
4653 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4655 (match_operand:DF 2 "nonimmediate_operand" "")
4657 (match_operand:V2DF 1 "nonimmediate_operand" "")
4658 (parallel [(const_int 1)]))))]
4661 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4663 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
4665 /* Fix up the destination if needed. */
4666 if (dst != operands[0])
4667 emit_move_insn (operands[0], dst);
4672 ;; Avoid combining registers from different units in a single alternative,
4673 ;; see comment above inline_secondary_memory_needed function in i386.c
4674 (define_insn "sse2_loadlpd"
4675 [(set (match_operand:V2DF 0 "nonimmediate_operand"
4676 "=x,x,x,x,x,x,x,x,m,m ,m")
4678 (match_operand:DF 2 "nonimmediate_operand"
4679 " m,m,m,x,x,0,0,x,x,*f,r")
4681 (match_operand:V2DF 1 "vector_move_operand"
4682 " C,0,x,0,x,x,o,o,0,0 ,0")
4683 (parallel [(const_int 1)]))))]
4684 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4686 %vmovsd\t{%2, %0|%0, %2}
4687 movlpd\t{%2, %0|%0, %2}
4688 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4689 movsd\t{%2, %0|%0, %2}
4690 vmovsd\t{%2, %1, %0|%0, %1, %2}
4691 shufpd\t{$2, %1, %0|%0, %1, 2}
4692 movhpd\t{%H1, %0|%0, %H1}
4693 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
4697 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
4699 (cond [(eq_attr "alternative" "5")
4700 (const_string "sselog")
4701 (eq_attr "alternative" "9")
4702 (const_string "fmov")
4703 (eq_attr "alternative" "10")
4704 (const_string "imov")
4706 (const_string "ssemov")))
4707 (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
4708 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
4709 (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
4710 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
4713 [(set (match_operand:V2DF 0 "memory_operand" "")
4715 (match_operand:DF 1 "register_operand" "")
4716 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
4717 "TARGET_SSE2 && reload_completed"
4718 [(set (match_dup 0) (match_dup 1))]
4719 "operands[0] = adjust_address (operands[0], DFmode, 8);")
4721 (define_insn "sse2_movsd"
4722 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o")
4724 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
4725 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
4729 movsd\t{%2, %0|%0, %2}
4730 vmovsd\t{%2, %1, %0|%0, %1, %2}
4731 movlpd\t{%2, %0|%0, %2}
4732 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4733 %vmovlpd\t{%2, %0|%0, %2}
4734 shufpd\t{$2, %1, %0|%0, %1, 2}
4735 movhps\t{%H1, %0|%0, %H1}
4736 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
4737 %vmovhps\t{%1, %H0|%H0, %1}"
4738 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
4741 (eq_attr "alternative" "5")
4742 (const_string "sselog")
4743 (const_string "ssemov")))
4744 (set (attr "prefix_data16")
4746 (and (eq_attr "alternative" "2,4")
4747 (not (match_test "TARGET_AVX")))
4749 (const_string "*")))
4750 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
4751 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
4752 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
4754 (define_expand "vec_dupv2df"
4755 [(set (match_operand:V2DF 0 "register_operand" "")
4757 (match_operand:DF 1 "nonimmediate_operand" "")))]
4761 operands[1] = force_reg (DFmode, operands[1]);
4764 (define_insn "*vec_dupv2df_sse3"
4765 [(set (match_operand:V2DF 0 "register_operand" "=x")
4767 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
4769 "%vmovddup\t{%1, %0|%0, %1}"
4770 [(set_attr "type" "sselog1")
4771 (set_attr "prefix" "maybe_vex")
4772 (set_attr "mode" "DF")])
4774 (define_insn "*vec_dupv2df"
4775 [(set (match_operand:V2DF 0 "register_operand" "=x")
4777 (match_operand:DF 1 "register_operand" "0")))]
4780 [(set_attr "type" "sselog1")
4781 (set_attr "mode" "V2DF")])
4783 (define_insn "*vec_concatv2df_sse3"
4784 [(set (match_operand:V2DF 0 "register_operand" "=x")
4786 (match_operand:DF 1 "nonimmediate_operand" "xm")
4789 "%vmovddup\t{%1, %0|%0, %1}"
4790 [(set_attr "type" "sselog1")
4791 (set_attr "prefix" "maybe_vex")
4792 (set_attr "mode" "DF")])
4794 (define_insn "*vec_concatv2df"
4795 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x,x,x,x,x")
4797 (match_operand:DF 1 "nonimmediate_operand" " 0,x,0,x,m,0,0")
4798 (match_operand:DF 2 "vector_move_operand" " x,x,m,m,C,x,m")))]
4801 unpcklpd\t{%2, %0|%0, %2}
4802 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4803 movhpd\t{%2, %0|%0, %2}
4804 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4805 %vmovsd\t{%1, %0|%0, %1}
4806 movlhps\t{%2, %0|%0, %2}
4807 movhps\t{%2, %0|%0, %2}"
4808 [(set_attr "isa" "sse2_noavx,avx,sse2_noavx,avx,sse2,noavx,noavx")
4811 (eq_attr "alternative" "0,1")
4812 (const_string "sselog")
4813 (const_string "ssemov")))
4814 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
4815 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
4816 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF,DF,V4SF,V2SF")])
4818 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4820 ;; Parallel integral arithmetic
4822 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4824 (define_expand "neg<mode>2"
4825 [(set (match_operand:VI_AVX2 0 "register_operand" "")
4828 (match_operand:VI_AVX2 1 "nonimmediate_operand" "")))]
4830 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
4832 (define_expand "<plusminus_insn><mode>3"
4833 [(set (match_operand:VI_AVX2 0 "register_operand" "")
4835 (match_operand:VI_AVX2 1 "nonimmediate_operand" "")
4836 (match_operand:VI_AVX2 2 "nonimmediate_operand" "")))]
4838 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4840 (define_insn "*<plusminus_insn><mode>3"
4841 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,x")
4843 (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,x")
4844 (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,xm")))]
4845 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4847 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
4848 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
4849 [(set_attr "isa" "noavx,avx")
4850 (set_attr "type" "sseiadd")
4851 (set_attr "prefix_data16" "1,*")
4852 (set_attr "prefix" "orig,vex")
4853 (set_attr "mode" "<sseinsnmode>")])
4855 (define_expand "<sse2_avx2>_<plusminus_insn><mode>3"
4856 [(set (match_operand:VI12_AVX2 0 "register_operand" "")
4857 (sat_plusminus:VI12_AVX2
4858 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "")
4859 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "")))]
4861 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4863 (define_insn "*<sse2_avx2>_<plusminus_insn><mode>3"
4864 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,x")
4865 (sat_plusminus:VI12_AVX2
4866 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,x")
4867 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,xm")))]
4868 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4870 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
4871 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
4872 [(set_attr "isa" "noavx,avx")
4873 (set_attr "type" "sseiadd")
4874 (set_attr "prefix_data16" "1,*")
4875 (set_attr "prefix" "orig,vex")
4876 (set_attr "mode" "TI")])
4878 (define_insn_and_split "mul<mode>3"
4879 [(set (match_operand:VI1_AVX2 0 "register_operand" "")
4880 (mult:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand" "")
4881 (match_operand:VI1_AVX2 2 "register_operand" "")))]
4883 && can_create_pseudo_p ()"
4890 enum machine_mode mulmode = <sseunpackmode>mode;
4892 for (i = 0; i < 6; ++i)
4893 t[i] = gen_reg_rtx (<MODE>mode);
4895 /* Unpack data such that we've got a source byte in each low byte of
4896 each word. We don't care what goes into the high byte of each word.
4897 Rather than trying to get zero in there, most convenient is to let
4898 it be a copy of the low byte. */
4899 emit_insn (gen_<vec_avx2>_interleave_high<mode> (t[0], operands[1],
4901 emit_insn (gen_<vec_avx2>_interleave_high<mode> (t[1], operands[2],
4903 emit_insn (gen_<vec_avx2>_interleave_low<mode> (t[2], operands[1],
4905 emit_insn (gen_<vec_avx2>_interleave_low<mode> (t[3], operands[2],
4908 /* Multiply words. The end-of-line annotations here give a picture of what
4909 the output of that instruction looks like. Dot means don't care; the
4910 letters are the bytes of the result with A being the most significant. */
4911 emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (mulmode, t[4]),
4912 gen_rtx_MULT (mulmode, /* .A.B.C.D.E.F.G.H */
4913 gen_lowpart (mulmode, t[0]),
4914 gen_lowpart (mulmode, t[1]))));
4915 emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (mulmode, t[5]),
4916 gen_rtx_MULT (mulmode, /* .I.J.K.L.M.N.O.P */
4917 gen_lowpart (mulmode, t[2]),
4918 gen_lowpart (mulmode, t[3]))));
4920 /* Extract the even bytes and merge them back together. */
4921 ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0);
4923 set_unique_reg_note (get_last_insn (), REG_EQUAL,
4924 gen_rtx_MULT (<MODE>mode, operands[1], operands[2]));
4928 (define_expand "mul<mode>3"
4929 [(set (match_operand:VI2_AVX2 0 "register_operand" "")
4930 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "")
4931 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "")))]
4933 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
4935 (define_insn "*mul<mode>3"
4936 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
4937 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x")
4938 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))]
4939 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
4941 pmullw\t{%2, %0|%0, %2}
4942 vpmullw\t{%2, %1, %0|%0, %1, %2}"
4943 [(set_attr "isa" "noavx,avx")
4944 (set_attr "type" "sseimul")
4945 (set_attr "prefix_data16" "1,*")
4946 (set_attr "prefix" "orig,vex")
4947 (set_attr "mode" "<sseinsnmode>")])
4949 (define_expand "<s>mul<mode>3_highpart"
4950 [(set (match_operand:VI2_AVX2 0 "register_operand" "")
4952 (lshiftrt:<ssedoublemode>
4953 (mult:<ssedoublemode>
4954 (any_extend:<ssedoublemode>
4955 (match_operand:VI2_AVX2 1 "nonimmediate_operand" ""))
4956 (any_extend:<ssedoublemode>
4957 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "")))
4960 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
4962 (define_insn "*<s>mul<mode>3_highpart"
4963 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
4965 (lshiftrt:<ssedoublemode>
4966 (mult:<ssedoublemode>
4967 (any_extend:<ssedoublemode>
4968 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
4969 (any_extend:<ssedoublemode>
4970 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
4972 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
4974 pmulh<u>w\t{%2, %0|%0, %2}
4975 vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
4976 [(set_attr "isa" "noavx,avx")
4977 (set_attr "type" "sseimul")
4978 (set_attr "prefix_data16" "1,*")
4979 (set_attr "prefix" "orig,vex")
4980 (set_attr "mode" "<sseinsnmode>")])
4982 (define_expand "avx2_umulv4siv4di3"
4983 [(set (match_operand:V4DI 0 "register_operand" "")
4987 (match_operand:V8SI 1 "nonimmediate_operand" "")
4988 (parallel [(const_int 0) (const_int 2)
4989 (const_int 4) (const_int 6)])))
4992 (match_operand:V8SI 2 "nonimmediate_operand" "")
4993 (parallel [(const_int 0) (const_int 2)
4994 (const_int 4) (const_int 6)])))))]
4996 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
4998 (define_insn "*avx_umulv4siv4di3"
4999 [(set (match_operand:V4DI 0 "register_operand" "=x")
5003 (match_operand:V8SI 1 "nonimmediate_operand" "%x")
5004 (parallel [(const_int 0) (const_int 2)
5005 (const_int 4) (const_int 6)])))
5008 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
5009 (parallel [(const_int 0) (const_int 2)
5010 (const_int 4) (const_int 6)])))))]
5011 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
5012 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5013 [(set_attr "type" "sseimul")
5014 (set_attr "prefix" "vex")
5015 (set_attr "mode" "OI")])
5017 (define_expand "sse2_umulv2siv2di3"
5018 [(set (match_operand:V2DI 0 "register_operand" "")
5022 (match_operand:V4SI 1 "nonimmediate_operand" "")
5023 (parallel [(const_int 0) (const_int 2)])))
5026 (match_operand:V4SI 2 "nonimmediate_operand" "")
5027 (parallel [(const_int 0) (const_int 2)])))))]
5029 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5031 (define_insn "*sse2_umulv2siv2di3"
5032 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5036 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
5037 (parallel [(const_int 0) (const_int 2)])))
5040 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
5041 (parallel [(const_int 0) (const_int 2)])))))]
5042 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5044 pmuludq\t{%2, %0|%0, %2}
5045 vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5046 [(set_attr "isa" "noavx,avx")
5047 (set_attr "type" "sseimul")
5048 (set_attr "prefix_data16" "1,*")
5049 (set_attr "prefix" "orig,vex")
5050 (set_attr "mode" "TI")])
5052 (define_expand "avx2_mulv4siv4di3"
5053 [(set (match_operand:V4DI 0 "register_operand" "")
5057 (match_operand:V8SI 1 "nonimmediate_operand" "")
5058 (parallel [(const_int 0) (const_int 2)
5059 (const_int 4) (const_int 6)])))
5062 (match_operand:V8SI 2 "nonimmediate_operand" "")
5063 (parallel [(const_int 0) (const_int 2)
5064 (const_int 4) (const_int 6)])))))]
5066 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
5068 (define_insn "*avx2_mulv4siv4di3"
5069 [(set (match_operand:V4DI 0 "register_operand" "=x")
5073 (match_operand:V8SI 1 "nonimmediate_operand" "x")
5074 (parallel [(const_int 0) (const_int 2)
5075 (const_int 4) (const_int 6)])))
5078 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
5079 (parallel [(const_int 0) (const_int 2)
5080 (const_int 4) (const_int 6)])))))]
5081 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
5082 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5083 [(set_attr "isa" "avx")
5084 (set_attr "type" "sseimul")
5085 (set_attr "prefix_extra" "1")
5086 (set_attr "prefix" "vex")
5087 (set_attr "mode" "OI")])
5089 (define_expand "sse4_1_mulv2siv2di3"
5090 [(set (match_operand:V2DI 0 "register_operand" "")
5094 (match_operand:V4SI 1 "nonimmediate_operand" "")
5095 (parallel [(const_int 0) (const_int 2)])))
5098 (match_operand:V4SI 2 "nonimmediate_operand" "")
5099 (parallel [(const_int 0) (const_int 2)])))))]
5101 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5103 (define_insn "*sse4_1_mulv2siv2di3"
5104 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5108 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
5109 (parallel [(const_int 0) (const_int 2)])))
5112 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
5113 (parallel [(const_int 0) (const_int 2)])))))]
5114 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5116 pmuldq\t{%2, %0|%0, %2}
5117 vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5118 [(set_attr "isa" "noavx,avx")
5119 (set_attr "type" "sseimul")
5120 (set_attr "prefix_data16" "1,*")
5121 (set_attr "prefix_extra" "1")
5122 (set_attr "prefix" "orig,vex")
5123 (set_attr "mode" "TI")])
5125 (define_expand "avx2_pmaddwd"
5126 [(set (match_operand:V8SI 0 "register_operand" "")
5131 (match_operand:V16HI 1 "nonimmediate_operand" "")
5132 (parallel [(const_int 0)
5142 (match_operand:V16HI 2 "nonimmediate_operand" "")
5143 (parallel [(const_int 0)
5153 (vec_select:V8HI (match_dup 1)
5154 (parallel [(const_int 1)
5163 (vec_select:V8HI (match_dup 2)
5164 (parallel [(const_int 1)
5171 (const_int 15)]))))))]
5173 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
5175 (define_expand "sse2_pmaddwd"
5176 [(set (match_operand:V4SI 0 "register_operand" "")
5181 (match_operand:V8HI 1 "nonimmediate_operand" "")
5182 (parallel [(const_int 0)
5188 (match_operand:V8HI 2 "nonimmediate_operand" "")
5189 (parallel [(const_int 0)
5195 (vec_select:V4HI (match_dup 1)
5196 (parallel [(const_int 1)
5201 (vec_select:V4HI (match_dup 2)
5202 (parallel [(const_int 1)
5205 (const_int 7)]))))))]
5207 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5209 (define_insn "*avx2_pmaddwd"
5210 [(set (match_operand:V8SI 0 "register_operand" "=x")
5215 (match_operand:V16HI 1 "nonimmediate_operand" "%x")
5216 (parallel [(const_int 0)
5226 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
5227 (parallel [(const_int 0)
5237 (vec_select:V8HI (match_dup 1)
5238 (parallel [(const_int 1)
5247 (vec_select:V8HI (match_dup 2)
5248 (parallel [(const_int 1)
5255 (const_int 15)]))))))]
5256 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
5257 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5258 [(set_attr "type" "sseiadd")
5259 (set_attr "prefix" "vex")
5260 (set_attr "mode" "OI")])
5262 (define_insn "*sse2_pmaddwd"
5263 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
5268 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
5269 (parallel [(const_int 0)
5275 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
5276 (parallel [(const_int 0)
5282 (vec_select:V4HI (match_dup 1)
5283 (parallel [(const_int 1)
5288 (vec_select:V4HI (match_dup 2)
5289 (parallel [(const_int 1)
5292 (const_int 7)]))))))]
5293 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5295 pmaddwd\t{%2, %0|%0, %2}
5296 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5297 [(set_attr "isa" "noavx,avx")
5298 (set_attr "type" "sseiadd")
5299 (set_attr "atom_unit" "simul")
5300 (set_attr "prefix_data16" "1,*")
5301 (set_attr "prefix" "orig,vex")
5302 (set_attr "mode" "TI")])
5304 (define_expand "mul<mode>3"
5305 [(set (match_operand:VI4_AVX2 0 "register_operand" "")
5306 (mult:VI4_AVX2 (match_operand:VI4_AVX2 1 "register_operand" "")
5307 (match_operand:VI4_AVX2 2 "register_operand" "")))]
5310 if (TARGET_SSE4_1 || TARGET_AVX)
5311 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
5314 (define_insn "*<sse4_1_avx2>_mul<mode>3"
5315 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,x")
5316 (mult:VI4_AVX2 (match_operand:VI4_AVX2 1 "nonimmediate_operand" "%0,x")
5317 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm,xm")))]
5318 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
5320 pmulld\t{%2, %0|%0, %2}
5321 vpmulld\t{%2, %1, %0|%0, %1, %2}"
5322 [(set_attr "isa" "noavx,avx")
5323 (set_attr "type" "sseimul")
5324 (set_attr "prefix_extra" "1")
5325 (set_attr "prefix" "orig,vex")
5326 (set_attr "mode" "<sseinsnmode>")])
5328 (define_insn_and_split "*sse2_mulv4si3"
5329 [(set (match_operand:V4SI 0 "register_operand" "")
5330 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5331 (match_operand:V4SI 2 "register_operand" "")))]
5332 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_AVX
5333 && can_create_pseudo_p ()"
5338 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5344 t1 = gen_reg_rtx (V4SImode);
5345 t2 = gen_reg_rtx (V4SImode);
5346 t3 = gen_reg_rtx (V4SImode);
5347 t4 = gen_reg_rtx (V4SImode);
5348 t5 = gen_reg_rtx (V4SImode);
5349 t6 = gen_reg_rtx (V4SImode);
5350 thirtytwo = GEN_INT (32);
5352 /* Multiply elements 2 and 0. */
5353 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
5356 /* Shift both input vectors down one element, so that elements 3
5357 and 1 are now in the slots for elements 2 and 0. For K8, at
5358 least, this is faster than using a shuffle. */
5359 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5360 gen_lowpart (V1TImode, op1),
5362 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5363 gen_lowpart (V1TImode, op2),
5365 /* Multiply elements 3 and 1. */
5366 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
5369 /* Move the results in element 2 down to element 1; we don't care
5370 what goes in elements 2 and 3. */
5371 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
5372 const0_rtx, const0_rtx));
5373 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
5374 const0_rtx, const0_rtx));
5376 /* Merge the parts back together. */
5377 emit_insn (gen_vec_interleave_lowv4si (op0, t5, t6));
5379 set_unique_reg_note (get_last_insn (), REG_EQUAL,
5380 gen_rtx_MULT (V4SImode, operands[1], operands[2]));
5384 (define_insn_and_split "mul<mode>3"
5385 [(set (match_operand:VI8_AVX2 0 "register_operand" "")
5386 (mult:VI8_AVX2 (match_operand:VI8_AVX2 1 "register_operand" "")
5387 (match_operand:VI8_AVX2 2 "register_operand" "")))]
5389 && can_create_pseudo_p ()"
5394 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5401 if (TARGET_XOP && <MODE>mode == V2DImode)
5403 /* op1: A,B,C,D, op2: E,F,G,H */
5404 op1 = gen_lowpart (V4SImode, op1);
5405 op2 = gen_lowpart (V4SImode, op2);
5407 t1 = gen_reg_rtx (V4SImode);
5408 t2 = gen_reg_rtx (V4SImode);
5409 t3 = gen_reg_rtx (V2DImode);
5410 t4 = gen_reg_rtx (V2DImode);
5413 emit_insn (gen_sse2_pshufd_1 (t1, op1,
5419 /* t2: (B*E),(A*F),(D*G),(C*H) */
5420 emit_insn (gen_mulv4si3 (t2, t1, op2));
5422 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
5423 emit_insn (gen_xop_phadddq (t3, t2));
5425 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
5426 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
5428 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
5429 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
5433 t1 = gen_reg_rtx (<MODE>mode);
5434 t2 = gen_reg_rtx (<MODE>mode);
5435 t3 = gen_reg_rtx (<MODE>mode);
5436 t4 = gen_reg_rtx (<MODE>mode);
5437 t5 = gen_reg_rtx (<MODE>mode);
5438 t6 = gen_reg_rtx (<MODE>mode);
5439 thirtytwo = GEN_INT (32);
5441 /* Multiply low parts. */
5442 emit_insn (gen_<sse2_avx2>_umulv<ssescalarnum>si<mode>3
5443 (t1, gen_lowpart (<ssepackmode>mode, op1),
5444 gen_lowpart (<ssepackmode>mode, op2)));
5446 /* Shift input vectors right 32 bits so we can multiply high parts. */
5447 emit_insn (gen_lshr<mode>3 (t2, op1, thirtytwo));
5448 emit_insn (gen_lshr<mode>3 (t3, op2, thirtytwo));
5450 /* Multiply high parts by low parts. */
5451 emit_insn (gen_<sse2_avx2>_umulv<ssescalarnum>si<mode>3
5452 (t4, gen_lowpart (<ssepackmode>mode, op1),
5453 gen_lowpart (<ssepackmode>mode, t3)));
5454 emit_insn (gen_<sse2_avx2>_umulv<ssescalarnum>si<mode>3
5455 (t5, gen_lowpart (<ssepackmode>mode, op2),
5456 gen_lowpart (<ssepackmode>mode, t2)));
5458 /* Shift them back. */
5459 emit_insn (gen_ashl<mode>3 (t4, t4, thirtytwo));
5460 emit_insn (gen_ashl<mode>3 (t5, t5, thirtytwo));
5462 /* Add the three parts together. */
5463 emit_insn (gen_add<mode>3 (t6, t1, t4));
5464 emit_insn (gen_add<mode>3 (op0, t6, t5));
5467 set_unique_reg_note (get_last_insn (), REG_EQUAL,
5468 gen_rtx_MULT (<MODE>mode, operands[1], operands[2]));
5472 (define_expand "vec_widen_<s>mult_hi_<mode>"
5473 [(match_operand:<sseunpackmode> 0 "register_operand" "")
5474 (any_extend:<sseunpackmode>
5475 (match_operand:VI2_AVX2 1 "register_operand" ""))
5476 (match_operand:VI2_AVX2 2 "register_operand" "")]
5479 rtx op1, op2, t1, t2, dest;
5483 t1 = gen_reg_rtx (<MODE>mode);
5484 t2 = gen_reg_rtx (<MODE>mode);
5485 dest = gen_lowpart (<MODE>mode, operands[0]);
5487 emit_insn (gen_mul<mode>3 (t1, op1, op2));
5488 emit_insn (gen_<s>mul<mode>3_highpart (t2, op1, op2));
5489 emit_insn (gen_vec_interleave_high<mode> (dest, t1, t2));
5493 (define_expand "vec_widen_<s>mult_lo_<mode>"
5494 [(match_operand:<sseunpackmode> 0 "register_operand" "")
5495 (any_extend:<sseunpackmode>
5496 (match_operand:VI2_AVX2 1 "register_operand" ""))
5497 (match_operand:VI2_AVX2 2 "register_operand" "")]
5500 rtx op1, op2, t1, t2, dest;
5504 t1 = gen_reg_rtx (<MODE>mode);
5505 t2 = gen_reg_rtx (<MODE>mode);
5506 dest = gen_lowpart (<MODE>mode, operands[0]);
5508 emit_insn (gen_mul<mode>3 (t1, op1, op2));
5509 emit_insn (gen_<s>mul<mode>3_highpart (t2, op1, op2));
5510 emit_insn (gen_vec_interleave_low<mode> (dest, t1, t2));
5514 (define_expand "vec_widen_<s>mult_hi_v8si"
5515 [(match_operand:V4DI 0 "register_operand" "")
5516 (any_extend:V4DI (match_operand:V8SI 1 "nonimmediate_operand" ""))
5517 (match_operand:V8SI 2 "nonimmediate_operand" "")]
5522 t1 = gen_reg_rtx (V4DImode);
5523 t2 = gen_reg_rtx (V4DImode);
5524 t3 = gen_reg_rtx (V8SImode);
5525 t4 = gen_reg_rtx (V8SImode);
5526 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, operands[1]),
5527 const0_rtx, const2_rtx,
5528 const1_rtx, GEN_INT (3)));
5529 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, operands[2]),
5530 const0_rtx, const2_rtx,
5531 const1_rtx, GEN_INT (3)));
5532 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1),
5533 GEN_INT (2 + (2 << 2) + (3 << 4) + (3 << 6))));
5534 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2),
5535 GEN_INT (2 + (2 << 2) + (3 << 4) + (3 << 6))));
5536 emit_insn (gen_avx2_<u>mulv4siv4di3 (operands[0], t3, t4));
5540 (define_expand "vec_widen_<s>mult_lo_v8si"
5541 [(match_operand:V4DI 0 "register_operand" "")
5542 (any_extend:V4DI (match_operand:V8SI 1 "nonimmediate_operand" ""))
5543 (match_operand:V8SI 2 "nonimmediate_operand" "")]
5548 t1 = gen_reg_rtx (V4DImode);
5549 t2 = gen_reg_rtx (V4DImode);
5550 t3 = gen_reg_rtx (V8SImode);
5551 t4 = gen_reg_rtx (V8SImode);
5552 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, operands[1]),
5553 const0_rtx, const2_rtx,
5554 const1_rtx, GEN_INT (3)));
5555 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, operands[2]),
5556 const0_rtx, const2_rtx,
5557 const1_rtx, GEN_INT (3)));
5558 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1),
5559 GEN_INT (0 + (0 << 2) + (1 << 4) + (1 << 6))));
5560 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2),
5561 GEN_INT (0 + (0 << 2) + (1 << 4) + (1 << 6))));
5562 emit_insn (gen_avx2_<u>mulv4siv4di3 (operands[0], t3, t4));
5566 (define_expand "vec_widen_smult_hi_v4si"
5567 [(match_operand:V2DI 0 "register_operand" "")
5568 (match_operand:V4SI 1 "register_operand" "")
5569 (match_operand:V4SI 2 "register_operand" "")]
5572 rtx op1, op2, t1, t2;
5576 t1 = gen_reg_rtx (V4SImode);
5577 t2 = gen_reg_rtx (V4SImode);
5581 emit_insn (gen_sse2_pshufd_1 (t1, op1, GEN_INT (0), GEN_INT (2),
5582 GEN_INT (1), GEN_INT (3)));
5583 emit_insn (gen_sse2_pshufd_1 (t2, op2, GEN_INT (0), GEN_INT (2),
5584 GEN_INT (1), GEN_INT (3)));
5585 emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
5589 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5590 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5591 emit_insn (gen_sse4_1_mulv2siv2di3 (operands[0], t1, t2));
5595 (define_expand "vec_widen_smult_lo_v4si"
5596 [(match_operand:V2DI 0 "register_operand" "")
5597 (match_operand:V4SI 1 "register_operand" "")
5598 (match_operand:V4SI 2 "register_operand" "")]
5601 rtx op1, op2, t1, t2;
5605 t1 = gen_reg_rtx (V4SImode);
5606 t2 = gen_reg_rtx (V4SImode);
5610 emit_insn (gen_sse2_pshufd_1 (t1, op1, GEN_INT (0), GEN_INT (2),
5611 GEN_INT (1), GEN_INT (3)));
5612 emit_insn (gen_sse2_pshufd_1 (t2, op2, GEN_INT (0), GEN_INT (2),
5613 GEN_INT (1), GEN_INT (3)));
5614 emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
5618 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5619 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5620 emit_insn (gen_sse4_1_mulv2siv2di3 (operands[0], t1, t2));
5624 (define_expand "vec_widen_umult_hi_v4si"
5625 [(match_operand:V2DI 0 "register_operand" "")
5626 (match_operand:V4SI 1 "register_operand" "")
5627 (match_operand:V4SI 2 "register_operand" "")]
5630 rtx op1, op2, t1, t2;
5634 t1 = gen_reg_rtx (V4SImode);
5635 t2 = gen_reg_rtx (V4SImode);
5637 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5638 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5639 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5643 (define_expand "vec_widen_umult_lo_v4si"
5644 [(match_operand:V2DI 0 "register_operand" "")
5645 (match_operand:V4SI 1 "register_operand" "")
5646 (match_operand:V4SI 2 "register_operand" "")]
5649 rtx op1, op2, t1, t2;
5653 t1 = gen_reg_rtx (V4SImode);
5654 t2 = gen_reg_rtx (V4SImode);
5656 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5657 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5658 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5662 (define_expand "sdot_prod<mode>"
5663 [(match_operand:<sseunpackmode> 0 "register_operand" "")
5664 (match_operand:VI2_AVX2 1 "register_operand" "")
5665 (match_operand:VI2_AVX2 2 "register_operand" "")
5666 (match_operand:<sseunpackmode> 3 "register_operand" "")]
5669 rtx t = gen_reg_rtx (<sseunpackmode>mode);
5670 emit_insn (gen_<sse2_avx2>_pmaddwd (t, operands[1], operands[2]));
5671 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5672 gen_rtx_PLUS (<sseunpackmode>mode,
5677 (define_code_attr sse2_sse4_1
5678 [(zero_extend "sse2") (sign_extend "sse4_1")])
5680 (define_expand "<s>dot_prodv4si"
5681 [(match_operand:V2DI 0 "register_operand" "")
5682 (any_extend:V2DI (match_operand:V4SI 1 "register_operand" ""))
5683 (match_operand:V4SI 2 "register_operand" "")
5684 (match_operand:V2DI 3 "register_operand" "")]
5685 "<CODE> == ZERO_EXTEND ? TARGET_SSE2 : TARGET_SSE4_1"
5689 t1 = gen_reg_rtx (V2DImode);
5690 emit_insn (gen_<sse2_sse4_1>_<u>mulv2siv2di3 (t1, operands[1], operands[2]));
5691 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5693 t2 = gen_reg_rtx (V4SImode);
5694 t3 = gen_reg_rtx (V4SImode);
5695 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5696 gen_lowpart (V1TImode, operands[1]),
5698 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5699 gen_lowpart (V1TImode, operands[2]),
5702 t4 = gen_reg_rtx (V2DImode);
5703 emit_insn (gen_<sse2_sse4_1>_<u>mulv2siv2di3 (t4, t2, t3));
5705 emit_insn (gen_addv2di3 (operands[0], t1, t4));
5709 (define_expand "<s>dot_prodv8si"
5710 [(match_operand:V4DI 0 "register_operand" "")
5711 (any_extend:V4DI (match_operand:V8SI 1 "register_operand" ""))
5712 (match_operand:V8SI 2 "register_operand" "")
5713 (match_operand:V4DI 3 "register_operand" "")]
5718 t1 = gen_reg_rtx (V4DImode);
5719 emit_insn (gen_avx2_<u>mulv4siv4di3 (t1, operands[1], operands[2]));
5720 emit_insn (gen_addv4di3 (t1, t1, operands[3]));
5722 t2 = gen_reg_rtx (V8SImode);
5723 t3 = gen_reg_rtx (V8SImode);
5724 emit_insn (gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, t2),
5725 gen_lowpart (V2TImode, operands[1]),
5727 emit_insn (gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, t3),
5728 gen_lowpart (V2TImode, operands[2]),
5731 t4 = gen_reg_rtx (V4DImode);
5732 emit_insn (gen_avx2_<u>mulv4siv4di3 (t4, t2, t3));
5734 emit_insn (gen_addv4di3 (operands[0], t1, t4));
5738 (define_insn "ashr<mode>3"
5739 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
5741 (match_operand:VI24_AVX2 1 "register_operand" "0,x")
5742 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5745 psra<ssemodesuffix>\t{%2, %0|%0, %2}
5746 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5747 [(set_attr "isa" "noavx,avx")
5748 (set_attr "type" "sseishft")
5749 (set (attr "length_immediate")
5750 (if_then_else (match_operand 2 "const_int_operand" "")
5752 (const_string "0")))
5753 (set_attr "prefix_data16" "1,*")
5754 (set_attr "prefix" "orig,vex")
5755 (set_attr "mode" "<sseinsnmode>")])
5757 (define_insn "lshr<mode>3"
5758 [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
5759 (lshiftrt:VI248_AVX2
5760 (match_operand:VI248_AVX2 1 "register_operand" "0,x")
5761 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5764 psrl<ssemodesuffix>\t{%2, %0|%0, %2}
5765 vpsrl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5766 [(set_attr "isa" "noavx,avx")
5767 (set_attr "type" "sseishft")
5768 (set (attr "length_immediate")
5769 (if_then_else (match_operand 2 "const_int_operand" "")
5771 (const_string "0")))
5772 (set_attr "prefix_data16" "1,*")
5773 (set_attr "prefix" "orig,vex")
5774 (set_attr "mode" "<sseinsnmode>")])
5776 (define_insn "ashl<mode>3"
5777 [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
5779 (match_operand:VI248_AVX2 1 "register_operand" "0,x")
5780 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5783 psll<ssemodesuffix>\t{%2, %0|%0, %2}
5784 vpsll<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5785 [(set_attr "isa" "noavx,avx")
5786 (set_attr "type" "sseishft")
5787 (set (attr "length_immediate")
5788 (if_then_else (match_operand 2 "const_int_operand" "")
5790 (const_string "0")))
5791 (set_attr "prefix_data16" "1,*")
5792 (set_attr "prefix" "orig,vex")
5793 (set_attr "mode" "<sseinsnmode>")])
5795 (define_expand "vec_shl_<mode>"
5796 [(set (match_operand:VI_128 0 "register_operand" "")
5798 (match_operand:VI_128 1 "register_operand" "")
5799 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5802 operands[0] = gen_lowpart (V1TImode, operands[0]);
5803 operands[1] = gen_lowpart (V1TImode, operands[1]);
5806 (define_insn "<sse2_avx2>_ashl<mode>3"
5807 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
5809 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
5810 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5813 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5815 switch (which_alternative)
5818 return "pslldq\t{%2, %0|%0, %2}";
5820 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
5825 [(set_attr "isa" "noavx,avx")
5826 (set_attr "type" "sseishft")
5827 (set_attr "length_immediate" "1")
5828 (set_attr "prefix_data16" "1,*")
5829 (set_attr "prefix" "orig,vex")
5830 (set_attr "mode" "<sseinsnmode>")])
5832 (define_expand "vec_shr_<mode>"
5833 [(set (match_operand:VI_128 0 "register_operand" "")
5835 (match_operand:VI_128 1 "register_operand" "")
5836 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5839 operands[0] = gen_lowpart (V1TImode, operands[0]);
5840 operands[1] = gen_lowpart (V1TImode, operands[1]);
5843 (define_insn "<sse2_avx2>_lshr<mode>3"
5844 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
5845 (lshiftrt:VIMAX_AVX2
5846 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
5847 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5850 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5852 switch (which_alternative)
5855 return "psrldq\t{%2, %0|%0, %2}";
5857 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
5862 [(set_attr "isa" "noavx,avx")
5863 (set_attr "type" "sseishft")
5864 (set_attr "length_immediate" "1")
5865 (set_attr "atom_unit" "sishuf")
5866 (set_attr "prefix_data16" "1,*")
5867 (set_attr "prefix" "orig,vex")
5868 (set_attr "mode" "<sseinsnmode>")])
5871 (define_expand "<code><mode>3"
5872 [(set (match_operand:VI124_256 0 "register_operand" "")
5874 (match_operand:VI124_256 1 "nonimmediate_operand" "")
5875 (match_operand:VI124_256 2 "nonimmediate_operand" "")))]
5877 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5879 (define_insn "*avx2_<code><mode>3"
5880 [(set (match_operand:VI124_256 0 "register_operand" "=x")
5882 (match_operand:VI124_256 1 "nonimmediate_operand" "%x")
5883 (match_operand:VI124_256 2 "nonimmediate_operand" "xm")))]
5884 "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5885 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5886 [(set_attr "type" "sseiadd")
5887 (set_attr "prefix_extra" "1")
5888 (set_attr "prefix" "vex")
5889 (set_attr "mode" "OI")])
5891 (define_expand "<code><mode>3"
5892 [(set (match_operand:VI8_AVX2 0 "register_operand" "")
5893 (maxmin:VI8_AVX2 (match_operand:VI8_AVX2 1 "register_operand" "")
5894 (match_operand:VI8_AVX2 2 "register_operand" "")))]
5901 xops[0] = operands[0];
5903 if (<CODE> == SMAX || <CODE> == UMAX)
5905 xops[1] = operands[1];
5906 xops[2] = operands[2];
5910 xops[1] = operands[2];
5911 xops[2] = operands[1];
5914 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
5916 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
5917 xops[4] = operands[1];
5918 xops[5] = operands[2];
5920 ok = ix86_expand_int_vcond (xops);
5925 (define_expand "<code><mode>3"
5926 [(set (match_operand:VI124_128 0 "register_operand" "")
5927 (smaxmin:VI124_128 (match_operand:VI124_128 1 "nonimmediate_operand" "")
5928 (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
5931 if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
5932 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
5938 xops[0] = operands[0];
5939 operands[1] = force_reg (<MODE>mode, operands[1]);
5940 operands[2] = force_reg (<MODE>mode, operands[2]);
5944 xops[1] = operands[1];
5945 xops[2] = operands[2];
5949 xops[1] = operands[2];
5950 xops[2] = operands[1];
5953 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5954 xops[4] = operands[1];
5955 xops[5] = operands[2];
5957 ok = ix86_expand_int_vcond (xops);
5963 (define_insn "*sse4_1_<code><mode>3"
5964 [(set (match_operand:VI14_128 0 "register_operand" "=x,x")
5966 (match_operand:VI14_128 1 "nonimmediate_operand" "%0,x")
5967 (match_operand:VI14_128 2 "nonimmediate_operand" "xm,xm")))]
5968 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5970 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
5971 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5972 [(set_attr "isa" "noavx,avx")
5973 (set_attr "type" "sseiadd")
5974 (set_attr "prefix_extra" "1,*")
5975 (set_attr "prefix" "orig,vex")
5976 (set_attr "mode" "TI")])
5978 (define_insn "*<code>v8hi3"
5979 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
5981 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
5982 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
5983 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
5985 p<maxmin_int>w\t{%2, %0|%0, %2}
5986 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
5987 [(set_attr "isa" "noavx,avx")
5988 (set_attr "type" "sseiadd")
5989 (set_attr "prefix_data16" "1,*")
5990 (set_attr "prefix_extra" "*,1")
5991 (set_attr "prefix" "orig,vex")
5992 (set_attr "mode" "TI")])
5994 (define_expand "<code><mode>3"
5995 [(set (match_operand:VI124_128 0 "register_operand" "")
5996 (umaxmin:VI124_128 (match_operand:VI124_128 1 "nonimmediate_operand" "")
5997 (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
6000 if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
6001 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
6002 else if (<CODE> == UMAX && <MODE>mode == V8HImode)
6004 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
6005 operands[1] = force_reg (<MODE>mode, operands[1]);
6006 if (rtx_equal_p (op3, op2))
6007 op3 = gen_reg_rtx (V8HImode);
6008 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
6009 emit_insn (gen_addv8hi3 (op0, op3, op2));
6017 operands[1] = force_reg (<MODE>mode, operands[1]);
6018 operands[2] = force_reg (<MODE>mode, operands[2]);
6020 xops[0] = operands[0];
6024 xops[1] = operands[1];
6025 xops[2] = operands[2];
6029 xops[1] = operands[2];
6030 xops[2] = operands[1];
6033 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6034 xops[4] = operands[1];
6035 xops[5] = operands[2];
6037 ok = ix86_expand_int_vcond (xops);
6043 (define_insn "*sse4_1_<code><mode>3"
6044 [(set (match_operand:VI24_128 0 "register_operand" "=x,x")
6046 (match_operand:VI24_128 1 "nonimmediate_operand" "%0,x")
6047 (match_operand:VI24_128 2 "nonimmediate_operand" "xm,xm")))]
6048 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6050 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
6051 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6052 [(set_attr "isa" "noavx,avx")
6053 (set_attr "type" "sseiadd")
6054 (set_attr "prefix_extra" "1,*")
6055 (set_attr "prefix" "orig,vex")
6056 (set_attr "mode" "TI")])
6058 (define_insn "*<code>v16qi3"
6059 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6061 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")
6062 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))]
6063 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
6065 p<maxmin_int>b\t{%2, %0|%0, %2}
6066 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
6067 [(set_attr "isa" "noavx,avx")
6068 (set_attr "type" "sseiadd")
6069 (set_attr "prefix_data16" "1,*")
6070 (set_attr "prefix_extra" "*,1")
6071 (set_attr "prefix" "orig,vex")
6072 (set_attr "mode" "TI")])
6074 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6076 ;; Parallel integral comparisons
6078 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6080 (define_expand "avx2_eq<mode>3"
6081 [(set (match_operand:VI_256 0 "register_operand" "")
6083 (match_operand:VI_256 1 "nonimmediate_operand" "")
6084 (match_operand:VI_256 2 "nonimmediate_operand" "")))]
6086 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6088 (define_insn "*avx2_eq<mode>3"
6089 [(set (match_operand:VI_256 0 "register_operand" "=x")
6091 (match_operand:VI_256 1 "nonimmediate_operand" "%x")
6092 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
6093 "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6094 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6095 [(set_attr "type" "ssecmp")
6096 (set_attr "prefix_extra" "1")
6097 (set_attr "prefix" "vex")
6098 (set_attr "mode" "OI")])
6100 (define_insn "*sse4_1_eqv2di3"
6101 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6103 (match_operand:V2DI 1 "nonimmediate_operand" "%0,x")
6104 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
6105 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
6107 pcmpeqq\t{%2, %0|%0, %2}
6108 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
6109 [(set_attr "isa" "noavx,avx")
6110 (set_attr "type" "ssecmp")
6111 (set_attr "prefix_extra" "1")
6112 (set_attr "prefix" "orig,vex")
6113 (set_attr "mode" "TI")])
6115 (define_insn "*sse2_eq<mode>3"
6116 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
6118 (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x")
6119 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
6120 "TARGET_SSE2 && !TARGET_XOP
6121 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6123 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
6124 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6125 [(set_attr "isa" "noavx,avx")
6126 (set_attr "type" "ssecmp")
6127 (set_attr "prefix_data16" "1,*")
6128 (set_attr "prefix" "orig,vex")
6129 (set_attr "mode" "TI")])
6131 (define_expand "sse2_eq<mode>3"
6132 [(set (match_operand:VI124_128 0 "register_operand" "")
6134 (match_operand:VI124_128 1 "nonimmediate_operand" "")
6135 (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
6136 "TARGET_SSE2 && !TARGET_XOP "
6137 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6139 (define_expand "sse4_1_eqv2di3"
6140 [(set (match_operand:V2DI 0 "register_operand" "")
6142 (match_operand:V2DI 1 "nonimmediate_operand" "")
6143 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
6145 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
6147 (define_insn "sse4_2_gtv2di3"
6148 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6150 (match_operand:V2DI 1 "register_operand" "0,x")
6151 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
6154 pcmpgtq\t{%2, %0|%0, %2}
6155 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
6156 [(set_attr "isa" "noavx,avx")
6157 (set_attr "type" "ssecmp")
6158 (set_attr "prefix_extra" "1")
6159 (set_attr "prefix" "orig,vex")
6160 (set_attr "mode" "TI")])
6162 (define_insn "avx2_gt<mode>3"
6163 [(set (match_operand:VI_256 0 "register_operand" "=x")
6165 (match_operand:VI_256 1 "register_operand" "x")
6166 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
6168 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6169 [(set_attr "type" "ssecmp")
6170 (set_attr "prefix_extra" "1")
6171 (set_attr "prefix" "vex")
6172 (set_attr "mode" "OI")])
6174 (define_insn "sse2_gt<mode>3"
6175 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
6177 (match_operand:VI124_128 1 "register_operand" "0,x")
6178 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
6179 "TARGET_SSE2 && !TARGET_XOP"
6181 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
6182 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6183 [(set_attr "isa" "noavx,avx")
6184 (set_attr "type" "ssecmp")
6185 (set_attr "prefix_data16" "1,*")
6186 (set_attr "prefix" "orig,vex")
6187 (set_attr "mode" "TI")])
6189 (define_expand "vcond<V_256:mode><VI_256:mode>"
6190 [(set (match_operand:V_256 0 "register_operand" "")
6192 (match_operator 3 ""
6193 [(match_operand:VI_256 4 "nonimmediate_operand" "")
6194 (match_operand:VI_256 5 "nonimmediate_operand" "")])
6195 (match_operand:V_256 1 "general_operand" "")
6196 (match_operand:V_256 2 "general_operand" "")))]
6198 && (GET_MODE_NUNITS (<V_256:MODE>mode)
6199 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
6201 bool ok = ix86_expand_int_vcond (operands);
6206 (define_expand "vcond<V_128:mode><VI124_128:mode>"
6207 [(set (match_operand:V_128 0 "register_operand" "")
6209 (match_operator 3 ""
6210 [(match_operand:VI124_128 4 "nonimmediate_operand" "")
6211 (match_operand:VI124_128 5 "nonimmediate_operand" "")])
6212 (match_operand:V_128 1 "general_operand" "")
6213 (match_operand:V_128 2 "general_operand" "")))]
6215 && (GET_MODE_NUNITS (<V_128:MODE>mode)
6216 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
6218 bool ok = ix86_expand_int_vcond (operands);
6223 (define_expand "vcond<VI8F_128:mode>v2di"
6224 [(set (match_operand:VI8F_128 0 "register_operand" "")
6225 (if_then_else:VI8F_128
6226 (match_operator 3 ""
6227 [(match_operand:V2DI 4 "nonimmediate_operand" "")
6228 (match_operand:V2DI 5 "nonimmediate_operand" "")])
6229 (match_operand:VI8F_128 1 "general_operand" "")
6230 (match_operand:VI8F_128 2 "general_operand" "")))]
6233 bool ok = ix86_expand_int_vcond (operands);
6238 (define_expand "vcondu<V_256:mode><VI_256:mode>"
6239 [(set (match_operand:V_256 0 "register_operand" "")
6241 (match_operator 3 ""
6242 [(match_operand:VI_256 4 "nonimmediate_operand" "")
6243 (match_operand:VI_256 5 "nonimmediate_operand" "")])
6244 (match_operand:V_256 1 "general_operand" "")
6245 (match_operand:V_256 2 "general_operand" "")))]
6247 && (GET_MODE_NUNITS (<V_256:MODE>mode)
6248 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
6250 bool ok = ix86_expand_int_vcond (operands);
6255 (define_expand "vcondu<V_128:mode><VI124_128:mode>"
6256 [(set (match_operand:V_128 0 "register_operand" "")
6258 (match_operator 3 ""
6259 [(match_operand:VI124_128 4 "nonimmediate_operand" "")
6260 (match_operand:VI124_128 5 "nonimmediate_operand" "")])
6261 (match_operand:V_128 1 "general_operand" "")
6262 (match_operand:V_128 2 "general_operand" "")))]
6264 && (GET_MODE_NUNITS (<V_128:MODE>mode)
6265 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
6267 bool ok = ix86_expand_int_vcond (operands);
6272 (define_expand "vcondu<VI8F_128:mode>v2di"
6273 [(set (match_operand:VI8F_128 0 "register_operand" "")
6274 (if_then_else:VI8F_128
6275 (match_operator 3 ""
6276 [(match_operand:V2DI 4 "nonimmediate_operand" "")
6277 (match_operand:V2DI 5 "nonimmediate_operand" "")])
6278 (match_operand:VI8F_128 1 "general_operand" "")
6279 (match_operand:VI8F_128 2 "general_operand" "")))]
6282 bool ok = ix86_expand_int_vcond (operands);
6287 (define_mode_iterator VEC_PERM_AVX2
6288 [V16QI V8HI V4SI V2DI V4SF V2DF
6289 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
6290 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
6291 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")])
6293 (define_expand "vec_perm<mode>"
6294 [(match_operand:VEC_PERM_AVX2 0 "register_operand" "")
6295 (match_operand:VEC_PERM_AVX2 1 "register_operand" "")
6296 (match_operand:VEC_PERM_AVX2 2 "register_operand" "")
6297 (match_operand:<sseintvecmode> 3 "register_operand" "")]
6298 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
6300 ix86_expand_vec_perm (operands);
6304 (define_mode_iterator VEC_PERM_CONST
6305 [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE")
6306 (V2DF "TARGET_SSE") (V2DI "TARGET_SSE")
6307 (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2")
6308 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
6309 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
6310 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")])
6312 (define_expand "vec_perm_const<mode>"
6313 [(match_operand:VEC_PERM_CONST 0 "register_operand" "")
6314 (match_operand:VEC_PERM_CONST 1 "register_operand" "")
6315 (match_operand:VEC_PERM_CONST 2 "register_operand" "")
6316 (match_operand:<sseintvecmode> 3 "" "")]
6319 if (ix86_expand_vec_perm_const (operands))
6325 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6327 ;; Parallel bitwise logical operations
6329 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6331 (define_expand "one_cmpl<mode>2"
6332 [(set (match_operand:VI 0 "register_operand" "")
6333 (xor:VI (match_operand:VI 1 "nonimmediate_operand" "")
6337 int i, n = GET_MODE_NUNITS (<MODE>mode);
6338 rtvec v = rtvec_alloc (n);
6340 for (i = 0; i < n; ++i)
6341 RTVEC_ELT (v, i) = constm1_rtx;
6343 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
6346 (define_expand "<sse2_avx2>_andnot<mode>3"
6347 [(set (match_operand:VI_AVX2 0 "register_operand" "")
6349 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand" ""))
6350 (match_operand:VI_AVX2 2 "nonimmediate_operand" "")))]
6353 (define_insn "*andnot<mode>3"
6354 [(set (match_operand:VI 0 "register_operand" "=x,x")
6356 (not:VI (match_operand:VI 1 "register_operand" "0,x"))
6357 (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
6360 static char buf[32];
6364 switch (get_attr_mode (insn))
6367 gcc_assert (TARGET_AVX2);
6369 gcc_assert (TARGET_SSE2);
6375 gcc_assert (TARGET_AVX);
6377 gcc_assert (TARGET_SSE);
6386 switch (which_alternative)
6389 ops = "%s\t{%%2, %%0|%%0, %%2}";
6392 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
6398 snprintf (buf, sizeof (buf), ops, tmp);
6401 [(set_attr "isa" "noavx,avx")
6402 (set_attr "type" "sselog")
6403 (set (attr "prefix_data16")
6405 (and (eq_attr "alternative" "0")
6406 (eq_attr "mode" "TI"))
6408 (const_string "*")))
6409 (set_attr "prefix" "orig,vex")
6411 (cond [(and (not (match_test "TARGET_AVX2"))
6412 (match_test "GET_MODE_SIZE (<MODE>mode) > 16"))
6413 (const_string "V8SF")
6414 (not (match_test "TARGET_SSE2"))
6415 (const_string "V4SF")
6417 (const_string "<sseinsnmode>")))])
6419 (define_expand "<code><mode>3"
6420 [(set (match_operand:VI 0 "register_operand" "")
6422 (match_operand:VI 1 "nonimmediate_operand" "")
6423 (match_operand:VI 2 "nonimmediate_operand" "")))]
6425 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
6427 (define_insn "*<code><mode>3"
6428 [(set (match_operand:VI 0 "register_operand" "=x,x")
6430 (match_operand:VI 1 "nonimmediate_operand" "%0,x")
6431 (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
6433 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6435 static char buf[32];
6439 switch (get_attr_mode (insn))
6442 gcc_assert (TARGET_AVX2);
6444 gcc_assert (TARGET_SSE2);
6450 gcc_assert (TARGET_AVX);
6452 gcc_assert (TARGET_SSE);
6461 switch (which_alternative)
6464 ops = "%s\t{%%2, %%0|%%0, %%2}";
6467 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
6473 snprintf (buf, sizeof (buf), ops, tmp);
6476 [(set_attr "isa" "noavx,avx")
6477 (set_attr "type" "sselog")
6478 (set (attr "prefix_data16")
6480 (and (eq_attr "alternative" "0")
6481 (eq_attr "mode" "TI"))
6483 (const_string "*")))
6484 (set_attr "prefix" "orig,vex")
6486 (cond [(and (not (match_test "TARGET_AVX2"))
6487 (match_test "GET_MODE_SIZE (<MODE>mode) > 16"))
6488 (const_string "V8SF")
6489 (not (match_test "TARGET_SSE2"))
6490 (const_string "V4SF")
6492 (const_string "<sseinsnmode>")))])
6494 (define_insn "*andnottf3"
6495 [(set (match_operand:TF 0 "register_operand" "=x,x")
6497 (not:TF (match_operand:TF 1 "register_operand" "0,x"))
6498 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
6501 pandn\t{%2, %0|%0, %2}
6502 vpandn\t{%2, %1, %0|%0, %1, %2}"
6503 [(set_attr "isa" "noavx,avx")
6504 (set_attr "type" "sselog")
6505 (set_attr "prefix_data16" "1,*")
6506 (set_attr "prefix" "orig,vex")
6507 (set_attr "mode" "TI")])
6509 (define_expand "<code>tf3"
6510 [(set (match_operand:TF 0 "register_operand" "")
6512 (match_operand:TF 1 "nonimmediate_operand" "")
6513 (match_operand:TF 2 "nonimmediate_operand" "")))]
6515 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
6517 (define_insn "*<code>tf3"
6518 [(set (match_operand:TF 0 "register_operand" "=x,x")
6520 (match_operand:TF 1 "nonimmediate_operand" "%0,x")
6521 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
6523 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
6525 p<logic>\t{%2, %0|%0, %2}
6526 vp<logic>\t{%2, %1, %0|%0, %1, %2}"
6527 [(set_attr "isa" "noavx,avx")
6528 (set_attr "type" "sselog")
6529 (set_attr "prefix_data16" "1,*")
6530 (set_attr "prefix" "orig,vex")
6531 (set_attr "mode" "TI")])
6533 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6535 ;; Parallel integral element swizzling
6537 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6539 (define_expand "vec_pack_trunc_<mode>"
6540 [(match_operand:<ssepackmode> 0 "register_operand" "")
6541 (match_operand:VI248_AVX2 1 "register_operand" "")
6542 (match_operand:VI248_AVX2 2 "register_operand" "")]
6545 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
6546 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
6547 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6551 (define_insn "<sse2_avx2>_packsswb"
6552 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
6553 (vec_concat:VI1_AVX2
6554 (ss_truncate:<ssehalfvecmode>
6555 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6556 (ss_truncate:<ssehalfvecmode>
6557 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6560 packsswb\t{%2, %0|%0, %2}
6561 vpacksswb\t{%2, %1, %0|%0, %1, %2}"
6562 [(set_attr "isa" "noavx,avx")
6563 (set_attr "type" "sselog")
6564 (set_attr "prefix_data16" "1,*")
6565 (set_attr "prefix" "orig,vex")
6566 (set_attr "mode" "<sseinsnmode>")])
6568 (define_insn "<sse2_avx2>_packssdw"
6569 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
6570 (vec_concat:VI2_AVX2
6571 (ss_truncate:<ssehalfvecmode>
6572 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6573 (ss_truncate:<ssehalfvecmode>
6574 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6577 packssdw\t{%2, %0|%0, %2}
6578 vpackssdw\t{%2, %1, %0|%0, %1, %2}"
6579 [(set_attr "isa" "noavx,avx")
6580 (set_attr "type" "sselog")
6581 (set_attr "prefix_data16" "1,*")
6582 (set_attr "prefix" "orig,vex")
6583 (set_attr "mode" "<sseinsnmode>")])
6585 (define_insn "<sse2_avx2>_packuswb"
6586 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
6587 (vec_concat:VI1_AVX2
6588 (us_truncate:<ssehalfvecmode>
6589 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6590 (us_truncate:<ssehalfvecmode>
6591 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6594 packuswb\t{%2, %0|%0, %2}
6595 vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6596 [(set_attr "isa" "noavx,avx")
6597 (set_attr "type" "sselog")
6598 (set_attr "prefix_data16" "1,*")
6599 (set_attr "prefix" "orig,vex")
6600 (set_attr "mode" "<sseinsnmode>")])
6602 (define_insn "avx2_interleave_highv32qi"
6603 [(set (match_operand:V32QI 0 "register_operand" "=x")
6606 (match_operand:V32QI 1 "register_operand" "x")
6607 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
6608 (parallel [(const_int 8) (const_int 40)
6609 (const_int 9) (const_int 41)
6610 (const_int 10) (const_int 42)
6611 (const_int 11) (const_int 43)
6612 (const_int 12) (const_int 44)
6613 (const_int 13) (const_int 45)
6614 (const_int 14) (const_int 46)
6615 (const_int 15) (const_int 47)
6616 (const_int 24) (const_int 56)
6617 (const_int 25) (const_int 57)
6618 (const_int 26) (const_int 58)
6619 (const_int 27) (const_int 59)
6620 (const_int 28) (const_int 60)
6621 (const_int 29) (const_int 61)
6622 (const_int 30) (const_int 62)
6623 (const_int 31) (const_int 63)])))]
6625 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6626 [(set_attr "type" "sselog")
6627 (set_attr "prefix" "vex")
6628 (set_attr "mode" "OI")])
6630 (define_insn "vec_interleave_highv16qi"
6631 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6634 (match_operand:V16QI 1 "register_operand" "0,x")
6635 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
6636 (parallel [(const_int 8) (const_int 24)
6637 (const_int 9) (const_int 25)
6638 (const_int 10) (const_int 26)
6639 (const_int 11) (const_int 27)
6640 (const_int 12) (const_int 28)
6641 (const_int 13) (const_int 29)
6642 (const_int 14) (const_int 30)
6643 (const_int 15) (const_int 31)])))]
6646 punpckhbw\t{%2, %0|%0, %2}
6647 vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6648 [(set_attr "isa" "noavx,avx")
6649 (set_attr "type" "sselog")
6650 (set_attr "prefix_data16" "1,*")
6651 (set_attr "prefix" "orig,vex")
6652 (set_attr "mode" "TI")])
6654 (define_insn "avx2_interleave_lowv32qi"
6655 [(set (match_operand:V32QI 0 "register_operand" "=x")
6658 (match_operand:V32QI 1 "register_operand" "x")
6659 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
6660 (parallel [(const_int 0) (const_int 32)
6661 (const_int 1) (const_int 33)
6662 (const_int 2) (const_int 34)
6663 (const_int 3) (const_int 35)
6664 (const_int 4) (const_int 36)
6665 (const_int 5) (const_int 37)
6666 (const_int 6) (const_int 38)
6667 (const_int 7) (const_int 39)
6668 (const_int 16) (const_int 48)
6669 (const_int 17) (const_int 49)
6670 (const_int 18) (const_int 50)
6671 (const_int 19) (const_int 51)
6672 (const_int 20) (const_int 52)
6673 (const_int 21) (const_int 53)
6674 (const_int 22) (const_int 54)
6675 (const_int 23) (const_int 55)])))]
6677 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6678 [(set_attr "type" "sselog")
6679 (set_attr "prefix" "vex")
6680 (set_attr "mode" "OI")])
6682 (define_insn "vec_interleave_lowv16qi"
6683 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6686 (match_operand:V16QI 1 "register_operand" "0,x")
6687 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
6688 (parallel [(const_int 0) (const_int 16)
6689 (const_int 1) (const_int 17)
6690 (const_int 2) (const_int 18)
6691 (const_int 3) (const_int 19)
6692 (const_int 4) (const_int 20)
6693 (const_int 5) (const_int 21)
6694 (const_int 6) (const_int 22)
6695 (const_int 7) (const_int 23)])))]
6698 punpcklbw\t{%2, %0|%0, %2}
6699 vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6700 [(set_attr "isa" "noavx,avx")
6701 (set_attr "type" "sselog")
6702 (set_attr "prefix_data16" "1,*")
6703 (set_attr "prefix" "orig,vex")
6704 (set_attr "mode" "TI")])
6706 (define_insn "avx2_interleave_highv16hi"
6707 [(set (match_operand:V16HI 0 "register_operand" "=x")
6710 (match_operand:V16HI 1 "register_operand" "x")
6711 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
6712 (parallel [(const_int 4) (const_int 20)
6713 (const_int 5) (const_int 21)
6714 (const_int 6) (const_int 22)
6715 (const_int 7) (const_int 23)
6716 (const_int 12) (const_int 28)
6717 (const_int 13) (const_int 29)
6718 (const_int 14) (const_int 30)
6719 (const_int 15) (const_int 31)])))]
6721 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6722 [(set_attr "type" "sselog")
6723 (set_attr "prefix" "vex")
6724 (set_attr "mode" "OI")])
6726 (define_insn "vec_interleave_highv8hi"
6727 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6730 (match_operand:V8HI 1 "register_operand" "0,x")
6731 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
6732 (parallel [(const_int 4) (const_int 12)
6733 (const_int 5) (const_int 13)
6734 (const_int 6) (const_int 14)
6735 (const_int 7) (const_int 15)])))]
6738 punpckhwd\t{%2, %0|%0, %2}
6739 vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6740 [(set_attr "isa" "noavx,avx")
6741 (set_attr "type" "sselog")
6742 (set_attr "prefix_data16" "1,*")
6743 (set_attr "prefix" "orig,vex")
6744 (set_attr "mode" "TI")])
6746 (define_insn "avx2_interleave_lowv16hi"
6747 [(set (match_operand:V16HI 0 "register_operand" "=x")
6750 (match_operand:V16HI 1 "register_operand" "x")
6751 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
6752 (parallel [(const_int 0) (const_int 16)
6753 (const_int 1) (const_int 17)
6754 (const_int 2) (const_int 18)
6755 (const_int 3) (const_int 19)
6756 (const_int 8) (const_int 24)
6757 (const_int 9) (const_int 25)
6758 (const_int 10) (const_int 26)
6759 (const_int 11) (const_int 27)])))]
6761 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6762 [(set_attr "type" "sselog")
6763 (set_attr "prefix" "vex")
6764 (set_attr "mode" "OI")])
6766 (define_insn "vec_interleave_lowv8hi"
6767 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6770 (match_operand:V8HI 1 "register_operand" "0,x")
6771 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
6772 (parallel [(const_int 0) (const_int 8)
6773 (const_int 1) (const_int 9)
6774 (const_int 2) (const_int 10)
6775 (const_int 3) (const_int 11)])))]
6778 punpcklwd\t{%2, %0|%0, %2}
6779 vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6780 [(set_attr "isa" "noavx,avx")
6781 (set_attr "type" "sselog")
6782 (set_attr "prefix_data16" "1,*")
6783 (set_attr "prefix" "orig,vex")
6784 (set_attr "mode" "TI")])
6786 (define_insn "avx2_interleave_highv8si"
6787 [(set (match_operand:V8SI 0 "register_operand" "=x")
6790 (match_operand:V8SI 1 "register_operand" "x")
6791 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
6792 (parallel [(const_int 2) (const_int 10)
6793 (const_int 3) (const_int 11)
6794 (const_int 6) (const_int 14)
6795 (const_int 7) (const_int 15)])))]
6797 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6798 [(set_attr "type" "sselog")
6799 (set_attr "prefix" "vex")
6800 (set_attr "mode" "OI")])
6802 (define_insn "vec_interleave_highv4si"
6803 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6806 (match_operand:V4SI 1 "register_operand" "0,x")
6807 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6808 (parallel [(const_int 2) (const_int 6)
6809 (const_int 3) (const_int 7)])))]
6812 punpckhdq\t{%2, %0|%0, %2}
6813 vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6814 [(set_attr "isa" "noavx,avx")
6815 (set_attr "type" "sselog")
6816 (set_attr "prefix_data16" "1,*")
6817 (set_attr "prefix" "orig,vex")
6818 (set_attr "mode" "TI")])
6820 (define_insn "avx2_interleave_lowv8si"
6821 [(set (match_operand:V8SI 0 "register_operand" "=x")
6824 (match_operand:V8SI 1 "register_operand" "x")
6825 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
6826 (parallel [(const_int 0) (const_int 8)
6827 (const_int 1) (const_int 9)
6828 (const_int 4) (const_int 12)
6829 (const_int 5) (const_int 13)])))]
6831 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6832 [(set_attr "type" "sselog")
6833 (set_attr "prefix" "vex")
6834 (set_attr "mode" "OI")])
6836 (define_insn "vec_interleave_lowv4si"
6837 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6840 (match_operand:V4SI 1 "register_operand" "0,x")
6841 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6842 (parallel [(const_int 0) (const_int 4)
6843 (const_int 1) (const_int 5)])))]
6846 punpckldq\t{%2, %0|%0, %2}
6847 vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6848 [(set_attr "isa" "noavx,avx")
6849 (set_attr "type" "sselog")
6850 (set_attr "prefix_data16" "1,*")
6851 (set_attr "prefix" "orig,vex")
6852 (set_attr "mode" "TI")])
6854 (define_expand "vec_interleave_high<mode>"
6855 [(match_operand:VI_256 0 "register_operand" "=x")
6856 (match_operand:VI_256 1 "register_operand" "x")
6857 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
6860 rtx t1 = gen_reg_rtx (<MODE>mode);
6861 rtx t2 = gen_reg_rtx (<MODE>mode);
6862 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
6863 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
6864 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, operands[0]),
6865 gen_lowpart (V4DImode, t1),
6866 gen_lowpart (V4DImode, t2), GEN_INT (1 + (3 << 4))));
6870 (define_expand "vec_interleave_low<mode>"
6871 [(match_operand:VI_256 0 "register_operand" "=x")
6872 (match_operand:VI_256 1 "register_operand" "x")
6873 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
6876 rtx t1 = gen_reg_rtx (<MODE>mode);
6877 rtx t2 = gen_reg_rtx (<MODE>mode);
6878 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
6879 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
6880 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, operands[0]),
6881 gen_lowpart (V4DImode, t1),
6882 gen_lowpart (V4DImode, t2), GEN_INT (0 + (2 << 4))));
6886 ;; Modes handled by pinsr patterns.
6887 (define_mode_iterator PINSR_MODE
6888 [(V16QI "TARGET_SSE4_1") V8HI
6889 (V4SI "TARGET_SSE4_1")
6890 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
6892 (define_mode_attr sse2p4_1
6893 [(V16QI "sse4_1") (V8HI "sse2")
6894 (V4SI "sse4_1") (V2DI "sse4_1")])
6896 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
6897 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
6898 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x")
6899 (vec_merge:PINSR_MODE
6900 (vec_duplicate:PINSR_MODE
6901 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m"))
6902 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x")
6903 (match_operand:SI 3 "const_int_operand" "")))]
6905 && ((unsigned) exact_log2 (INTVAL (operands[3]))
6906 < GET_MODE_NUNITS (<MODE>mode))"
6908 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6910 switch (which_alternative)
6913 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
6914 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
6917 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
6919 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
6920 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
6923 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6928 [(set_attr "isa" "noavx,noavx,avx,avx")
6929 (set_attr "type" "sselog")
6930 (set (attr "prefix_rex")
6932 (and (not (match_test "TARGET_AVX"))
6933 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
6935 (const_string "*")))
6936 (set (attr "prefix_data16")
6938 (and (not (match_test "TARGET_AVX"))
6939 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
6941 (const_string "*")))
6942 (set (attr "prefix_extra")
6944 (and (not (match_test "TARGET_AVX"))
6945 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
6947 (const_string "1")))
6948 (set_attr "length_immediate" "1")
6949 (set_attr "prefix" "orig,orig,vex,vex")
6950 (set_attr "mode" "TI")])
6952 (define_insn "*sse4_1_pextrb_<mode>"
6953 [(set (match_operand:SWI48 0 "register_operand" "=r")
6956 (match_operand:V16QI 1 "register_operand" "x")
6957 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
6959 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
6960 [(set_attr "type" "sselog")
6961 (set_attr "prefix_extra" "1")
6962 (set_attr "length_immediate" "1")
6963 (set_attr "prefix" "maybe_vex")
6964 (set_attr "mode" "TI")])
6966 (define_insn "*sse4_1_pextrb_memory"
6967 [(set (match_operand:QI 0 "memory_operand" "=m")
6969 (match_operand:V16QI 1 "register_operand" "x")
6970 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
6972 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
6973 [(set_attr "type" "sselog")
6974 (set_attr "prefix_extra" "1")
6975 (set_attr "length_immediate" "1")
6976 (set_attr "prefix" "maybe_vex")
6977 (set_attr "mode" "TI")])
6979 (define_insn "*sse2_pextrw_<mode>"
6980 [(set (match_operand:SWI48 0 "register_operand" "=r")
6983 (match_operand:V8HI 1 "register_operand" "x")
6984 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
6986 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
6987 [(set_attr "type" "sselog")
6988 (set_attr "prefix_data16" "1")
6989 (set_attr "length_immediate" "1")
6990 (set_attr "prefix" "maybe_vex")
6991 (set_attr "mode" "TI")])
6993 (define_insn "*sse4_1_pextrw_memory"
6994 [(set (match_operand:HI 0 "memory_operand" "=m")
6996 (match_operand:V8HI 1 "register_operand" "x")
6997 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
6999 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
7000 [(set_attr "type" "sselog")
7001 (set_attr "prefix_extra" "1")
7002 (set_attr "length_immediate" "1")
7003 (set_attr "prefix" "maybe_vex")
7004 (set_attr "mode" "TI")])
7006 (define_insn "*sse4_1_pextrd"
7007 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
7009 (match_operand:V4SI 1 "register_operand" "x")
7010 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
7012 "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
7013 [(set_attr "type" "sselog")
7014 (set_attr "prefix_extra" "1")
7015 (set_attr "length_immediate" "1")
7016 (set_attr "prefix" "maybe_vex")
7017 (set_attr "mode" "TI")])
7019 (define_insn "*sse4_1_pextrd_zext"
7020 [(set (match_operand:DI 0 "register_operand" "=r")
7023 (match_operand:V4SI 1 "register_operand" "x")
7024 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))]
7025 "TARGET_64BIT && TARGET_SSE4_1"
7026 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
7027 [(set_attr "type" "sselog")
7028 (set_attr "prefix_extra" "1")
7029 (set_attr "length_immediate" "1")
7030 (set_attr "prefix" "maybe_vex")
7031 (set_attr "mode" "TI")])
7033 ;; It must come before *vec_extractv2di_1_rex64 since it is preferred.
7034 (define_insn "*sse4_1_pextrq"
7035 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
7037 (match_operand:V2DI 1 "register_operand" "x")
7038 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
7039 "TARGET_SSE4_1 && TARGET_64BIT"
7040 "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
7041 [(set_attr "type" "sselog")
7042 (set_attr "prefix_rex" "1")
7043 (set_attr "prefix_extra" "1")
7044 (set_attr "length_immediate" "1")
7045 (set_attr "prefix" "maybe_vex")
7046 (set_attr "mode" "TI")])
7048 (define_expand "avx2_pshufdv3"
7049 [(match_operand:V8SI 0 "register_operand" "")
7050 (match_operand:V8SI 1 "nonimmediate_operand" "")
7051 (match_operand:SI 2 "const_0_to_255_operand" "")]
7054 int mask = INTVAL (operands[2]);
7055 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
7056 GEN_INT ((mask >> 0) & 3),
7057 GEN_INT ((mask >> 2) & 3),
7058 GEN_INT ((mask >> 4) & 3),
7059 GEN_INT ((mask >> 6) & 3),
7060 GEN_INT (((mask >> 0) & 3) + 4),
7061 GEN_INT (((mask >> 2) & 3) + 4),
7062 GEN_INT (((mask >> 4) & 3) + 4),
7063 GEN_INT (((mask >> 6) & 3) + 4)));
7067 (define_insn "avx2_pshufd_1"
7068 [(set (match_operand:V8SI 0 "register_operand" "=x")
7070 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
7071 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7072 (match_operand 3 "const_0_to_3_operand" "")
7073 (match_operand 4 "const_0_to_3_operand" "")
7074 (match_operand 5 "const_0_to_3_operand" "")
7075 (match_operand 6 "const_4_to_7_operand" "")
7076 (match_operand 7 "const_4_to_7_operand" "")
7077 (match_operand 8 "const_4_to_7_operand" "")
7078 (match_operand 9 "const_4_to_7_operand" "")])))]
7080 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
7081 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
7082 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
7083 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
7086 mask |= INTVAL (operands[2]) << 0;
7087 mask |= INTVAL (operands[3]) << 2;
7088 mask |= INTVAL (operands[4]) << 4;
7089 mask |= INTVAL (operands[5]) << 6;
7090 operands[2] = GEN_INT (mask);
7092 return "vpshufd\t{%2, %1, %0|%0, %1, %2}";
7094 [(set_attr "type" "sselog1")
7095 (set_attr "prefix" "vex")
7096 (set_attr "length_immediate" "1")
7097 (set_attr "mode" "OI")])
7099 (define_expand "sse2_pshufd"
7100 [(match_operand:V4SI 0 "register_operand" "")
7101 (match_operand:V4SI 1 "nonimmediate_operand" "")
7102 (match_operand:SI 2 "const_int_operand" "")]
7105 int mask = INTVAL (operands[2]);
7106 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
7107 GEN_INT ((mask >> 0) & 3),
7108 GEN_INT ((mask >> 2) & 3),
7109 GEN_INT ((mask >> 4) & 3),
7110 GEN_INT ((mask >> 6) & 3)));
7114 (define_insn "sse2_pshufd_1"
7115 [(set (match_operand:V4SI 0 "register_operand" "=x")
7117 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7118 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7119 (match_operand 3 "const_0_to_3_operand" "")
7120 (match_operand 4 "const_0_to_3_operand" "")
7121 (match_operand 5 "const_0_to_3_operand" "")])))]
7125 mask |= INTVAL (operands[2]) << 0;
7126 mask |= INTVAL (operands[3]) << 2;
7127 mask |= INTVAL (operands[4]) << 4;
7128 mask |= INTVAL (operands[5]) << 6;
7129 operands[2] = GEN_INT (mask);
7131 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
7133 [(set_attr "type" "sselog1")
7134 (set_attr "prefix_data16" "1")
7135 (set_attr "prefix" "maybe_vex")
7136 (set_attr "length_immediate" "1")
7137 (set_attr "mode" "TI")])
7139 (define_expand "avx2_pshuflwv3"
7140 [(match_operand:V16HI 0 "register_operand" "")
7141 (match_operand:V16HI 1 "nonimmediate_operand" "")
7142 (match_operand:SI 2 "const_0_to_255_operand" "")]
7145 int mask = INTVAL (operands[2]);
7146 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
7147 GEN_INT ((mask >> 0) & 3),
7148 GEN_INT ((mask >> 2) & 3),
7149 GEN_INT ((mask >> 4) & 3),
7150 GEN_INT ((mask >> 6) & 3),
7151 GEN_INT (((mask >> 0) & 3) + 8),
7152 GEN_INT (((mask >> 2) & 3) + 8),
7153 GEN_INT (((mask >> 4) & 3) + 8),
7154 GEN_INT (((mask >> 6) & 3) + 8)));
7158 (define_insn "avx2_pshuflw_1"
7159 [(set (match_operand:V16HI 0 "register_operand" "=x")
7161 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
7162 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7163 (match_operand 3 "const_0_to_3_operand" "")
7164 (match_operand 4 "const_0_to_3_operand" "")
7165 (match_operand 5 "const_0_to_3_operand" "")
7170 (match_operand 6 "const_8_to_11_operand" "")
7171 (match_operand 7 "const_8_to_11_operand" "")
7172 (match_operand 8 "const_8_to_11_operand" "")
7173 (match_operand 9 "const_8_to_11_operand" "")
7179 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
7180 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
7181 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
7182 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
7185 mask |= INTVAL (operands[2]) << 0;
7186 mask |= INTVAL (operands[3]) << 2;
7187 mask |= INTVAL (operands[4]) << 4;
7188 mask |= INTVAL (operands[5]) << 6;
7189 operands[2] = GEN_INT (mask);
7191 return "vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7193 [(set_attr "type" "sselog")
7194 (set_attr "prefix" "vex")
7195 (set_attr "length_immediate" "1")
7196 (set_attr "mode" "OI")])
7198 (define_expand "sse2_pshuflw"
7199 [(match_operand:V8HI 0 "register_operand" "")
7200 (match_operand:V8HI 1 "nonimmediate_operand" "")
7201 (match_operand:SI 2 "const_int_operand" "")]
7204 int mask = INTVAL (operands[2]);
7205 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
7206 GEN_INT ((mask >> 0) & 3),
7207 GEN_INT ((mask >> 2) & 3),
7208 GEN_INT ((mask >> 4) & 3),
7209 GEN_INT ((mask >> 6) & 3)));
7213 (define_insn "sse2_pshuflw_1"
7214 [(set (match_operand:V8HI 0 "register_operand" "=x")
7216 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7217 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7218 (match_operand 3 "const_0_to_3_operand" "")
7219 (match_operand 4 "const_0_to_3_operand" "")
7220 (match_operand 5 "const_0_to_3_operand" "")
7228 mask |= INTVAL (operands[2]) << 0;
7229 mask |= INTVAL (operands[3]) << 2;
7230 mask |= INTVAL (operands[4]) << 4;
7231 mask |= INTVAL (operands[5]) << 6;
7232 operands[2] = GEN_INT (mask);
7234 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7236 [(set_attr "type" "sselog")
7237 (set_attr "prefix_data16" "0")
7238 (set_attr "prefix_rep" "1")
7239 (set_attr "prefix" "maybe_vex")
7240 (set_attr "length_immediate" "1")
7241 (set_attr "mode" "TI")])
7243 (define_expand "avx2_pshufhwv3"
7244 [(match_operand:V16HI 0 "register_operand" "")
7245 (match_operand:V16HI 1 "nonimmediate_operand" "")
7246 (match_operand:SI 2 "const_0_to_255_operand" "")]
7249 int mask = INTVAL (operands[2]);
7250 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
7251 GEN_INT (((mask >> 0) & 3) + 4),
7252 GEN_INT (((mask >> 2) & 3) + 4),
7253 GEN_INT (((mask >> 4) & 3) + 4),
7254 GEN_INT (((mask >> 6) & 3) + 4),
7255 GEN_INT (((mask >> 0) & 3) + 12),
7256 GEN_INT (((mask >> 2) & 3) + 12),
7257 GEN_INT (((mask >> 4) & 3) + 12),
7258 GEN_INT (((mask >> 6) & 3) + 12)));
7262 (define_insn "avx2_pshufhw_1"
7263 [(set (match_operand:V16HI 0 "register_operand" "=x")
7265 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
7266 (parallel [(const_int 0)
7270 (match_operand 2 "const_4_to_7_operand" "")
7271 (match_operand 3 "const_4_to_7_operand" "")
7272 (match_operand 4 "const_4_to_7_operand" "")
7273 (match_operand 5 "const_4_to_7_operand" "")
7278 (match_operand 6 "const_12_to_15_operand" "")
7279 (match_operand 7 "const_12_to_15_operand" "")
7280 (match_operand 8 "const_12_to_15_operand" "")
7281 (match_operand 9 "const_12_to_15_operand" "")])))]
7283 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
7284 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
7285 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
7286 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
7289 mask |= (INTVAL (operands[2]) - 4) << 0;
7290 mask |= (INTVAL (operands[3]) - 4) << 2;
7291 mask |= (INTVAL (operands[4]) - 4) << 4;
7292 mask |= (INTVAL (operands[5]) - 4) << 6;
7293 operands[2] = GEN_INT (mask);
7295 return "vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7297 [(set_attr "type" "sselog")
7298 (set_attr "prefix" "vex")
7299 (set_attr "length_immediate" "1")
7300 (set_attr "mode" "OI")])
7302 (define_expand "sse2_pshufhw"
7303 [(match_operand:V8HI 0 "register_operand" "")
7304 (match_operand:V8HI 1 "nonimmediate_operand" "")
7305 (match_operand:SI 2 "const_int_operand" "")]
7308 int mask = INTVAL (operands[2]);
7309 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
7310 GEN_INT (((mask >> 0) & 3) + 4),
7311 GEN_INT (((mask >> 2) & 3) + 4),
7312 GEN_INT (((mask >> 4) & 3) + 4),
7313 GEN_INT (((mask >> 6) & 3) + 4)));
7317 (define_insn "sse2_pshufhw_1"
7318 [(set (match_operand:V8HI 0 "register_operand" "=x")
7320 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7321 (parallel [(const_int 0)
7325 (match_operand 2 "const_4_to_7_operand" "")
7326 (match_operand 3 "const_4_to_7_operand" "")
7327 (match_operand 4 "const_4_to_7_operand" "")
7328 (match_operand 5 "const_4_to_7_operand" "")])))]
7332 mask |= (INTVAL (operands[2]) - 4) << 0;
7333 mask |= (INTVAL (operands[3]) - 4) << 2;
7334 mask |= (INTVAL (operands[4]) - 4) << 4;
7335 mask |= (INTVAL (operands[5]) - 4) << 6;
7336 operands[2] = GEN_INT (mask);
7338 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7340 [(set_attr "type" "sselog")
7341 (set_attr "prefix_rep" "1")
7342 (set_attr "prefix_data16" "0")
7343 (set_attr "prefix" "maybe_vex")
7344 (set_attr "length_immediate" "1")
7345 (set_attr "mode" "TI")])
7347 (define_expand "sse2_loadd"
7348 [(set (match_operand:V4SI 0 "register_operand" "")
7351 (match_operand:SI 1 "nonimmediate_operand" ""))
7355 "operands[2] = CONST0_RTX (V4SImode);")
7357 (define_insn "sse2_loadld"
7358 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x,x,x")
7361 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x"))
7362 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,x")
7366 %vmovd\t{%2, %0|%0, %2}
7367 %vmovd\t{%2, %0|%0, %2}
7368 movss\t{%2, %0|%0, %2}
7369 movss\t{%2, %0|%0, %2}
7370 vmovss\t{%2, %1, %0|%0, %1, %2}"
7371 [(set_attr "isa" "sse2,*,noavx,noavx,avx")
7372 (set_attr "type" "ssemov")
7373 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex")
7374 (set_attr "mode" "TI,TI,V4SF,SF,SF")])
7376 (define_insn_and_split "sse2_stored"
7377 [(set (match_operand:SI 0 "nonimmediate_operand" "=xm,r")
7379 (match_operand:V4SI 1 "register_operand" "x,Yi")
7380 (parallel [(const_int 0)])))]
7383 "&& reload_completed
7384 && (TARGET_INTER_UNIT_MOVES
7385 || MEM_P (operands [0])
7386 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7387 [(set (match_dup 0) (match_dup 1))]
7388 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
7390 (define_insn_and_split "*vec_ext_v4si_mem"
7391 [(set (match_operand:SI 0 "register_operand" "=r")
7393 (match_operand:V4SI 1 "memory_operand" "o")
7394 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
7400 int i = INTVAL (operands[2]);
7402 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
7406 (define_expand "sse_storeq"
7407 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7409 (match_operand:V2DI 1 "register_operand" "")
7410 (parallel [(const_int 0)])))]
7413 (define_insn "*sse2_storeq_rex64"
7414 [(set (match_operand:DI 0 "nonimmediate_operand" "=xm,*r,r")
7416 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
7417 (parallel [(const_int 0)])))]
7418 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7422 mov{q}\t{%1, %0|%0, %1}"
7423 [(set_attr "type" "*,*,imov")
7424 (set_attr "mode" "*,*,DI")])
7426 (define_insn "*sse2_storeq"
7427 [(set (match_operand:DI 0 "nonimmediate_operand" "=xm")
7429 (match_operand:V2DI 1 "register_operand" "x")
7430 (parallel [(const_int 0)])))]
7435 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7437 (match_operand:V2DI 1 "register_operand" "")
7438 (parallel [(const_int 0)])))]
7441 && (TARGET_INTER_UNIT_MOVES
7442 || MEM_P (operands [0])
7443 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7444 [(set (match_dup 0) (match_dup 1))]
7445 "operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));")
7447 (define_insn "*vec_extractv2di_1_rex64"
7448 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,x,r")
7450 (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o,o")
7451 (parallel [(const_int 1)])))]
7452 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7454 %vmovhps\t{%1, %0|%0, %1}
7455 psrldq\t{$8, %0|%0, 8}
7456 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7457 %vmovq\t{%H1, %0|%0, %H1}
7458 mov{q}\t{%H1, %0|%0, %H1}"
7459 [(set_attr "isa" "*,noavx,avx,*,*")
7460 (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,imov")
7461 (set_attr "length_immediate" "*,1,1,*,*")
7462 (set_attr "memory" "*,none,none,*,*")
7463 (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig")
7464 (set_attr "mode" "V2SF,TI,TI,TI,DI")])
7466 (define_insn "*vec_extractv2di_1"
7467 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,x,x,x")
7469 (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o,x,o")
7470 (parallel [(const_int 1)])))]
7471 "!TARGET_64BIT && TARGET_SSE
7472 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7474 %vmovhps\t{%1, %0|%0, %1}
7475 psrldq\t{$8, %0|%0, 8}
7476 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7477 %vmovq\t{%H1, %0|%0, %H1}
7478 movhlps\t{%1, %0|%0, %1}
7479 movlps\t{%H1, %0|%0, %H1}"
7480 [(set_attr "isa" "*,sse2_noavx,avx,sse2,noavx,noavx")
7481 (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,ssemov,ssemov")
7482 (set_attr "length_immediate" "*,1,1,*,*,*")
7483 (set_attr "memory" "*,none,none,*,*,*")
7484 (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig,orig")
7485 (set_attr "mode" "V2SF,TI,TI,TI,V4SF,V2SF")])
7487 (define_insn "*vec_dupv4si_avx"
7488 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7490 (match_operand:SI 1 "nonimmediate_operand" " x,m")))]
7493 vpshufd\t{$0, %1, %0|%0, %1, 0}
7494 vbroadcastss\t{%1, %0|%0, %1}"
7495 [(set_attr "type" "sselog1,ssemov")
7496 (set_attr "length_immediate" "1,0")
7497 (set_attr "prefix_extra" "0,1")
7498 (set_attr "prefix" "vex")
7499 (set_attr "mode" "TI,V4SF")])
7501 (define_insn "*vec_dupv4si"
7502 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7504 (match_operand:SI 1 "register_operand" " x,0")))]
7507 pshufd\t{$0, %1, %0|%0, %1, 0}
7508 shufps\t{$0, %0, %0|%0, %0, 0}"
7509 [(set_attr "isa" "sse2,*")
7510 (set_attr "type" "sselog1")
7511 (set_attr "length_immediate" "1")
7512 (set_attr "mode" "TI,V4SF")])
7514 (define_insn "*vec_dupv2di_sse3"
7515 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
7517 (match_operand:DI 1 "nonimmediate_operand" " 0,x,m")))]
7521 vpunpcklqdq\t{%d1, %0|%0, %d1}
7522 %vmovddup\t{%1, %0|%0, %1}"
7523 [(set_attr "isa" "noavx,avx,*")
7524 (set_attr "type" "sselog1")
7525 (set_attr "prefix" "orig,vex,maybe_vex")
7526 (set_attr "mode" "TI,TI,DF")])
7528 (define_insn "*vec_dupv2di"
7529 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7531 (match_operand:DI 1 "register_operand" " 0,0")))]
7536 [(set_attr "isa" "sse2,*")
7537 (set_attr "type" "sselog1,ssemov")
7538 (set_attr "mode" "TI,V4SF")])
7540 (define_insn "*vec_concatv2si_sse4_1"
7541 [(set (match_operand:V2SI 0 "register_operand" "=x, x,x,x, x, *y,*y")
7543 (match_operand:SI 1 "nonimmediate_operand" " 0, x,0,x,rm, 0,rm")
7544 (match_operand:SI 2 "vector_move_operand" "rm,rm,x,x, C,*ym, C")))]
7547 pinsrd\t{$1, %2, %0|%0, %2, 1}
7548 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
7549 punpckldq\t{%2, %0|%0, %2}
7550 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7551 %vmovd\t{%1, %0|%0, %1}
7552 punpckldq\t{%2, %0|%0, %2}
7553 movd\t{%1, %0|%0, %1}"
7554 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
7555 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
7556 (set_attr "prefix_extra" "1,1,*,*,*,*,*")
7557 (set_attr "length_immediate" "1,1,*,*,*,*,*")
7558 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
7559 (set_attr "mode" "TI,TI,TI,TI,TI,DI,DI")])
7561 ;; ??? In theory we can match memory for the MMX alternative, but allowing
7562 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7563 ;; alternatives pretty much forces the MMX alternative to be chosen.
7564 (define_insn "*vec_concatv2si_sse2"
7565 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
7567 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
7568 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
7571 punpckldq\t{%2, %0|%0, %2}
7572 movd\t{%1, %0|%0, %1}
7573 punpckldq\t{%2, %0|%0, %2}
7574 movd\t{%1, %0|%0, %1}"
7575 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7576 (set_attr "mode" "TI,TI,DI,DI")])
7578 (define_insn "*vec_concatv2si_sse"
7579 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
7581 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
7582 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
7585 unpcklps\t{%2, %0|%0, %2}
7586 movss\t{%1, %0|%0, %1}
7587 punpckldq\t{%2, %0|%0, %2}
7588 movd\t{%1, %0|%0, %1}"
7589 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7590 (set_attr "mode" "V4SF,V4SF,DI,DI")])
7592 (define_insn "*vec_concatv4si"
7593 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x,x")
7595 (match_operand:V2SI 1 "register_operand" " 0,x,0,0,x")
7596 (match_operand:V2SI 2 "nonimmediate_operand" " x,x,x,m,m")))]
7599 punpcklqdq\t{%2, %0|%0, %2}
7600 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7601 movlhps\t{%2, %0|%0, %2}
7602 movhps\t{%2, %0|%0, %2}
7603 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7604 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
7605 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
7606 (set_attr "prefix" "orig,vex,orig,orig,vex")
7607 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
7609 ;; movd instead of movq is required to handle broken assemblers.
7610 (define_insn "*vec_concatv2di_rex64"
7611 [(set (match_operand:V2DI 0 "register_operand"
7612 "=x,x ,x ,Yi,!x,x,x,x,x")
7614 (match_operand:DI 1 "nonimmediate_operand"
7615 " 0,x ,xm,r ,*y,0,x,0,x")
7616 (match_operand:DI 2 "vector_move_operand"
7617 "rm,rm,C ,C ,C ,x,x,m,m")))]
7620 pinsrq\t{$1, %2, %0|%0, %2, 1}
7621 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
7622 %vmovq\t{%1, %0|%0, %1}
7623 %vmovd\t{%1, %0|%0, %1}
7624 movq2dq\t{%1, %0|%0, %1}
7625 punpcklqdq\t{%2, %0|%0, %2}
7626 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7627 movhps\t{%2, %0|%0, %2}
7628 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7629 [(set_attr "isa" "sse4_noavx,avx,*,*,*,noavx,avx,noavx,avx")
7632 (eq_attr "alternative" "0,1,5,6")
7633 (const_string "sselog")
7634 (const_string "ssemov")))
7635 (set (attr "prefix_rex")
7637 (and (eq_attr "alternative" "0,3")
7638 (not (match_test "TARGET_AVX")))
7640 (const_string "*")))
7641 (set_attr "prefix_extra" "1,1,*,*,*,*,*,*,*")
7642 (set_attr "length_immediate" "1,1,*,*,*,*,*,*,*")
7643 (set_attr "prefix" "orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex")
7644 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,V2SF,V2SF")])
7646 (define_insn "vec_concatv2di"
7647 [(set (match_operand:V2DI 0 "register_operand" "=x,?x,x,x,x,x,x")
7649 (match_operand:DI 1 "nonimmediate_operand" "xm,*y,0,x,0,0,x")
7650 (match_operand:DI 2 "vector_move_operand" " C, C,x,x,x,m,m")))]
7651 "!TARGET_64BIT && TARGET_SSE"
7653 %vmovq\t{%1, %0|%0, %1}
7654 movq2dq\t{%1, %0|%0, %1}
7655 punpcklqdq\t{%2, %0|%0, %2}
7656 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7657 movlhps\t{%2, %0|%0, %2}
7658 movhps\t{%2, %0|%0, %2}
7659 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7660 [(set_attr "isa" "sse2,sse2,sse2_noavx,avx,noavx,noavx,avx")
7661 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,ssemov,ssemov")
7662 (set_attr "prefix" "maybe_vex,orig,orig,vex,orig,orig,vex")
7663 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")])
7665 (define_expand "vec_unpacks_lo_<mode>"
7666 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7667 (match_operand:VI124_AVX2 1 "register_operand" "")]
7669 "ix86_expand_sse_unpack (operands, false, false); DONE;")
7671 (define_expand "vec_unpacks_hi_<mode>"
7672 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7673 (match_operand:VI124_AVX2 1 "register_operand" "")]
7675 "ix86_expand_sse_unpack (operands, false, true); DONE;")
7677 (define_expand "vec_unpacku_lo_<mode>"
7678 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7679 (match_operand:VI124_AVX2 1 "register_operand" "")]
7681 "ix86_expand_sse_unpack (operands, true, false); DONE;")
7683 (define_expand "vec_unpacku_hi_<mode>"
7684 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7685 (match_operand:VI124_AVX2 1 "register_operand" "")]
7687 "ix86_expand_sse_unpack (operands, true, true); DONE;")
7689 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7693 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7695 (define_expand "avx2_uavgv32qi3"
7696 [(set (match_operand:V32QI 0 "register_operand" "")
7702 (match_operand:V32QI 1 "nonimmediate_operand" ""))
7704 (match_operand:V32QI 2 "nonimmediate_operand" "")))
7705 (const_vector:V32QI [(const_int 1) (const_int 1)
7706 (const_int 1) (const_int 1)
7707 (const_int 1) (const_int 1)
7708 (const_int 1) (const_int 1)
7709 (const_int 1) (const_int 1)
7710 (const_int 1) (const_int 1)
7711 (const_int 1) (const_int 1)
7712 (const_int 1) (const_int 1)
7713 (const_int 1) (const_int 1)
7714 (const_int 1) (const_int 1)
7715 (const_int 1) (const_int 1)
7716 (const_int 1) (const_int 1)
7717 (const_int 1) (const_int 1)
7718 (const_int 1) (const_int 1)
7719 (const_int 1) (const_int 1)
7720 (const_int 1) (const_int 1)]))
7723 "ix86_fixup_binary_operands_no_copy (PLUS, V32QImode, operands);")
7725 (define_expand "sse2_uavgv16qi3"
7726 [(set (match_operand:V16QI 0 "register_operand" "")
7732 (match_operand:V16QI 1 "nonimmediate_operand" ""))
7734 (match_operand:V16QI 2 "nonimmediate_operand" "")))
7735 (const_vector:V16QI [(const_int 1) (const_int 1)
7736 (const_int 1) (const_int 1)
7737 (const_int 1) (const_int 1)
7738 (const_int 1) (const_int 1)
7739 (const_int 1) (const_int 1)
7740 (const_int 1) (const_int 1)
7741 (const_int 1) (const_int 1)
7742 (const_int 1) (const_int 1)]))
7745 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
7747 (define_insn "*avx2_uavgv32qi3"
7748 [(set (match_operand:V32QI 0 "register_operand" "=x")
7754 (match_operand:V32QI 1 "nonimmediate_operand" "%x"))
7756 (match_operand:V32QI 2 "nonimmediate_operand" "xm")))
7757 (const_vector:V32QI [(const_int 1) (const_int 1)
7758 (const_int 1) (const_int 1)
7759 (const_int 1) (const_int 1)
7760 (const_int 1) (const_int 1)
7761 (const_int 1) (const_int 1)
7762 (const_int 1) (const_int 1)
7763 (const_int 1) (const_int 1)
7764 (const_int 1) (const_int 1)
7765 (const_int 1) (const_int 1)
7766 (const_int 1) (const_int 1)
7767 (const_int 1) (const_int 1)
7768 (const_int 1) (const_int 1)
7769 (const_int 1) (const_int 1)
7770 (const_int 1) (const_int 1)
7771 (const_int 1) (const_int 1)
7772 (const_int 1) (const_int 1)]))
7774 "TARGET_AVX2 && ix86_binary_operator_ok (PLUS, V32QImode, operands)"
7775 "vpavgb\t{%2, %1, %0|%0, %1, %2}"
7776 [(set_attr "type" "sseiadd")
7777 (set_attr "prefix" "vex")
7778 (set_attr "mode" "OI")])
7780 (define_insn "*sse2_uavgv16qi3"
7781 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
7787 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x"))
7789 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))
7790 (const_vector:V16QI [(const_int 1) (const_int 1)
7791 (const_int 1) (const_int 1)
7792 (const_int 1) (const_int 1)
7793 (const_int 1) (const_int 1)
7794 (const_int 1) (const_int 1)
7795 (const_int 1) (const_int 1)
7796 (const_int 1) (const_int 1)
7797 (const_int 1) (const_int 1)]))
7799 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7801 pavgb\t{%2, %0|%0, %2}
7802 vpavgb\t{%2, %1, %0|%0, %1, %2}"
7803 [(set_attr "isa" "noavx,avx")
7804 (set_attr "type" "sseiadd")
7805 (set_attr "prefix_data16" "1,*")
7806 (set_attr "prefix" "orig,vex")
7807 (set_attr "mode" "TI")])
7809 (define_expand "avx2_uavgv16hi3"
7810 [(set (match_operand:V16HI 0 "register_operand" "")
7816 (match_operand:V16HI 1 "nonimmediate_operand" ""))
7818 (match_operand:V16HI 2 "nonimmediate_operand" "")))
7819 (const_vector:V16HI [(const_int 1) (const_int 1)
7820 (const_int 1) (const_int 1)
7821 (const_int 1) (const_int 1)
7822 (const_int 1) (const_int 1)
7823 (const_int 1) (const_int 1)
7824 (const_int 1) (const_int 1)
7825 (const_int 1) (const_int 1)
7826 (const_int 1) (const_int 1)]))
7829 "ix86_fixup_binary_operands_no_copy (PLUS, V16HImode, operands);")
7831 (define_expand "sse2_uavgv8hi3"
7832 [(set (match_operand:V8HI 0 "register_operand" "")
7838 (match_operand:V8HI 1 "nonimmediate_operand" ""))
7840 (match_operand:V8HI 2 "nonimmediate_operand" "")))
7841 (const_vector:V8HI [(const_int 1) (const_int 1)
7842 (const_int 1) (const_int 1)
7843 (const_int 1) (const_int 1)
7844 (const_int 1) (const_int 1)]))
7847 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
7849 (define_insn "*avx2_uavgv16hi3"
7850 [(set (match_operand:V16HI 0 "register_operand" "=x")
7856 (match_operand:V16HI 1 "nonimmediate_operand" "%x"))
7858 (match_operand:V16HI 2 "nonimmediate_operand" "xm")))
7859 (const_vector:V16HI [(const_int 1) (const_int 1)
7860 (const_int 1) (const_int 1)
7861 (const_int 1) (const_int 1)
7862 (const_int 1) (const_int 1)
7863 (const_int 1) (const_int 1)
7864 (const_int 1) (const_int 1)
7865 (const_int 1) (const_int 1)
7866 (const_int 1) (const_int 1)]))
7868 "TARGET_AVX2 && ix86_binary_operator_ok (PLUS, V16HImode, operands)"
7869 "vpavgw\t{%2, %1, %0|%0, %1, %2}"
7870 [(set_attr "type" "sseiadd")
7871 (set_attr "prefix" "vex")
7872 (set_attr "mode" "OI")])
7874 (define_insn "*sse2_uavgv8hi3"
7875 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7881 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
7883 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
7884 (const_vector:V8HI [(const_int 1) (const_int 1)
7885 (const_int 1) (const_int 1)
7886 (const_int 1) (const_int 1)
7887 (const_int 1) (const_int 1)]))
7889 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7891 pavgw\t{%2, %0|%0, %2}
7892 vpavgw\t{%2, %1, %0|%0, %1, %2}"
7893 [(set_attr "isa" "noavx,avx")
7894 (set_attr "type" "sseiadd")
7895 (set_attr "prefix_data16" "1,*")
7896 (set_attr "prefix" "orig,vex")
7897 (set_attr "mode" "TI")])
7899 ;; The correct representation for this is absolutely enormous, and
7900 ;; surely not generally useful.
7901 (define_insn "<sse2_avx2>_psadbw"
7902 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x,x")
7903 (unspec:VI8_AVX2 [(match_operand:<ssebytemode> 1 "register_operand" "0,x")
7904 (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,xm")]
7908 psadbw\t{%2, %0|%0, %2}
7909 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
7910 [(set_attr "isa" "noavx,avx")
7911 (set_attr "type" "sseiadd")
7912 (set_attr "atom_unit" "simul")
7913 (set_attr "prefix_data16" "1,*")
7914 (set_attr "prefix" "orig,vex")
7915 (set_attr "mode" "<sseinsnmode>")])
7917 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
7918 [(set (match_operand:SI 0 "register_operand" "=r")
7920 [(match_operand:VF 1 "register_operand" "x")]
7923 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
7924 [(set_attr "type" "ssemov")
7925 (set_attr "prefix" "maybe_vex")
7926 (set_attr "mode" "<MODE>")])
7928 (define_insn "avx2_pmovmskb"
7929 [(set (match_operand:SI 0 "register_operand" "=r")
7930 (unspec:SI [(match_operand:V32QI 1 "register_operand" "x")]
7933 "vpmovmskb\t{%1, %0|%0, %1}"
7934 [(set_attr "type" "ssemov")
7935 (set_attr "prefix" "vex")
7936 (set_attr "mode" "DI")])
7938 (define_insn "sse2_pmovmskb"
7939 [(set (match_operand:SI 0 "register_operand" "=r")
7940 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
7943 "%vpmovmskb\t{%1, %0|%0, %1}"
7944 [(set_attr "type" "ssemov")
7945 (set_attr "prefix_data16" "1")
7946 (set_attr "prefix" "maybe_vex")
7947 (set_attr "mode" "SI")])
7949 (define_expand "sse2_maskmovdqu"
7950 [(set (match_operand:V16QI 0 "memory_operand" "")
7951 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
7952 (match_operand:V16QI 2 "register_operand" "")
7957 (define_insn "*sse2_maskmovdqu"
7958 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
7959 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
7960 (match_operand:V16QI 2 "register_operand" "x")
7961 (mem:V16QI (match_dup 0))]
7964 "%vmaskmovdqu\t{%2, %1|%1, %2}"
7965 [(set_attr "type" "ssemov")
7966 (set_attr "prefix_data16" "1")
7967 ;; The implicit %rdi operand confuses default length_vex computation.
7968 (set (attr "length_vex")
7969 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
7970 (set_attr "prefix" "maybe_vex")
7971 (set_attr "mode" "TI")])
7973 (define_insn "sse_ldmxcsr"
7974 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
7978 [(set_attr "type" "sse")
7979 (set_attr "atom_sse_attr" "mxcsr")
7980 (set_attr "prefix" "maybe_vex")
7981 (set_attr "memory" "load")])
7983 (define_insn "sse_stmxcsr"
7984 [(set (match_operand:SI 0 "memory_operand" "=m")
7985 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
7988 [(set_attr "type" "sse")
7989 (set_attr "atom_sse_attr" "mxcsr")
7990 (set_attr "prefix" "maybe_vex")
7991 (set_attr "memory" "store")])
7993 (define_expand "sse_sfence"
7995 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
7996 "TARGET_SSE || TARGET_3DNOW_A"
7998 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
7999 MEM_VOLATILE_P (operands[0]) = 1;
8002 (define_insn "*sse_sfence"
8003 [(set (match_operand:BLK 0 "" "")
8004 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
8005 "TARGET_SSE || TARGET_3DNOW_A"
8007 [(set_attr "type" "sse")
8008 (set_attr "length_address" "0")
8009 (set_attr "atom_sse_attr" "fence")
8010 (set_attr "memory" "unknown")])
8012 (define_insn "sse2_clflush"
8013 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
8017 [(set_attr "type" "sse")
8018 (set_attr "atom_sse_attr" "fence")
8019 (set_attr "memory" "unknown")])
8021 (define_expand "sse2_mfence"
8023 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8026 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8027 MEM_VOLATILE_P (operands[0]) = 1;
8030 (define_insn "*sse2_mfence"
8031 [(set (match_operand:BLK 0 "" "")
8032 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8033 "TARGET_64BIT || TARGET_SSE2"
8035 [(set_attr "type" "sse")
8036 (set_attr "length_address" "0")
8037 (set_attr "atom_sse_attr" "fence")
8038 (set_attr "memory" "unknown")])
8040 (define_expand "sse2_lfence"
8042 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8045 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8046 MEM_VOLATILE_P (operands[0]) = 1;
8049 (define_insn "*sse2_lfence"
8050 [(set (match_operand:BLK 0 "" "")
8051 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8054 [(set_attr "type" "sse")
8055 (set_attr "length_address" "0")
8056 (set_attr "atom_sse_attr" "lfence")
8057 (set_attr "memory" "unknown")])
8059 (define_insn "sse3_mwait"
8060 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8061 (match_operand:SI 1 "register_operand" "c")]
8064 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
8065 ;; Since 32bit register operands are implicitly zero extended to 64bit,
8066 ;; we only need to set up 32bit registers.
8068 [(set_attr "length" "3")])
8070 (define_insn "sse3_monitor"
8071 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8072 (match_operand:SI 1 "register_operand" "c")
8073 (match_operand:SI 2 "register_operand" "d")]
8075 "TARGET_SSE3 && !TARGET_64BIT"
8076 "monitor\t%0, %1, %2"
8077 [(set_attr "length" "3")])
8079 (define_insn "sse3_monitor64"
8080 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
8081 (match_operand:SI 1 "register_operand" "c")
8082 (match_operand:SI 2 "register_operand" "d")]
8084 "TARGET_SSE3 && TARGET_64BIT"
8085 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
8086 ;; RCX and RDX are used. Since 32bit register operands are implicitly
8087 ;; zero extended to 64bit, we only need to set up 32bit registers.
8089 [(set_attr "length" "3")])
8091 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8093 ;; SSSE3 instructions
8095 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8097 (define_insn "avx2_phaddwv16hi3"
8098 [(set (match_operand:V16HI 0 "register_operand" "=x")
8105 (match_operand:V16HI 1 "register_operand" "x")
8106 (parallel [(const_int 0)]))
8107 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8109 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8110 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8113 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8114 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8116 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8117 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8121 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8122 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8124 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8125 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8128 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8129 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8131 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8132 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8138 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8139 (parallel [(const_int 0)]))
8140 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8142 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8143 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8146 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8147 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8149 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8150 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8154 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8155 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8157 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8158 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8161 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8162 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8164 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8165 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8167 "vphaddw\t{%2, %1, %0|%0, %1, %2}"
8168 [(set_attr "type" "sseiadd")
8169 (set_attr "prefix_extra" "1")
8170 (set_attr "prefix" "vex")
8171 (set_attr "mode" "OI")])
8173 (define_insn "ssse3_phaddwv8hi3"
8174 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8180 (match_operand:V8HI 1 "register_operand" "0,x")
8181 (parallel [(const_int 0)]))
8182 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8184 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8185 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8188 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8189 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8191 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8192 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8197 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8198 (parallel [(const_int 0)]))
8199 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8201 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8202 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8205 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8206 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8208 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8209 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8212 phaddw\t{%2, %0|%0, %2}
8213 vphaddw\t{%2, %1, %0|%0, %1, %2}"
8214 [(set_attr "isa" "noavx,avx")
8215 (set_attr "type" "sseiadd")
8216 (set_attr "atom_unit" "complex")
8217 (set_attr "prefix_data16" "1,*")
8218 (set_attr "prefix_extra" "1")
8219 (set_attr "prefix" "orig,vex")
8220 (set_attr "mode" "TI")])
8222 (define_insn "ssse3_phaddwv4hi3"
8223 [(set (match_operand:V4HI 0 "register_operand" "=y")
8228 (match_operand:V4HI 1 "register_operand" "0")
8229 (parallel [(const_int 0)]))
8230 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8232 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8233 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8237 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8238 (parallel [(const_int 0)]))
8239 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8241 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8242 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8244 "phaddw\t{%2, %0|%0, %2}"
8245 [(set_attr "type" "sseiadd")
8246 (set_attr "atom_unit" "complex")
8247 (set_attr "prefix_extra" "1")
8248 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8249 (set_attr "mode" "DI")])
8251 (define_insn "avx2_phadddv8si3"
8252 [(set (match_operand:V8SI 0 "register_operand" "=x")
8258 (match_operand:V8SI 1 "register_operand" "x")
8259 (parallel [(const_int 0)]))
8260 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8262 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8263 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8266 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
8267 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
8269 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
8270 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
8275 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
8276 (parallel [(const_int 0)]))
8277 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8279 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8280 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
8283 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
8284 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
8286 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
8287 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
8289 "vphaddd\t{%2, %1, %0|%0, %1, %2}"
8290 [(set_attr "type" "sseiadd")
8291 (set_attr "prefix_extra" "1")
8292 (set_attr "prefix" "vex")
8293 (set_attr "mode" "OI")])
8295 (define_insn "ssse3_phadddv4si3"
8296 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
8301 (match_operand:V4SI 1 "register_operand" "0,x")
8302 (parallel [(const_int 0)]))
8303 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8305 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8306 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8310 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
8311 (parallel [(const_int 0)]))
8312 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8314 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8315 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8318 phaddd\t{%2, %0|%0, %2}
8319 vphaddd\t{%2, %1, %0|%0, %1, %2}"
8320 [(set_attr "isa" "noavx,avx")
8321 (set_attr "type" "sseiadd")
8322 (set_attr "atom_unit" "complex")
8323 (set_attr "prefix_data16" "1,*")
8324 (set_attr "prefix_extra" "1")
8325 (set_attr "prefix" "orig,vex")
8326 (set_attr "mode" "TI")])
8328 (define_insn "ssse3_phadddv2si3"
8329 [(set (match_operand:V2SI 0 "register_operand" "=y")
8333 (match_operand:V2SI 1 "register_operand" "0")
8334 (parallel [(const_int 0)]))
8335 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8338 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8339 (parallel [(const_int 0)]))
8340 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8342 "phaddd\t{%2, %0|%0, %2}"
8343 [(set_attr "type" "sseiadd")
8344 (set_attr "atom_unit" "complex")
8345 (set_attr "prefix_extra" "1")
8346 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8347 (set_attr "mode" "DI")])
8349 (define_insn "avx2_phaddswv16hi3"
8350 [(set (match_operand:V16HI 0 "register_operand" "=x")
8357 (match_operand:V16HI 1 "register_operand" "x")
8358 (parallel [(const_int 0)]))
8359 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8361 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8362 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8365 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8366 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8368 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8369 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8373 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8374 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8376 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8377 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8380 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8381 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8383 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8384 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8390 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8391 (parallel [(const_int 0)]))
8392 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8394 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8395 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8398 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8399 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8401 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8402 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8406 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8407 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8409 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8410 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8413 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8414 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8416 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8417 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8419 "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8420 [(set_attr "type" "sseiadd")
8421 (set_attr "prefix_extra" "1")
8422 (set_attr "prefix" "vex")
8423 (set_attr "mode" "OI")])
8425 (define_insn "ssse3_phaddswv8hi3"
8426 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8432 (match_operand:V8HI 1 "register_operand" "0,x")
8433 (parallel [(const_int 0)]))
8434 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8436 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8437 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8440 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8441 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8443 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8444 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8449 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8450 (parallel [(const_int 0)]))
8451 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8453 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8454 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8457 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8458 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8460 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8461 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8464 phaddsw\t{%2, %0|%0, %2}
8465 vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8466 [(set_attr "isa" "noavx,avx")
8467 (set_attr "type" "sseiadd")
8468 (set_attr "atom_unit" "complex")
8469 (set_attr "prefix_data16" "1,*")
8470 (set_attr "prefix_extra" "1")
8471 (set_attr "prefix" "orig,vex")
8472 (set_attr "mode" "TI")])
8474 (define_insn "ssse3_phaddswv4hi3"
8475 [(set (match_operand:V4HI 0 "register_operand" "=y")
8480 (match_operand:V4HI 1 "register_operand" "0")
8481 (parallel [(const_int 0)]))
8482 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8484 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8485 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8489 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8490 (parallel [(const_int 0)]))
8491 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8493 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8494 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8496 "phaddsw\t{%2, %0|%0, %2}"
8497 [(set_attr "type" "sseiadd")
8498 (set_attr "atom_unit" "complex")
8499 (set_attr "prefix_extra" "1")
8500 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8501 (set_attr "mode" "DI")])
8503 (define_insn "avx2_phsubwv16hi3"
8504 [(set (match_operand:V16HI 0 "register_operand" "=x")
8511 (match_operand:V16HI 1 "register_operand" "x")
8512 (parallel [(const_int 0)]))
8513 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8515 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8516 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8519 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8520 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8522 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8523 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8527 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8528 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8530 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8531 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8534 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8535 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8537 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8538 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8544 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8545 (parallel [(const_int 0)]))
8546 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8548 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8549 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8552 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8553 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8555 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8556 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8560 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8561 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8563 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8564 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8567 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8568 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8570 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8571 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8573 "vphsubw\t{%2, %1, %0|%0, %1, %2}"
8574 [(set_attr "type" "sseiadd")
8575 (set_attr "prefix_extra" "1")
8576 (set_attr "prefix" "vex")
8577 (set_attr "mode" "OI")])
8579 (define_insn "ssse3_phsubwv8hi3"
8580 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8586 (match_operand:V8HI 1 "register_operand" "0,x")
8587 (parallel [(const_int 0)]))
8588 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8590 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8591 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8594 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8595 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8597 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8598 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8603 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8604 (parallel [(const_int 0)]))
8605 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8607 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8608 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8611 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8612 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8614 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8615 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8618 phsubw\t{%2, %0|%0, %2}
8619 vphsubw\t{%2, %1, %0|%0, %1, %2}"
8620 [(set_attr "isa" "noavx,avx")
8621 (set_attr "type" "sseiadd")
8622 (set_attr "atom_unit" "complex")
8623 (set_attr "prefix_data16" "1,*")
8624 (set_attr "prefix_extra" "1")
8625 (set_attr "prefix" "orig,vex")
8626 (set_attr "mode" "TI")])
8628 (define_insn "ssse3_phsubwv4hi3"
8629 [(set (match_operand:V4HI 0 "register_operand" "=y")
8634 (match_operand:V4HI 1 "register_operand" "0")
8635 (parallel [(const_int 0)]))
8636 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8638 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8639 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8643 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8644 (parallel [(const_int 0)]))
8645 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8647 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8648 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8650 "phsubw\t{%2, %0|%0, %2}"
8651 [(set_attr "type" "sseiadd")
8652 (set_attr "atom_unit" "complex")
8653 (set_attr "prefix_extra" "1")
8654 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8655 (set_attr "mode" "DI")])
8657 (define_insn "avx2_phsubdv8si3"
8658 [(set (match_operand:V8SI 0 "register_operand" "=x")
8664 (match_operand:V8SI 1 "register_operand" "x")
8665 (parallel [(const_int 0)]))
8666 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8668 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8669 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8672 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
8673 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
8675 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
8676 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
8681 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
8682 (parallel [(const_int 0)]))
8683 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8685 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8686 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
8689 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
8690 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
8692 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
8693 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
8695 "vphsubd\t{%2, %1, %0|%0, %1, %2}"
8696 [(set_attr "type" "sseiadd")
8697 (set_attr "prefix_extra" "1")
8698 (set_attr "prefix" "vex")
8699 (set_attr "mode" "OI")])
8701 (define_insn "ssse3_phsubdv4si3"
8702 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
8707 (match_operand:V4SI 1 "register_operand" "0,x")
8708 (parallel [(const_int 0)]))
8709 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8711 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8712 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8716 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
8717 (parallel [(const_int 0)]))
8718 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8720 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8721 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8724 phsubd\t{%2, %0|%0, %2}
8725 vphsubd\t{%2, %1, %0|%0, %1, %2}"
8727 [(set_attr "isa" "noavx,avx")
8728 (set_attr "type" "sseiadd")
8729 (set_attr "atom_unit" "complex")
8730 (set_attr "prefix_data16" "1,*")
8731 (set_attr "prefix_extra" "1")
8732 (set_attr "prefix" "orig,vex")
8733 (set_attr "mode" "TI")])
8735 (define_insn "ssse3_phsubdv2si3"
8736 [(set (match_operand:V2SI 0 "register_operand" "=y")
8740 (match_operand:V2SI 1 "register_operand" "0")
8741 (parallel [(const_int 0)]))
8742 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8745 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8746 (parallel [(const_int 0)]))
8747 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8749 "phsubd\t{%2, %0|%0, %2}"
8750 [(set_attr "type" "sseiadd")
8751 (set_attr "atom_unit" "complex")
8752 (set_attr "prefix_extra" "1")
8753 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8754 (set_attr "mode" "DI")])
8756 (define_insn "avx2_phsubswv16hi3"
8757 [(set (match_operand:V16HI 0 "register_operand" "=x")
8764 (match_operand:V16HI 1 "register_operand" "x")
8765 (parallel [(const_int 0)]))
8766 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8768 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8769 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8772 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8773 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8775 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8776 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8780 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8781 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8783 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8784 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8787 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8788 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8790 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8791 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8797 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8798 (parallel [(const_int 0)]))
8799 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8801 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8802 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8805 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8806 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8808 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8809 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8813 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8814 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8816 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8817 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8820 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8821 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8823 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8824 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8826 "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8827 [(set_attr "type" "sseiadd")
8828 (set_attr "prefix_extra" "1")
8829 (set_attr "prefix" "vex")
8830 (set_attr "mode" "OI")])
8832 (define_insn "ssse3_phsubswv8hi3"
8833 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8839 (match_operand:V8HI 1 "register_operand" "0,x")
8840 (parallel [(const_int 0)]))
8841 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8843 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8844 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8847 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8848 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8850 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8851 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8856 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8857 (parallel [(const_int 0)]))
8858 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8860 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8861 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8864 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8865 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8867 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8868 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8871 phsubsw\t{%2, %0|%0, %2}
8872 vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8873 [(set_attr "isa" "noavx,avx")
8874 (set_attr "type" "sseiadd")
8875 (set_attr "atom_unit" "complex")
8876 (set_attr "prefix_data16" "1,*")
8877 (set_attr "prefix_extra" "1")
8878 (set_attr "prefix" "orig,vex")
8879 (set_attr "mode" "TI")])
8881 (define_insn "ssse3_phsubswv4hi3"
8882 [(set (match_operand:V4HI 0 "register_operand" "=y")
8887 (match_operand:V4HI 1 "register_operand" "0")
8888 (parallel [(const_int 0)]))
8889 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8891 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8892 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8896 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8897 (parallel [(const_int 0)]))
8898 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8900 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8901 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8903 "phsubsw\t{%2, %0|%0, %2}"
8904 [(set_attr "type" "sseiadd")
8905 (set_attr "atom_unit" "complex")
8906 (set_attr "prefix_extra" "1")
8907 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8908 (set_attr "mode" "DI")])
8910 (define_insn "avx2_pmaddubsw256"
8911 [(set (match_operand:V16HI 0 "register_operand" "=x")
8916 (match_operand:V32QI 1 "register_operand" "x")
8917 (parallel [(const_int 0)
8935 (match_operand:V32QI 2 "nonimmediate_operand" "xm")
8936 (parallel [(const_int 0)
8954 (vec_select:V16QI (match_dup 1)
8955 (parallel [(const_int 1)
8972 (vec_select:V16QI (match_dup 2)
8973 (parallel [(const_int 1)
8988 (const_int 31)]))))))]
8990 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8991 [(set_attr "type" "sseiadd")
8992 (set_attr "prefix_extra" "1")
8993 (set_attr "prefix" "vex")
8994 (set_attr "mode" "OI")])
8996 (define_insn "ssse3_pmaddubsw128"
8997 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9002 (match_operand:V16QI 1 "register_operand" "0,x")
9003 (parallel [(const_int 0)
9013 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
9014 (parallel [(const_int 0)
9024 (vec_select:V8QI (match_dup 1)
9025 (parallel [(const_int 1)
9034 (vec_select:V8QI (match_dup 2)
9035 (parallel [(const_int 1)
9042 (const_int 15)]))))))]
9045 pmaddubsw\t{%2, %0|%0, %2}
9046 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
9047 [(set_attr "isa" "noavx,avx")
9048 (set_attr "type" "sseiadd")
9049 (set_attr "atom_unit" "simul")
9050 (set_attr "prefix_data16" "1,*")
9051 (set_attr "prefix_extra" "1")
9052 (set_attr "prefix" "orig,vex")
9053 (set_attr "mode" "TI")])
9055 (define_insn "ssse3_pmaddubsw"
9056 [(set (match_operand:V4HI 0 "register_operand" "=y")
9061 (match_operand:V8QI 1 "register_operand" "0")
9062 (parallel [(const_int 0)
9068 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
9069 (parallel [(const_int 0)
9075 (vec_select:V4QI (match_dup 1)
9076 (parallel [(const_int 1)
9081 (vec_select:V4QI (match_dup 2)
9082 (parallel [(const_int 1)
9085 (const_int 7)]))))))]
9087 "pmaddubsw\t{%2, %0|%0, %2}"
9088 [(set_attr "type" "sseiadd")
9089 (set_attr "atom_unit" "simul")
9090 (set_attr "prefix_extra" "1")
9091 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9092 (set_attr "mode" "DI")])
9094 (define_expand "avx2_umulhrswv16hi3"
9095 [(set (match_operand:V16HI 0 "register_operand" "")
9102 (match_operand:V16HI 1 "nonimmediate_operand" ""))
9104 (match_operand:V16HI 2 "nonimmediate_operand" "")))
9106 (const_vector:V16HI [(const_int 1) (const_int 1)
9107 (const_int 1) (const_int 1)
9108 (const_int 1) (const_int 1)
9109 (const_int 1) (const_int 1)
9110 (const_int 1) (const_int 1)
9111 (const_int 1) (const_int 1)
9112 (const_int 1) (const_int 1)
9113 (const_int 1) (const_int 1)]))
9116 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
9118 (define_insn "*avx2_umulhrswv16hi3"
9119 [(set (match_operand:V16HI 0 "register_operand" "=x")
9126 (match_operand:V16HI 1 "nonimmediate_operand" "%x"))
9128 (match_operand:V16HI 2 "nonimmediate_operand" "xm")))
9130 (const_vector:V16HI [(const_int 1) (const_int 1)
9131 (const_int 1) (const_int 1)
9132 (const_int 1) (const_int 1)
9133 (const_int 1) (const_int 1)
9134 (const_int 1) (const_int 1)
9135 (const_int 1) (const_int 1)
9136 (const_int 1) (const_int 1)
9137 (const_int 1) (const_int 1)]))
9139 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
9140 "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9141 [(set_attr "type" "sseimul")
9142 (set_attr "prefix_extra" "1")
9143 (set_attr "prefix" "vex")
9144 (set_attr "mode" "OI")])
9146 (define_expand "ssse3_pmulhrswv8hi3"
9147 [(set (match_operand:V8HI 0 "register_operand" "")
9154 (match_operand:V8HI 1 "nonimmediate_operand" ""))
9156 (match_operand:V8HI 2 "nonimmediate_operand" "")))
9158 (const_vector:V8HI [(const_int 1) (const_int 1)
9159 (const_int 1) (const_int 1)
9160 (const_int 1) (const_int 1)
9161 (const_int 1) (const_int 1)]))
9164 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
9166 (define_insn "*ssse3_pmulhrswv8hi3"
9167 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9174 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
9176 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
9178 (const_vector:V8HI [(const_int 1) (const_int 1)
9179 (const_int 1) (const_int 1)
9180 (const_int 1) (const_int 1)
9181 (const_int 1) (const_int 1)]))
9183 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9185 pmulhrsw\t{%2, %0|%0, %2}
9186 vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9187 [(set_attr "isa" "noavx,avx")
9188 (set_attr "type" "sseimul")
9189 (set_attr "prefix_data16" "1,*")
9190 (set_attr "prefix_extra" "1")
9191 (set_attr "prefix" "orig,vex")
9192 (set_attr "mode" "TI")])
9194 (define_expand "ssse3_pmulhrswv4hi3"
9195 [(set (match_operand:V4HI 0 "register_operand" "")
9202 (match_operand:V4HI 1 "nonimmediate_operand" ""))
9204 (match_operand:V4HI 2 "nonimmediate_operand" "")))
9206 (const_vector:V4HI [(const_int 1) (const_int 1)
9207 (const_int 1) (const_int 1)]))
9210 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
9212 (define_insn "*ssse3_pmulhrswv4hi3"
9213 [(set (match_operand:V4HI 0 "register_operand" "=y")
9220 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
9222 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
9224 (const_vector:V4HI [(const_int 1) (const_int 1)
9225 (const_int 1) (const_int 1)]))
9227 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
9228 "pmulhrsw\t{%2, %0|%0, %2}"
9229 [(set_attr "type" "sseimul")
9230 (set_attr "prefix_extra" "1")
9231 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9232 (set_attr "mode" "DI")])
9234 (define_insn "<ssse3_avx2>_pshufb<mode>3"
9235 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
9236 (unspec:VI1_AVX2 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
9237 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")]
9241 pshufb\t{%2, %0|%0, %2}
9242 vpshufb\t{%2, %1, %0|%0, %1, %2}"
9243 [(set_attr "isa" "noavx,avx")
9244 (set_attr "type" "sselog1")
9245 (set_attr "prefix_data16" "1,*")
9246 (set_attr "prefix_extra" "1")
9247 (set_attr "prefix" "orig,vex")
9248 (set_attr "mode" "<sseinsnmode>")])
9250 (define_insn "ssse3_pshufbv8qi3"
9251 [(set (match_operand:V8QI 0 "register_operand" "=y")
9252 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
9253 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
9256 "pshufb\t{%2, %0|%0, %2}";
9257 [(set_attr "type" "sselog1")
9258 (set_attr "prefix_extra" "1")
9259 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9260 (set_attr "mode" "DI")])
9262 (define_insn "<ssse3_avx2>_psign<mode>3"
9263 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
9265 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
9266 (match_operand:VI124_AVX2 2 "nonimmediate_operand" "xm,xm")]
9270 psign<ssemodesuffix>\t{%2, %0|%0, %2}
9271 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9272 [(set_attr "isa" "noavx,avx")
9273 (set_attr "type" "sselog1")
9274 (set_attr "prefix_data16" "1,*")
9275 (set_attr "prefix_extra" "1")
9276 (set_attr "prefix" "orig,vex")
9277 (set_attr "mode" "<sseinsnmode>")])
9279 (define_insn "ssse3_psign<mode>3"
9280 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9282 [(match_operand:MMXMODEI 1 "register_operand" "0")
9283 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
9286 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
9287 [(set_attr "type" "sselog1")
9288 (set_attr "prefix_extra" "1")
9289 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9290 (set_attr "mode" "DI")])
9292 (define_insn "<ssse3_avx2>_palignr<mode>"
9293 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x")
9294 (unspec:SSESCALARMODE [(match_operand:SSESCALARMODE 1 "register_operand" "0,x")
9295 (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,xm")
9296 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
9300 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9302 switch (which_alternative)
9305 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9307 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9312 [(set_attr "isa" "noavx,avx")
9313 (set_attr "type" "sseishft")
9314 (set_attr "atom_unit" "sishuf")
9315 (set_attr "prefix_data16" "1,*")
9316 (set_attr "prefix_extra" "1")
9317 (set_attr "length_immediate" "1")
9318 (set_attr "prefix" "orig,vex")
9319 (set_attr "mode" "<sseinsnmode>")])
9321 (define_insn "ssse3_palignrdi"
9322 [(set (match_operand:DI 0 "register_operand" "=y")
9323 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
9324 (match_operand:DI 2 "nonimmediate_operand" "ym")
9325 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9329 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9330 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9332 [(set_attr "type" "sseishft")
9333 (set_attr "atom_unit" "sishuf")
9334 (set_attr "prefix_extra" "1")
9335 (set_attr "length_immediate" "1")
9336 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9337 (set_attr "mode" "DI")])
9339 (define_insn "abs<mode>2"
9340 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x")
9342 (match_operand:VI124_AVX2 1 "nonimmediate_operand" "xm")))]
9344 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
9345 [(set_attr "type" "sselog1")
9346 (set_attr "prefix_data16" "1")
9347 (set_attr "prefix_extra" "1")
9348 (set_attr "prefix" "maybe_vex")
9349 (set_attr "mode" "<sseinsnmode>")])
9351 (define_insn "abs<mode>2"
9352 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9354 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
9356 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
9357 [(set_attr "type" "sselog1")
9358 (set_attr "prefix_rep" "0")
9359 (set_attr "prefix_extra" "1")
9360 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9361 (set_attr "mode" "DI")])
9363 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9365 ;; AMD SSE4A instructions
9367 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9369 (define_insn "sse4a_movnt<mode>"
9370 [(set (match_operand:MODEF 0 "memory_operand" "=m")
9372 [(match_operand:MODEF 1 "register_operand" "x")]
9375 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
9376 [(set_attr "type" "ssemov")
9377 (set_attr "mode" "<MODE>")])
9379 (define_insn "sse4a_vmmovnt<mode>"
9380 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
9381 (unspec:<ssescalarmode>
9382 [(vec_select:<ssescalarmode>
9383 (match_operand:VF_128 1 "register_operand" "x")
9384 (parallel [(const_int 0)]))]
9387 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
9388 [(set_attr "type" "ssemov")
9389 (set_attr "mode" "<ssescalarmode>")])
9391 (define_insn "sse4a_extrqi"
9392 [(set (match_operand:V2DI 0 "register_operand" "=x")
9393 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9394 (match_operand 2 "const_0_to_255_operand" "")
9395 (match_operand 3 "const_0_to_255_operand" "")]
9398 "extrq\t{%3, %2, %0|%0, %2, %3}"
9399 [(set_attr "type" "sse")
9400 (set_attr "prefix_data16" "1")
9401 (set_attr "length_immediate" "2")
9402 (set_attr "mode" "TI")])
9404 (define_insn "sse4a_extrq"
9405 [(set (match_operand:V2DI 0 "register_operand" "=x")
9406 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9407 (match_operand:V16QI 2 "register_operand" "x")]
9410 "extrq\t{%2, %0|%0, %2}"
9411 [(set_attr "type" "sse")
9412 (set_attr "prefix_data16" "1")
9413 (set_attr "mode" "TI")])
9415 (define_insn "sse4a_insertqi"
9416 [(set (match_operand:V2DI 0 "register_operand" "=x")
9417 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9418 (match_operand:V2DI 2 "register_operand" "x")
9419 (match_operand 3 "const_0_to_255_operand" "")
9420 (match_operand 4 "const_0_to_255_operand" "")]
9423 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
9424 [(set_attr "type" "sseins")
9425 (set_attr "prefix_data16" "0")
9426 (set_attr "prefix_rep" "1")
9427 (set_attr "length_immediate" "2")
9428 (set_attr "mode" "TI")])
9430 (define_insn "sse4a_insertq"
9431 [(set (match_operand:V2DI 0 "register_operand" "=x")
9432 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9433 (match_operand:V2DI 2 "register_operand" "x")]
9436 "insertq\t{%2, %0|%0, %2}"
9437 [(set_attr "type" "sseins")
9438 (set_attr "prefix_data16" "0")
9439 (set_attr "prefix_rep" "1")
9440 (set_attr "mode" "TI")])
9442 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9444 ;; Intel SSE4.1 instructions
9446 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9448 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
9449 [(set (match_operand:VF 0 "register_operand" "=x,x")
9451 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
9452 (match_operand:VF 1 "register_operand" "0,x")
9453 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "")))]
9456 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
9457 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9458 [(set_attr "isa" "noavx,avx")
9459 (set_attr "type" "ssemov")
9460 (set_attr "length_immediate" "1")
9461 (set_attr "prefix_data16" "1,*")
9462 (set_attr "prefix_extra" "1")
9463 (set_attr "prefix" "orig,vex")
9464 (set_attr "mode" "<MODE>")])
9466 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
9467 [(set (match_operand:VF 0 "reg_not_xmm0_operand_maybe_avx" "=x,x")
9469 [(match_operand:VF 1 "reg_not_xmm0_operand_maybe_avx" "0,x")
9470 (match_operand:VF 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
9471 (match_operand:VF 3 "register_operand" "Yz,x")]
9475 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
9476 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9477 [(set_attr "isa" "noavx,avx")
9478 (set_attr "type" "ssemov")
9479 (set_attr "length_immediate" "1")
9480 (set_attr "prefix_data16" "1,*")
9481 (set_attr "prefix_extra" "1")
9482 (set_attr "prefix" "orig,vex")
9483 (set_attr "mode" "<MODE>")])
9485 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
9486 [(set (match_operand:VF 0 "register_operand" "=x,x")
9488 [(match_operand:VF 1 "nonimmediate_operand" "%0,x")
9489 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
9490 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9494 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
9495 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9496 [(set_attr "isa" "noavx,avx")
9497 (set_attr "type" "ssemul")
9498 (set_attr "length_immediate" "1")
9499 (set_attr "prefix_data16" "1,*")
9500 (set_attr "prefix_extra" "1")
9501 (set_attr "prefix" "orig,vex")
9502 (set_attr "mode" "<MODE>")])
9504 (define_insn "<sse4_1_avx2>_movntdqa"
9505 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x")
9506 (unspec:VI8_AVX2 [(match_operand:VI8_AVX2 1 "memory_operand" "m")]
9509 "%vmovntdqa\t{%1, %0|%0, %1}"
9510 [(set_attr "type" "ssemov")
9511 (set_attr "prefix_extra" "1")
9512 (set_attr "prefix" "maybe_vex")
9513 (set_attr "mode" "<sseinsnmode>")])
9515 (define_insn "<sse4_1_avx2>_mpsadbw"
9516 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
9517 (unspec:VI1_AVX2 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
9518 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
9519 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9523 mpsadbw\t{%3, %2, %0|%0, %2, %3}
9524 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9525 [(set_attr "isa" "noavx,avx")
9526 (set_attr "type" "sselog1")
9527 (set_attr "length_immediate" "1")
9528 (set_attr "prefix_extra" "1")
9529 (set_attr "prefix" "orig,vex")
9530 (set_attr "mode" "<sseinsnmode>")])
9532 (define_insn "avx2_packusdw"
9533 [(set (match_operand:V16HI 0 "register_operand" "=x")
9536 (match_operand:V8SI 1 "register_operand" "x"))
9538 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))))]
9540 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9541 [(set_attr "type" "sselog")
9542 (set_attr "prefix_extra" "1")
9543 (set_attr "prefix" "vex")
9544 (set_attr "mode" "OI")])
9546 (define_insn "sse4_1_packusdw"
9547 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9550 (match_operand:V4SI 1 "register_operand" "0,x"))
9552 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))]
9555 packusdw\t{%2, %0|%0, %2}
9556 vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9557 [(set_attr "isa" "noavx,avx")
9558 (set_attr "type" "sselog")
9559 (set_attr "prefix_extra" "1")
9560 (set_attr "prefix" "orig,vex")
9561 (set_attr "mode" "TI")])
9563 (define_insn "<sse4_1_avx2>_pblendvb"
9564 [(set (match_operand:VI1_AVX2 0 "reg_not_xmm0_operand" "=x,x")
9566 [(match_operand:VI1_AVX2 1 "reg_not_xmm0_operand_maybe_avx" "0,x")
9567 (match_operand:VI1_AVX2 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
9568 (match_operand:VI1_AVX2 3 "register_operand" "Yz,x")]
9572 pblendvb\t{%3, %2, %0|%0, %2, %3}
9573 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9574 [(set_attr "isa" "noavx,avx")
9575 (set_attr "type" "ssemov")
9576 (set_attr "prefix_extra" "1")
9577 (set_attr "length_immediate" "*,1")
9578 (set_attr "prefix" "orig,vex")
9579 (set_attr "mode" "<sseinsnmode>")])
9581 (define_insn "sse4_1_pblendw"
9582 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9584 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
9585 (match_operand:V8HI 1 "register_operand" "0,x")
9586 (match_operand:SI 3 "const_0_to_255_operand" "n,n")))]
9589 pblendw\t{%3, %2, %0|%0, %2, %3}
9590 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9591 [(set_attr "isa" "noavx,avx")
9592 (set_attr "type" "ssemov")
9593 (set_attr "prefix_extra" "1")
9594 (set_attr "length_immediate" "1")
9595 (set_attr "prefix" "orig,vex")
9596 (set_attr "mode" "TI")])
9598 ;; The builtin uses an 8-bit immediate. Expand that.
9599 (define_expand "avx2_pblendw"
9600 [(set (match_operand:V16HI 0 "register_operand" "")
9602 (match_operand:V16HI 2 "nonimmediate_operand" "")
9603 (match_operand:V16HI 1 "register_operand" "")
9604 (match_operand:SI 3 "const_0_to_255_operand" "")))]
9607 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
9608 operands[3] = GEN_INT (val << 8 | val);
9611 (define_insn "*avx2_pblendw"
9612 [(set (match_operand:V16HI 0 "register_operand" "=x")
9614 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
9615 (match_operand:V16HI 1 "register_operand" "x")
9616 (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
9619 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
9620 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9622 [(set_attr "type" "ssemov")
9623 (set_attr "prefix_extra" "1")
9624 (set_attr "length_immediate" "1")
9625 (set_attr "prefix" "vex")
9626 (set_attr "mode" "OI")])
9628 (define_insn "avx2_pblendd<mode>"
9629 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
9631 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
9632 (match_operand:VI4_AVX2 1 "register_operand" "x")
9633 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9635 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9636 [(set_attr "type" "ssemov")
9637 (set_attr "prefix_extra" "1")
9638 (set_attr "length_immediate" "1")
9639 (set_attr "prefix" "vex")
9640 (set_attr "mode" "<sseinsnmode>")])
9642 (define_insn "sse4_1_phminposuw"
9643 [(set (match_operand:V8HI 0 "register_operand" "=x")
9644 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
9645 UNSPEC_PHMINPOSUW))]
9647 "%vphminposuw\t{%1, %0|%0, %1}"
9648 [(set_attr "type" "sselog1")
9649 (set_attr "prefix_extra" "1")
9650 (set_attr "prefix" "maybe_vex")
9651 (set_attr "mode" "TI")])
9653 (define_insn "avx2_<code>v16qiv16hi2"
9654 [(set (match_operand:V16HI 0 "register_operand" "=x")
9656 (match_operand:V16QI 1 "nonimmediate_operand" "xm")))]
9658 "vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
9659 [(set_attr "type" "ssemov")
9660 (set_attr "prefix_extra" "1")
9661 (set_attr "prefix" "vex")
9662 (set_attr "mode" "OI")])
9664 (define_insn "sse4_1_<code>v8qiv8hi2"
9665 [(set (match_operand:V8HI 0 "register_operand" "=x")
9668 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9669 (parallel [(const_int 0)
9678 "%vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
9679 [(set_attr "type" "ssemov")
9680 (set_attr "prefix_extra" "1")
9681 (set_attr "prefix" "maybe_vex")
9682 (set_attr "mode" "TI")])
9684 (define_insn "avx2_<code>v8qiv8si2"
9685 [(set (match_operand:V8SI 0 "register_operand" "=x")
9688 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9689 (parallel [(const_int 0)
9698 "vpmov<extsuffix>bd\t{%1, %0|%0, %1}"
9699 [(set_attr "type" "ssemov")
9700 (set_attr "prefix_extra" "1")
9701 (set_attr "prefix" "vex")
9702 (set_attr "mode" "OI")])
9704 (define_insn "sse4_1_<code>v4qiv4si2"
9705 [(set (match_operand:V4SI 0 "register_operand" "=x")
9708 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9709 (parallel [(const_int 0)
9714 "%vpmov<extsuffix>bd\t{%1, %0|%0, %1}"
9715 [(set_attr "type" "ssemov")
9716 (set_attr "prefix_extra" "1")
9717 (set_attr "prefix" "maybe_vex")
9718 (set_attr "mode" "TI")])
9720 (define_insn "avx2_<code>v8hiv8si2"
9721 [(set (match_operand:V8SI 0 "register_operand" "=x")
9723 (match_operand:V8HI 1 "nonimmediate_operand" "xm")))]
9725 "vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
9726 [(set_attr "type" "ssemov")
9727 (set_attr "prefix_extra" "1")
9728 (set_attr "prefix" "vex")
9729 (set_attr "mode" "OI")])
9731 (define_insn "sse4_1_<code>v4hiv4si2"
9732 [(set (match_operand:V4SI 0 "register_operand" "=x")
9735 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9736 (parallel [(const_int 0)
9741 "%vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
9742 [(set_attr "type" "ssemov")
9743 (set_attr "prefix_extra" "1")
9744 (set_attr "prefix" "maybe_vex")
9745 (set_attr "mode" "TI")])
9747 (define_insn "avx2_<code>v4qiv4di2"
9748 [(set (match_operand:V4DI 0 "register_operand" "=x")
9751 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9752 (parallel [(const_int 0)
9757 "vpmov<extsuffix>bq\t{%1, %0|%0, %1}"
9758 [(set_attr "type" "ssemov")
9759 (set_attr "prefix_extra" "1")
9760 (set_attr "prefix" "vex")
9761 (set_attr "mode" "OI")])
9763 (define_insn "sse4_1_<code>v2qiv2di2"
9764 [(set (match_operand:V2DI 0 "register_operand" "=x")
9767 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9768 (parallel [(const_int 0)
9771 "%vpmov<extsuffix>bq\t{%1, %0|%0, %1}"
9772 [(set_attr "type" "ssemov")
9773 (set_attr "prefix_extra" "1")
9774 (set_attr "prefix" "maybe_vex")
9775 (set_attr "mode" "TI")])
9777 (define_insn "avx2_<code>v4hiv4di2"
9778 [(set (match_operand:V4DI 0 "register_operand" "=x")
9781 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9782 (parallel [(const_int 0)
9787 "vpmov<extsuffix>wq\t{%1, %0|%0, %1}"
9788 [(set_attr "type" "ssemov")
9789 (set_attr "prefix_extra" "1")
9790 (set_attr "prefix" "vex")
9791 (set_attr "mode" "OI")])
9793 (define_insn "sse4_1_<code>v2hiv2di2"
9794 [(set (match_operand:V2DI 0 "register_operand" "=x")
9797 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9798 (parallel [(const_int 0)
9801 "%vpmov<extsuffix>wq\t{%1, %0|%0, %1}"
9802 [(set_attr "type" "ssemov")
9803 (set_attr "prefix_extra" "1")
9804 (set_attr "prefix" "maybe_vex")
9805 (set_attr "mode" "TI")])
9807 (define_insn "avx2_<code>v4siv4di2"
9808 [(set (match_operand:V4DI 0 "register_operand" "=x")
9810 (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
9812 "vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
9813 [(set_attr "type" "ssemov")
9814 (set_attr "prefix_extra" "1")
9815 (set_attr "mode" "OI")])
9817 (define_insn "sse4_1_<code>v2siv2di2"
9818 [(set (match_operand:V2DI 0 "register_operand" "=x")
9821 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9822 (parallel [(const_int 0)
9825 "%vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
9826 [(set_attr "type" "ssemov")
9827 (set_attr "prefix_extra" "1")
9828 (set_attr "prefix" "maybe_vex")
9829 (set_attr "mode" "TI")])
9831 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
9832 ;; setting FLAGS_REG. But it is not a really compare instruction.
9833 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
9834 [(set (reg:CC FLAGS_REG)
9835 (unspec:CC [(match_operand:VF 0 "register_operand" "x")
9836 (match_operand:VF 1 "nonimmediate_operand" "xm")]
9839 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
9840 [(set_attr "type" "ssecomi")
9841 (set_attr "prefix_extra" "1")
9842 (set_attr "prefix" "vex")
9843 (set_attr "mode" "<MODE>")])
9845 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
9846 ;; But it is not a really compare instruction.
9847 (define_insn "avx_ptest256"
9848 [(set (reg:CC FLAGS_REG)
9849 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
9850 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
9853 "vptest\t{%1, %0|%0, %1}"
9854 [(set_attr "type" "ssecomi")
9855 (set_attr "prefix_extra" "1")
9856 (set_attr "prefix" "vex")
9857 (set_attr "mode" "OI")])
9859 (define_insn "sse4_1_ptest"
9860 [(set (reg:CC FLAGS_REG)
9861 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
9862 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
9865 "%vptest\t{%1, %0|%0, %1}"
9866 [(set_attr "type" "ssecomi")
9867 (set_attr "prefix_extra" "1")
9868 (set_attr "prefix" "maybe_vex")
9869 (set_attr "mode" "TI")])
9871 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
9872 [(set (match_operand:VF 0 "register_operand" "=x")
9874 [(match_operand:VF 1 "nonimmediate_operand" "xm")
9875 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9878 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9879 [(set_attr "type" "ssecvt")
9880 (set (attr "prefix_data16")
9882 (match_test "TARGET_AVX")
9884 (const_string "1")))
9885 (set_attr "prefix_extra" "1")
9886 (set_attr "length_immediate" "1")
9887 (set_attr "prefix" "maybe_vex")
9888 (set_attr "mode" "<MODE>")])
9890 (define_insn "sse4_1_round<ssescalarmodesuffix>"
9891 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
9894 [(match_operand:VF_128 2 "register_operand" "x,x")
9895 (match_operand:SI 3 "const_0_to_15_operand" "n,n")]
9897 (match_operand:VF_128 1 "register_operand" "0,x")
9901 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
9902 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9903 [(set_attr "isa" "noavx,avx")
9904 (set_attr "type" "ssecvt")
9905 (set_attr "length_immediate" "1")
9906 (set_attr "prefix_data16" "1,*")
9907 (set_attr "prefix_extra" "1")
9908 (set_attr "prefix" "orig,vex")
9909 (set_attr "mode" "<MODE>")])
9911 (define_expand "round<mode>2"
9914 (match_operand:VF 1 "nonimmediate_operand" "")
9916 (set (match_operand:VF 0 "register_operand" "")
9918 [(match_dup 4) (match_dup 5)]
9920 "TARGET_ROUND && !flag_trapping_math"
9922 enum machine_mode scalar_mode;
9923 const struct real_format *fmt;
9924 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
9927 scalar_mode = GET_MODE_INNER (<MODE>mode);
9929 /* load nextafter (0.5, 0.0) */
9930 fmt = REAL_MODE_FORMAT (scalar_mode);
9931 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
9932 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
9933 half = const_double_from_real_value (pred_half, scalar_mode);
9935 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
9936 vec_half = force_reg (<MODE>mode, vec_half);
9938 operands[3] = gen_reg_rtx (<MODE>mode);
9939 emit_insn (gen_copysign<mode>3 (operands[3], vec_half, operands[1]));
9941 operands[4] = gen_reg_rtx (<MODE>mode);
9942 operands[5] = GEN_INT (ROUND_TRUNC);
9945 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9947 ;; Intel SSE4.2 string/text processing instructions
9949 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9951 (define_insn_and_split "sse4_2_pcmpestr"
9952 [(set (match_operand:SI 0 "register_operand" "=c,c")
9954 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
9955 (match_operand:SI 3 "register_operand" "a,a")
9956 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
9957 (match_operand:SI 5 "register_operand" "d,d")
9958 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
9960 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9968 (set (reg:CC FLAGS_REG)
9977 && can_create_pseudo_p ()"
9982 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9983 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9984 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9987 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
9988 operands[3], operands[4],
9989 operands[5], operands[6]));
9991 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
9992 operands[3], operands[4],
9993 operands[5], operands[6]));
9994 if (flags && !(ecx || xmm0))
9995 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
9996 operands[2], operands[3],
9997 operands[4], operands[5],
9999 if (!(flags || ecx || xmm0))
10000 emit_note (NOTE_INSN_DELETED);
10004 [(set_attr "type" "sselog")
10005 (set_attr "prefix_data16" "1")
10006 (set_attr "prefix_extra" "1")
10007 (set_attr "length_immediate" "1")
10008 (set_attr "memory" "none,load")
10009 (set_attr "mode" "TI")])
10011 (define_insn "sse4_2_pcmpestri"
10012 [(set (match_operand:SI 0 "register_operand" "=c,c")
10014 [(match_operand:V16QI 1 "register_operand" "x,x")
10015 (match_operand:SI 2 "register_operand" "a,a")
10016 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
10017 (match_operand:SI 4 "register_operand" "d,d")
10018 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
10020 (set (reg:CC FLAGS_REG)
10029 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
10030 [(set_attr "type" "sselog")
10031 (set_attr "prefix_data16" "1")
10032 (set_attr "prefix_extra" "1")
10033 (set_attr "prefix" "maybe_vex")
10034 (set_attr "length_immediate" "1")
10035 (set_attr "memory" "none,load")
10036 (set_attr "mode" "TI")])
10038 (define_insn "sse4_2_pcmpestrm"
10039 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10041 [(match_operand:V16QI 1 "register_operand" "x,x")
10042 (match_operand:SI 2 "register_operand" "a,a")
10043 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
10044 (match_operand:SI 4 "register_operand" "d,d")
10045 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
10047 (set (reg:CC FLAGS_REG)
10056 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
10057 [(set_attr "type" "sselog")
10058 (set_attr "prefix_data16" "1")
10059 (set_attr "prefix_extra" "1")
10060 (set_attr "length_immediate" "1")
10061 (set_attr "prefix" "maybe_vex")
10062 (set_attr "memory" "none,load")
10063 (set_attr "mode" "TI")])
10065 (define_insn "sse4_2_pcmpestr_cconly"
10066 [(set (reg:CC FLAGS_REG)
10068 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10069 (match_operand:SI 3 "register_operand" "a,a,a,a")
10070 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
10071 (match_operand:SI 5 "register_operand" "d,d,d,d")
10072 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
10074 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10075 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10078 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10079 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10080 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
10081 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
10082 [(set_attr "type" "sselog")
10083 (set_attr "prefix_data16" "1")
10084 (set_attr "prefix_extra" "1")
10085 (set_attr "length_immediate" "1")
10086 (set_attr "memory" "none,load,none,load")
10087 (set_attr "prefix" "maybe_vex")
10088 (set_attr "mode" "TI")])
10090 (define_insn_and_split "sse4_2_pcmpistr"
10091 [(set (match_operand:SI 0 "register_operand" "=c,c")
10093 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
10094 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
10095 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
10097 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
10103 (set (reg:CC FLAGS_REG)
10110 && can_create_pseudo_p ()"
10115 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10116 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10117 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10120 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
10121 operands[3], operands[4]));
10123 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
10124 operands[3], operands[4]));
10125 if (flags && !(ecx || xmm0))
10126 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
10127 operands[2], operands[3],
10129 if (!(flags || ecx || xmm0))
10130 emit_note (NOTE_INSN_DELETED);
10134 [(set_attr "type" "sselog")
10135 (set_attr "prefix_data16" "1")
10136 (set_attr "prefix_extra" "1")
10137 (set_attr "length_immediate" "1")
10138 (set_attr "memory" "none,load")
10139 (set_attr "mode" "TI")])
10141 (define_insn "sse4_2_pcmpistri"
10142 [(set (match_operand:SI 0 "register_operand" "=c,c")
10144 [(match_operand:V16QI 1 "register_operand" "x,x")
10145 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10146 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10148 (set (reg:CC FLAGS_REG)
10155 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
10156 [(set_attr "type" "sselog")
10157 (set_attr "prefix_data16" "1")
10158 (set_attr "prefix_extra" "1")
10159 (set_attr "length_immediate" "1")
10160 (set_attr "prefix" "maybe_vex")
10161 (set_attr "memory" "none,load")
10162 (set_attr "mode" "TI")])
10164 (define_insn "sse4_2_pcmpistrm"
10165 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10167 [(match_operand:V16QI 1 "register_operand" "x,x")
10168 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10169 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10171 (set (reg:CC FLAGS_REG)
10178 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
10179 [(set_attr "type" "sselog")
10180 (set_attr "prefix_data16" "1")
10181 (set_attr "prefix_extra" "1")
10182 (set_attr "length_immediate" "1")
10183 (set_attr "prefix" "maybe_vex")
10184 (set_attr "memory" "none,load")
10185 (set_attr "mode" "TI")])
10187 (define_insn "sse4_2_pcmpistr_cconly"
10188 [(set (reg:CC FLAGS_REG)
10190 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10191 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
10192 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
10194 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10195 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10198 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10199 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10200 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
10201 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
10202 [(set_attr "type" "sselog")
10203 (set_attr "prefix_data16" "1")
10204 (set_attr "prefix_extra" "1")
10205 (set_attr "length_immediate" "1")
10206 (set_attr "memory" "none,load,none,load")
10207 (set_attr "prefix" "maybe_vex")
10208 (set_attr "mode" "TI")])
10210 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10212 ;; XOP instructions
10214 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10216 ;; XOP parallel integer multiply/add instructions.
10217 ;; Note the XOP multiply/add instructions
10218 ;; a[i] = b[i] * c[i] + d[i];
10219 ;; do not allow the value being added to be a memory operation.
10220 (define_insn "xop_pmacsww"
10221 [(set (match_operand:V8HI 0 "register_operand" "=x")
10224 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10225 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10226 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10228 "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10229 [(set_attr "type" "ssemuladd")
10230 (set_attr "mode" "TI")])
10232 (define_insn "xop_pmacssww"
10233 [(set (match_operand:V8HI 0 "register_operand" "=x")
10235 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10236 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10237 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10239 "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10240 [(set_attr "type" "ssemuladd")
10241 (set_attr "mode" "TI")])
10243 (define_insn "xop_pmacsdd"
10244 [(set (match_operand:V4SI 0 "register_operand" "=x")
10247 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10248 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10249 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10251 "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10252 [(set_attr "type" "ssemuladd")
10253 (set_attr "mode" "TI")])
10255 (define_insn "xop_pmacssdd"
10256 [(set (match_operand:V4SI 0 "register_operand" "=x")
10258 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10259 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10260 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10262 "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10263 [(set_attr "type" "ssemuladd")
10264 (set_attr "mode" "TI")])
10266 (define_insn "xop_pmacssdql"
10267 [(set (match_operand:V2DI 0 "register_operand" "=x")
10272 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10273 (parallel [(const_int 1)
10276 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10277 (parallel [(const_int 1)
10279 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10281 "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10282 [(set_attr "type" "ssemuladd")
10283 (set_attr "mode" "TI")])
10285 (define_insn "xop_pmacssdqh"
10286 [(set (match_operand:V2DI 0 "register_operand" "=x")
10291 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10292 (parallel [(const_int 0)
10296 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10297 (parallel [(const_int 0)
10299 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10301 "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10302 [(set_attr "type" "ssemuladd")
10303 (set_attr "mode" "TI")])
10305 (define_insn "xop_pmacsdql"
10306 [(set (match_operand:V2DI 0 "register_operand" "=x")
10311 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10312 (parallel [(const_int 1)
10316 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10317 (parallel [(const_int 1)
10319 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10321 "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10322 [(set_attr "type" "ssemuladd")
10323 (set_attr "mode" "TI")])
10325 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10326 ;; fake it with a multiply/add. In general, we expect the define_split to
10327 ;; occur before register allocation, so we have to handle the corner case where
10328 ;; the target is the same as operands 1/2
10329 (define_insn_and_split "xop_mulv2div2di3_low"
10330 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10334 (match_operand:V4SI 1 "register_operand" "%x")
10335 (parallel [(const_int 1)
10339 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10340 (parallel [(const_int 1)
10341 (const_int 3)])))))]
10344 "&& reload_completed"
10345 [(set (match_dup 0)
10353 (parallel [(const_int 1)
10358 (parallel [(const_int 1)
10362 operands[3] = CONST0_RTX (V2DImode);
10364 [(set_attr "type" "ssemul")
10365 (set_attr "mode" "TI")])
10367 (define_insn "xop_pmacsdqh"
10368 [(set (match_operand:V2DI 0 "register_operand" "=x")
10373 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10374 (parallel [(const_int 0)
10378 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10379 (parallel [(const_int 0)
10381 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10383 "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10384 [(set_attr "type" "ssemuladd")
10385 (set_attr "mode" "TI")])
10387 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10388 ;; fake it with a multiply/add. In general, we expect the define_split to
10389 ;; occur before register allocation, so we have to handle the corner case where
10390 ;; the target is the same as either operands[1] or operands[2]
10391 (define_insn_and_split "xop_mulv2div2di3_high"
10392 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10396 (match_operand:V4SI 1 "register_operand" "%x")
10397 (parallel [(const_int 0)
10401 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10402 (parallel [(const_int 0)
10403 (const_int 2)])))))]
10406 "&& reload_completed"
10407 [(set (match_dup 0)
10415 (parallel [(const_int 0)
10420 (parallel [(const_int 0)
10424 operands[3] = CONST0_RTX (V2DImode);
10426 [(set_attr "type" "ssemul")
10427 (set_attr "mode" "TI")])
10429 ;; XOP parallel integer multiply/add instructions for the intrinisics
10430 (define_insn "xop_pmacsswd"
10431 [(set (match_operand:V4SI 0 "register_operand" "=x")
10436 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10437 (parallel [(const_int 1)
10443 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10444 (parallel [(const_int 1)
10448 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10450 "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10451 [(set_attr "type" "ssemuladd")
10452 (set_attr "mode" "TI")])
10454 (define_insn "xop_pmacswd"
10455 [(set (match_operand:V4SI 0 "register_operand" "=x")
10460 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10461 (parallel [(const_int 1)
10467 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10468 (parallel [(const_int 1)
10472 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10474 "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10475 [(set_attr "type" "ssemuladd")
10476 (set_attr "mode" "TI")])
10478 (define_insn "xop_pmadcsswd"
10479 [(set (match_operand:V4SI 0 "register_operand" "=x")
10485 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10486 (parallel [(const_int 0)
10492 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10493 (parallel [(const_int 0)
10501 (parallel [(const_int 1)
10508 (parallel [(const_int 1)
10511 (const_int 7)])))))
10512 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10514 "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10515 [(set_attr "type" "ssemuladd")
10516 (set_attr "mode" "TI")])
10518 (define_insn "xop_pmadcswd"
10519 [(set (match_operand:V4SI 0 "register_operand" "=x")
10525 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10526 (parallel [(const_int 0)
10532 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10533 (parallel [(const_int 0)
10541 (parallel [(const_int 1)
10548 (parallel [(const_int 1)
10551 (const_int 7)])))))
10552 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10554 "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10555 [(set_attr "type" "ssemuladd")
10556 (set_attr "mode" "TI")])
10558 ;; XOP parallel XMM conditional moves
10559 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
10560 [(set (match_operand:V 0 "register_operand" "=x,x")
10562 (match_operand:V 3 "nonimmediate_operand" "x,m")
10563 (match_operand:V 1 "register_operand" "x,x")
10564 (match_operand:V 2 "nonimmediate_operand" "xm,x")))]
10566 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10567 [(set_attr "type" "sse4arg")])
10569 ;; XOP horizontal add/subtract instructions
10570 (define_insn "xop_phaddbw"
10571 [(set (match_operand:V8HI 0 "register_operand" "=x")
10575 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10576 (parallel [(const_int 0)
10587 (parallel [(const_int 1)
10594 (const_int 15)])))))]
10596 "vphaddbw\t{%1, %0|%0, %1}"
10597 [(set_attr "type" "sseiadd1")])
10599 (define_insn "xop_phaddbd"
10600 [(set (match_operand:V4SI 0 "register_operand" "=x")
10605 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10606 (parallel [(const_int 0)
10613 (parallel [(const_int 1)
10616 (const_int 13)]))))
10621 (parallel [(const_int 2)
10628 (parallel [(const_int 3)
10631 (const_int 15)]))))))]
10633 "vphaddbd\t{%1, %0|%0, %1}"
10634 [(set_attr "type" "sseiadd1")])
10636 (define_insn "xop_phaddbq"
10637 [(set (match_operand:V2DI 0 "register_operand" "=x")
10643 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10644 (parallel [(const_int 0)
10649 (parallel [(const_int 1)
10655 (parallel [(const_int 2)
10660 (parallel [(const_int 3)
10661 (const_int 7)])))))
10667 (parallel [(const_int 8)
10672 (parallel [(const_int 9)
10673 (const_int 13)]))))
10678 (parallel [(const_int 10)
10683 (parallel [(const_int 11)
10684 (const_int 15)])))))))]
10686 "vphaddbq\t{%1, %0|%0, %1}"
10687 [(set_attr "type" "sseiadd1")])
10689 (define_insn "xop_phaddwd"
10690 [(set (match_operand:V4SI 0 "register_operand" "=x")
10694 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10695 (parallel [(const_int 0)
10702 (parallel [(const_int 1)
10705 (const_int 7)])))))]
10707 "vphaddwd\t{%1, %0|%0, %1}"
10708 [(set_attr "type" "sseiadd1")])
10710 (define_insn "xop_phaddwq"
10711 [(set (match_operand:V2DI 0 "register_operand" "=x")
10716 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10717 (parallel [(const_int 0)
10722 (parallel [(const_int 1)
10728 (parallel [(const_int 2)
10733 (parallel [(const_int 3)
10734 (const_int 7)]))))))]
10736 "vphaddwq\t{%1, %0|%0, %1}"
10737 [(set_attr "type" "sseiadd1")])
10739 (define_insn "xop_phadddq"
10740 [(set (match_operand:V2DI 0 "register_operand" "=x")
10744 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10745 (parallel [(const_int 0)
10750 (parallel [(const_int 1)
10751 (const_int 3)])))))]
10753 "vphadddq\t{%1, %0|%0, %1}"
10754 [(set_attr "type" "sseiadd1")])
10756 (define_insn "xop_phaddubw"
10757 [(set (match_operand:V8HI 0 "register_operand" "=x")
10761 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10762 (parallel [(const_int 0)
10773 (parallel [(const_int 1)
10780 (const_int 15)])))))]
10782 "vphaddubw\t{%1, %0|%0, %1}"
10783 [(set_attr "type" "sseiadd1")])
10785 (define_insn "xop_phaddubd"
10786 [(set (match_operand:V4SI 0 "register_operand" "=x")
10791 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10792 (parallel [(const_int 0)
10799 (parallel [(const_int 1)
10802 (const_int 13)]))))
10807 (parallel [(const_int 2)
10814 (parallel [(const_int 3)
10817 (const_int 15)]))))))]
10819 "vphaddubd\t{%1, %0|%0, %1}"
10820 [(set_attr "type" "sseiadd1")])
10822 (define_insn "xop_phaddubq"
10823 [(set (match_operand:V2DI 0 "register_operand" "=x")
10829 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10830 (parallel [(const_int 0)
10835 (parallel [(const_int 1)
10841 (parallel [(const_int 2)
10846 (parallel [(const_int 3)
10847 (const_int 7)])))))
10853 (parallel [(const_int 8)
10858 (parallel [(const_int 9)
10859 (const_int 13)]))))
10864 (parallel [(const_int 10)
10869 (parallel [(const_int 11)
10870 (const_int 15)])))))))]
10872 "vphaddubq\t{%1, %0|%0, %1}"
10873 [(set_attr "type" "sseiadd1")])
10875 (define_insn "xop_phadduwd"
10876 [(set (match_operand:V4SI 0 "register_operand" "=x")
10880 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10881 (parallel [(const_int 0)
10888 (parallel [(const_int 1)
10891 (const_int 7)])))))]
10893 "vphadduwd\t{%1, %0|%0, %1}"
10894 [(set_attr "type" "sseiadd1")])
10896 (define_insn "xop_phadduwq"
10897 [(set (match_operand:V2DI 0 "register_operand" "=x")
10902 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10903 (parallel [(const_int 0)
10908 (parallel [(const_int 1)
10914 (parallel [(const_int 2)
10919 (parallel [(const_int 3)
10920 (const_int 7)]))))))]
10922 "vphadduwq\t{%1, %0|%0, %1}"
10923 [(set_attr "type" "sseiadd1")])
10925 (define_insn "xop_phaddudq"
10926 [(set (match_operand:V2DI 0 "register_operand" "=x")
10930 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10931 (parallel [(const_int 0)
10936 (parallel [(const_int 1)
10937 (const_int 3)])))))]
10939 "vphaddudq\t{%1, %0|%0, %1}"
10940 [(set_attr "type" "sseiadd1")])
10942 (define_insn "xop_phsubbw"
10943 [(set (match_operand:V8HI 0 "register_operand" "=x")
10947 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10948 (parallel [(const_int 0)
10959 (parallel [(const_int 1)
10966 (const_int 15)])))))]
10968 "vphsubbw\t{%1, %0|%0, %1}"
10969 [(set_attr "type" "sseiadd1")])
10971 (define_insn "xop_phsubwd"
10972 [(set (match_operand:V4SI 0 "register_operand" "=x")
10976 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10977 (parallel [(const_int 0)
10984 (parallel [(const_int 1)
10987 (const_int 7)])))))]
10989 "vphsubwd\t{%1, %0|%0, %1}"
10990 [(set_attr "type" "sseiadd1")])
10992 (define_insn "xop_phsubdq"
10993 [(set (match_operand:V2DI 0 "register_operand" "=x")
10997 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10998 (parallel [(const_int 0)
11003 (parallel [(const_int 1)
11004 (const_int 3)])))))]
11006 "vphsubdq\t{%1, %0|%0, %1}"
11007 [(set_attr "type" "sseiadd1")])
11009 ;; XOP permute instructions
11010 (define_insn "xop_pperm"
11011 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
11013 [(match_operand:V16QI 1 "register_operand" "x,x")
11014 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
11015 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
11016 UNSPEC_XOP_PERMUTE))]
11017 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11018 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11019 [(set_attr "type" "sse4arg")
11020 (set_attr "mode" "TI")])
11022 ;; XOP pack instructions that combine two vectors into a smaller vector
11023 (define_insn "xop_pperm_pack_v2di_v4si"
11024 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
11027 (match_operand:V2DI 1 "register_operand" "x,x"))
11029 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
11030 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11031 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11032 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11033 [(set_attr "type" "sse4arg")
11034 (set_attr "mode" "TI")])
11036 (define_insn "xop_pperm_pack_v4si_v8hi"
11037 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
11040 (match_operand:V4SI 1 "register_operand" "x,x"))
11042 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
11043 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11044 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11045 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11046 [(set_attr "type" "sse4arg")
11047 (set_attr "mode" "TI")])
11049 (define_insn "xop_pperm_pack_v8hi_v16qi"
11050 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
11053 (match_operand:V8HI 1 "register_operand" "x,x"))
11055 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
11056 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11057 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11058 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11059 [(set_attr "type" "sse4arg")
11060 (set_attr "mode" "TI")])
11062 ;; XOP packed rotate instructions
11063 (define_expand "rotl<mode>3"
11064 [(set (match_operand:VI_128 0 "register_operand" "")
11066 (match_operand:VI_128 1 "nonimmediate_operand" "")
11067 (match_operand:SI 2 "general_operand")))]
11070 /* If we were given a scalar, convert it to parallel */
11071 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11073 rtvec vs = rtvec_alloc (<ssescalarnum>);
11074 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11075 rtx reg = gen_reg_rtx (<MODE>mode);
11076 rtx op2 = operands[2];
11079 if (GET_MODE (op2) != <ssescalarmode>mode)
11081 op2 = gen_reg_rtx (<ssescalarmode>mode);
11082 convert_move (op2, operands[2], false);
11085 for (i = 0; i < <ssescalarnum>; i++)
11086 RTVEC_ELT (vs, i) = op2;
11088 emit_insn (gen_vec_init<mode> (reg, par));
11089 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11094 (define_expand "rotr<mode>3"
11095 [(set (match_operand:VI_128 0 "register_operand" "")
11097 (match_operand:VI_128 1 "nonimmediate_operand" "")
11098 (match_operand:SI 2 "general_operand")))]
11101 /* If we were given a scalar, convert it to parallel */
11102 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11104 rtvec vs = rtvec_alloc (<ssescalarnum>);
11105 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11106 rtx neg = gen_reg_rtx (<MODE>mode);
11107 rtx reg = gen_reg_rtx (<MODE>mode);
11108 rtx op2 = operands[2];
11111 if (GET_MODE (op2) != <ssescalarmode>mode)
11113 op2 = gen_reg_rtx (<ssescalarmode>mode);
11114 convert_move (op2, operands[2], false);
11117 for (i = 0; i < <ssescalarnum>; i++)
11118 RTVEC_ELT (vs, i) = op2;
11120 emit_insn (gen_vec_init<mode> (reg, par));
11121 emit_insn (gen_neg<mode>2 (neg, reg));
11122 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
11127 (define_insn "xop_rotl<mode>3"
11128 [(set (match_operand:VI_128 0 "register_operand" "=x")
11130 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
11131 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11133 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11134 [(set_attr "type" "sseishft")
11135 (set_attr "length_immediate" "1")
11136 (set_attr "mode" "TI")])
11138 (define_insn "xop_rotr<mode>3"
11139 [(set (match_operand:VI_128 0 "register_operand" "=x")
11141 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
11142 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11145 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
11146 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
11148 [(set_attr "type" "sseishft")
11149 (set_attr "length_immediate" "1")
11150 (set_attr "mode" "TI")])
11152 (define_expand "vrotr<mode>3"
11153 [(match_operand:VI_128 0 "register_operand" "")
11154 (match_operand:VI_128 1 "register_operand" "")
11155 (match_operand:VI_128 2 "register_operand" "")]
11158 rtx reg = gen_reg_rtx (<MODE>mode);
11159 emit_insn (gen_neg<mode>2 (reg, operands[2]));
11160 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11164 (define_expand "vrotl<mode>3"
11165 [(match_operand:VI_128 0 "register_operand" "")
11166 (match_operand:VI_128 1 "register_operand" "")
11167 (match_operand:VI_128 2 "register_operand" "")]
11170 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
11174 (define_insn "xop_vrotl<mode>3"
11175 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
11176 (if_then_else:VI_128
11178 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
11181 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
11185 (neg:VI_128 (match_dup 2)))))]
11186 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11187 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11188 [(set_attr "type" "sseishft")
11189 (set_attr "prefix_data16" "0")
11190 (set_attr "prefix_extra" "2")
11191 (set_attr "mode" "TI")])
11193 ;; XOP packed shift instructions.
11194 ;; FIXME: add V2DI back in
11195 (define_expand "vlshr<mode>3"
11196 [(match_operand:VI124_128 0 "register_operand" "")
11197 (match_operand:VI124_128 1 "register_operand" "")
11198 (match_operand:VI124_128 2 "register_operand" "")]
11201 rtx neg = gen_reg_rtx (<MODE>mode);
11202 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11203 emit_insn (gen_xop_lshl<mode>3 (operands[0], operands[1], neg));
11207 (define_expand "vashr<mode>3"
11208 [(match_operand:VI124_128 0 "register_operand" "")
11209 (match_operand:VI124_128 1 "register_operand" "")
11210 (match_operand:VI124_128 2 "register_operand" "")]
11213 rtx neg = gen_reg_rtx (<MODE>mode);
11214 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11215 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], neg));
11219 (define_expand "vashl<mode>3"
11220 [(match_operand:VI124_128 0 "register_operand" "")
11221 (match_operand:VI124_128 1 "register_operand" "")
11222 (match_operand:VI124_128 2 "register_operand" "")]
11225 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], operands[2]));
11229 (define_insn "xop_ashl<mode>3"
11230 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
11231 (if_then_else:VI_128
11233 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
11236 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
11240 (neg:VI_128 (match_dup 2)))))]
11241 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11242 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11243 [(set_attr "type" "sseishft")
11244 (set_attr "prefix_data16" "0")
11245 (set_attr "prefix_extra" "2")
11246 (set_attr "mode" "TI")])
11248 (define_insn "xop_lshl<mode>3"
11249 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
11250 (if_then_else:VI_128
11252 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
11255 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
11259 (neg:VI_128 (match_dup 2)))))]
11260 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11261 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11262 [(set_attr "type" "sseishft")
11263 (set_attr "prefix_data16" "0")
11264 (set_attr "prefix_extra" "2")
11265 (set_attr "mode" "TI")])
11267 ;; SSE2 doesn't have some shift varients, so define versions for XOP
11268 (define_expand "ashlv16qi3"
11269 [(match_operand:V16QI 0 "register_operand" "")
11270 (match_operand:V16QI 1 "register_operand" "")
11271 (match_operand:SI 2 "nonmemory_operand" "")]
11274 rtvec vs = rtvec_alloc (16);
11275 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11276 rtx reg = gen_reg_rtx (V16QImode);
11278 for (i = 0; i < 16; i++)
11279 RTVEC_ELT (vs, i) = operands[2];
11281 emit_insn (gen_vec_initv16qi (reg, par));
11282 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11286 (define_expand "lshlv16qi3"
11287 [(match_operand:V16QI 0 "register_operand" "")
11288 (match_operand:V16QI 1 "register_operand" "")
11289 (match_operand:SI 2 "nonmemory_operand" "")]
11292 rtvec vs = rtvec_alloc (16);
11293 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11294 rtx reg = gen_reg_rtx (V16QImode);
11296 for (i = 0; i < 16; i++)
11297 RTVEC_ELT (vs, i) = operands[2];
11299 emit_insn (gen_vec_initv16qi (reg, par));
11300 emit_insn (gen_xop_lshlv16qi3 (operands[0], operands[1], reg));
11304 (define_expand "ashrv16qi3"
11305 [(match_operand:V16QI 0 "register_operand" "")
11306 (match_operand:V16QI 1 "register_operand" "")
11307 (match_operand:SI 2 "nonmemory_operand" "")]
11310 rtvec vs = rtvec_alloc (16);
11311 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11312 rtx reg = gen_reg_rtx (V16QImode);
11314 rtx ele = ((CONST_INT_P (operands[2]))
11315 ? GEN_INT (- INTVAL (operands[2]))
11318 for (i = 0; i < 16; i++)
11319 RTVEC_ELT (vs, i) = ele;
11321 emit_insn (gen_vec_initv16qi (reg, par));
11323 if (!CONST_INT_P (operands[2]))
11325 rtx neg = gen_reg_rtx (V16QImode);
11326 emit_insn (gen_negv16qi2 (neg, reg));
11327 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], neg));
11330 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11335 (define_expand "ashrv2di3"
11336 [(match_operand:V2DI 0 "register_operand" "")
11337 (match_operand:V2DI 1 "register_operand" "")
11338 (match_operand:DI 2 "nonmemory_operand" "")]
11341 rtvec vs = rtvec_alloc (2);
11342 rtx par = gen_rtx_PARALLEL (V2DImode, vs);
11343 rtx reg = gen_reg_rtx (V2DImode);
11346 if (CONST_INT_P (operands[2]))
11347 ele = GEN_INT (- INTVAL (operands[2]));
11348 else if (GET_MODE (operands[2]) != DImode)
11350 rtx move = gen_reg_rtx (DImode);
11351 ele = gen_reg_rtx (DImode);
11352 convert_move (move, operands[2], false);
11353 emit_insn (gen_negdi2 (ele, move));
11357 ele = gen_reg_rtx (DImode);
11358 emit_insn (gen_negdi2 (ele, operands[2]));
11361 RTVEC_ELT (vs, 0) = ele;
11362 RTVEC_ELT (vs, 1) = ele;
11363 emit_insn (gen_vec_initv2di (reg, par));
11364 emit_insn (gen_xop_ashlv2di3 (operands[0], operands[1], reg));
11368 ;; XOP FRCZ support
11369 (define_insn "xop_frcz<mode>2"
11370 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
11372 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
11375 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
11376 [(set_attr "type" "ssecvt1")
11377 (set_attr "mode" "<MODE>")])
11380 (define_expand "xop_vmfrcz<mode>2"
11381 [(set (match_operand:VF_128 0 "register_operand")
11384 [(match_operand:VF_128 1 "nonimmediate_operand")]
11390 operands[3] = CONST0_RTX (<MODE>mode);
11393 (define_insn "*xop_vmfrcz_<mode>"
11394 [(set (match_operand:VF_128 0 "register_operand" "=x")
11397 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
11399 (match_operand:VF_128 2 "const0_operand")
11402 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
11403 [(set_attr "type" "ssecvt1")
11404 (set_attr "mode" "<MODE>")])
11406 (define_insn "xop_maskcmp<mode>3"
11407 [(set (match_operand:VI_128 0 "register_operand" "=x")
11408 (match_operator:VI_128 1 "ix86_comparison_int_operator"
11409 [(match_operand:VI_128 2 "register_operand" "x")
11410 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
11412 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
11413 [(set_attr "type" "sse4arg")
11414 (set_attr "prefix_data16" "0")
11415 (set_attr "prefix_rep" "0")
11416 (set_attr "prefix_extra" "2")
11417 (set_attr "length_immediate" "1")
11418 (set_attr "mode" "TI")])
11420 (define_insn "xop_maskcmp_uns<mode>3"
11421 [(set (match_operand:VI_128 0 "register_operand" "=x")
11422 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
11423 [(match_operand:VI_128 2 "register_operand" "x")
11424 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
11426 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
11427 [(set_attr "type" "ssecmp")
11428 (set_attr "prefix_data16" "0")
11429 (set_attr "prefix_rep" "0")
11430 (set_attr "prefix_extra" "2")
11431 (set_attr "length_immediate" "1")
11432 (set_attr "mode" "TI")])
11434 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
11435 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
11436 ;; the exact instruction generated for the intrinsic.
11437 (define_insn "xop_maskcmp_uns2<mode>3"
11438 [(set (match_operand:VI_128 0 "register_operand" "=x")
11440 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
11441 [(match_operand:VI_128 2 "register_operand" "x")
11442 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
11443 UNSPEC_XOP_UNSIGNED_CMP))]
11445 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
11446 [(set_attr "type" "ssecmp")
11447 (set_attr "prefix_data16" "0")
11448 (set_attr "prefix_extra" "2")
11449 (set_attr "length_immediate" "1")
11450 (set_attr "mode" "TI")])
11452 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
11453 ;; being added here to be complete.
11454 (define_insn "xop_pcom_tf<mode>3"
11455 [(set (match_operand:VI_128 0 "register_operand" "=x")
11457 [(match_operand:VI_128 1 "register_operand" "x")
11458 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
11459 (match_operand:SI 3 "const_int_operand" "n")]
11460 UNSPEC_XOP_TRUEFALSE))]
11463 return ((INTVAL (operands[3]) != 0)
11464 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11465 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
11467 [(set_attr "type" "ssecmp")
11468 (set_attr "prefix_data16" "0")
11469 (set_attr "prefix_extra" "2")
11470 (set_attr "length_immediate" "1")
11471 (set_attr "mode" "TI")])
11473 (define_insn "xop_vpermil2<mode>3"
11474 [(set (match_operand:VF 0 "register_operand" "=x")
11476 [(match_operand:VF 1 "register_operand" "x")
11477 (match_operand:VF 2 "nonimmediate_operand" "%x")
11478 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm")
11479 (match_operand:SI 4 "const_0_to_3_operand" "n")]
11482 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
11483 [(set_attr "type" "sse4arg")
11484 (set_attr "length_immediate" "1")
11485 (set_attr "mode" "<MODE>")])
11487 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11489 (define_insn "aesenc"
11490 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11491 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11492 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11496 aesenc\t{%2, %0|%0, %2}
11497 vaesenc\t{%2, %1, %0|%0, %1, %2}"
11498 [(set_attr "isa" "noavx,avx")
11499 (set_attr "type" "sselog1")
11500 (set_attr "prefix_extra" "1")
11501 (set_attr "prefix" "orig,vex")
11502 (set_attr "mode" "TI")])
11504 (define_insn "aesenclast"
11505 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11506 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11507 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11508 UNSPEC_AESENCLAST))]
11511 aesenclast\t{%2, %0|%0, %2}
11512 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
11513 [(set_attr "isa" "noavx,avx")
11514 (set_attr "type" "sselog1")
11515 (set_attr "prefix_extra" "1")
11516 (set_attr "prefix" "orig,vex")
11517 (set_attr "mode" "TI")])
11519 (define_insn "aesdec"
11520 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11521 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11522 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11526 aesdec\t{%2, %0|%0, %2}
11527 vaesdec\t{%2, %1, %0|%0, %1, %2}"
11528 [(set_attr "isa" "noavx,avx")
11529 (set_attr "type" "sselog1")
11530 (set_attr "prefix_extra" "1")
11531 (set_attr "prefix" "orig,vex")
11532 (set_attr "mode" "TI")])
11534 (define_insn "aesdeclast"
11535 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11536 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11537 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11538 UNSPEC_AESDECLAST))]
11541 aesdeclast\t{%2, %0|%0, %2}
11542 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
11543 [(set_attr "isa" "noavx,avx")
11544 (set_attr "type" "sselog1")
11545 (set_attr "prefix_extra" "1")
11546 (set_attr "prefix" "orig,vex")
11547 (set_attr "mode" "TI")])
11549 (define_insn "aesimc"
11550 [(set (match_operand:V2DI 0 "register_operand" "=x")
11551 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
11554 "%vaesimc\t{%1, %0|%0, %1}"
11555 [(set_attr "type" "sselog1")
11556 (set_attr "prefix_extra" "1")
11557 (set_attr "prefix" "maybe_vex")
11558 (set_attr "mode" "TI")])
11560 (define_insn "aeskeygenassist"
11561 [(set (match_operand:V2DI 0 "register_operand" "=x")
11562 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
11563 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11564 UNSPEC_AESKEYGENASSIST))]
11566 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
11567 [(set_attr "type" "sselog1")
11568 (set_attr "prefix_extra" "1")
11569 (set_attr "length_immediate" "1")
11570 (set_attr "prefix" "maybe_vex")
11571 (set_attr "mode" "TI")])
11573 (define_insn "pclmulqdq"
11574 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11575 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11576 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")
11577 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
11581 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
11582 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11583 [(set_attr "isa" "noavx,avx")
11584 (set_attr "type" "sselog1")
11585 (set_attr "prefix_extra" "1")
11586 (set_attr "length_immediate" "1")
11587 (set_attr "prefix" "orig,vex")
11588 (set_attr "mode" "TI")])
11590 (define_expand "avx_vzeroall"
11591 [(match_par_dup 0 [(const_int 0)])]
11594 int nregs = TARGET_64BIT ? 16 : 8;
11597 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11599 XVECEXP (operands[0], 0, 0)
11600 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11603 for (regno = 0; regno < nregs; regno++)
11604 XVECEXP (operands[0], 0, regno + 1)
11605 = gen_rtx_SET (VOIDmode,
11606 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
11607 CONST0_RTX (V8SImode));
11610 (define_insn "*avx_vzeroall"
11611 [(match_parallel 0 "vzeroall_operation"
11612 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
11615 [(set_attr "type" "sse")
11616 (set_attr "modrm" "0")
11617 (set_attr "memory" "none")
11618 (set_attr "prefix" "vex")
11619 (set_attr "mode" "OI")])
11621 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
11622 ;; if the upper 128bits are unused.
11623 (define_insn "avx_vzeroupper"
11624 [(unspec_volatile [(match_operand 0 "const_int_operand" "")]
11625 UNSPECV_VZEROUPPER)]
11628 [(set_attr "type" "sse")
11629 (set_attr "modrm" "0")
11630 (set_attr "memory" "none")
11631 (set_attr "prefix" "vex")
11632 (set_attr "mode" "OI")])
11634 (define_mode_attr AVXTOSSEMODE
11635 [(V4DI "V2DI") (V2DI "V2DI")
11636 (V8SI "V4SI") (V4SI "V4SI")
11637 (V16HI "V8HI") (V8HI "V8HI")
11638 (V32QI "V16QI") (V16QI "V16QI")])
11640 (define_insn "avx2_pbroadcast<mode>"
11641 [(set (match_operand:VI 0 "register_operand" "=x")
11643 (vec_select:<ssescalarmode>
11644 (match_operand:<AVXTOSSEMODE> 1 "nonimmediate_operand" "xm")
11645 (parallel [(const_int 0)]))))]
11647 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %1}"
11648 [(set_attr "type" "ssemov")
11649 (set_attr "prefix_extra" "1")
11650 (set_attr "prefix" "vex")
11651 (set_attr "mode" "<sseinsnmode>")])
11653 (define_insn "avx2_permvarv8si"
11654 [(set (match_operand:V8SI 0 "register_operand" "=x")
11656 [(match_operand:V8SI 1 "register_operand" "x")
11657 (match_operand:V8SI 2 "nonimmediate_operand" "xm")]
11660 "vpermd\t{%2, %1, %0|%0, %1, %2}"
11661 [(set_attr "type" "sselog")
11662 (set_attr "prefix" "vex")
11663 (set_attr "mode" "OI")])
11665 (define_insn "avx2_permv4df"
11666 [(set (match_operand:V4DF 0 "register_operand" "=x")
11668 [(match_operand:V4DF 1 "register_operand" "xm")
11669 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11672 "vpermpd\t{%2, %1, %0|%0, %1, %2}"
11673 [(set_attr "type" "sselog")
11674 (set_attr "prefix_extra" "1")
11675 (set_attr "prefix" "vex")
11676 (set_attr "mode" "OI")])
11678 (define_insn "avx2_permvarv8sf"
11679 [(set (match_operand:V8SF 0 "register_operand" "=x")
11681 [(match_operand:V8SF 1 "register_operand" "x")
11682 (match_operand:V8SF 2 "nonimmediate_operand" "xm")]
11685 "vpermps\t{%2, %1, %0|%0, %1, %2}"
11686 [(set_attr "type" "sselog")
11687 (set_attr "prefix" "vex")
11688 (set_attr "mode" "OI")])
11690 (define_expand "avx2_permv4di"
11691 [(match_operand:V4DI 0 "register_operand" "")
11692 (match_operand:V4DI 1 "nonimmediate_operand" "")
11693 (match_operand:SI 2 "const_0_to_255_operand" "")]
11696 int mask = INTVAL (operands[2]);
11697 emit_insn (gen_avx2_permv4di_1 (operands[0], operands[1],
11698 GEN_INT ((mask >> 0) & 3),
11699 GEN_INT ((mask >> 2) & 3),
11700 GEN_INT ((mask >> 4) & 3),
11701 GEN_INT ((mask >> 6) & 3)));
11705 (define_insn "avx2_permv4di_1"
11706 [(set (match_operand:V4DI 0 "register_operand" "=x")
11708 (match_operand:V4DI 1 "nonimmediate_operand" "xm")
11709 (parallel [(match_operand 2 "const_0_to_3_operand" "")
11710 (match_operand 3 "const_0_to_3_operand" "")
11711 (match_operand 4 "const_0_to_3_operand" "")
11712 (match_operand 5 "const_0_to_3_operand" "")])))]
11716 mask |= INTVAL (operands[2]) << 0;
11717 mask |= INTVAL (operands[3]) << 2;
11718 mask |= INTVAL (operands[4]) << 4;
11719 mask |= INTVAL (operands[5]) << 6;
11720 operands[2] = GEN_INT (mask);
11721 return "vpermq\t{%2, %1, %0|%0, %1, %2}";
11723 [(set_attr "type" "sselog")
11724 (set_attr "prefix" "vex")
11725 (set_attr "mode" "OI")])
11727 (define_insn "avx2_permv2ti"
11728 [(set (match_operand:V4DI 0 "register_operand" "=x")
11730 [(match_operand:V4DI 1 "register_operand" "x")
11731 (match_operand:V4DI 2 "nonimmediate_operand" "xm")
11732 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11735 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11736 [(set_attr "type" "sselog")
11737 (set_attr "prefix" "vex")
11738 (set_attr "mode" "OI")])
11740 (define_insn "avx2_vec_dupv4df"
11741 [(set (match_operand:V4DF 0 "register_operand" "=x")
11742 (vec_duplicate:V4DF
11744 (match_operand:V2DF 1 "register_operand" "x")
11745 (parallel [(const_int 0)]))))]
11747 "vbroadcastsd\t{%1, %0|%0, %1}"
11748 [(set_attr "type" "sselog1")
11749 (set_attr "prefix" "vex")
11750 (set_attr "mode" "V4DF")])
11752 ;; Modes handled by AVX vec_dup patterns.
11753 (define_mode_iterator AVX_VEC_DUP_MODE
11754 [V8SI V8SF V4DI V4DF])
11756 (define_insn "vec_dup<mode>"
11757 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x")
11758 (vec_duplicate:AVX_VEC_DUP_MODE
11759 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,?x")))]
11762 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
11764 [(set_attr "type" "ssemov")
11765 (set_attr "prefix_extra" "1")
11766 (set_attr "prefix" "vex")
11767 (set_attr "mode" "V8SF")])
11769 (define_insn "avx2_vbroadcasti128_<mode>"
11770 [(set (match_operand:VI_256 0 "register_operand" "=x")
11772 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m")
11775 "vbroadcasti128\t{%1, %0|%0, %1}"
11776 [(set_attr "type" "ssemov")
11777 (set_attr "prefix_extra" "1")
11778 (set_attr "prefix" "vex")
11779 (set_attr "mode" "OI")])
11782 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "")
11783 (vec_duplicate:AVX_VEC_DUP_MODE
11784 (match_operand:<ssescalarmode> 1 "register_operand" "")))]
11785 "TARGET_AVX && reload_completed"
11786 [(set (match_dup 2)
11787 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
11789 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
11790 "operands[2] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[0]));")
11792 (define_insn "avx_vbroadcastf128_<mode>"
11793 [(set (match_operand:V_256 0 "register_operand" "=x,x,x")
11795 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
11799 vbroadcast<i128>\t{%1, %0|%0, %1}
11800 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
11801 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
11802 [(set_attr "type" "ssemov,sselog1,sselog1")
11803 (set_attr "prefix_extra" "1")
11804 (set_attr "length_immediate" "0,1,1")
11805 (set_attr "prefix" "vex")
11806 (set_attr "mode" "<sseinsnmode>")])
11808 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
11809 ;; If it so happens that the input is in memory, use vbroadcast.
11810 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
11811 (define_insn "*avx_vperm_broadcast_v4sf"
11812 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
11814 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
11815 (match_parallel 2 "avx_vbroadcast_operand"
11816 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11819 int elt = INTVAL (operands[3]);
11820 switch (which_alternative)
11824 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
11825 return "vbroadcastss\t{%1, %0|%0, %1}";
11827 operands[2] = GEN_INT (elt * 0x55);
11828 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
11830 gcc_unreachable ();
11833 [(set_attr "type" "ssemov,ssemov,sselog1")
11834 (set_attr "prefix_extra" "1")
11835 (set_attr "length_immediate" "0,0,1")
11836 (set_attr "prefix" "vex")
11837 (set_attr "mode" "SF,SF,V4SF")])
11839 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
11840 [(set (match_operand:VF_256 0 "register_operand" "=x,x,x")
11842 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x")
11843 (match_parallel 2 "avx_vbroadcast_operand"
11844 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11847 "&& reload_completed"
11848 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
11850 rtx op0 = operands[0], op1 = operands[1];
11851 int elt = INTVAL (operands[3]);
11857 /* Shuffle element we care about into all elements of the 128-bit lane.
11858 The other lane gets shuffled too, but we don't care. */
11859 if (<MODE>mode == V4DFmode)
11860 mask = (elt & 1 ? 15 : 0);
11862 mask = (elt & 3) * 0x55;
11863 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
11865 /* Shuffle the lane we care about into both lanes of the dest. */
11866 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
11867 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
11871 operands[1] = adjust_address_nv (op1, <ssescalarmode>mode,
11872 elt * GET_MODE_SIZE (<ssescalarmode>mode));
11875 (define_expand "avx_vpermil<mode>"
11876 [(set (match_operand:VF2 0 "register_operand" "")
11878 (match_operand:VF2 1 "nonimmediate_operand" "")
11879 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11882 int mask = INTVAL (operands[2]);
11883 rtx perm[<ssescalarnum>];
11885 perm[0] = GEN_INT (mask & 1);
11886 perm[1] = GEN_INT ((mask >> 1) & 1);
11887 if (<MODE>mode == V4DFmode)
11889 perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
11890 perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
11894 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11897 (define_expand "avx_vpermil<mode>"
11898 [(set (match_operand:VF1 0 "register_operand" "")
11900 (match_operand:VF1 1 "nonimmediate_operand" "")
11901 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11904 int mask = INTVAL (operands[2]);
11905 rtx perm[<ssescalarnum>];
11907 perm[0] = GEN_INT (mask & 3);
11908 perm[1] = GEN_INT ((mask >> 2) & 3);
11909 perm[2] = GEN_INT ((mask >> 4) & 3);
11910 perm[3] = GEN_INT ((mask >> 6) & 3);
11911 if (<MODE>mode == V8SFmode)
11913 perm[4] = GEN_INT ((mask & 3) + 4);
11914 perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
11915 perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
11916 perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
11920 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11923 (define_insn "*avx_vpermilp<mode>"
11924 [(set (match_operand:VF 0 "register_operand" "=x")
11926 (match_operand:VF 1 "nonimmediate_operand" "xm")
11927 (match_parallel 2 ""
11928 [(match_operand 3 "const_int_operand" "")])))]
11930 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
11932 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
11933 operands[2] = GEN_INT (mask);
11934 return "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
11936 [(set_attr "type" "sselog")
11937 (set_attr "prefix_extra" "1")
11938 (set_attr "length_immediate" "1")
11939 (set_attr "prefix" "vex")
11940 (set_attr "mode" "<MODE>")])
11942 (define_insn "avx_vpermilvar<mode>3"
11943 [(set (match_operand:VF 0 "register_operand" "=x")
11945 [(match_operand:VF 1 "register_operand" "x")
11946 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "xm")]
11949 "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11950 [(set_attr "type" "sselog")
11951 (set_attr "prefix_extra" "1")
11952 (set_attr "prefix" "vex")
11953 (set_attr "mode" "<MODE>")])
11955 (define_expand "avx_vperm2f128<mode>3"
11956 [(set (match_operand:AVX256MODE2P 0 "register_operand" "")
11957 (unspec:AVX256MODE2P
11958 [(match_operand:AVX256MODE2P 1 "register_operand" "")
11959 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "")
11960 (match_operand:SI 3 "const_0_to_255_operand" "")]
11961 UNSPEC_VPERMIL2F128))]
11964 int mask = INTVAL (operands[3]);
11965 if ((mask & 0x88) == 0)
11967 rtx perm[<ssescalarnum>], t1, t2;
11968 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
11970 base = (mask & 3) * nelt2;
11971 for (i = 0; i < nelt2; ++i)
11972 perm[i] = GEN_INT (base + i);
11974 base = ((mask >> 4) & 3) * nelt2;
11975 for (i = 0; i < nelt2; ++i)
11976 perm[i + nelt2] = GEN_INT (base + i);
11978 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
11979 operands[1], operands[2]);
11980 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
11981 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
11982 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
11988 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
11989 ;; means that in order to represent this properly in rtl we'd have to
11990 ;; nest *another* vec_concat with a zero operand and do the select from
11991 ;; a 4x wide vector. That doesn't seem very nice.
11992 (define_insn "*avx_vperm2f128<mode>_full"
11993 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
11994 (unspec:AVX256MODE2P
11995 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
11996 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
11997 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11998 UNSPEC_VPERMIL2F128))]
12000 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12001 [(set_attr "type" "sselog")
12002 (set_attr "prefix_extra" "1")
12003 (set_attr "length_immediate" "1")
12004 (set_attr "prefix" "vex")
12005 (set_attr "mode" "<sseinsnmode>")])
12007 (define_insn "*avx_vperm2f128<mode>_nozero"
12008 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
12009 (vec_select:AVX256MODE2P
12010 (vec_concat:<ssedoublevecmode>
12011 (match_operand:AVX256MODE2P 1 "register_operand" "x")
12012 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
12013 (match_parallel 3 ""
12014 [(match_operand 4 "const_int_operand" "")])))]
12016 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
12018 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
12019 operands[3] = GEN_INT (mask);
12020 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
12022 [(set_attr "type" "sselog")
12023 (set_attr "prefix_extra" "1")
12024 (set_attr "length_immediate" "1")
12025 (set_attr "prefix" "vex")
12026 (set_attr "mode" "<sseinsnmode>")])
12028 (define_expand "avx_vinsertf128<mode>"
12029 [(match_operand:V_256 0 "register_operand" "")
12030 (match_operand:V_256 1 "register_operand" "")
12031 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "")
12032 (match_operand:SI 3 "const_0_to_1_operand" "")]
12035 rtx (*insn)(rtx, rtx, rtx);
12037 switch (INTVAL (operands[3]))
12040 insn = gen_vec_set_lo_<mode>;
12043 insn = gen_vec_set_hi_<mode>;
12046 gcc_unreachable ();
12049 emit_insn (insn (operands[0], operands[1], operands[2]));
12053 (define_insn "avx2_vec_set_lo_v4di"
12054 [(set (match_operand:V4DI 0 "register_operand" "=x")
12056 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
12058 (match_operand:V4DI 1 "register_operand" "x")
12059 (parallel [(const_int 2) (const_int 3)]))))]
12061 "vinserti128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12062 [(set_attr "type" "sselog")
12063 (set_attr "prefix_extra" "1")
12064 (set_attr "length_immediate" "1")
12065 (set_attr "prefix" "vex")
12066 (set_attr "mode" "OI")])
12068 (define_insn "avx2_vec_set_hi_v4di"
12069 [(set (match_operand:V4DI 0 "register_operand" "=x")
12072 (match_operand:V4DI 1 "register_operand" "x")
12073 (parallel [(const_int 0) (const_int 1)]))
12074 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
12076 "vinserti128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12077 [(set_attr "type" "sselog")
12078 (set_attr "prefix_extra" "1")
12079 (set_attr "length_immediate" "1")
12080 (set_attr "prefix" "vex")
12081 (set_attr "mode" "OI")])
12083 (define_insn "vec_set_lo_<mode>"
12084 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
12085 (vec_concat:VI8F_256
12086 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
12087 (vec_select:<ssehalfvecmode>
12088 (match_operand:VI8F_256 1 "register_operand" "x")
12089 (parallel [(const_int 2) (const_int 3)]))))]
12091 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12092 [(set_attr "type" "sselog")
12093 (set_attr "prefix_extra" "1")
12094 (set_attr "length_immediate" "1")
12095 (set_attr "prefix" "vex")
12096 (set_attr "mode" "<sseinsnmode>")])
12098 (define_insn "vec_set_hi_<mode>"
12099 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
12100 (vec_concat:VI8F_256
12101 (vec_select:<ssehalfvecmode>
12102 (match_operand:VI8F_256 1 "register_operand" "x")
12103 (parallel [(const_int 0) (const_int 1)]))
12104 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
12106 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12107 [(set_attr "type" "sselog")
12108 (set_attr "prefix_extra" "1")
12109 (set_attr "length_immediate" "1")
12110 (set_attr "prefix" "vex")
12111 (set_attr "mode" "<sseinsnmode>")])
12113 (define_insn "vec_set_lo_<mode>"
12114 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
12115 (vec_concat:VI4F_256
12116 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
12117 (vec_select:<ssehalfvecmode>
12118 (match_operand:VI4F_256 1 "register_operand" "x")
12119 (parallel [(const_int 4) (const_int 5)
12120 (const_int 6) (const_int 7)]))))]
12122 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12123 [(set_attr "type" "sselog")
12124 (set_attr "prefix_extra" "1")
12125 (set_attr "length_immediate" "1")
12126 (set_attr "prefix" "vex")
12127 (set_attr "mode" "<sseinsnmode>")])
12129 (define_insn "vec_set_hi_<mode>"
12130 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
12131 (vec_concat:VI4F_256
12132 (vec_select:<ssehalfvecmode>
12133 (match_operand:VI4F_256 1 "register_operand" "x")
12134 (parallel [(const_int 0) (const_int 1)
12135 (const_int 2) (const_int 3)]))
12136 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
12138 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12139 [(set_attr "type" "sselog")
12140 (set_attr "prefix_extra" "1")
12141 (set_attr "length_immediate" "1")
12142 (set_attr "prefix" "vex")
12143 (set_attr "mode" "<sseinsnmode>")])
12145 (define_insn "vec_set_lo_v16hi"
12146 [(set (match_operand:V16HI 0 "register_operand" "=x")
12148 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
12150 (match_operand:V16HI 1 "register_operand" "x")
12151 (parallel [(const_int 8) (const_int 9)
12152 (const_int 10) (const_int 11)
12153 (const_int 12) (const_int 13)
12154 (const_int 14) (const_int 15)]))))]
12156 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12157 [(set_attr "type" "sselog")
12158 (set_attr "prefix_extra" "1")
12159 (set_attr "length_immediate" "1")
12160 (set_attr "prefix" "vex")
12161 (set_attr "mode" "OI")])
12163 (define_insn "vec_set_hi_v16hi"
12164 [(set (match_operand:V16HI 0 "register_operand" "=x")
12167 (match_operand:V16HI 1 "register_operand" "x")
12168 (parallel [(const_int 0) (const_int 1)
12169 (const_int 2) (const_int 3)
12170 (const_int 4) (const_int 5)
12171 (const_int 6) (const_int 7)]))
12172 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
12174 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12175 [(set_attr "type" "sselog")
12176 (set_attr "prefix_extra" "1")
12177 (set_attr "length_immediate" "1")
12178 (set_attr "prefix" "vex")
12179 (set_attr "mode" "OI")])
12181 (define_insn "vec_set_lo_v32qi"
12182 [(set (match_operand:V32QI 0 "register_operand" "=x")
12184 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
12186 (match_operand:V32QI 1 "register_operand" "x")
12187 (parallel [(const_int 16) (const_int 17)
12188 (const_int 18) (const_int 19)
12189 (const_int 20) (const_int 21)
12190 (const_int 22) (const_int 23)
12191 (const_int 24) (const_int 25)
12192 (const_int 26) (const_int 27)
12193 (const_int 28) (const_int 29)
12194 (const_int 30) (const_int 31)]))))]
12196 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12197 [(set_attr "type" "sselog")
12198 (set_attr "prefix_extra" "1")
12199 (set_attr "length_immediate" "1")
12200 (set_attr "prefix" "vex")
12201 (set_attr "mode" "OI")])
12203 (define_insn "vec_set_hi_v32qi"
12204 [(set (match_operand:V32QI 0 "register_operand" "=x")
12207 (match_operand:V32QI 1 "register_operand" "x")
12208 (parallel [(const_int 0) (const_int 1)
12209 (const_int 2) (const_int 3)
12210 (const_int 4) (const_int 5)
12211 (const_int 6) (const_int 7)
12212 (const_int 8) (const_int 9)
12213 (const_int 10) (const_int 11)
12214 (const_int 12) (const_int 13)
12215 (const_int 14) (const_int 15)]))
12216 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
12218 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12219 [(set_attr "type" "sselog")
12220 (set_attr "prefix_extra" "1")
12221 (set_attr "length_immediate" "1")
12222 (set_attr "prefix" "vex")
12223 (set_attr "mode" "OI")])
12225 (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
12226 [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
12228 [(match_operand:<sseintvecmode> 2 "register_operand" "x")
12229 (match_operand:V48_AVX2 1 "memory_operand" "m")]
12232 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
12233 [(set_attr "type" "sselog1")
12234 (set_attr "prefix_extra" "1")
12235 (set_attr "prefix" "vex")
12236 (set_attr "mode" "<sseinsnmode>")])
12238 (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
12239 [(set (match_operand:V48_AVX2 0 "memory_operand" "=m")
12241 [(match_operand:<sseintvecmode> 1 "register_operand" "x")
12242 (match_operand:V48_AVX2 2 "register_operand" "x")
12246 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12247 [(set_attr "type" "sselog1")
12248 (set_attr "prefix_extra" "1")
12249 (set_attr "prefix" "vex")
12250 (set_attr "mode" "<sseinsnmode>")])
12252 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
12253 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
12254 (unspec:AVX256MODE2P
12255 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
12259 "&& reload_completed"
12262 rtx op0 = operands[0];
12263 rtx op1 = operands[1];
12265 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
12267 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
12268 emit_move_insn (op0, op1);
12272 (define_expand "vec_init<mode>"
12273 [(match_operand:V_256 0 "register_operand" "")
12274 (match_operand 1 "" "")]
12277 ix86_expand_vector_init (false, operands[0], operands[1]);
12281 (define_expand "avx2_extracti128"
12282 [(match_operand:V2DI 0 "nonimmediate_operand" "")
12283 (match_operand:V4DI 1 "register_operand" "")
12284 (match_operand:SI 2 "const_0_to_1_operand" "")]
12287 rtx (*insn)(rtx, rtx);
12289 switch (INTVAL (operands[2]))
12292 insn = gen_vec_extract_lo_v4di;
12295 insn = gen_vec_extract_hi_v4di;
12298 gcc_unreachable ();
12301 emit_insn (insn (operands[0], operands[1]));
12305 (define_expand "avx2_inserti128"
12306 [(match_operand:V4DI 0 "register_operand" "")
12307 (match_operand:V4DI 1 "register_operand" "")
12308 (match_operand:V2DI 2 "nonimmediate_operand" "")
12309 (match_operand:SI 3 "const_0_to_1_operand" "")]
12312 rtx (*insn)(rtx, rtx, rtx);
12314 switch (INTVAL (operands[3]))
12317 insn = gen_avx2_vec_set_lo_v4di;
12320 insn = gen_avx2_vec_set_hi_v4di;
12323 gcc_unreachable ();
12326 emit_insn (insn (operands[0], operands[1], operands[2]));
12330 (define_insn "avx2_ashrvv8si"
12331 [(set (match_operand:V8SI 0 "register_operand" "=x")
12337 (match_operand:V8SI 1 "register_operand" "x")
12338 (parallel [(const_int 0)]))
12340 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
12341 (parallel [(const_int 0)])))
12345 (parallel [(const_int 1)]))
12348 (parallel [(const_int 1)]))))
12353 (parallel [(const_int 2)]))
12356 (parallel [(const_int 2)])))
12360 (parallel [(const_int 3)]))
12363 (parallel [(const_int 3)])))))
12369 (parallel [(const_int 0)]))
12372 (parallel [(const_int 0)])))
12376 (parallel [(const_int 1)]))
12379 (parallel [(const_int 1)]))))
12384 (parallel [(const_int 2)]))
12387 (parallel [(const_int 2)])))
12391 (parallel [(const_int 3)]))
12394 (parallel [(const_int 3)])))))))]
12396 "vpsravd\t{%2, %1, %0|%0, %1, %2}"
12397 [(set_attr "type" "sseishft")
12398 (set_attr "prefix" "vex")
12399 (set_attr "mode" "OI")])
12401 (define_insn "avx2_ashrvv4si"
12402 [(set (match_operand:V4SI 0 "register_operand" "=x")
12407 (match_operand:V4SI 1 "register_operand" "x")
12408 (parallel [(const_int 0)]))
12410 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
12411 (parallel [(const_int 0)])))
12415 (parallel [(const_int 1)]))
12418 (parallel [(const_int 1)]))))
12423 (parallel [(const_int 2)]))
12426 (parallel [(const_int 2)])))
12430 (parallel [(const_int 3)]))
12433 (parallel [(const_int 3)]))))))]
12435 "vpsravd\t{%2, %1, %0|%0, %1, %2}"
12436 [(set_attr "type" "sseishft")
12437 (set_attr "prefix" "vex")
12438 (set_attr "mode" "TI")])
12440 (define_insn "avx2_<lshift>vv8si"
12441 [(set (match_operand:V8SI 0 "register_operand" "=x")
12447 (match_operand:V8SI 1 "register_operand" "x")
12448 (parallel [(const_int 0)]))
12450 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
12451 (parallel [(const_int 0)])))
12455 (parallel [(const_int 1)]))
12458 (parallel [(const_int 1)]))))
12463 (parallel [(const_int 2)]))
12466 (parallel [(const_int 2)])))
12470 (parallel [(const_int 3)]))
12473 (parallel [(const_int 3)])))))
12479 (parallel [(const_int 0)]))
12482 (parallel [(const_int 0)])))
12486 (parallel [(const_int 1)]))
12489 (parallel [(const_int 1)]))))
12494 (parallel [(const_int 2)]))
12497 (parallel [(const_int 2)])))
12501 (parallel [(const_int 3)]))
12504 (parallel [(const_int 3)])))))))]
12506 "vp<lshift_insn>vd\t{%2, %1, %0|%0, %1, %2}"
12507 [(set_attr "type" "sseishft")
12508 (set_attr "prefix" "vex")
12509 (set_attr "mode" "OI")])
12511 (define_insn "avx2_<lshift>v<mode>"
12512 [(set (match_operand:VI4SD_AVX2 0 "register_operand" "=x")
12513 (vec_concat:VI4SD_AVX2
12514 (vec_concat:<ssehalfvecmode>
12515 (lshift:<ssescalarmode>
12516 (vec_select:<ssescalarmode>
12517 (match_operand:VI4SD_AVX2 1 "register_operand" "x")
12518 (parallel [(const_int 0)]))
12519 (vec_select:<ssescalarmode>
12520 (match_operand:VI4SD_AVX2 2 "nonimmediate_operand" "xm")
12521 (parallel [(const_int 0)])))
12522 (lshift:<ssescalarmode>
12523 (vec_select:<ssescalarmode>
12525 (parallel [(const_int 1)]))
12526 (vec_select:<ssescalarmode>
12528 (parallel [(const_int 1)]))))
12529 (vec_concat:<ssehalfvecmode>
12530 (lshift:<ssescalarmode>
12531 (vec_select:<ssescalarmode>
12533 (parallel [(const_int 2)]))
12534 (vec_select:<ssescalarmode>
12536 (parallel [(const_int 2)])))
12537 (lshift:<ssescalarmode>
12538 (vec_select:<ssescalarmode>
12540 (parallel [(const_int 3)]))
12541 (vec_select:<ssescalarmode>
12543 (parallel [(const_int 3)]))))))]
12545 "vp<lshift_insn>v<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12546 [(set_attr "type" "sseishft")
12547 (set_attr "prefix" "vex")
12548 (set_attr "mode" "<sseinsnmode>")])
12550 (define_insn "avx2_<lshift>vv2di"
12551 [(set (match_operand:V2DI 0 "register_operand" "=x")
12555 (match_operand:V2DI 1 "register_operand" "x")
12556 (parallel [(const_int 0)]))
12558 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
12559 (parallel [(const_int 0)])))
12563 (parallel [(const_int 1)]))
12566 (parallel [(const_int 1)])))))]
12568 "vp<lshift_insn>vq\t{%2, %1, %0|%0, %1, %2}"
12569 [(set_attr "type" "sseishft")
12570 (set_attr "prefix" "vex")
12571 (set_attr "mode" "TI")])
12573 (define_insn "avx_vec_concat<mode>"
12574 [(set (match_operand:V_256 0 "register_operand" "=x,x")
12576 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
12577 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))]
12580 switch (which_alternative)
12583 return "vinsert<i128>\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
12585 switch (get_attr_mode (insn))
12588 return "vmovaps\t{%1, %x0|%x0, %1}";
12590 return "vmovapd\t{%1, %x0|%x0, %1}";
12592 return "vmovdqa\t{%1, %x0|%x0, %1}";
12595 gcc_unreachable ();
12598 [(set_attr "type" "sselog,ssemov")
12599 (set_attr "prefix_extra" "1,*")
12600 (set_attr "length_immediate" "1,*")
12601 (set_attr "prefix" "vex")
12602 (set_attr "mode" "<sseinsnmode>")])
12604 (define_insn "vcvtph2ps"
12605 [(set (match_operand:V4SF 0 "register_operand" "=x")
12607 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
12609 (parallel [(const_int 0) (const_int 1)
12610 (const_int 1) (const_int 2)])))]
12612 "vcvtph2ps\t{%1, %0|%0, %1}"
12613 [(set_attr "type" "ssecvt")
12614 (set_attr "prefix" "vex")
12615 (set_attr "mode" "V4SF")])
12617 (define_insn "*vcvtph2ps_load"
12618 [(set (match_operand:V4SF 0 "register_operand" "=x")
12619 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
12620 UNSPEC_VCVTPH2PS))]
12622 "vcvtph2ps\t{%1, %0|%0, %1}"
12623 [(set_attr "type" "ssecvt")
12624 (set_attr "prefix" "vex")
12625 (set_attr "mode" "V8SF")])
12627 (define_insn "vcvtph2ps256"
12628 [(set (match_operand:V8SF 0 "register_operand" "=x")
12629 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
12630 UNSPEC_VCVTPH2PS))]
12632 "vcvtph2ps\t{%1, %0|%0, %1}"
12633 [(set_attr "type" "ssecvt")
12634 (set_attr "prefix" "vex")
12635 (set_attr "mode" "V8SF")])
12637 (define_expand "vcvtps2ph"
12638 [(set (match_operand:V8HI 0 "register_operand" "")
12640 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "")
12641 (match_operand:SI 2 "const_0_to_255_operand" "")]
12645 "operands[3] = CONST0_RTX (V4HImode);")
12647 (define_insn "*vcvtps2ph"
12648 [(set (match_operand:V8HI 0 "register_operand" "=x")
12650 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12651 (match_operand:SI 2 "const_0_to_255_operand" "N")]
12653 (match_operand:V4HI 3 "const0_operand" "")))]
12655 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12656 [(set_attr "type" "ssecvt")
12657 (set_attr "prefix" "vex")
12658 (set_attr "mode" "V4SF")])
12660 (define_insn "*vcvtps2ph_store"
12661 [(set (match_operand:V4HI 0 "memory_operand" "=m")
12662 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12663 (match_operand:SI 2 "const_0_to_255_operand" "N")]
12664 UNSPEC_VCVTPS2PH))]
12666 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12667 [(set_attr "type" "ssecvt")
12668 (set_attr "prefix" "vex")
12669 (set_attr "mode" "V4SF")])
12671 (define_insn "vcvtps2ph256"
12672 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
12673 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
12674 (match_operand:SI 2 "const_0_to_255_operand" "N")]
12675 UNSPEC_VCVTPS2PH))]
12677 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12678 [(set_attr "type" "ssecvt")
12679 (set_attr "prefix" "vex")
12680 (set_attr "mode" "V8SF")])
12682 ;; For gather* insn patterns
12683 (define_mode_iterator VEC_GATHER_MODE
12684 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
12685 (define_mode_attr VEC_GATHER_MODE
12686 [(V2DI "V4SI") (V2DF "V4SI")
12687 (V4DI "V4SI") (V4DF "V4SI")
12688 (V4SI "V4SI") (V4SF "V4SI")
12689 (V8SI "V8SI") (V8SF "V8SI")])
12691 (define_expand "avx2_gathersi<mode>"
12692 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "")
12693 (unspec:VEC_GATHER_MODE
12694 [(match_operand:VEC_GATHER_MODE 1 "register_operand" "")
12695 (mem:<ssescalarmode>
12697 [(match_operand 2 "vsib_address_operand" "")
12698 (match_operand:<VEC_GATHER_MODE> 3 "register_operand" "")
12699 (match_operand:SI 5 "const1248_operand " "")]))
12700 (mem:BLK (scratch))
12701 (match_operand:VEC_GATHER_MODE 4 "register_operand" "")]
12703 (clobber (match_scratch:VEC_GATHER_MODE 6 ""))])]
12707 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
12708 operands[5]), UNSPEC_VSIBADDR);
12711 (define_insn "*avx2_gathersi<mode>"
12712 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
12713 (unspec:VEC_GATHER_MODE
12714 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
12715 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
12717 [(match_operand:P 3 "vsib_address_operand" "p")
12718 (match_operand:<VEC_GATHER_MODE> 4 "register_operand" "x")
12719 (match_operand:SI 6 "const1248_operand" "n")]
12721 (mem:BLK (scratch))
12722 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
12724 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
12726 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
12727 [(set_attr "type" "ssemov")
12728 (set_attr "prefix" "vex")
12729 (set_attr "mode" "<sseinsnmode>")])
12731 (define_expand "avx2_gatherdi<mode>"
12732 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "")
12733 (unspec:VEC_GATHER_MODE
12734 [(match_operand:VEC_GATHER_MODE 1 "register_operand" "")
12735 (mem:<ssescalarmode>
12737 [(match_operand 2 "vsib_address_operand" "")
12738 (match_operand:<AVXMODE48P_DI> 3 "register_operand" "")
12739 (match_operand:SI 5 "const1248_operand " "")]))
12740 (mem:BLK (scratch))
12741 (match_operand:VEC_GATHER_MODE 4 "register_operand" "")]
12743 (clobber (match_scratch:VEC_GATHER_MODE 6 ""))])]
12747 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
12748 operands[5]), UNSPEC_VSIBADDR);
12751 (define_insn "*avx2_gatherdi<mode>"
12752 [(set (match_operand:AVXMODE48P_DI 0 "register_operand" "=&x")
12753 (unspec:AVXMODE48P_DI
12754 [(match_operand:AVXMODE48P_DI 2 "register_operand" "0")
12755 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
12757 [(match_operand:P 3 "vsib_address_operand" "p")
12758 (match_operand:<AVXMODE48P_DI> 4 "register_operand" "x")
12759 (match_operand:SI 6 "const1248_operand" "n")]
12761 (mem:BLK (scratch))
12762 (match_operand:AVXMODE48P_DI 5 "register_operand" "1")]
12764 (clobber (match_scratch:AVXMODE48P_DI 1 "=&x"))]
12766 "v<sseintprefix>gatherq<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
12767 [(set_attr "type" "ssemov")
12768 (set_attr "prefix" "vex")
12769 (set_attr "mode" "<sseinsnmode>")])
12771 ;; Special handling for VEX.256 with float arguments
12772 ;; since there're still xmms as operands
12773 (define_expand "avx2_gatherdi<mode>256"
12774 [(parallel [(set (match_operand:VI4F_128 0 "register_operand" "")
12776 [(match_operand:VI4F_128 1 "register_operand" "")
12777 (mem:<ssescalarmode>
12779 [(match_operand 2 "vsib_address_operand" "")
12780 (match_operand:V4DI 3 "register_operand" "")
12781 (match_operand:SI 5 "const1248_operand " "")]))
12782 (mem:BLK (scratch))
12783 (match_operand:VI4F_128 4 "register_operand" "")]
12785 (clobber (match_scratch:VI4F_128 6 ""))])]
12789 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
12790 operands[5]), UNSPEC_VSIBADDR);
12793 (define_insn "*avx2_gatherdi<mode>256"
12794 [(set (match_operand:VI4F_128 0 "register_operand" "=x")
12796 [(match_operand:VI4F_128 2 "register_operand" "0")
12797 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
12799 [(match_operand:P 3 "vsib_address_operand" "p")
12800 (match_operand:V4DI 4 "register_operand" "x")
12801 (match_operand:SI 6 "const1248_operand" "n")]
12803 (mem:BLK (scratch))
12804 (match_operand:VI4F_128 5 "register_operand" "1")]
12806 (clobber (match_scratch:VI4F_128 1 "=&x"))]
12808 "v<sseintprefix>gatherq<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
12809 [(set_attr "type" "ssemov")
12810 (set_attr "prefix" "vex")
12811 (set_attr "mode" "<sseinsnmode>")])