1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 ;; All vector modes including V?TImode, used in move patterns.
22 (define_mode_iterator V16
23 [(V32QI "TARGET_AVX") V16QI
24 (V16HI "TARGET_AVX") V8HI
25 (V8SI "TARGET_AVX") V4SI
26 (V4DI "TARGET_AVX") V2DI
27 (V2TI "TARGET_AVX") V1TI
28 (V8SF "TARGET_AVX") V4SF
29 (V4DF "TARGET_AVX") V2DF])
32 (define_mode_iterator V
33 [(V32QI "TARGET_AVX") V16QI
34 (V16HI "TARGET_AVX") V8HI
35 (V8SI "TARGET_AVX") V4SI
36 (V4DI "TARGET_AVX") V2DI
37 (V8SF "TARGET_AVX") V4SF
38 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
40 ;; All 128bit vector modes
41 (define_mode_iterator V_128
42 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
44 ;; All 256bit vector modes
45 (define_mode_iterator V_256
46 [V32QI V16HI V8SI V4DI V8SF V4DF])
48 ;; All vector float modes
49 (define_mode_iterator VF
50 [(V8SF "TARGET_AVX") V4SF
51 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
53 ;; All SFmode vector float modes
54 (define_mode_iterator VF1
55 [(V8SF "TARGET_AVX") V4SF])
57 ;; All DFmode vector float modes
58 (define_mode_iterator VF2
59 [(V4DF "TARGET_AVX") V2DF])
61 ;; All 128bit vector float modes
62 (define_mode_iterator VF_128
63 [V4SF (V2DF "TARGET_SSE2")])
65 ;; All 256bit vector float modes
66 (define_mode_iterator VF_256
69 ;; All vector integer modes
70 (define_mode_iterator VI
71 [(V32QI "TARGET_AVX") V16QI
72 (V16HI "TARGET_AVX") V8HI
73 (V8SI "TARGET_AVX") V4SI
74 (V4DI "TARGET_AVX") V2DI])
76 (define_mode_iterator VI_AVX2
77 [(V32QI "TARGET_AVX2") V16QI
78 (V16HI "TARGET_AVX2") V8HI
79 (V8SI "TARGET_AVX2") V4SI
80 (V4DI "TARGET_AVX2") V2DI])
82 ;; All QImode vector integer modes
83 (define_mode_iterator VI1
84 [(V32QI "TARGET_AVX") V16QI])
86 ;; All DImode vector integer modes
87 (define_mode_iterator VI8
88 [(V4DI "TARGET_AVX") V2DI])
90 (define_mode_iterator VI1_AVX2
91 [(V32QI "TARGET_AVX2") V16QI])
93 (define_mode_iterator VI2_AVX2
94 [(V16HI "TARGET_AVX2") V8HI])
96 (define_mode_iterator VI4_AVX2
97 [(V8SI "TARGET_AVX2") V4SI])
99 (define_mode_iterator VI8_AVX2
100 [(V4DI "TARGET_AVX2") V2DI])
102 ;; ??? We should probably use TImode instead.
103 (define_mode_iterator VIMAX_AVX2
104 [(V2TI "TARGET_AVX2") V1TI])
106 ;; ??? This should probably be dropped in favor of VIMAX_AVX2.
107 (define_mode_iterator SSESCALARMODE
108 [(V2TI "TARGET_AVX2") TI])
110 (define_mode_iterator VI12_AVX2
111 [(V32QI "TARGET_AVX2") V16QI
112 (V16HI "TARGET_AVX2") V8HI])
114 (define_mode_iterator VI24_AVX2
115 [(V16HI "TARGET_AVX2") V8HI
116 (V8SI "TARGET_AVX2") V4SI])
118 (define_mode_iterator VI124_AVX2
119 [(V32QI "TARGET_AVX2") V16QI
120 (V16HI "TARGET_AVX2") V8HI
121 (V8SI "TARGET_AVX2") V4SI])
123 (define_mode_iterator VI248_AVX2
124 [(V16HI "TARGET_AVX2") V8HI
125 (V8SI "TARGET_AVX2") V4SI
126 (V4DI "TARGET_AVX2") V2DI])
128 (define_mode_iterator VI4SD_AVX2
131 (define_mode_iterator V48_AVX2
134 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
135 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
137 (define_mode_attr sse2_avx2
138 [(V16QI "sse2") (V32QI "avx2")
139 (V8HI "sse2") (V16HI "avx2")
140 (V4SI "sse2") (V8SI "avx2")
141 (V2DI "sse2") (V4DI "avx2")
142 (V1TI "sse2") (V2TI "avx2")])
144 (define_mode_attr ssse3_avx2
145 [(V16QI "ssse3") (V32QI "avx2")
146 (V8HI "ssse3") (V16HI "avx2")
147 (V4SI "ssse3") (V8SI "avx2")
148 (V2DI "ssse3") (V4DI "avx2")
149 (TI "ssse3") (V2TI "avx2")])
151 (define_mode_attr sse4_1_avx2
152 [(V16QI "sse4_1") (V32QI "avx2")
153 (V8HI "sse4_1") (V16HI "avx2")
154 (V4SI "sse4_1") (V8SI "avx2")
155 (V2DI "sse4_1") (V4DI "avx2")])
157 (define_mode_attr avx_avx2
158 [(V4SF "avx") (V2DF "avx")
159 (V8SF "avx") (V4DF "avx")
160 (V4SI "avx2") (V2DI "avx2")
161 (V8SI "avx2") (V4DI "avx2")])
163 (define_mode_attr vec_avx2
164 [(V16QI "vec") (V32QI "avx2")
165 (V8HI "vec") (V16HI "avx2")
166 (V4SI "vec") (V8SI "avx2")
167 (V2DI "vec") (V4DI "avx2")])
169 ;; Mapping of logic-shift operators
170 (define_code_iterator lshift [lshiftrt ashift])
172 ;; Base name for define_insn
173 (define_code_attr lshift_insn [(lshiftrt "srl") (ashift "sll")])
175 ;; Base name for insn mnemonic
176 (define_code_attr lshift [(lshiftrt "lshr") (ashift "lshl")])
178 (define_mode_attr ssedoublemode
179 [(V16HI "V16SI") (V8HI "V8SI")])
181 (define_mode_attr ssebytemode
182 [(V4DI "V32QI") (V2DI "V16QI")])
184 ;; All 128bit vector integer modes
185 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
187 ;; All 256bit vector integer modes
188 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
190 ;; Random 128bit vector integer mode combinations
191 (define_mode_iterator VI12_128 [V16QI V8HI])
192 (define_mode_iterator VI14_128 [V16QI V4SI])
193 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
194 (define_mode_iterator VI24_128 [V8HI V4SI])
195 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
197 ;; Random 256bit vector integer mode combinations
198 (define_mode_iterator VI124_256 [V32QI V16HI V8SI])
200 ;; Int-float size matches
201 (define_mode_iterator VI4F_128 [V4SI V4SF])
202 (define_mode_iterator VI8F_128 [V2DI V2DF])
203 (define_mode_iterator VI4F_256 [V8SI V8SF])
204 (define_mode_iterator VI8F_256 [V4DI V4DF])
206 ;; Mapping from float mode to required SSE level
207 (define_mode_attr sse
208 [(SF "sse") (DF "sse2")
209 (V4SF "sse") (V2DF "sse2")
210 (V8SF "avx") (V4DF "avx")])
212 (define_mode_attr sse2
213 [(V16QI "sse2") (V32QI "avx")
214 (V2DI "sse2") (V4DI "avx")])
216 (define_mode_attr sse3
217 [(V16QI "sse3") (V32QI "avx")])
219 (define_mode_attr sse4_1
220 [(V4SF "sse4_1") (V2DF "sse4_1")
221 (V8SF "avx") (V4DF "avx")])
223 (define_mode_attr avxsizesuffix
224 [(V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
225 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
226 (V8SF "256") (V4DF "256")
227 (V4SF "") (V2DF "")])
229 ;; SSE instruction mode
230 (define_mode_attr sseinsnmode
231 [(V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
232 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
233 (V8SF "V8SF") (V4DF "V4DF")
234 (V4SF "V4SF") (V2DF "V2DF")
237 ;; Mapping of vector float modes to an integer mode of the same size
238 (define_mode_attr sseintvecmode
239 [(V8SF "V8SI") (V4DF "V4DI")
240 (V4SF "V4SI") (V2DF "V2DI")
241 (V4DF "V4DI") (V8SF "V8SI")
242 (V8SI "V8SI") (V4DI "V4DI")
243 (V4SI "V4SI") (V2DI "V2DI")
244 (V16HI "V16HI") (V8HI "V8HI")
245 (V32QI "V32QI") (V16QI "V16QI")])
247 ;; Mapping of vector modes to a vector mode of double size
248 (define_mode_attr ssedoublevecmode
249 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
250 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
251 (V8SF "V16SF") (V4DF "V8DF")
252 (V4SF "V8SF") (V2DF "V4DF")])
254 ;; Mapping of vector modes to a vector mode of half size
255 (define_mode_attr ssehalfvecmode
256 [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
257 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
258 (V8SF "V4SF") (V4DF "V2DF")
261 ;; Mapping of vector modes back to the scalar modes
262 (define_mode_attr ssescalarmode
263 [(V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI")
264 (V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI")
265 (V8SF "SF") (V4DF "DF")
266 (V4SF "SF") (V2DF "DF")])
268 ;; Number of scalar elements in each vector type
269 (define_mode_attr ssescalarnum
270 [(V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
271 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
272 (V8SF "8") (V4DF "4")
273 (V4SF "4") (V2DF "2")])
275 ;; SSE prefix for integer vector modes
276 (define_mode_attr sseintprefix
277 [(V2DI "p") (V2DF "")
280 (V8SI "p") (V8SF "")])
282 ;; SSE scalar suffix for vector modes
283 (define_mode_attr ssescalarmodesuffix
285 (V8SF "ss") (V4DF "sd")
286 (V4SF "ss") (V2DF "sd")
287 (V8SI "ss") (V4DI "sd")
290 ;; Pack/unpack vector modes
291 (define_mode_attr sseunpackmode
292 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
293 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")])
295 (define_mode_attr ssepackmode
296 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
297 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")])
299 ;; Mapping of the max integer size for xop rotate immediate constraint
300 (define_mode_attr sserotatemax
301 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
303 ;; Mapping of mode to cast intrinsic name
304 (define_mode_attr castmode [(V8SI "si") (V8SF "ps") (V4DF "pd")])
306 ;; Instruction suffix for sign and zero extensions.
307 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
309 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
310 (define_mode_attr i128
311 [(V8SF "f128") (V4DF "f128") (V32QI "%~128") (V16HI "%~128")
312 (V8SI "%~128") (V4DI "%~128")])
315 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
317 (define_mode_iterator AVXMODE48P_DI
318 [V2DI V2DF V4DI V4DF V4SF V4SI])
319 (define_mode_attr AVXMODE48P_DI
320 [(V2DI "V2DI") (V2DF "V2DI")
321 (V4DI "V4DI") (V4DF "V4DI")
322 (V4SI "V2DI") (V4SF "V2DI")
323 (V8SI "V4DI") (V8SF "V4DI")])
325 (define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
327 ;; Mapping of immediate bits for blend instructions
328 (define_mode_attr blendbits
329 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
331 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
333 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
337 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
339 ;; All of these patterns are enabled for SSE1 as well as SSE2.
340 ;; This is essential for maintaining stable calling conventions.
342 (define_expand "mov<mode>"
343 [(set (match_operand:V16 0 "nonimmediate_operand" "")
344 (match_operand:V16 1 "nonimmediate_operand" ""))]
347 ix86_expand_vector_move (<MODE>mode, operands);
351 (define_insn "*mov<mode>_internal"
352 [(set (match_operand:V16 0 "nonimmediate_operand" "=x,x ,m")
353 (match_operand:V16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
355 && (register_operand (operands[0], <MODE>mode)
356 || register_operand (operands[1], <MODE>mode))"
358 switch (which_alternative)
361 return standard_sse_constant_opcode (insn, operands[1]);
364 switch (get_attr_mode (insn))
369 && (misaligned_operand (operands[0], <MODE>mode)
370 || misaligned_operand (operands[1], <MODE>mode)))
371 return "vmovups\t{%1, %0|%0, %1}";
373 return "%vmovaps\t{%1, %0|%0, %1}";
378 && (misaligned_operand (operands[0], <MODE>mode)
379 || misaligned_operand (operands[1], <MODE>mode)))
380 return "vmovupd\t{%1, %0|%0, %1}";
381 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
382 return "%vmovaps\t{%1, %0|%0, %1}";
384 return "%vmovapd\t{%1, %0|%0, %1}";
389 && (misaligned_operand (operands[0], <MODE>mode)
390 || misaligned_operand (operands[1], <MODE>mode)))
391 return "vmovdqu\t{%1, %0|%0, %1}";
392 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
393 return "%vmovaps\t{%1, %0|%0, %1}";
395 return "%vmovdqa\t{%1, %0|%0, %1}";
404 [(set_attr "type" "sselog1,ssemov,ssemov")
405 (set_attr "prefix" "maybe_vex")
407 (cond [(match_test "TARGET_AVX")
408 (const_string "<sseinsnmode>")
409 (ior (ior (match_test "optimize_function_for_size_p (cfun)")
410 (not (match_test "TARGET_SSE2")))
411 (and (eq_attr "alternative" "2")
412 (match_test "TARGET_SSE_TYPELESS_STORES")))
413 (const_string "V4SF")
414 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
415 (const_string "V4SF")
416 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
417 (const_string "V2DF")
419 (const_string "TI")))])
421 (define_insn "sse2_movq128"
422 [(set (match_operand:V2DI 0 "register_operand" "=x")
425 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
426 (parallel [(const_int 0)]))
429 "%vmovq\t{%1, %0|%0, %1}"
430 [(set_attr "type" "ssemov")
431 (set_attr "prefix" "maybe_vex")
432 (set_attr "mode" "TI")])
434 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
435 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
436 ;; from memory, we'd prefer to load the memory directly into the %xmm
437 ;; register. To facilitate this happy circumstance, this pattern won't
438 ;; split until after register allocation. If the 64-bit value didn't
439 ;; come from memory, this is the best we can do. This is much better
440 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
443 (define_insn_and_split "movdi_to_sse"
445 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
446 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
447 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
448 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
450 "&& reload_completed"
453 if (register_operand (operands[1], DImode))
455 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
456 Assemble the 64-bit DImode value in an xmm register. */
457 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
458 gen_rtx_SUBREG (SImode, operands[1], 0)));
459 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
460 gen_rtx_SUBREG (SImode, operands[1], 4)));
461 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
464 else if (memory_operand (operands[1], DImode))
465 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
466 operands[1], const0_rtx));
472 [(set (match_operand:V4SF 0 "register_operand" "")
473 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
474 "TARGET_SSE && reload_completed"
477 (vec_duplicate:V4SF (match_dup 1))
481 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
482 operands[2] = CONST0_RTX (V4SFmode);
486 [(set (match_operand:V2DF 0 "register_operand" "")
487 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
488 "TARGET_SSE2 && reload_completed"
489 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
491 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
492 operands[2] = CONST0_RTX (DFmode);
495 (define_expand "push<mode>1"
496 [(match_operand:V16 0 "register_operand" "")]
499 ix86_expand_push (<MODE>mode, operands[0]);
503 (define_expand "movmisalign<mode>"
504 [(set (match_operand:V16 0 "nonimmediate_operand" "")
505 (match_operand:V16 1 "nonimmediate_operand" ""))]
508 ix86_expand_vector_move_misalign (<MODE>mode, operands);
512 (define_expand "<sse>_movu<ssemodesuffix><avxsizesuffix>"
513 [(set (match_operand:VF 0 "nonimmediate_operand" "")
515 [(match_operand:VF 1 "nonimmediate_operand" "")]
519 if (MEM_P (operands[0]) && MEM_P (operands[1]))
520 operands[1] = force_reg (<MODE>mode, operands[1]);
523 (define_insn "*<sse>_movu<ssemodesuffix><avxsizesuffix>"
524 [(set (match_operand:VF 0 "nonimmediate_operand" "=x,m")
526 [(match_operand:VF 1 "nonimmediate_operand" "xm,x")]
528 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
529 "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
530 [(set_attr "type" "ssemov")
531 (set_attr "movu" "1")
532 (set_attr "prefix" "maybe_vex")
533 (set_attr "mode" "<MODE>")])
535 (define_expand "<sse2>_movdqu<avxsizesuffix>"
536 [(set (match_operand:VI1 0 "nonimmediate_operand" "")
537 (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "")]
541 if (MEM_P (operands[0]) && MEM_P (operands[1]))
542 operands[1] = force_reg (<MODE>mode, operands[1]);
545 (define_insn "*<sse2>_movdqu<avxsizesuffix>"
546 [(set (match_operand:VI1 0 "nonimmediate_operand" "=x,m")
547 (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "xm,x")]
549 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
550 "%vmovdqu\t{%1, %0|%0, %1}"
551 [(set_attr "type" "ssemov")
552 (set_attr "movu" "1")
553 (set (attr "prefix_data16")
555 (match_test "TARGET_AVX")
558 (set_attr "prefix" "maybe_vex")
559 (set_attr "mode" "<sseinsnmode>")])
561 (define_insn "<sse3>_lddqu<avxsizesuffix>"
562 [(set (match_operand:VI1 0 "register_operand" "=x")
563 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
566 "%vlddqu\t{%1, %0|%0, %1}"
567 [(set_attr "type" "ssemov")
568 (set_attr "movu" "1")
569 (set (attr "prefix_data16")
571 (match_test "TARGET_AVX")
574 (set (attr "prefix_rep")
576 (match_test "TARGET_AVX")
579 (set_attr "prefix" "maybe_vex")
580 (set_attr "mode" "<sseinsnmode>")])
582 (define_insn "sse2_movntsi"
583 [(set (match_operand:SI 0 "memory_operand" "=m")
584 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
587 "movnti\t{%1, %0|%0, %1}"
588 [(set_attr "type" "ssemov")
589 (set_attr "prefix_data16" "0")
590 (set_attr "mode" "V2DF")])
592 (define_insn "<sse>_movnt<mode>"
593 [(set (match_operand:VF 0 "memory_operand" "=m")
594 (unspec:VF [(match_operand:VF 1 "register_operand" "x")]
597 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
598 [(set_attr "type" "ssemov")
599 (set_attr "prefix" "maybe_vex")
600 (set_attr "mode" "<MODE>")])
602 (define_insn "<sse2>_movnt<mode>"
603 [(set (match_operand:VI8 0 "memory_operand" "=m")
604 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "x")]
607 "%vmovntdq\t{%1, %0|%0, %1}"
608 [(set_attr "type" "ssecvt")
609 (set (attr "prefix_data16")
611 (match_test "TARGET_AVX")
614 (set_attr "prefix" "maybe_vex")
615 (set_attr "mode" "<sseinsnmode>")])
617 ; Expand patterns for non-temporal stores. At the moment, only those
618 ; that directly map to insns are defined; it would be possible to
619 ; define patterns for other modes that would expand to several insns.
621 ;; Modes handled by storent patterns.
622 (define_mode_iterator STORENT_MODE
623 [(SI "TARGET_SSE2") (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
625 (V8SF "TARGET_AVX") V4SF
626 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
628 (define_expand "storent<mode>"
629 [(set (match_operand:STORENT_MODE 0 "memory_operand" "")
631 [(match_operand:STORENT_MODE 1 "register_operand" "")]
635 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
637 ;; Parallel floating point arithmetic
639 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
641 (define_expand "<code><mode>2"
642 [(set (match_operand:VF 0 "register_operand" "")
644 (match_operand:VF 1 "register_operand" "")))]
646 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
648 (define_insn_and_split "*absneg<mode>2"
649 [(set (match_operand:VF 0 "register_operand" "=x,x,x,x")
650 (match_operator:VF 3 "absneg_operator"
651 [(match_operand:VF 1 "nonimmediate_operand" "0, xm,x, m")]))
652 (use (match_operand:VF 2 "nonimmediate_operand" "xm,0, xm,x"))]
655 "&& reload_completed"
658 enum rtx_code absneg_op;
664 if (MEM_P (operands[1]))
665 op1 = operands[2], op2 = operands[1];
667 op1 = operands[1], op2 = operands[2];
672 if (rtx_equal_p (operands[0], operands[1]))
678 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
679 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
680 t = gen_rtx_SET (VOIDmode, operands[0], t);
684 [(set_attr "isa" "noavx,noavx,avx,avx")])
686 (define_expand "<plusminus_insn><mode>3"
687 [(set (match_operand:VF 0 "register_operand" "")
689 (match_operand:VF 1 "nonimmediate_operand" "")
690 (match_operand:VF 2 "nonimmediate_operand" "")))]
692 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
694 (define_insn "*<plusminus_insn><mode>3"
695 [(set (match_operand:VF 0 "register_operand" "=x,x")
697 (match_operand:VF 1 "nonimmediate_operand" "<comm>0,x")
698 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
699 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
701 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
702 v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
703 [(set_attr "isa" "noavx,avx")
704 (set_attr "type" "sseadd")
705 (set_attr "prefix" "orig,vex")
706 (set_attr "mode" "<MODE>")])
708 (define_insn "<sse>_vm<plusminus_insn><mode>3"
709 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
712 (match_operand:VF_128 1 "register_operand" "0,x")
713 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
718 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}
719 v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
720 [(set_attr "isa" "noavx,avx")
721 (set_attr "type" "sseadd")
722 (set_attr "prefix" "orig,vex")
723 (set_attr "mode" "<ssescalarmode>")])
725 (define_expand "mul<mode>3"
726 [(set (match_operand:VF 0 "register_operand" "")
728 (match_operand:VF 1 "nonimmediate_operand" "")
729 (match_operand:VF 2 "nonimmediate_operand" "")))]
731 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
733 (define_insn "*mul<mode>3"
734 [(set (match_operand:VF 0 "register_operand" "=x,x")
736 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
737 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
738 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
740 mul<ssemodesuffix>\t{%2, %0|%0, %2}
741 vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
742 [(set_attr "isa" "noavx,avx")
743 (set_attr "type" "ssemul")
744 (set_attr "prefix" "orig,vex")
745 (set_attr "mode" "<MODE>")])
747 (define_insn "<sse>_vmmul<mode>3"
748 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
751 (match_operand:VF_128 1 "register_operand" "0,x")
752 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
757 mul<ssescalarmodesuffix>\t{%2, %0|%0, %2}
758 vmul<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
759 [(set_attr "isa" "noavx,avx")
760 (set_attr "type" "ssemul")
761 (set_attr "prefix" "orig,vex")
762 (set_attr "mode" "<ssescalarmode>")])
764 (define_expand "div<mode>3"
765 [(set (match_operand:VF2 0 "register_operand" "")
766 (div:VF2 (match_operand:VF2 1 "register_operand" "")
767 (match_operand:VF2 2 "nonimmediate_operand" "")))]
769 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
771 (define_expand "div<mode>3"
772 [(set (match_operand:VF1 0 "register_operand" "")
773 (div:VF1 (match_operand:VF1 1 "register_operand" "")
774 (match_operand:VF1 2 "nonimmediate_operand" "")))]
777 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
780 && TARGET_RECIP_VEC_DIV
781 && !optimize_insn_for_size_p ()
782 && flag_finite_math_only && !flag_trapping_math
783 && flag_unsafe_math_optimizations)
785 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
790 (define_insn "<sse>_div<mode>3"
791 [(set (match_operand:VF 0 "register_operand" "=x,x")
793 (match_operand:VF 1 "register_operand" "0,x")
794 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
797 div<ssemodesuffix>\t{%2, %0|%0, %2}
798 vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
799 [(set_attr "isa" "noavx,avx")
800 (set_attr "type" "ssediv")
801 (set_attr "prefix" "orig,vex")
802 (set_attr "mode" "<MODE>")])
804 (define_insn "<sse>_vmdiv<mode>3"
805 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
808 (match_operand:VF_128 1 "register_operand" "0,x")
809 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
814 div<ssescalarmodesuffix>\t{%2, %0|%0, %2}
815 vdiv<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
816 [(set_attr "isa" "noavx,avx")
817 (set_attr "type" "ssediv")
818 (set_attr "prefix" "orig,vex")
819 (set_attr "mode" "<ssescalarmode>")])
821 (define_insn "<sse>_rcp<mode>2"
822 [(set (match_operand:VF1 0 "register_operand" "=x")
824 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
826 "%vrcpps\t{%1, %0|%0, %1}"
827 [(set_attr "type" "sse")
828 (set_attr "atom_sse_attr" "rcp")
829 (set_attr "prefix" "maybe_vex")
830 (set_attr "mode" "<MODE>")])
832 (define_insn "sse_vmrcpv4sf2"
833 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
835 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
837 (match_operand:V4SF 2 "register_operand" "0,x")
841 rcpss\t{%1, %0|%0, %1}
842 vrcpss\t{%1, %2, %0|%0, %2, %1}"
843 [(set_attr "isa" "noavx,avx")
844 (set_attr "type" "sse")
845 (set_attr "atom_sse_attr" "rcp")
846 (set_attr "prefix" "orig,vex")
847 (set_attr "mode" "SF")])
849 (define_expand "sqrt<mode>2"
850 [(set (match_operand:VF2 0 "register_operand" "")
851 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand" "")))]
854 (define_expand "sqrt<mode>2"
855 [(set (match_operand:VF1 0 "register_operand" "")
856 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand" "")))]
860 && TARGET_RECIP_VEC_SQRT
861 && !optimize_insn_for_size_p ()
862 && flag_finite_math_only && !flag_trapping_math
863 && flag_unsafe_math_optimizations)
865 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
870 (define_insn "<sse>_sqrt<mode>2"
871 [(set (match_operand:VF 0 "register_operand" "=x")
872 (sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "xm")))]
874 "%vsqrt<ssemodesuffix>\t{%1, %0|%0, %1}"
875 [(set_attr "type" "sse")
876 (set_attr "atom_sse_attr" "sqrt")
877 (set_attr "prefix" "maybe_vex")
878 (set_attr "mode" "<MODE>")])
880 (define_insn "<sse>_vmsqrt<mode>2"
881 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
884 (match_operand:VF_128 1 "nonimmediate_operand" "xm,xm"))
885 (match_operand:VF_128 2 "register_operand" "0,x")
889 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}
890 vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
891 [(set_attr "isa" "noavx,avx")
892 (set_attr "type" "sse")
893 (set_attr "atom_sse_attr" "sqrt")
894 (set_attr "prefix" "orig,vex")
895 (set_attr "mode" "<ssescalarmode>")])
897 (define_expand "rsqrt<mode>2"
898 [(set (match_operand:VF1 0 "register_operand" "")
900 [(match_operand:VF1 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
903 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
907 (define_insn "<sse>_rsqrt<mode>2"
908 [(set (match_operand:VF1 0 "register_operand" "=x")
910 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
912 "%vrsqrtps\t{%1, %0|%0, %1}"
913 [(set_attr "type" "sse")
914 (set_attr "prefix" "maybe_vex")
915 (set_attr "mode" "<MODE>")])
917 (define_insn "sse_vmrsqrtv4sf2"
918 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
920 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
922 (match_operand:V4SF 2 "register_operand" "0,x")
926 rsqrtss\t{%1, %0|%0, %1}
927 vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
928 [(set_attr "isa" "noavx,avx")
929 (set_attr "type" "sse")
930 (set_attr "prefix" "orig,vex")
931 (set_attr "mode" "SF")])
933 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
934 ;; isn't really correct, as those rtl operators aren't defined when
935 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
937 (define_expand "<code><mode>3"
938 [(set (match_operand:VF 0 "register_operand" "")
940 (match_operand:VF 1 "nonimmediate_operand" "")
941 (match_operand:VF 2 "nonimmediate_operand" "")))]
944 if (!flag_finite_math_only)
945 operands[1] = force_reg (<MODE>mode, operands[1]);
946 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
949 (define_insn "*<code><mode>3_finite"
950 [(set (match_operand:VF 0 "register_operand" "=x,x")
952 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
953 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
954 "TARGET_SSE && flag_finite_math_only
955 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
957 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
958 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
959 [(set_attr "isa" "noavx,avx")
960 (set_attr "type" "sseadd")
961 (set_attr "prefix" "orig,vex")
962 (set_attr "mode" "<MODE>")])
964 (define_insn "*<code><mode>3"
965 [(set (match_operand:VF 0 "register_operand" "=x,x")
967 (match_operand:VF 1 "register_operand" "0,x")
968 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
969 "TARGET_SSE && !flag_finite_math_only"
971 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
972 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
973 [(set_attr "isa" "noavx,avx")
974 (set_attr "type" "sseadd")
975 (set_attr "prefix" "orig,vex")
976 (set_attr "mode" "<MODE>")])
978 (define_insn "<sse>_vm<code><mode>3"
979 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
982 (match_operand:VF_128 1 "register_operand" "0,x")
983 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
988 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %2}
989 v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
990 [(set_attr "isa" "noavx,avx")
991 (set_attr "type" "sse")
992 (set_attr "prefix" "orig,vex")
993 (set_attr "mode" "<ssescalarmode>")])
995 ;; These versions of the min/max patterns implement exactly the operations
996 ;; min = (op1 < op2 ? op1 : op2)
997 ;; max = (!(op1 < op2) ? op1 : op2)
998 ;; Their operands are not commutative, and thus they may be used in the
999 ;; presence of -0.0 and NaN.
1001 (define_insn "*ieee_smin<mode>3"
1002 [(set (match_operand:VF 0 "register_operand" "=x,x")
1004 [(match_operand:VF 1 "register_operand" "0,x")
1005 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
1009 min<ssemodesuffix>\t{%2, %0|%0, %2}
1010 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1011 [(set_attr "isa" "noavx,avx")
1012 (set_attr "type" "sseadd")
1013 (set_attr "prefix" "orig,vex")
1014 (set_attr "mode" "<MODE>")])
1016 (define_insn "*ieee_smax<mode>3"
1017 [(set (match_operand:VF 0 "register_operand" "=x,x")
1019 [(match_operand:VF 1 "register_operand" "0,x")
1020 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
1024 max<ssemodesuffix>\t{%2, %0|%0, %2}
1025 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1026 [(set_attr "isa" "noavx,avx")
1027 (set_attr "type" "sseadd")
1028 (set_attr "prefix" "orig,vex")
1029 (set_attr "mode" "<MODE>")])
1031 (define_insn "avx_addsubv4df3"
1032 [(set (match_operand:V4DF 0 "register_operand" "=x")
1035 (match_operand:V4DF 1 "register_operand" "x")
1036 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1037 (minus:V4DF (match_dup 1) (match_dup 2))
1040 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1041 [(set_attr "type" "sseadd")
1042 (set_attr "prefix" "vex")
1043 (set_attr "mode" "V4DF")])
1045 (define_insn "sse3_addsubv2df3"
1046 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1049 (match_operand:V2DF 1 "register_operand" "0,x")
1050 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
1051 (minus:V2DF (match_dup 1) (match_dup 2))
1055 addsubpd\t{%2, %0|%0, %2}
1056 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1057 [(set_attr "isa" "noavx,avx")
1058 (set_attr "type" "sseadd")
1059 (set_attr "atom_unit" "complex")
1060 (set_attr "prefix" "orig,vex")
1061 (set_attr "mode" "V2DF")])
1063 (define_insn "avx_addsubv8sf3"
1064 [(set (match_operand:V8SF 0 "register_operand" "=x")
1067 (match_operand:V8SF 1 "register_operand" "x")
1068 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1069 (minus:V8SF (match_dup 1) (match_dup 2))
1072 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1073 [(set_attr "type" "sseadd")
1074 (set_attr "prefix" "vex")
1075 (set_attr "mode" "V8SF")])
1077 (define_insn "sse3_addsubv4sf3"
1078 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1081 (match_operand:V4SF 1 "register_operand" "0,x")
1082 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
1083 (minus:V4SF (match_dup 1) (match_dup 2))
1087 addsubps\t{%2, %0|%0, %2}
1088 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1089 [(set_attr "isa" "noavx,avx")
1090 (set_attr "type" "sseadd")
1091 (set_attr "prefix" "orig,vex")
1092 (set_attr "prefix_rep" "1,*")
1093 (set_attr "mode" "V4SF")])
1095 (define_insn "avx_h<plusminus_insn>v4df3"
1096 [(set (match_operand:V4DF 0 "register_operand" "=x")
1101 (match_operand:V4DF 1 "register_operand" "x")
1102 (parallel [(const_int 0)]))
1103 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1105 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1106 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
1110 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1111 (parallel [(const_int 0)]))
1112 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
1114 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1115 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1117 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1118 [(set_attr "type" "sseadd")
1119 (set_attr "prefix" "vex")
1120 (set_attr "mode" "V4DF")])
1122 (define_insn "sse3_h<plusminus_insn>v2df3"
1123 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1127 (match_operand:V2DF 1 "register_operand" "0,x")
1128 (parallel [(const_int 0)]))
1129 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1132 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
1133 (parallel [(const_int 0)]))
1134 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1137 h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}
1138 vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1139 [(set_attr "isa" "noavx,avx")
1140 (set_attr "type" "sseadd")
1141 (set_attr "prefix" "orig,vex")
1142 (set_attr "mode" "V2DF")])
1144 (define_insn "avx_h<plusminus_insn>v8sf3"
1145 [(set (match_operand:V8SF 0 "register_operand" "=x")
1151 (match_operand:V8SF 1 "register_operand" "x")
1152 (parallel [(const_int 0)]))
1153 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1155 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1156 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1160 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1161 (parallel [(const_int 0)]))
1162 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1164 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1165 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1169 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1170 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1172 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1173 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1176 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1177 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1179 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1180 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1182 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1183 [(set_attr "type" "sseadd")
1184 (set_attr "prefix" "vex")
1185 (set_attr "mode" "V8SF")])
1187 (define_insn "sse3_h<plusminus_insn>v4sf3"
1188 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1193 (match_operand:V4SF 1 "register_operand" "0,x")
1194 (parallel [(const_int 0)]))
1195 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1197 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1198 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1202 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
1203 (parallel [(const_int 0)]))
1204 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1206 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1207 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1210 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
1211 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1212 [(set_attr "isa" "noavx,avx")
1213 (set_attr "type" "sseadd")
1214 (set_attr "atom_unit" "complex")
1215 (set_attr "prefix" "orig,vex")
1216 (set_attr "prefix_rep" "1,*")
1217 (set_attr "mode" "V4SF")])
1219 (define_expand "reduc_splus_v4df"
1220 [(match_operand:V4DF 0 "register_operand" "")
1221 (match_operand:V4DF 1 "register_operand" "")]
1224 rtx tmp = gen_reg_rtx (V4DFmode);
1225 rtx tmp2 = gen_reg_rtx (V4DFmode);
1226 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
1227 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
1228 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
1232 (define_expand "reduc_splus_v2df"
1233 [(match_operand:V2DF 0 "register_operand" "")
1234 (match_operand:V2DF 1 "register_operand" "")]
1237 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1241 (define_expand "reduc_splus_v8sf"
1242 [(match_operand:V8SF 0 "register_operand" "")
1243 (match_operand:V8SF 1 "register_operand" "")]
1246 rtx tmp = gen_reg_rtx (V8SFmode);
1247 rtx tmp2 = gen_reg_rtx (V8SFmode);
1248 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
1249 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
1250 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
1251 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
1255 (define_expand "reduc_splus_v4sf"
1256 [(match_operand:V4SF 0 "register_operand" "")
1257 (match_operand:V4SF 1 "register_operand" "")]
1262 rtx tmp = gen_reg_rtx (V4SFmode);
1263 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1264 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1267 ix86_expand_reduc (gen_addv4sf3, operands[0], operands[1]);
1271 ;; Modes handled by reduc_sm{in,ax}* patterns.
1272 (define_mode_iterator REDUC_SMINMAX_MODE
1273 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
1274 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
1275 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
1276 (V4SF "TARGET_SSE")])
1278 (define_expand "reduc_<code>_<mode>"
1279 [(smaxmin:REDUC_SMINMAX_MODE
1280 (match_operand:REDUC_SMINMAX_MODE 0 "register_operand" "")
1281 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand" ""))]
1284 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1288 (define_expand "reduc_<code>_<mode>"
1290 (match_operand:VI_256 0 "register_operand" "")
1291 (match_operand:VI_256 1 "register_operand" ""))]
1294 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1298 (define_expand "reduc_umin_v8hi"
1300 (match_operand:V8HI 0 "register_operand" "")
1301 (match_operand:V8HI 1 "register_operand" ""))]
1304 ix86_expand_reduc (gen_uminv8hi3, operands[0], operands[1]);
1308 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1310 ;; Parallel floating point comparisons
1312 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1314 (define_insn "avx_cmp<mode>3"
1315 [(set (match_operand:VF 0 "register_operand" "=x")
1317 [(match_operand:VF 1 "register_operand" "x")
1318 (match_operand:VF 2 "nonimmediate_operand" "xm")
1319 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1322 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1323 [(set_attr "type" "ssecmp")
1324 (set_attr "length_immediate" "1")
1325 (set_attr "prefix" "vex")
1326 (set_attr "mode" "<MODE>")])
1328 (define_insn "avx_vmcmp<mode>3"
1329 [(set (match_operand:VF_128 0 "register_operand" "=x")
1332 [(match_operand:VF_128 1 "register_operand" "x")
1333 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
1334 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1339 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1340 [(set_attr "type" "ssecmp")
1341 (set_attr "length_immediate" "1")
1342 (set_attr "prefix" "vex")
1343 (set_attr "mode" "<ssescalarmode>")])
1345 (define_insn "*<sse>_maskcmp<mode>3_comm"
1346 [(set (match_operand:VF 0 "register_operand" "=x,x")
1347 (match_operator:VF 3 "sse_comparison_operator"
1348 [(match_operand:VF 1 "register_operand" "%0,x")
1349 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1351 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
1353 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1354 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1355 [(set_attr "isa" "noavx,avx")
1356 (set_attr "type" "ssecmp")
1357 (set_attr "length_immediate" "1")
1358 (set_attr "prefix" "orig,vex")
1359 (set_attr "mode" "<MODE>")])
1361 (define_insn "<sse>_maskcmp<mode>3"
1362 [(set (match_operand:VF 0 "register_operand" "=x,x")
1363 (match_operator:VF 3 "sse_comparison_operator"
1364 [(match_operand:VF 1 "register_operand" "0,x")
1365 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1368 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1369 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1370 [(set_attr "isa" "noavx,avx")
1371 (set_attr "type" "ssecmp")
1372 (set_attr "length_immediate" "1")
1373 (set_attr "prefix" "orig,vex")
1374 (set_attr "mode" "<MODE>")])
1376 (define_insn "<sse>_vmmaskcmp<mode>3"
1377 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1379 (match_operator:VF_128 3 "sse_comparison_operator"
1380 [(match_operand:VF_128 1 "register_operand" "0,x")
1381 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
1386 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %2}
1387 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1388 [(set_attr "isa" "noavx,avx")
1389 (set_attr "type" "ssecmp")
1390 (set_attr "length_immediate" "1,*")
1391 (set_attr "prefix" "orig,vex")
1392 (set_attr "mode" "<ssescalarmode>")])
1394 (define_insn "<sse>_comi"
1395 [(set (reg:CCFP FLAGS_REG)
1398 (match_operand:<ssevecmode> 0 "register_operand" "x")
1399 (parallel [(const_int 0)]))
1401 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1402 (parallel [(const_int 0)]))))]
1403 "SSE_FLOAT_MODE_P (<MODE>mode)"
1404 "%vcomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1405 [(set_attr "type" "ssecomi")
1406 (set_attr "prefix" "maybe_vex")
1407 (set_attr "prefix_rep" "0")
1408 (set (attr "prefix_data16")
1409 (if_then_else (eq_attr "mode" "DF")
1411 (const_string "0")))
1412 (set_attr "mode" "<MODE>")])
1414 (define_insn "<sse>_ucomi"
1415 [(set (reg:CCFPU FLAGS_REG)
1418 (match_operand:<ssevecmode> 0 "register_operand" "x")
1419 (parallel [(const_int 0)]))
1421 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1422 (parallel [(const_int 0)]))))]
1423 "SSE_FLOAT_MODE_P (<MODE>mode)"
1424 "%vucomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1425 [(set_attr "type" "ssecomi")
1426 (set_attr "prefix" "maybe_vex")
1427 (set_attr "prefix_rep" "0")
1428 (set (attr "prefix_data16")
1429 (if_then_else (eq_attr "mode" "DF")
1431 (const_string "0")))
1432 (set_attr "mode" "<MODE>")])
1434 (define_expand "vcond<V_256:mode><VF_256:mode>"
1435 [(set (match_operand:V_256 0 "register_operand" "")
1437 (match_operator 3 ""
1438 [(match_operand:VF_256 4 "nonimmediate_operand" "")
1439 (match_operand:VF_256 5 "nonimmediate_operand" "")])
1440 (match_operand:V_256 1 "general_operand" "")
1441 (match_operand:V_256 2 "general_operand" "")))]
1443 && (GET_MODE_NUNITS (<V_256:MODE>mode)
1444 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
1446 bool ok = ix86_expand_fp_vcond (operands);
1451 (define_expand "vcond<V_128:mode><VF_128:mode>"
1452 [(set (match_operand:V_128 0 "register_operand" "")
1454 (match_operator 3 ""
1455 [(match_operand:VF_128 4 "nonimmediate_operand" "")
1456 (match_operand:VF_128 5 "nonimmediate_operand" "")])
1457 (match_operand:V_128 1 "general_operand" "")
1458 (match_operand:V_128 2 "general_operand" "")))]
1460 && (GET_MODE_NUNITS (<V_128:MODE>mode)
1461 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
1463 bool ok = ix86_expand_fp_vcond (operands);
1468 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1470 ;; Parallel floating point logical operations
1472 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1474 (define_insn "<sse>_andnot<mode>3"
1475 [(set (match_operand:VF 0 "register_operand" "=x,x")
1478 (match_operand:VF 1 "register_operand" "0,x"))
1479 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1482 static char buf[32];
1485 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1487 switch (which_alternative)
1490 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1493 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1499 snprintf (buf, sizeof (buf), insn, suffix);
1502 [(set_attr "isa" "noavx,avx")
1503 (set_attr "type" "sselog")
1504 (set_attr "prefix" "orig,vex")
1505 (set_attr "mode" "<MODE>")])
1507 (define_expand "<code><mode>3"
1508 [(set (match_operand:VF 0 "register_operand" "")
1510 (match_operand:VF 1 "nonimmediate_operand" "")
1511 (match_operand:VF 2 "nonimmediate_operand" "")))]
1513 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1515 (define_insn "*<code><mode>3"
1516 [(set (match_operand:VF 0 "register_operand" "=x,x")
1518 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
1519 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1520 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1522 static char buf[32];
1525 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1527 switch (which_alternative)
1530 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1533 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1539 snprintf (buf, sizeof (buf), insn, suffix);
1542 [(set_attr "isa" "noavx,avx")
1543 (set_attr "type" "sselog")
1544 (set_attr "prefix" "orig,vex")
1545 (set_attr "mode" "<MODE>")])
1547 (define_expand "copysign<mode>3"
1550 (not:VF (match_dup 3))
1551 (match_operand:VF 1 "nonimmediate_operand" "")))
1553 (and:VF (match_dup 3)
1554 (match_operand:VF 2 "nonimmediate_operand" "")))
1555 (set (match_operand:VF 0 "register_operand" "")
1556 (ior:VF (match_dup 4) (match_dup 5)))]
1559 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
1561 operands[4] = gen_reg_rtx (<MODE>mode);
1562 operands[5] = gen_reg_rtx (<MODE>mode);
1565 ;; Also define scalar versions. These are used for abs, neg, and
1566 ;; conditional move. Using subregs into vector modes causes register
1567 ;; allocation lossage. These patterns do not allow memory operands
1568 ;; because the native instructions read the full 128-bits.
1570 (define_insn "*andnot<mode>3"
1571 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1574 (match_operand:MODEF 1 "register_operand" "0,x"))
1575 (match_operand:MODEF 2 "register_operand" "x,x")))]
1576 "SSE_FLOAT_MODE_P (<MODE>mode)"
1578 static char buf[32];
1581 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1583 switch (which_alternative)
1586 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1589 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1595 snprintf (buf, sizeof (buf), insn, suffix);
1598 [(set_attr "isa" "noavx,avx")
1599 (set_attr "type" "sselog")
1600 (set_attr "prefix" "orig,vex")
1601 (set_attr "mode" "<ssevecmode>")])
1603 (define_insn "*<code><mode>3"
1604 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1606 (match_operand:MODEF 1 "register_operand" "%0,x")
1607 (match_operand:MODEF 2 "register_operand" "x,x")))]
1608 "SSE_FLOAT_MODE_P (<MODE>mode)"
1610 static char buf[32];
1613 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1615 switch (which_alternative)
1618 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1621 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1627 snprintf (buf, sizeof (buf), insn, suffix);
1630 [(set_attr "isa" "noavx,avx")
1631 (set_attr "type" "sselog")
1632 (set_attr "prefix" "orig,vex")
1633 (set_attr "mode" "<ssevecmode>")])
1635 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1637 ;; FMA4 floating point multiply/accumulate instructions. This
1638 ;; includes the scalar version of the instructions as well as the
1641 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1643 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1644 ;; combine to generate a multiply/add with two memory references. We then
1645 ;; split this insn, into loading up the destination register with one of the
1646 ;; memory operations. If we don't manage to split the insn, reload will
1647 ;; generate the appropriate moves. The reason this is needed, is that combine
1648 ;; has already folded one of the memory references into both the multiply and
1649 ;; add insns, and it can't generate a new pseudo. I.e.:
1650 ;; (set (reg1) (mem (addr1)))
1651 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1652 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1654 ;; ??? This is historic, pre-dating the gimple fma transformation.
1655 ;; We could now properly represent that only one memory operand is
1656 ;; allowed and not be penalized during optimization.
1658 ;; Intrinsic FMA operations.
1660 ;; The standard names for fma is only available with SSE math enabled.
1661 (define_expand "fma<mode>4"
1662 [(set (match_operand:FMAMODE 0 "register_operand")
1664 (match_operand:FMAMODE 1 "nonimmediate_operand")
1665 (match_operand:FMAMODE 2 "nonimmediate_operand")
1666 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1667 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1669 (define_expand "fms<mode>4"
1670 [(set (match_operand:FMAMODE 0 "register_operand")
1672 (match_operand:FMAMODE 1 "nonimmediate_operand")
1673 (match_operand:FMAMODE 2 "nonimmediate_operand")
1674 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1675 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1677 (define_expand "fnma<mode>4"
1678 [(set (match_operand:FMAMODE 0 "register_operand")
1680 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1681 (match_operand:FMAMODE 2 "nonimmediate_operand")
1682 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1683 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1685 (define_expand "fnms<mode>4"
1686 [(set (match_operand:FMAMODE 0 "register_operand")
1688 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1689 (match_operand:FMAMODE 2 "nonimmediate_operand")
1690 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1691 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1693 ;; The builtin for fma4intrin.h is not constrained by SSE math enabled.
1694 (define_expand "fma4i_fmadd_<mode>"
1695 [(set (match_operand:FMAMODE 0 "register_operand")
1697 (match_operand:FMAMODE 1 "nonimmediate_operand")
1698 (match_operand:FMAMODE 2 "nonimmediate_operand")
1699 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1700 "TARGET_FMA || TARGET_FMA4")
1702 (define_insn "*fma4i_fmadd_<mode>"
1703 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1705 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1706 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1707 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1709 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1710 [(set_attr "type" "ssemuladd")
1711 (set_attr "mode" "<MODE>")])
1713 (define_insn "*fma4i_fmsub_<mode>"
1714 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1716 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1717 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1719 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1721 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1722 [(set_attr "type" "ssemuladd")
1723 (set_attr "mode" "<MODE>")])
1725 (define_insn "*fma4i_fnmadd_<mode>"
1726 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1729 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1730 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1731 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1733 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1734 [(set_attr "type" "ssemuladd")
1735 (set_attr "mode" "<MODE>")])
1737 (define_insn "*fma4i_fnmsub_<mode>"
1738 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1741 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1742 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1744 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1746 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1747 [(set_attr "type" "ssemuladd")
1748 (set_attr "mode" "<MODE>")])
1750 ;; Scalar versions of the above. Unlike ADDSS et al, these write the
1751 ;; entire destination register, with the high-order elements zeroed.
1753 (define_expand "fma4i_vmfmadd_<mode>"
1754 [(set (match_operand:VF_128 0 "register_operand")
1757 (match_operand:VF_128 1 "nonimmediate_operand")
1758 (match_operand:VF_128 2 "nonimmediate_operand")
1759 (match_operand:VF_128 3 "nonimmediate_operand"))
1764 operands[4] = CONST0_RTX (<MODE>mode);
1767 (define_expand "fmai_vmfmadd_<mode>"
1768 [(set (match_operand:VF_128 0 "register_operand")
1771 (match_operand:VF_128 1 "nonimmediate_operand")
1772 (match_operand:VF_128 2 "nonimmediate_operand")
1773 (match_operand:VF_128 3 "nonimmediate_operand"))
1778 (define_insn "*fmai_fmadd_<mode>"
1779 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1782 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")
1783 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1784 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))
1789 vfmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1790 vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1791 vfmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1792 [(set_attr "type" "ssemuladd")
1793 (set_attr "mode" "<MODE>")])
1795 (define_insn "*fmai_fmsub_<mode>"
1796 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1799 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")
1800 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1802 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")))
1807 vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1808 vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1809 vfmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1810 [(set_attr "type" "ssemuladd")
1811 (set_attr "mode" "<MODE>")])
1813 (define_insn "*fmai_fnmadd_<mode>"
1814 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1818 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x"))
1819 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1820 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))
1825 vfnmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1826 vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1827 vfnmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1828 [(set_attr "type" "ssemuladd")
1829 (set_attr "mode" "<MODE>")])
1831 (define_insn "*fmai_fnmsub_<mode>"
1832 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1836 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x"))
1837 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1839 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")))
1844 vfnmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1845 vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1846 vfnmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1847 [(set_attr "type" "ssemuladd")
1848 (set_attr "mode" "<MODE>")])
1850 (define_insn "*fma4i_vmfmadd_<mode>"
1851 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1854 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
1855 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1856 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
1857 (match_operand:VF_128 4 "const0_operand" "")
1860 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1861 [(set_attr "type" "ssemuladd")
1862 (set_attr "mode" "<MODE>")])
1864 (define_insn "*fma4i_vmfmsub_<mode>"
1865 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1868 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
1869 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1871 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
1872 (match_operand:VF_128 4 "const0_operand" "")
1875 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1876 [(set_attr "type" "ssemuladd")
1877 (set_attr "mode" "<MODE>")])
1879 (define_insn "*fma4i_vmfnmadd_<mode>"
1880 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1884 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
1885 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1886 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
1887 (match_operand:VF_128 4 "const0_operand" "")
1890 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1891 [(set_attr "type" "ssemuladd")
1892 (set_attr "mode" "<MODE>")])
1894 (define_insn "*fma4i_vmfnmsub_<mode>"
1895 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1899 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
1900 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1902 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
1903 (match_operand:VF_128 4 "const0_operand" "")
1906 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1907 [(set_attr "type" "ssemuladd")
1908 (set_attr "mode" "<MODE>")])
1910 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1912 ;; FMA4 Parallel floating point multiply addsub and subadd operations.
1914 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1916 ;; It would be possible to represent these without the UNSPEC as
1919 ;; (fma op1 op2 op3)
1920 ;; (fma op1 op2 (neg op3))
1923 ;; But this doesn't seem useful in practice.
1925 (define_expand "fmaddsub_<mode>"
1926 [(set (match_operand:VF 0 "register_operand")
1928 [(match_operand:VF 1 "nonimmediate_operand")
1929 (match_operand:VF 2 "nonimmediate_operand")
1930 (match_operand:VF 3 "nonimmediate_operand")]
1932 "TARGET_FMA || TARGET_FMA4")
1934 (define_insn "*fma4_fmaddsub_<mode>"
1935 [(set (match_operand:VF 0 "register_operand" "=x,x")
1937 [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
1938 (match_operand:VF 2 "nonimmediate_operand" " x,m")
1939 (match_operand:VF 3 "nonimmediate_operand" "xm,x")]
1942 "vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1943 [(set_attr "type" "ssemuladd")
1944 (set_attr "mode" "<MODE>")])
1946 (define_insn "*fma4_fmsubadd_<mode>"
1947 [(set (match_operand:VF 0 "register_operand" "=x,x")
1949 [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
1950 (match_operand:VF 2 "nonimmediate_operand" " x,m")
1952 (match_operand:VF 3 "nonimmediate_operand" "xm,x"))]
1955 "vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1956 [(set_attr "type" "ssemuladd")
1957 (set_attr "mode" "<MODE>")])
1959 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1961 ;; FMA3 floating point multiply/accumulate instructions.
1963 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1965 (define_insn "*fma_fmadd_<mode>"
1966 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1968 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
1969 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1970 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
1973 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1974 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1975 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1976 [(set_attr "type" "ssemuladd")
1977 (set_attr "mode" "<MODE>")])
1979 (define_insn "*fma_fmsub_<mode>"
1980 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1982 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
1983 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1985 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
1988 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1989 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1990 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1991 [(set_attr "type" "ssemuladd")
1992 (set_attr "mode" "<MODE>")])
1994 (define_insn "*fma_fnmadd_<mode>"
1995 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1998 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
1999 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
2000 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
2003 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2004 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2005 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2006 [(set_attr "type" "ssemuladd")
2007 (set_attr "mode" "<MODE>")])
2009 (define_insn "*fma_fnmsub_<mode>"
2010 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
2013 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
2014 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
2016 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
2019 vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2020 vfnmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2021 vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2022 [(set_attr "type" "ssemuladd")
2023 (set_attr "mode" "<MODE>")])
2025 (define_insn "*fma_fmaddsub_<mode>"
2026 [(set (match_operand:VF 0 "register_operand" "=x,x,x")
2028 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
2029 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
2030 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0")]
2034 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2035 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2036 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2037 [(set_attr "type" "ssemuladd")
2038 (set_attr "mode" "<MODE>")])
2040 (define_insn "*fma_fmsubadd_<mode>"
2041 [(set (match_operand:VF 0 "register_operand" "=x,x,x")
2043 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
2044 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
2046 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0"))]
2050 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2051 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2052 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2053 [(set_attr "type" "ssemuladd")
2054 (set_attr "mode" "<MODE>")])
2056 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2058 ;; Parallel single-precision floating point conversion operations
2060 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2062 (define_insn "sse_cvtpi2ps"
2063 [(set (match_operand:V4SF 0 "register_operand" "=x")
2066 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2067 (match_operand:V4SF 1 "register_operand" "0")
2070 "cvtpi2ps\t{%2, %0|%0, %2}"
2071 [(set_attr "type" "ssecvt")
2072 (set_attr "mode" "V4SF")])
2074 (define_insn "sse_cvtps2pi"
2075 [(set (match_operand:V2SI 0 "register_operand" "=y")
2077 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2079 (parallel [(const_int 0) (const_int 1)])))]
2081 "cvtps2pi\t{%1, %0|%0, %1}"
2082 [(set_attr "type" "ssecvt")
2083 (set_attr "unit" "mmx")
2084 (set_attr "mode" "DI")])
2086 (define_insn "sse_cvttps2pi"
2087 [(set (match_operand:V2SI 0 "register_operand" "=y")
2089 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2090 (parallel [(const_int 0) (const_int 1)])))]
2092 "cvttps2pi\t{%1, %0|%0, %1}"
2093 [(set_attr "type" "ssecvt")
2094 (set_attr "unit" "mmx")
2095 (set_attr "prefix_rep" "0")
2096 (set_attr "mode" "SF")])
2098 (define_insn "sse_cvtsi2ss"
2099 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2102 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2103 (match_operand:V4SF 1 "register_operand" "0,0,x")
2107 cvtsi2ss\t{%2, %0|%0, %2}
2108 cvtsi2ss\t{%2, %0|%0, %2}
2109 vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2110 [(set_attr "isa" "noavx,noavx,avx")
2111 (set_attr "type" "sseicvt")
2112 (set_attr "athlon_decode" "vector,double,*")
2113 (set_attr "amdfam10_decode" "vector,double,*")
2114 (set_attr "bdver1_decode" "double,direct,*")
2115 (set_attr "prefix" "orig,orig,vex")
2116 (set_attr "mode" "SF")])
2118 (define_insn "sse_cvtsi2ssq"
2119 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2122 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2123 (match_operand:V4SF 1 "register_operand" "0,0,x")
2125 "TARGET_SSE && TARGET_64BIT"
2127 cvtsi2ssq\t{%2, %0|%0, %2}
2128 cvtsi2ssq\t{%2, %0|%0, %2}
2129 vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2130 [(set_attr "isa" "noavx,noavx,avx")
2131 (set_attr "type" "sseicvt")
2132 (set_attr "athlon_decode" "vector,double,*")
2133 (set_attr "amdfam10_decode" "vector,double,*")
2134 (set_attr "bdver1_decode" "double,direct,*")
2135 (set_attr "length_vex" "*,*,4")
2136 (set_attr "prefix_rex" "1,1,*")
2137 (set_attr "prefix" "orig,orig,vex")
2138 (set_attr "mode" "SF")])
2140 (define_insn "sse_cvtss2si"
2141 [(set (match_operand:SI 0 "register_operand" "=r,r")
2144 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2145 (parallel [(const_int 0)]))]
2146 UNSPEC_FIX_NOTRUNC))]
2148 "%vcvtss2si\t{%1, %0|%0, %1}"
2149 [(set_attr "type" "sseicvt")
2150 (set_attr "athlon_decode" "double,vector")
2151 (set_attr "bdver1_decode" "double,double")
2152 (set_attr "prefix_rep" "1")
2153 (set_attr "prefix" "maybe_vex")
2154 (set_attr "mode" "SI")])
2156 (define_insn "sse_cvtss2si_2"
2157 [(set (match_operand:SI 0 "register_operand" "=r,r")
2158 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2159 UNSPEC_FIX_NOTRUNC))]
2161 "%vcvtss2si\t{%1, %0|%0, %1}"
2162 [(set_attr "type" "sseicvt")
2163 (set_attr "athlon_decode" "double,vector")
2164 (set_attr "amdfam10_decode" "double,double")
2165 (set_attr "bdver1_decode" "double,double")
2166 (set_attr "prefix_rep" "1")
2167 (set_attr "prefix" "maybe_vex")
2168 (set_attr "mode" "SI")])
2170 (define_insn "sse_cvtss2siq"
2171 [(set (match_operand:DI 0 "register_operand" "=r,r")
2174 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2175 (parallel [(const_int 0)]))]
2176 UNSPEC_FIX_NOTRUNC))]
2177 "TARGET_SSE && TARGET_64BIT"
2178 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2179 [(set_attr "type" "sseicvt")
2180 (set_attr "athlon_decode" "double,vector")
2181 (set_attr "bdver1_decode" "double,double")
2182 (set_attr "prefix_rep" "1")
2183 (set_attr "prefix" "maybe_vex")
2184 (set_attr "mode" "DI")])
2186 (define_insn "sse_cvtss2siq_2"
2187 [(set (match_operand:DI 0 "register_operand" "=r,r")
2188 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2189 UNSPEC_FIX_NOTRUNC))]
2190 "TARGET_SSE && TARGET_64BIT"
2191 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2192 [(set_attr "type" "sseicvt")
2193 (set_attr "athlon_decode" "double,vector")
2194 (set_attr "amdfam10_decode" "double,double")
2195 (set_attr "bdver1_decode" "double,double")
2196 (set_attr "prefix_rep" "1")
2197 (set_attr "prefix" "maybe_vex")
2198 (set_attr "mode" "DI")])
2200 (define_insn "sse_cvttss2si"
2201 [(set (match_operand:SI 0 "register_operand" "=r,r")
2204 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2205 (parallel [(const_int 0)]))))]
2207 "%vcvttss2si\t{%1, %0|%0, %1}"
2208 [(set_attr "type" "sseicvt")
2209 (set_attr "athlon_decode" "double,vector")
2210 (set_attr "amdfam10_decode" "double,double")
2211 (set_attr "bdver1_decode" "double,double")
2212 (set_attr "prefix_rep" "1")
2213 (set_attr "prefix" "maybe_vex")
2214 (set_attr "mode" "SI")])
2216 (define_insn "sse_cvttss2siq"
2217 [(set (match_operand:DI 0 "register_operand" "=r,r")
2220 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2221 (parallel [(const_int 0)]))))]
2222 "TARGET_SSE && TARGET_64BIT"
2223 "%vcvttss2si{q}\t{%1, %0|%0, %1}"
2224 [(set_attr "type" "sseicvt")
2225 (set_attr "athlon_decode" "double,vector")
2226 (set_attr "amdfam10_decode" "double,double")
2227 (set_attr "bdver1_decode" "double,double")
2228 (set_attr "prefix_rep" "1")
2229 (set_attr "prefix" "maybe_vex")
2230 (set_attr "mode" "DI")])
2232 (define_insn "avx_cvtdq2ps256"
2233 [(set (match_operand:V8SF 0 "register_operand" "=x")
2234 (float:V8SF (match_operand:V8SI 1 "nonimmediate_operand" "xm")))]
2236 "vcvtdq2ps\t{%1, %0|%0, %1}"
2237 [(set_attr "type" "ssecvt")
2238 (set_attr "prefix" "vex")
2239 (set_attr "mode" "V8SF")])
2241 (define_insn "sse2_cvtdq2ps"
2242 [(set (match_operand:V4SF 0 "register_operand" "=x")
2243 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2245 "%vcvtdq2ps\t{%1, %0|%0, %1}"
2246 [(set_attr "type" "ssecvt")
2247 (set_attr "prefix" "maybe_vex")
2248 (set_attr "mode" "V4SF")])
2250 (define_expand "sse2_cvtudq2ps"
2252 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "")))
2254 (lt:V4SF (match_dup 5) (match_dup 3)))
2256 (and:V4SF (match_dup 6) (match_dup 4)))
2257 (set (match_operand:V4SF 0 "register_operand" "")
2258 (plus:V4SF (match_dup 5) (match_dup 7)))]
2261 REAL_VALUE_TYPE TWO32r;
2265 real_ldexp (&TWO32r, &dconst1, 32);
2266 x = const_double_from_real_value (TWO32r, SFmode);
2268 operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
2269 operands[4] = force_reg (V4SFmode,
2270 ix86_build_const_vector (V4SFmode, 1, x));
2272 for (i = 5; i < 8; i++)
2273 operands[i] = gen_reg_rtx (V4SFmode);
2276 (define_insn "avx_cvtps2dq256"
2277 [(set (match_operand:V8SI 0 "register_operand" "=x")
2278 (unspec:V8SI [(match_operand:V8SF 1 "nonimmediate_operand" "xm")]
2279 UNSPEC_FIX_NOTRUNC))]
2281 "vcvtps2dq\t{%1, %0|%0, %1}"
2282 [(set_attr "type" "ssecvt")
2283 (set_attr "prefix" "vex")
2284 (set_attr "mode" "OI")])
2286 (define_insn "sse2_cvtps2dq"
2287 [(set (match_operand:V4SI 0 "register_operand" "=x")
2288 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2289 UNSPEC_FIX_NOTRUNC))]
2291 "%vcvtps2dq\t{%1, %0|%0, %1}"
2292 [(set_attr "type" "ssecvt")
2293 (set (attr "prefix_data16")
2295 (match_test "TARGET_AVX")
2297 (const_string "1")))
2298 (set_attr "prefix" "maybe_vex")
2299 (set_attr "mode" "TI")])
2301 (define_insn "avx_cvttps2dq256"
2302 [(set (match_operand:V8SI 0 "register_operand" "=x")
2303 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
2305 "vcvttps2dq\t{%1, %0|%0, %1}"
2306 [(set_attr "type" "ssecvt")
2307 (set_attr "prefix" "vex")
2308 (set_attr "mode" "OI")])
2310 (define_insn "sse2_cvttps2dq"
2311 [(set (match_operand:V4SI 0 "register_operand" "=x")
2312 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2314 "%vcvttps2dq\t{%1, %0|%0, %1}"
2315 [(set_attr "type" "ssecvt")
2316 (set (attr "prefix_rep")
2318 (match_test "TARGET_AVX")
2320 (const_string "1")))
2321 (set (attr "prefix_data16")
2323 (match_test "TARGET_AVX")
2325 (const_string "0")))
2326 (set_attr "prefix_data16" "0")
2327 (set_attr "prefix" "maybe_vex")
2328 (set_attr "mode" "TI")])
2330 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2332 ;; Parallel double-precision floating point conversion operations
2334 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2336 (define_insn "sse2_cvtpi2pd"
2337 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2338 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2340 "cvtpi2pd\t{%1, %0|%0, %1}"
2341 [(set_attr "type" "ssecvt")
2342 (set_attr "unit" "mmx,*")
2343 (set_attr "prefix_data16" "1,*")
2344 (set_attr "mode" "V2DF")])
2346 (define_insn "sse2_cvtpd2pi"
2347 [(set (match_operand:V2SI 0 "register_operand" "=y")
2348 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2349 UNSPEC_FIX_NOTRUNC))]
2351 "cvtpd2pi\t{%1, %0|%0, %1}"
2352 [(set_attr "type" "ssecvt")
2353 (set_attr "unit" "mmx")
2354 (set_attr "bdver1_decode" "double")
2355 (set_attr "prefix_data16" "1")
2356 (set_attr "mode" "DI")])
2358 (define_insn "sse2_cvttpd2pi"
2359 [(set (match_operand:V2SI 0 "register_operand" "=y")
2360 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2362 "cvttpd2pi\t{%1, %0|%0, %1}"
2363 [(set_attr "type" "ssecvt")
2364 (set_attr "unit" "mmx")
2365 (set_attr "bdver1_decode" "double")
2366 (set_attr "prefix_data16" "1")
2367 (set_attr "mode" "TI")])
2369 (define_insn "sse2_cvtsi2sd"
2370 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2373 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2374 (match_operand:V2DF 1 "register_operand" "0,0,x")
2378 cvtsi2sd\t{%2, %0|%0, %2}
2379 cvtsi2sd\t{%2, %0|%0, %2}
2380 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2381 [(set_attr "isa" "noavx,noavx,avx")
2382 (set_attr "type" "sseicvt")
2383 (set_attr "athlon_decode" "double,direct,*")
2384 (set_attr "amdfam10_decode" "vector,double,*")
2385 (set_attr "bdver1_decode" "double,direct,*")
2386 (set_attr "prefix" "orig,orig,vex")
2387 (set_attr "mode" "DF")])
2389 (define_insn "sse2_cvtsi2sdq"
2390 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2393 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2394 (match_operand:V2DF 1 "register_operand" "0,0,x")
2396 "TARGET_SSE2 && TARGET_64BIT"
2398 cvtsi2sdq\t{%2, %0|%0, %2}
2399 cvtsi2sdq\t{%2, %0|%0, %2}
2400 vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2401 [(set_attr "isa" "noavx,noavx,avx")
2402 (set_attr "type" "sseicvt")
2403 (set_attr "athlon_decode" "double,direct,*")
2404 (set_attr "amdfam10_decode" "vector,double,*")
2405 (set_attr "bdver1_decode" "double,direct,*")
2406 (set_attr "length_vex" "*,*,4")
2407 (set_attr "prefix_rex" "1,1,*")
2408 (set_attr "prefix" "orig,orig,vex")
2409 (set_attr "mode" "DF")])
2411 (define_insn "sse2_cvtsd2si"
2412 [(set (match_operand:SI 0 "register_operand" "=r,r")
2415 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2416 (parallel [(const_int 0)]))]
2417 UNSPEC_FIX_NOTRUNC))]
2419 "%vcvtsd2si\t{%1, %0|%0, %1}"
2420 [(set_attr "type" "sseicvt")
2421 (set_attr "athlon_decode" "double,vector")
2422 (set_attr "bdver1_decode" "double,double")
2423 (set_attr "prefix_rep" "1")
2424 (set_attr "prefix" "maybe_vex")
2425 (set_attr "mode" "SI")])
2427 (define_insn "sse2_cvtsd2si_2"
2428 [(set (match_operand:SI 0 "register_operand" "=r,r")
2429 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2430 UNSPEC_FIX_NOTRUNC))]
2432 "%vcvtsd2si\t{%1, %0|%0, %1}"
2433 [(set_attr "type" "sseicvt")
2434 (set_attr "athlon_decode" "double,vector")
2435 (set_attr "amdfam10_decode" "double,double")
2436 (set_attr "bdver1_decode" "double,double")
2437 (set_attr "prefix_rep" "1")
2438 (set_attr "prefix" "maybe_vex")
2439 (set_attr "mode" "SI")])
2441 (define_insn "sse2_cvtsd2siq"
2442 [(set (match_operand:DI 0 "register_operand" "=r,r")
2445 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2446 (parallel [(const_int 0)]))]
2447 UNSPEC_FIX_NOTRUNC))]
2448 "TARGET_SSE2 && TARGET_64BIT"
2449 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2450 [(set_attr "type" "sseicvt")
2451 (set_attr "athlon_decode" "double,vector")
2452 (set_attr "bdver1_decode" "double,double")
2453 (set_attr "prefix_rep" "1")
2454 (set_attr "prefix" "maybe_vex")
2455 (set_attr "mode" "DI")])
2457 (define_insn "sse2_cvtsd2siq_2"
2458 [(set (match_operand:DI 0 "register_operand" "=r,r")
2459 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2460 UNSPEC_FIX_NOTRUNC))]
2461 "TARGET_SSE2 && TARGET_64BIT"
2462 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2463 [(set_attr "type" "sseicvt")
2464 (set_attr "athlon_decode" "double,vector")
2465 (set_attr "amdfam10_decode" "double,double")
2466 (set_attr "bdver1_decode" "double,double")
2467 (set_attr "prefix_rep" "1")
2468 (set_attr "prefix" "maybe_vex")
2469 (set_attr "mode" "DI")])
2471 (define_insn "sse2_cvttsd2si"
2472 [(set (match_operand:SI 0 "register_operand" "=r,r")
2475 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2476 (parallel [(const_int 0)]))))]
2478 "%vcvttsd2si\t{%1, %0|%0, %1}"
2479 [(set_attr "type" "sseicvt")
2480 (set_attr "athlon_decode" "double,vector")
2481 (set_attr "amdfam10_decode" "double,double")
2482 (set_attr "bdver1_decode" "double,double")
2483 (set_attr "prefix_rep" "1")
2484 (set_attr "prefix" "maybe_vex")
2485 (set_attr "mode" "SI")])
2487 (define_insn "sse2_cvttsd2siq"
2488 [(set (match_operand:DI 0 "register_operand" "=r,r")
2491 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2492 (parallel [(const_int 0)]))))]
2493 "TARGET_SSE2 && TARGET_64BIT"
2494 "%vcvttsd2si{q}\t{%1, %0|%0, %1}"
2495 [(set_attr "type" "sseicvt")
2496 (set_attr "athlon_decode" "double,vector")
2497 (set_attr "amdfam10_decode" "double,double")
2498 (set_attr "bdver1_decode" "double,double")
2499 (set_attr "prefix_rep" "1")
2500 (set_attr "prefix" "maybe_vex")
2501 (set_attr "mode" "DI")])
2503 (define_insn "avx_cvtdq2pd256"
2504 [(set (match_operand:V4DF 0 "register_operand" "=x")
2505 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2507 "vcvtdq2pd\t{%1, %0|%0, %1}"
2508 [(set_attr "type" "ssecvt")
2509 (set_attr "prefix" "vex")
2510 (set_attr "mode" "V4DF")])
2512 (define_insn "avx_cvtdq2pd256_2"
2513 [(set (match_operand:V4DF 0 "register_operand" "=x")
2516 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
2517 (parallel [(const_int 0) (const_int 1)
2518 (const_int 2) (const_int 3)]))))]
2520 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
2521 [(set_attr "type" "ssecvt")
2522 (set_attr "prefix" "vex")
2523 (set_attr "mode" "V4DF")])
2525 (define_insn "sse2_cvtdq2pd"
2526 [(set (match_operand:V2DF 0 "register_operand" "=x")
2529 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2530 (parallel [(const_int 0) (const_int 1)]))))]
2532 "%vcvtdq2pd\t{%1, %0|%0, %q1}"
2533 [(set_attr "type" "ssecvt")
2534 (set_attr "prefix" "maybe_vex")
2535 (set_attr "mode" "V2DF")])
2537 (define_insn "avx_cvtpd2dq256"
2538 [(set (match_operand:V4SI 0 "register_operand" "=x")
2539 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2540 UNSPEC_FIX_NOTRUNC))]
2542 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2543 [(set_attr "type" "ssecvt")
2544 (set_attr "prefix" "vex")
2545 (set_attr "mode" "OI")])
2547 (define_expand "avx_cvtpd2dq256_2"
2548 [(set (match_operand:V8SI 0 "register_operand" "")
2550 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "")]
2554 "operands[2] = CONST0_RTX (V4SImode);")
2556 (define_insn "*avx_cvtpd2dq256_2"
2557 [(set (match_operand:V8SI 0 "register_operand" "=x")
2559 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2561 (match_operand:V4SI 2 "const0_operand" "")))]
2563 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
2564 [(set_attr "type" "ssecvt")
2565 (set_attr "prefix" "vex")
2566 (set_attr "mode" "OI")])
2568 (define_expand "sse2_cvtpd2dq"
2569 [(set (match_operand:V4SI 0 "register_operand" "")
2571 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2575 "operands[2] = CONST0_RTX (V2SImode);")
2577 (define_insn "*sse2_cvtpd2dq"
2578 [(set (match_operand:V4SI 0 "register_operand" "=x")
2580 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2582 (match_operand:V2SI 2 "const0_operand" "")))]
2586 return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
2588 return "cvtpd2dq\t{%1, %0|%0, %1}";
2590 [(set_attr "type" "ssecvt")
2591 (set_attr "prefix_rep" "1")
2592 (set_attr "prefix_data16" "0")
2593 (set_attr "prefix" "maybe_vex")
2594 (set_attr "mode" "TI")
2595 (set_attr "amdfam10_decode" "double")
2596 (set_attr "athlon_decode" "vector")
2597 (set_attr "bdver1_decode" "double")])
2599 (define_insn "avx_cvttpd2dq256"
2600 [(set (match_operand:V4SI 0 "register_operand" "=x")
2601 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2603 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2604 [(set_attr "type" "ssecvt")
2605 (set_attr "prefix" "vex")
2606 (set_attr "mode" "OI")])
2608 (define_expand "avx_cvttpd2dq256_2"
2609 [(set (match_operand:V8SI 0 "register_operand" "")
2611 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" ""))
2614 "operands[2] = CONST0_RTX (V4SImode);")
2616 (define_insn "*avx_cvttpd2dq256_2"
2617 [(set (match_operand:V8SI 0 "register_operand" "=x")
2619 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm"))
2620 (match_operand:V4SI 2 "const0_operand" "")))]
2622 "vcvttpd2dq{y}\t{%1, %x0|%x0, %1}"
2623 [(set_attr "type" "ssecvt")
2624 (set_attr "prefix" "vex")
2625 (set_attr "mode" "OI")])
2627 (define_expand "sse2_cvttpd2dq"
2628 [(set (match_operand:V4SI 0 "register_operand" "")
2630 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2633 "operands[2] = CONST0_RTX (V2SImode);")
2635 (define_insn "*sse2_cvttpd2dq"
2636 [(set (match_operand:V4SI 0 "register_operand" "=x")
2638 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2639 (match_operand:V2SI 2 "const0_operand" "")))]
2643 return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
2645 return "cvttpd2dq\t{%1, %0|%0, %1}";
2647 [(set_attr "type" "ssecvt")
2648 (set_attr "amdfam10_decode" "double")
2649 (set_attr "athlon_decode" "vector")
2650 (set_attr "bdver1_decode" "double")
2651 (set_attr "prefix" "maybe_vex")
2652 (set_attr "mode" "TI")])
2654 (define_insn "sse2_cvtsd2ss"
2655 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2658 (float_truncate:V2SF
2659 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,xm")))
2660 (match_operand:V4SF 1 "register_operand" "0,0,x")
2664 cvtsd2ss\t{%2, %0|%0, %2}
2665 cvtsd2ss\t{%2, %0|%0, %2}
2666 vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2667 [(set_attr "isa" "noavx,noavx,avx")
2668 (set_attr "type" "ssecvt")
2669 (set_attr "athlon_decode" "vector,double,*")
2670 (set_attr "amdfam10_decode" "vector,double,*")
2671 (set_attr "bdver1_decode" "direct,direct,*")
2672 (set_attr "prefix" "orig,orig,vex")
2673 (set_attr "mode" "SF")])
2675 (define_insn "sse2_cvtss2sd"
2676 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2680 (match_operand:V4SF 2 "nonimmediate_operand" "x,m,xm")
2681 (parallel [(const_int 0) (const_int 1)])))
2682 (match_operand:V2DF 1 "register_operand" "0,0,x")
2686 cvtss2sd\t{%2, %0|%0, %2}
2687 cvtss2sd\t{%2, %0|%0, %2}
2688 vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
2689 [(set_attr "isa" "noavx,noavx,avx")
2690 (set_attr "type" "ssecvt")
2691 (set_attr "amdfam10_decode" "vector,double,*")
2692 (set_attr "athlon_decode" "direct,direct,*")
2693 (set_attr "bdver1_decode" "direct,direct,*")
2694 (set_attr "prefix" "orig,orig,vex")
2695 (set_attr "mode" "DF")])
2697 (define_insn "avx_cvtpd2ps256"
2698 [(set (match_operand:V4SF 0 "register_operand" "=x")
2699 (float_truncate:V4SF
2700 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2702 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
2703 [(set_attr "type" "ssecvt")
2704 (set_attr "prefix" "vex")
2705 (set_attr "mode" "V4SF")])
2707 (define_expand "sse2_cvtpd2ps"
2708 [(set (match_operand:V4SF 0 "register_operand" "")
2710 (float_truncate:V2SF
2711 (match_operand:V2DF 1 "nonimmediate_operand" ""))
2714 "operands[2] = CONST0_RTX (V2SFmode);")
2716 (define_insn "*sse2_cvtpd2ps"
2717 [(set (match_operand:V4SF 0 "register_operand" "=x")
2719 (float_truncate:V2SF
2720 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2721 (match_operand:V2SF 2 "const0_operand" "")))]
2725 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
2727 return "cvtpd2ps\t{%1, %0|%0, %1}";
2729 [(set_attr "type" "ssecvt")
2730 (set_attr "amdfam10_decode" "double")
2731 (set_attr "athlon_decode" "vector")
2732 (set_attr "bdver1_decode" "double")
2733 (set_attr "prefix_data16" "1")
2734 (set_attr "prefix" "maybe_vex")
2735 (set_attr "mode" "V4SF")])
2737 (define_insn "avx_cvtps2pd256"
2738 [(set (match_operand:V4DF 0 "register_operand" "=x")
2740 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2742 "vcvtps2pd\t{%1, %0|%0, %1}"
2743 [(set_attr "type" "ssecvt")
2744 (set_attr "prefix" "vex")
2745 (set_attr "mode" "V4DF")])
2747 (define_insn "*avx_cvtps2pd256_2"
2748 [(set (match_operand:V4DF 0 "register_operand" "=x")
2751 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
2752 (parallel [(const_int 0) (const_int 1)
2753 (const_int 2) (const_int 3)]))))]
2755 "vcvtps2pd\t{%x1, %0|%0, %x1}"
2756 [(set_attr "type" "ssecvt")
2757 (set_attr "prefix" "vex")
2758 (set_attr "mode" "V4DF")])
2760 (define_insn "sse2_cvtps2pd"
2761 [(set (match_operand:V2DF 0 "register_operand" "=x")
2764 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2765 (parallel [(const_int 0) (const_int 1)]))))]
2767 "%vcvtps2pd\t{%1, %0|%0, %q1}"
2768 [(set_attr "type" "ssecvt")
2769 (set_attr "amdfam10_decode" "direct")
2770 (set_attr "athlon_decode" "double")
2771 (set_attr "bdver1_decode" "double")
2772 (set_attr "prefix_data16" "0")
2773 (set_attr "prefix" "maybe_vex")
2774 (set_attr "mode" "V2DF")])
2776 (define_expand "vec_unpacks_hi_v4sf"
2781 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2782 (parallel [(const_int 6) (const_int 7)
2783 (const_int 2) (const_int 3)])))
2784 (set (match_operand:V2DF 0 "register_operand" "")
2788 (parallel [(const_int 0) (const_int 1)]))))]
2790 "operands[2] = gen_reg_rtx (V4SFmode);")
2792 (define_expand "vec_unpacks_hi_v8sf"
2795 (match_operand:V8SF 1 "nonimmediate_operand" "")
2796 (parallel [(const_int 4) (const_int 5)
2797 (const_int 6) (const_int 7)])))
2798 (set (match_operand:V4DF 0 "register_operand" "")
2802 "operands[2] = gen_reg_rtx (V4SFmode);")
2804 (define_expand "vec_unpacks_lo_v4sf"
2805 [(set (match_operand:V2DF 0 "register_operand" "")
2808 (match_operand:V4SF 1 "nonimmediate_operand" "")
2809 (parallel [(const_int 0) (const_int 1)]))))]
2812 (define_expand "vec_unpacks_lo_v8sf"
2813 [(set (match_operand:V4DF 0 "register_operand" "")
2816 (match_operand:V8SF 1 "nonimmediate_operand" "")
2817 (parallel [(const_int 0) (const_int 1)
2818 (const_int 2) (const_int 3)]))))]
2821 (define_mode_attr sseunpackfltmode
2822 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF") (V8SI "V4DF")])
2824 (define_expand "vec_unpacks_float_hi_<mode>"
2825 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2826 (match_operand:VI2_AVX2 1 "register_operand" "")]
2829 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2831 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
2832 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2833 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2837 (define_expand "vec_unpacks_float_lo_<mode>"
2838 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2839 (match_operand:VI2_AVX2 1 "register_operand" "")]
2842 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2844 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
2845 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2846 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2850 (define_expand "vec_unpacku_float_hi_<mode>"
2851 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2852 (match_operand:VI2_AVX2 1 "register_operand" "")]
2855 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2857 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
2858 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2859 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2863 (define_expand "vec_unpacku_float_lo_<mode>"
2864 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2865 (match_operand:VI2_AVX2 1 "register_operand" "")]
2868 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2870 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
2871 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2872 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2876 (define_expand "vec_unpacks_float_hi_v4si"
2879 (match_operand:V4SI 1 "nonimmediate_operand" "")
2880 (parallel [(const_int 2) (const_int 3)
2881 (const_int 2) (const_int 3)])))
2882 (set (match_operand:V2DF 0 "register_operand" "")
2886 (parallel [(const_int 0) (const_int 1)]))))]
2888 "operands[2] = gen_reg_rtx (V4SImode);")
2890 (define_expand "vec_unpacks_float_lo_v4si"
2891 [(set (match_operand:V2DF 0 "register_operand" "")
2894 (match_operand:V4SI 1 "nonimmediate_operand" "")
2895 (parallel [(const_int 0) (const_int 1)]))))]
2898 (define_expand "vec_unpacks_float_hi_v8si"
2901 (match_operand:V8SI 1 "nonimmediate_operand" "")
2902 (parallel [(const_int 4) (const_int 5)
2903 (const_int 6) (const_int 7)])))
2904 (set (match_operand:V4DF 0 "register_operand" "")
2908 "operands[2] = gen_reg_rtx (V4SImode);")
2910 (define_expand "vec_unpacks_float_lo_v8si"
2911 [(set (match_operand:V4DF 0 "register_operand" "")
2914 (match_operand:V8SI 1 "nonimmediate_operand" "")
2915 (parallel [(const_int 0) (const_int 1)
2916 (const_int 2) (const_int 3)]))))]
2919 (define_expand "vec_unpacku_float_hi_v4si"
2922 (match_operand:V4SI 1 "nonimmediate_operand" "")
2923 (parallel [(const_int 2) (const_int 3)
2924 (const_int 2) (const_int 3)])))
2929 (parallel [(const_int 0) (const_int 1)]))))
2931 (lt:V2DF (match_dup 6) (match_dup 3)))
2933 (and:V2DF (match_dup 7) (match_dup 4)))
2934 (set (match_operand:V2DF 0 "register_operand" "")
2935 (plus:V2DF (match_dup 6) (match_dup 8)))]
2938 REAL_VALUE_TYPE TWO32r;
2942 real_ldexp (&TWO32r, &dconst1, 32);
2943 x = const_double_from_real_value (TWO32r, DFmode);
2945 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2946 operands[4] = force_reg (V2DFmode,
2947 ix86_build_const_vector (V2DFmode, 1, x));
2949 operands[5] = gen_reg_rtx (V4SImode);
2951 for (i = 6; i < 9; i++)
2952 operands[i] = gen_reg_rtx (V2DFmode);
2955 (define_expand "vec_unpacku_float_lo_v4si"
2959 (match_operand:V4SI 1 "nonimmediate_operand" "")
2960 (parallel [(const_int 0) (const_int 1)]))))
2962 (lt:V2DF (match_dup 5) (match_dup 3)))
2964 (and:V2DF (match_dup 6) (match_dup 4)))
2965 (set (match_operand:V2DF 0 "register_operand" "")
2966 (plus:V2DF (match_dup 5) (match_dup 7)))]
2969 REAL_VALUE_TYPE TWO32r;
2973 real_ldexp (&TWO32r, &dconst1, 32);
2974 x = const_double_from_real_value (TWO32r, DFmode);
2976 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2977 operands[4] = force_reg (V2DFmode,
2978 ix86_build_const_vector (V2DFmode, 1, x));
2980 for (i = 5; i < 8; i++)
2981 operands[i] = gen_reg_rtx (V2DFmode);
2984 (define_expand "vec_unpacku_float_hi_v8si"
2985 [(match_operand:V4DF 0 "register_operand" "")
2986 (match_operand:V8SI 1 "register_operand" "")]
2989 REAL_VALUE_TYPE TWO32r;
2993 real_ldexp (&TWO32r, &dconst1, 32);
2994 x = const_double_from_real_value (TWO32r, DFmode);
2996 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
2997 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
2998 tmp[5] = gen_reg_rtx (V4SImode);
3000 for (i = 2; i < 5; i++)
3001 tmp[i] = gen_reg_rtx (V4DFmode);
3002 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
3003 emit_insn (gen_avx_cvtdq2pd256 (tmp[2], tmp[5]));
3004 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
3005 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
3006 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
3007 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
3011 (define_expand "vec_unpacku_float_lo_v8si"
3012 [(match_operand:V4DF 0 "register_operand" "")
3013 (match_operand:V8SI 1 "nonimmediate_operand" "")]
3016 REAL_VALUE_TYPE TWO32r;
3020 real_ldexp (&TWO32r, &dconst1, 32);
3021 x = const_double_from_real_value (TWO32r, DFmode);
3023 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
3024 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
3026 for (i = 2; i < 5; i++)
3027 tmp[i] = gen_reg_rtx (V4DFmode);
3028 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
3029 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
3030 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
3031 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
3032 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
3036 (define_expand "vec_pack_trunc_v4df"
3038 (float_truncate:V4SF
3039 (match_operand:V4DF 1 "nonimmediate_operand" "")))
3041 (float_truncate:V4SF
3042 (match_operand:V4DF 2 "nonimmediate_operand" "")))
3043 (set (match_operand:V8SF 0 "register_operand" "")
3049 operands[3] = gen_reg_rtx (V4SFmode);
3050 operands[4] = gen_reg_rtx (V4SFmode);
3053 (define_expand "vec_pack_trunc_v2df"
3054 [(match_operand:V4SF 0 "register_operand" "")
3055 (match_operand:V2DF 1 "nonimmediate_operand" "")
3056 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3061 r1 = gen_reg_rtx (V4SFmode);
3062 r2 = gen_reg_rtx (V4SFmode);
3064 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
3065 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
3066 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
3070 (define_expand "vec_pack_sfix_trunc_v4df"
3071 [(match_operand:V8SI 0 "register_operand" "")
3072 (match_operand:V4DF 1 "nonimmediate_operand" "")
3073 (match_operand:V4DF 2 "nonimmediate_operand" "")]
3078 r1 = gen_reg_rtx (V8SImode);
3079 r2 = gen_reg_rtx (V8SImode);
3081 emit_insn (gen_avx_cvttpd2dq256_2 (r1, operands[1]));
3082 emit_insn (gen_avx_cvttpd2dq256_2 (r2, operands[2]));
3083 emit_insn (gen_avx_vperm2f128v8si3 (operands[0], r1, r2, GEN_INT (0x20)));
3087 (define_expand "vec_pack_sfix_trunc_v2df"
3088 [(match_operand:V4SI 0 "register_operand" "")
3089 (match_operand:V2DF 1 "nonimmediate_operand" "")
3090 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3095 r1 = gen_reg_rtx (V4SImode);
3096 r2 = gen_reg_rtx (V4SImode);
3098 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
3099 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
3100 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3101 gen_lowpart (V2DImode, r1),
3102 gen_lowpart (V2DImode, r2)));
3106 (define_expand "vec_pack_sfix_v4df"
3107 [(match_operand:V8SI 0 "register_operand" "")
3108 (match_operand:V4DF 1 "nonimmediate_operand" "")
3109 (match_operand:V4DF 2 "nonimmediate_operand" "")]
3114 r1 = gen_reg_rtx (V8SImode);
3115 r2 = gen_reg_rtx (V8SImode);
3117 emit_insn (gen_avx_cvtpd2dq256_2 (r1, operands[1]));
3118 emit_insn (gen_avx_cvtpd2dq256_2 (r2, operands[2]));
3119 emit_insn (gen_avx_vperm2f128v8si3 (operands[0], r1, r2, GEN_INT (0x20)));
3123 (define_expand "vec_pack_sfix_v2df"
3124 [(match_operand:V4SI 0 "register_operand" "")
3125 (match_operand:V2DF 1 "nonimmediate_operand" "")
3126 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3131 r1 = gen_reg_rtx (V4SImode);
3132 r2 = gen_reg_rtx (V4SImode);
3134 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
3135 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
3136 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3137 gen_lowpart (V2DImode, r1),
3138 gen_lowpart (V2DImode, r2)));
3142 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3144 ;; Parallel single-precision floating point element swizzling
3146 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3148 (define_expand "sse_movhlps_exp"
3149 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3152 (match_operand:V4SF 1 "nonimmediate_operand" "")
3153 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3154 (parallel [(const_int 6)
3160 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3162 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
3164 /* Fix up the destination if needed. */
3165 if (dst != operands[0])
3166 emit_move_insn (operands[0], dst);
3171 (define_insn "sse_movhlps"
3172 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
3175 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3176 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
3177 (parallel [(const_int 6)
3181 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3183 movhlps\t{%2, %0|%0, %2}
3184 vmovhlps\t{%2, %1, %0|%0, %1, %2}
3185 movlps\t{%H2, %0|%0, %H2}
3186 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3187 %vmovhps\t{%2, %0|%0, %2}"
3188 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3189 (set_attr "type" "ssemov")
3190 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3191 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3193 (define_expand "sse_movlhps_exp"
3194 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3197 (match_operand:V4SF 1 "nonimmediate_operand" "")
3198 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3199 (parallel [(const_int 0)
3205 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3207 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
3209 /* Fix up the destination if needed. */
3210 if (dst != operands[0])
3211 emit_move_insn (operands[0], dst);
3216 (define_insn "sse_movlhps"
3217 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
3220 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3221 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,x,x"))
3222 (parallel [(const_int 0)
3226 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3228 movlhps\t{%2, %0|%0, %2}
3229 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3230 movhps\t{%2, %0|%0, %2}
3231 vmovhps\t{%2, %1, %0|%0, %1, %2}
3232 %vmovlps\t{%2, %H0|%H0, %2}"
3233 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3234 (set_attr "type" "ssemov")
3235 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3236 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3238 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3239 (define_insn "avx_unpckhps256"
3240 [(set (match_operand:V8SF 0 "register_operand" "=x")
3243 (match_operand:V8SF 1 "register_operand" "x")
3244 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3245 (parallel [(const_int 2) (const_int 10)
3246 (const_int 3) (const_int 11)
3247 (const_int 6) (const_int 14)
3248 (const_int 7) (const_int 15)])))]
3250 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3251 [(set_attr "type" "sselog")
3252 (set_attr "prefix" "vex")
3253 (set_attr "mode" "V8SF")])
3255 (define_expand "vec_interleave_highv8sf"
3259 (match_operand:V8SF 1 "register_operand" "x")
3260 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3261 (parallel [(const_int 0) (const_int 8)
3262 (const_int 1) (const_int 9)
3263 (const_int 4) (const_int 12)
3264 (const_int 5) (const_int 13)])))
3270 (parallel [(const_int 2) (const_int 10)
3271 (const_int 3) (const_int 11)
3272 (const_int 6) (const_int 14)
3273 (const_int 7) (const_int 15)])))
3274 (set (match_operand:V8SF 0 "register_operand" "")
3279 (parallel [(const_int 4) (const_int 5)
3280 (const_int 6) (const_int 7)
3281 (const_int 12) (const_int 13)
3282 (const_int 14) (const_int 15)])))]
3285 operands[3] = gen_reg_rtx (V8SFmode);
3286 operands[4] = gen_reg_rtx (V8SFmode);
3289 (define_insn "vec_interleave_highv4sf"
3290 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3293 (match_operand:V4SF 1 "register_operand" "0,x")
3294 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3295 (parallel [(const_int 2) (const_int 6)
3296 (const_int 3) (const_int 7)])))]
3299 unpckhps\t{%2, %0|%0, %2}
3300 vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3301 [(set_attr "isa" "noavx,avx")
3302 (set_attr "type" "sselog")
3303 (set_attr "prefix" "orig,vex")
3304 (set_attr "mode" "V4SF")])
3306 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3307 (define_insn "avx_unpcklps256"
3308 [(set (match_operand:V8SF 0 "register_operand" "=x")
3311 (match_operand:V8SF 1 "register_operand" "x")
3312 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3313 (parallel [(const_int 0) (const_int 8)
3314 (const_int 1) (const_int 9)
3315 (const_int 4) (const_int 12)
3316 (const_int 5) (const_int 13)])))]
3318 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3319 [(set_attr "type" "sselog")
3320 (set_attr "prefix" "vex")
3321 (set_attr "mode" "V8SF")])
3323 (define_expand "vec_interleave_lowv8sf"
3327 (match_operand:V8SF 1 "register_operand" "x")
3328 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3329 (parallel [(const_int 0) (const_int 8)
3330 (const_int 1) (const_int 9)
3331 (const_int 4) (const_int 12)
3332 (const_int 5) (const_int 13)])))
3338 (parallel [(const_int 2) (const_int 10)
3339 (const_int 3) (const_int 11)
3340 (const_int 6) (const_int 14)
3341 (const_int 7) (const_int 15)])))
3342 (set (match_operand:V8SF 0 "register_operand" "")
3347 (parallel [(const_int 0) (const_int 1)
3348 (const_int 2) (const_int 3)
3349 (const_int 8) (const_int 9)
3350 (const_int 10) (const_int 11)])))]
3353 operands[3] = gen_reg_rtx (V8SFmode);
3354 operands[4] = gen_reg_rtx (V8SFmode);
3357 (define_insn "vec_interleave_lowv4sf"
3358 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3361 (match_operand:V4SF 1 "register_operand" "0,x")
3362 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3363 (parallel [(const_int 0) (const_int 4)
3364 (const_int 1) (const_int 5)])))]
3367 unpcklps\t{%2, %0|%0, %2}
3368 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3369 [(set_attr "isa" "noavx,avx")
3370 (set_attr "type" "sselog")
3371 (set_attr "prefix" "orig,vex")
3372 (set_attr "mode" "V4SF")])
3374 ;; These are modeled with the same vec_concat as the others so that we
3375 ;; capture users of shufps that can use the new instructions
3376 (define_insn "avx_movshdup256"
3377 [(set (match_operand:V8SF 0 "register_operand" "=x")
3380 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3382 (parallel [(const_int 1) (const_int 1)
3383 (const_int 3) (const_int 3)
3384 (const_int 5) (const_int 5)
3385 (const_int 7) (const_int 7)])))]
3387 "vmovshdup\t{%1, %0|%0, %1}"
3388 [(set_attr "type" "sse")
3389 (set_attr "prefix" "vex")
3390 (set_attr "mode" "V8SF")])
3392 (define_insn "sse3_movshdup"
3393 [(set (match_operand:V4SF 0 "register_operand" "=x")
3396 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3398 (parallel [(const_int 1)
3403 "%vmovshdup\t{%1, %0|%0, %1}"
3404 [(set_attr "type" "sse")
3405 (set_attr "prefix_rep" "1")
3406 (set_attr "prefix" "maybe_vex")
3407 (set_attr "mode" "V4SF")])
3409 (define_insn "avx_movsldup256"
3410 [(set (match_operand:V8SF 0 "register_operand" "=x")
3413 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3415 (parallel [(const_int 0) (const_int 0)
3416 (const_int 2) (const_int 2)
3417 (const_int 4) (const_int 4)
3418 (const_int 6) (const_int 6)])))]
3420 "vmovsldup\t{%1, %0|%0, %1}"
3421 [(set_attr "type" "sse")
3422 (set_attr "prefix" "vex")
3423 (set_attr "mode" "V8SF")])
3425 (define_insn "sse3_movsldup"
3426 [(set (match_operand:V4SF 0 "register_operand" "=x")
3429 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3431 (parallel [(const_int 0)
3436 "%vmovsldup\t{%1, %0|%0, %1}"
3437 [(set_attr "type" "sse")
3438 (set_attr "prefix_rep" "1")
3439 (set_attr "prefix" "maybe_vex")
3440 (set_attr "mode" "V4SF")])
3442 (define_expand "avx_shufps256"
3443 [(match_operand:V8SF 0 "register_operand" "")
3444 (match_operand:V8SF 1 "register_operand" "")
3445 (match_operand:V8SF 2 "nonimmediate_operand" "")
3446 (match_operand:SI 3 "const_int_operand" "")]
3449 int mask = INTVAL (operands[3]);
3450 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3451 GEN_INT ((mask >> 0) & 3),
3452 GEN_INT ((mask >> 2) & 3),
3453 GEN_INT (((mask >> 4) & 3) + 8),
3454 GEN_INT (((mask >> 6) & 3) + 8),
3455 GEN_INT (((mask >> 0) & 3) + 4),
3456 GEN_INT (((mask >> 2) & 3) + 4),
3457 GEN_INT (((mask >> 4) & 3) + 12),
3458 GEN_INT (((mask >> 6) & 3) + 12)));
3462 ;; One bit in mask selects 2 elements.
3463 (define_insn "avx_shufps256_1"
3464 [(set (match_operand:V8SF 0 "register_operand" "=x")
3467 (match_operand:V8SF 1 "register_operand" "x")
3468 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3469 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3470 (match_operand 4 "const_0_to_3_operand" "")
3471 (match_operand 5 "const_8_to_11_operand" "")
3472 (match_operand 6 "const_8_to_11_operand" "")
3473 (match_operand 7 "const_4_to_7_operand" "")
3474 (match_operand 8 "const_4_to_7_operand" "")
3475 (match_operand 9 "const_12_to_15_operand" "")
3476 (match_operand 10 "const_12_to_15_operand" "")])))]
3478 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3479 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3480 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3481 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3484 mask = INTVAL (operands[3]);
3485 mask |= INTVAL (operands[4]) << 2;
3486 mask |= (INTVAL (operands[5]) - 8) << 4;
3487 mask |= (INTVAL (operands[6]) - 8) << 6;
3488 operands[3] = GEN_INT (mask);
3490 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3492 [(set_attr "type" "sselog")
3493 (set_attr "length_immediate" "1")
3494 (set_attr "prefix" "vex")
3495 (set_attr "mode" "V8SF")])
3497 (define_expand "sse_shufps"
3498 [(match_operand:V4SF 0 "register_operand" "")
3499 (match_operand:V4SF 1 "register_operand" "")
3500 (match_operand:V4SF 2 "nonimmediate_operand" "")
3501 (match_operand:SI 3 "const_int_operand" "")]
3504 int mask = INTVAL (operands[3]);
3505 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3506 GEN_INT ((mask >> 0) & 3),
3507 GEN_INT ((mask >> 2) & 3),
3508 GEN_INT (((mask >> 4) & 3) + 4),
3509 GEN_INT (((mask >> 6) & 3) + 4)));
3513 (define_insn "sse_shufps_<mode>"
3514 [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
3515 (vec_select:VI4F_128
3516 (vec_concat:<ssedoublevecmode>
3517 (match_operand:VI4F_128 1 "register_operand" "0,x")
3518 (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm"))
3519 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3520 (match_operand 4 "const_0_to_3_operand" "")
3521 (match_operand 5 "const_4_to_7_operand" "")
3522 (match_operand 6 "const_4_to_7_operand" "")])))]
3526 mask |= INTVAL (operands[3]) << 0;
3527 mask |= INTVAL (operands[4]) << 2;
3528 mask |= (INTVAL (operands[5]) - 4) << 4;
3529 mask |= (INTVAL (operands[6]) - 4) << 6;
3530 operands[3] = GEN_INT (mask);
3532 switch (which_alternative)
3535 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3537 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3542 [(set_attr "isa" "noavx,avx")
3543 (set_attr "type" "sselog")
3544 (set_attr "length_immediate" "1")
3545 (set_attr "prefix" "orig,vex")
3546 (set_attr "mode" "V4SF")])
3548 (define_insn "sse_storehps"
3549 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3551 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3552 (parallel [(const_int 2) (const_int 3)])))]
3555 %vmovhps\t{%1, %0|%0, %1}
3556 %vmovhlps\t{%1, %d0|%d0, %1}
3557 %vmovlps\t{%H1, %d0|%d0, %H1}"
3558 [(set_attr "type" "ssemov")
3559 (set_attr "prefix" "maybe_vex")
3560 (set_attr "mode" "V2SF,V4SF,V2SF")])
3562 (define_expand "sse_loadhps_exp"
3563 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3566 (match_operand:V4SF 1 "nonimmediate_operand" "")
3567 (parallel [(const_int 0) (const_int 1)]))
3568 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3571 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3573 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
3575 /* Fix up the destination if needed. */
3576 if (dst != operands[0])
3577 emit_move_insn (operands[0], dst);
3582 (define_insn "sse_loadhps"
3583 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
3586 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3587 (parallel [(const_int 0) (const_int 1)]))
3588 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))]
3591 movhps\t{%2, %0|%0, %2}
3592 vmovhps\t{%2, %1, %0|%0, %1, %2}
3593 movlhps\t{%2, %0|%0, %2}
3594 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3595 %vmovlps\t{%2, %H0|%H0, %2}"
3596 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3597 (set_attr "type" "ssemov")
3598 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3599 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
3601 (define_insn "sse_storelps"
3602 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3604 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
3605 (parallel [(const_int 0) (const_int 1)])))]
3608 %vmovlps\t{%1, %0|%0, %1}
3609 %vmovaps\t{%1, %0|%0, %1}
3610 %vmovlps\t{%1, %d0|%d0, %1}"
3611 [(set_attr "type" "ssemov")
3612 (set_attr "prefix" "maybe_vex")
3613 (set_attr "mode" "V2SF,V4SF,V2SF")])
3615 (define_expand "sse_loadlps_exp"
3616 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3618 (match_operand:V2SF 2 "nonimmediate_operand" "")
3620 (match_operand:V4SF 1 "nonimmediate_operand" "")
3621 (parallel [(const_int 2) (const_int 3)]))))]
3624 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3626 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
3628 /* Fix up the destination if needed. */
3629 if (dst != operands[0])
3630 emit_move_insn (operands[0], dst);
3635 (define_insn "sse_loadlps"
3636 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
3638 (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,x,x")
3640 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
3641 (parallel [(const_int 2) (const_int 3)]))))]
3644 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3645 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3646 movlps\t{%2, %0|%0, %2}
3647 vmovlps\t{%2, %1, %0|%0, %1, %2}
3648 %vmovlps\t{%2, %0|%0, %2}"
3649 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3650 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
3651 (set_attr "length_immediate" "1,1,*,*,*")
3652 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3653 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3655 (define_insn "sse_movss"
3656 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3658 (match_operand:V4SF 2 "register_operand" " x,x")
3659 (match_operand:V4SF 1 "register_operand" " 0,x")
3663 movss\t{%2, %0|%0, %2}
3664 vmovss\t{%2, %1, %0|%0, %1, %2}"
3665 [(set_attr "isa" "noavx,avx")
3666 (set_attr "type" "ssemov")
3667 (set_attr "prefix" "orig,vex")
3668 (set_attr "mode" "SF")])
3670 (define_expand "vec_dupv4sf"
3671 [(set (match_operand:V4SF 0 "register_operand" "")
3673 (match_operand:SF 1 "nonimmediate_operand" "")))]
3677 operands[1] = force_reg (SFmode, operands[1]);
3680 (define_insn "avx2_vec_dupv4sf"
3681 [(set (match_operand:V4SF 0 "register_operand" "=x")
3684 (match_operand:V4SF 1 "register_operand" "x")
3685 (parallel [(const_int 0)]))))]
3687 "vbroadcastss\t{%1, %0|%0, %1}"
3688 [(set_attr "type" "sselog1")
3689 (set_attr "prefix" "vex")
3690 (set_attr "mode" "V4SF")])
3692 (define_insn "*vec_dupv4sf_avx"
3693 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3695 (match_operand:SF 1 "nonimmediate_operand" "x,m")))]
3698 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
3699 vbroadcastss\t{%1, %0|%0, %1}"
3700 [(set_attr "type" "sselog1,ssemov")
3701 (set_attr "length_immediate" "1,0")
3702 (set_attr "prefix_extra" "0,1")
3703 (set_attr "prefix" "vex")
3704 (set_attr "mode" "V4SF")])
3706 (define_insn "avx2_vec_dupv8sf"
3707 [(set (match_operand:V8SF 0 "register_operand" "=x")
3710 (match_operand:V4SF 1 "register_operand" "x")
3711 (parallel [(const_int 0)]))))]
3713 "vbroadcastss\t{%1, %0|%0, %1}"
3714 [(set_attr "type" "sselog1")
3715 (set_attr "prefix" "vex")
3716 (set_attr "mode" "V8SF")])
3718 (define_insn "*vec_dupv4sf"
3719 [(set (match_operand:V4SF 0 "register_operand" "=x")
3721 (match_operand:SF 1 "register_operand" "0")))]
3723 "shufps\t{$0, %0, %0|%0, %0, 0}"
3724 [(set_attr "type" "sselog1")
3725 (set_attr "length_immediate" "1")
3726 (set_attr "mode" "V4SF")])
3728 ;; Although insertps takes register source, we prefer
3729 ;; unpcklps with register source since it is shorter.
3730 (define_insn "*vec_concatv2sf_sse4_1"
3731 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,x,x,*y ,*y")
3733 (match_operand:SF 1 "nonimmediate_operand" " 0,x,0,x,m, 0 , m")
3734 (match_operand:SF 2 "vector_move_operand" " x,x,m,m,C,*ym, C")))]
3737 unpcklps\t{%2, %0|%0, %2}
3738 vunpcklps\t{%2, %1, %0|%0, %1, %2}
3739 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3740 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3741 %vmovss\t{%1, %0|%0, %1}
3742 punpckldq\t{%2, %0|%0, %2}
3743 movd\t{%1, %0|%0, %1}"
3744 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
3745 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
3746 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
3747 (set_attr "prefix_extra" "*,*,1,1,*,*,*")
3748 (set_attr "length_immediate" "*,*,1,1,*,*,*")
3749 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
3750 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
3752 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3753 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3754 ;; alternatives pretty much forces the MMX alternative to be chosen.
3755 (define_insn "*vec_concatv2sf_sse"
3756 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
3758 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3759 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
3762 unpcklps\t{%2, %0|%0, %2}
3763 movss\t{%1, %0|%0, %1}
3764 punpckldq\t{%2, %0|%0, %2}
3765 movd\t{%1, %0|%0, %1}"
3766 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3767 (set_attr "mode" "V4SF,SF,DI,DI")])
3769 (define_insn "*vec_concatv4sf"
3770 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x")
3772 (match_operand:V2SF 1 "register_operand" " 0,x,0,x")
3773 (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
3776 movlhps\t{%2, %0|%0, %2}
3777 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3778 movhps\t{%2, %0|%0, %2}
3779 vmovhps\t{%2, %1, %0|%0, %1, %2}"
3780 [(set_attr "isa" "noavx,avx,noavx,avx")
3781 (set_attr "type" "ssemov")
3782 (set_attr "prefix" "orig,vex,orig,vex")
3783 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
3785 (define_expand "vec_init<mode>"
3786 [(match_operand:V_128 0 "register_operand" "")
3787 (match_operand 1 "" "")]
3790 ix86_expand_vector_init (false, operands[0], operands[1]);
3794 ;; Avoid combining registers from different units in a single alternative,
3795 ;; see comment above inline_secondary_memory_needed function in i386.c
3796 (define_insn "vec_set<mode>_0"
3797 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
3798 "=x,x,x ,x,x,x,x ,x ,m,m ,m")
3800 (vec_duplicate:VI4F_128
3801 (match_operand:<ssescalarmode> 2 "general_operand"
3802 " x,m,*r,m,x,x,*rm,*rm,x,fF,*r"))
3803 (match_operand:VI4F_128 1 "vector_move_operand"
3804 " C,C,C ,C,0,x,0 ,x ,0,0 ,0")
3808 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
3809 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
3810 %vmovd\t{%2, %0|%0, %2}
3811 movss\t{%2, %0|%0, %2}
3812 movss\t{%2, %0|%0, %2}
3813 vmovss\t{%2, %1, %0|%0, %1, %2}
3814 pinsrd\t{$0, %2, %0|%0, %2, 0}
3815 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
3819 [(set_attr "isa" "sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,avx,*,*,*")
3821 (cond [(eq_attr "alternative" "0,6,7")
3822 (const_string "sselog")
3823 (eq_attr "alternative" "9")
3824 (const_string "fmov")
3825 (eq_attr "alternative" "10")
3826 (const_string "imov")
3828 (const_string "ssemov")))
3829 (set_attr "prefix_extra" "*,*,*,*,*,*,1,1,*,*,*")
3830 (set_attr "length_immediate" "*,*,*,*,*,*,1,1,*,*,*")
3831 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex,*,*,*")
3832 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,*,*,*")])
3834 ;; A subset is vec_setv4sf.
3835 (define_insn "*vec_setv4sf_sse4_1"
3836 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3839 (match_operand:SF 2 "nonimmediate_operand" "xm,xm"))
3840 (match_operand:V4SF 1 "register_operand" "0,x")
3841 (match_operand:SI 3 "const_int_operand" "")))]
3843 && ((unsigned) exact_log2 (INTVAL (operands[3]))
3844 < GET_MODE_NUNITS (V4SFmode))"
3846 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
3847 switch (which_alternative)
3850 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3852 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3857 [(set_attr "isa" "noavx,avx")
3858 (set_attr "type" "sselog")
3859 (set_attr "prefix_data16" "1,*")
3860 (set_attr "prefix_extra" "1")
3861 (set_attr "length_immediate" "1")
3862 (set_attr "prefix" "orig,vex")
3863 (set_attr "mode" "V4SF")])
3865 (define_insn "sse4_1_insertps"
3866 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3867 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
3868 (match_operand:V4SF 1 "register_operand" "0,x")
3869 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
3873 if (MEM_P (operands[2]))
3875 unsigned count_s = INTVAL (operands[3]) >> 6;
3877 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
3878 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
3880 switch (which_alternative)
3883 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3885 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3890 [(set_attr "isa" "noavx,avx")
3891 (set_attr "type" "sselog")
3892 (set_attr "prefix_data16" "1,*")
3893 (set_attr "prefix_extra" "1")
3894 (set_attr "length_immediate" "1")
3895 (set_attr "prefix" "orig,vex")
3896 (set_attr "mode" "V4SF")])
3899 [(set (match_operand:VI4F_128 0 "memory_operand" "")
3901 (vec_duplicate:VI4F_128
3902 (match_operand:<ssescalarmode> 1 "nonmemory_operand" ""))
3905 "TARGET_SSE && reload_completed"
3908 emit_move_insn (adjust_address (operands[0], <ssescalarmode>mode, 0),
3913 (define_expand "vec_set<mode>"
3914 [(match_operand:V 0 "register_operand" "")
3915 (match_operand:<ssescalarmode> 1 "register_operand" "")
3916 (match_operand 2 "const_int_operand" "")]
3919 ix86_expand_vector_set (false, operands[0], operands[1],
3920 INTVAL (operands[2]));
3924 (define_insn_and_split "*vec_extractv4sf_0"
3925 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
3927 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
3928 (parallel [(const_int 0)])))]
3929 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3931 "&& reload_completed"
3934 rtx op1 = operands[1];
3936 op1 = gen_rtx_REG (SFmode, REGNO (op1));
3938 op1 = gen_lowpart (SFmode, op1);
3939 emit_move_insn (operands[0], op1);
3943 (define_insn_and_split "*sse4_1_extractps"
3944 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,x,x")
3946 (match_operand:V4SF 1 "register_operand" "x,0,x")
3947 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n")])))]
3950 %vextractps\t{%2, %1, %0|%0, %1, %2}
3953 "&& reload_completed && SSE_REG_P (operands[0])"
3956 rtx dest = gen_rtx_REG (V4SFmode, REGNO (operands[0]));
3957 switch (INTVAL (operands[2]))
3961 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
3962 operands[2], operands[2],
3963 GEN_INT (INTVAL (operands[2]) + 4),
3964 GEN_INT (INTVAL (operands[2]) + 4)));
3967 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
3970 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
3975 [(set_attr "isa" "*,noavx,avx")
3976 (set_attr "type" "sselog,*,*")
3977 (set_attr "prefix_data16" "1,*,*")
3978 (set_attr "prefix_extra" "1,*,*")
3979 (set_attr "length_immediate" "1,*,*")
3980 (set_attr "prefix" "maybe_vex,*,*")
3981 (set_attr "mode" "V4SF,*,*")])
3983 (define_insn_and_split "*vec_extract_v4sf_mem"
3984 [(set (match_operand:SF 0 "register_operand" "=x,*r,f")
3986 (match_operand:V4SF 1 "memory_operand" "o,o,o")
3987 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
3990 "&& reload_completed"
3993 int i = INTVAL (operands[2]);
3995 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
3999 (define_expand "avx_vextractf128<mode>"
4000 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "")
4001 (match_operand:V_256 1 "register_operand" "")
4002 (match_operand:SI 2 "const_0_to_1_operand" "")]
4005 rtx (*insn)(rtx, rtx);
4007 switch (INTVAL (operands[2]))
4010 insn = gen_vec_extract_lo_<mode>;
4013 insn = gen_vec_extract_hi_<mode>;
4019 emit_insn (insn (operands[0], operands[1]));
4023 (define_insn_and_split "vec_extract_lo_<mode>"
4024 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4025 (vec_select:<ssehalfvecmode>
4026 (match_operand:VI8F_256 1 "nonimmediate_operand" "xm,x")
4027 (parallel [(const_int 0) (const_int 1)])))]
4028 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4030 "&& reload_completed"
4033 rtx op1 = operands[1];
4035 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
4037 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
4038 emit_move_insn (operands[0], op1);
4042 (define_insn "vec_extract_hi_<mode>"
4043 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4044 (vec_select:<ssehalfvecmode>
4045 (match_operand:VI8F_256 1 "register_operand" "x,x")
4046 (parallel [(const_int 2) (const_int 3)])))]
4048 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
4049 [(set_attr "type" "sselog")
4050 (set_attr "prefix_extra" "1")
4051 (set_attr "length_immediate" "1")
4052 (set_attr "memory" "none,store")
4053 (set_attr "prefix" "vex")
4054 (set_attr "mode" "<sseinsnmode>")])
4056 (define_insn_and_split "vec_extract_lo_<mode>"
4057 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4058 (vec_select:<ssehalfvecmode>
4059 (match_operand:VI4F_256 1 "nonimmediate_operand" "xm,x")
4060 (parallel [(const_int 0) (const_int 1)
4061 (const_int 2) (const_int 3)])))]
4062 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4064 "&& reload_completed"
4067 rtx op1 = operands[1];
4069 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
4071 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
4072 emit_move_insn (operands[0], op1);
4076 (define_insn "vec_extract_hi_<mode>"
4077 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4078 (vec_select:<ssehalfvecmode>
4079 (match_operand:VI4F_256 1 "register_operand" "x,x")
4080 (parallel [(const_int 4) (const_int 5)
4081 (const_int 6) (const_int 7)])))]
4083 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
4084 [(set_attr "type" "sselog")
4085 (set_attr "prefix_extra" "1")
4086 (set_attr "length_immediate" "1")
4087 (set_attr "memory" "none,store")
4088 (set_attr "prefix" "vex")
4089 (set_attr "mode" "<sseinsnmode>")])
4091 (define_insn_and_split "vec_extract_lo_v16hi"
4092 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4094 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
4095 (parallel [(const_int 0) (const_int 1)
4096 (const_int 2) (const_int 3)
4097 (const_int 4) (const_int 5)
4098 (const_int 6) (const_int 7)])))]
4099 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4101 "&& reload_completed"
4104 rtx op1 = operands[1];
4106 op1 = gen_rtx_REG (V8HImode, REGNO (op1));
4108 op1 = gen_lowpart (V8HImode, op1);
4109 emit_move_insn (operands[0], op1);
4113 (define_insn "vec_extract_hi_v16hi"
4114 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4116 (match_operand:V16HI 1 "register_operand" "x,x")
4117 (parallel [(const_int 8) (const_int 9)
4118 (const_int 10) (const_int 11)
4119 (const_int 12) (const_int 13)
4120 (const_int 14) (const_int 15)])))]
4122 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
4123 [(set_attr "type" "sselog")
4124 (set_attr "prefix_extra" "1")
4125 (set_attr "length_immediate" "1")
4126 (set_attr "memory" "none,store")
4127 (set_attr "prefix" "vex")
4128 (set_attr "mode" "OI")])
4130 (define_insn_and_split "vec_extract_lo_v32qi"
4131 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4133 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
4134 (parallel [(const_int 0) (const_int 1)
4135 (const_int 2) (const_int 3)
4136 (const_int 4) (const_int 5)
4137 (const_int 6) (const_int 7)
4138 (const_int 8) (const_int 9)
4139 (const_int 10) (const_int 11)
4140 (const_int 12) (const_int 13)
4141 (const_int 14) (const_int 15)])))]
4142 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4144 "&& reload_completed"
4147 rtx op1 = operands[1];
4149 op1 = gen_rtx_REG (V16QImode, REGNO (op1));
4151 op1 = gen_lowpart (V16QImode, op1);
4152 emit_move_insn (operands[0], op1);
4156 (define_insn "vec_extract_hi_v32qi"
4157 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4159 (match_operand:V32QI 1 "register_operand" "x,x")
4160 (parallel [(const_int 16) (const_int 17)
4161 (const_int 18) (const_int 19)
4162 (const_int 20) (const_int 21)
4163 (const_int 22) (const_int 23)
4164 (const_int 24) (const_int 25)
4165 (const_int 26) (const_int 27)
4166 (const_int 28) (const_int 29)
4167 (const_int 30) (const_int 31)])))]
4169 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
4170 [(set_attr "type" "sselog")
4171 (set_attr "prefix_extra" "1")
4172 (set_attr "length_immediate" "1")
4173 (set_attr "memory" "none,store")
4174 (set_attr "prefix" "vex")
4175 (set_attr "mode" "OI")])
4177 ;; Modes handled by vec_extract patterns.
4178 (define_mode_iterator VEC_EXTRACT_MODE
4179 [(V32QI "TARGET_AVX") V16QI
4180 (V16HI "TARGET_AVX") V8HI
4181 (V8SI "TARGET_AVX") V4SI
4182 (V4DI "TARGET_AVX") V2DI
4183 (V8SF "TARGET_AVX") V4SF
4184 (V4DF "TARGET_AVX") V2DF])
4186 (define_expand "vec_extract<mode>"
4187 [(match_operand:<ssescalarmode> 0 "register_operand" "")
4188 (match_operand:VEC_EXTRACT_MODE 1 "register_operand" "")
4189 (match_operand 2 "const_int_operand" "")]
4192 ix86_expand_vector_extract (false, operands[0], operands[1],
4193 INTVAL (operands[2]));
4197 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4199 ;; Parallel double-precision floating point element swizzling
4201 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4203 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4204 (define_insn "avx_unpckhpd256"
4205 [(set (match_operand:V4DF 0 "register_operand" "=x")
4208 (match_operand:V4DF 1 "register_operand" "x")
4209 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4210 (parallel [(const_int 1) (const_int 5)
4211 (const_int 3) (const_int 7)])))]
4213 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
4214 [(set_attr "type" "sselog")
4215 (set_attr "prefix" "vex")
4216 (set_attr "mode" "V4DF")])
4218 (define_expand "vec_interleave_highv4df"
4222 (match_operand:V4DF 1 "register_operand" "x")
4223 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4224 (parallel [(const_int 0) (const_int 4)
4225 (const_int 2) (const_int 6)])))
4231 (parallel [(const_int 1) (const_int 5)
4232 (const_int 3) (const_int 7)])))
4233 (set (match_operand:V4DF 0 "register_operand" "")
4238 (parallel [(const_int 2) (const_int 3)
4239 (const_int 6) (const_int 7)])))]
4242 operands[3] = gen_reg_rtx (V4DFmode);
4243 operands[4] = gen_reg_rtx (V4DFmode);
4247 (define_expand "vec_interleave_highv2df"
4248 [(set (match_operand:V2DF 0 "register_operand" "")
4251 (match_operand:V2DF 1 "nonimmediate_operand" "")
4252 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4253 (parallel [(const_int 1)
4257 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
4258 operands[2] = force_reg (V2DFmode, operands[2]);
4261 (define_insn "*vec_interleave_highv2df"
4262 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
4265 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x")
4266 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0"))
4267 (parallel [(const_int 1)
4269 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4271 unpckhpd\t{%2, %0|%0, %2}
4272 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
4273 %vmovddup\t{%H1, %0|%0, %H1}
4274 movlpd\t{%H1, %0|%0, %H1}
4275 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
4276 %vmovhpd\t{%1, %0|%0, %1}"
4277 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
4278 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
4279 (set_attr "prefix_data16" "*,*,*,1,*,1")
4280 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
4281 (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")])
4283 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4284 (define_expand "avx_movddup256"
4285 [(set (match_operand:V4DF 0 "register_operand" "")
4288 (match_operand:V4DF 1 "nonimmediate_operand" "")
4290 (parallel [(const_int 0) (const_int 4)
4291 (const_int 2) (const_int 6)])))]
4294 (define_expand "avx_unpcklpd256"
4295 [(set (match_operand:V4DF 0 "register_operand" "")
4298 (match_operand:V4DF 1 "register_operand" "")
4299 (match_operand:V4DF 2 "nonimmediate_operand" ""))
4300 (parallel [(const_int 0) (const_int 4)
4301 (const_int 2) (const_int 6)])))]
4304 (define_insn "*avx_unpcklpd256"
4305 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
4308 (match_operand:V4DF 1 "nonimmediate_operand" " x,m")
4309 (match_operand:V4DF 2 "nonimmediate_operand" "xm,1"))
4310 (parallel [(const_int 0) (const_int 4)
4311 (const_int 2) (const_int 6)])))]
4314 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4315 vmovddup\t{%1, %0|%0, %1}"
4316 [(set_attr "type" "sselog")
4317 (set_attr "prefix" "vex")
4318 (set_attr "mode" "V4DF")])
4320 (define_expand "vec_interleave_lowv4df"
4324 (match_operand:V4DF 1 "register_operand" "x")
4325 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4326 (parallel [(const_int 0) (const_int 4)
4327 (const_int 2) (const_int 6)])))
4333 (parallel [(const_int 1) (const_int 5)
4334 (const_int 3) (const_int 7)])))
4335 (set (match_operand:V4DF 0 "register_operand" "")
4340 (parallel [(const_int 0) (const_int 1)
4341 (const_int 4) (const_int 5)])))]
4344 operands[3] = gen_reg_rtx (V4DFmode);
4345 operands[4] = gen_reg_rtx (V4DFmode);
4348 (define_expand "vec_interleave_lowv2df"
4349 [(set (match_operand:V2DF 0 "register_operand" "")
4352 (match_operand:V2DF 1 "nonimmediate_operand" "")
4353 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4354 (parallel [(const_int 0)
4358 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
4359 operands[1] = force_reg (V2DFmode, operands[1]);
4362 (define_insn "*vec_interleave_lowv2df"
4363 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,o")
4366 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0")
4367 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x"))
4368 (parallel [(const_int 0)
4370 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4372 unpcklpd\t{%2, %0|%0, %2}
4373 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4374 %vmovddup\t{%1, %0|%0, %1}
4375 movhpd\t{%2, %0|%0, %2}
4376 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4377 %vmovlpd\t{%2, %H0|%H0, %2}"
4378 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
4379 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
4380 (set_attr "prefix_data16" "*,*,*,1,*,1")
4381 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
4382 (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")])
4385 [(set (match_operand:V2DF 0 "memory_operand" "")
4388 (match_operand:V2DF 1 "register_operand" "")
4390 (parallel [(const_int 0)
4392 "TARGET_SSE3 && reload_completed"
4395 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4396 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4397 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4402 [(set (match_operand:V2DF 0 "register_operand" "")
4405 (match_operand:V2DF 1 "memory_operand" "")
4407 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "")
4408 (match_operand:SI 3 "const_int_operand" "")])))]
4409 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
4410 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
4412 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
4415 (define_expand "avx_shufpd256"
4416 [(match_operand:V4DF 0 "register_operand" "")
4417 (match_operand:V4DF 1 "register_operand" "")
4418 (match_operand:V4DF 2 "nonimmediate_operand" "")
4419 (match_operand:SI 3 "const_int_operand" "")]
4422 int mask = INTVAL (operands[3]);
4423 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4425 GEN_INT (mask & 2 ? 5 : 4),
4426 GEN_INT (mask & 4 ? 3 : 2),
4427 GEN_INT (mask & 8 ? 7 : 6)));
4431 (define_insn "avx_shufpd256_1"
4432 [(set (match_operand:V4DF 0 "register_operand" "=x")
4435 (match_operand:V4DF 1 "register_operand" "x")
4436 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4437 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4438 (match_operand 4 "const_4_to_5_operand" "")
4439 (match_operand 5 "const_2_to_3_operand" "")
4440 (match_operand 6 "const_6_to_7_operand" "")])))]
4444 mask = INTVAL (operands[3]);
4445 mask |= (INTVAL (operands[4]) - 4) << 1;
4446 mask |= (INTVAL (operands[5]) - 2) << 2;
4447 mask |= (INTVAL (operands[6]) - 6) << 3;
4448 operands[3] = GEN_INT (mask);
4450 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4452 [(set_attr "type" "sselog")
4453 (set_attr "length_immediate" "1")
4454 (set_attr "prefix" "vex")
4455 (set_attr "mode" "V4DF")])
4457 (define_expand "sse2_shufpd"
4458 [(match_operand:V2DF 0 "register_operand" "")
4459 (match_operand:V2DF 1 "register_operand" "")
4460 (match_operand:V2DF 2 "nonimmediate_operand" "")
4461 (match_operand:SI 3 "const_int_operand" "")]
4464 int mask = INTVAL (operands[3]);
4465 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4467 GEN_INT (mask & 2 ? 3 : 2)));
4471 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4472 (define_insn "avx2_interleave_highv4di"
4473 [(set (match_operand:V4DI 0 "register_operand" "=x")
4476 (match_operand:V4DI 1 "register_operand" "x")
4477 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
4478 (parallel [(const_int 1)
4483 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4484 [(set_attr "type" "sselog")
4485 (set_attr "prefix" "vex")
4486 (set_attr "mode" "OI")])
4488 (define_insn "vec_interleave_highv2di"
4489 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4492 (match_operand:V2DI 1 "register_operand" "0,x")
4493 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4494 (parallel [(const_int 1)
4498 punpckhqdq\t{%2, %0|%0, %2}
4499 vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4500 [(set_attr "isa" "noavx,avx")
4501 (set_attr "type" "sselog")
4502 (set_attr "prefix_data16" "1,*")
4503 (set_attr "prefix" "orig,vex")
4504 (set_attr "mode" "TI")])
4506 (define_insn "avx2_interleave_lowv4di"
4507 [(set (match_operand:V4DI 0 "register_operand" "=x")
4510 (match_operand:V4DI 1 "register_operand" "x")
4511 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
4512 (parallel [(const_int 0)
4517 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4518 [(set_attr "type" "sselog")
4519 (set_attr "prefix" "vex")
4520 (set_attr "mode" "OI")])
4522 (define_insn "vec_interleave_lowv2di"
4523 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4526 (match_operand:V2DI 1 "register_operand" "0,x")
4527 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4528 (parallel [(const_int 0)
4532 punpcklqdq\t{%2, %0|%0, %2}
4533 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4534 [(set_attr "isa" "noavx,avx")
4535 (set_attr "type" "sselog")
4536 (set_attr "prefix_data16" "1,*")
4537 (set_attr "prefix" "orig,vex")
4538 (set_attr "mode" "TI")])
4540 (define_insn "sse2_shufpd_<mode>"
4541 [(set (match_operand:VI8F_128 0 "register_operand" "=x,x")
4542 (vec_select:VI8F_128
4543 (vec_concat:<ssedoublevecmode>
4544 (match_operand:VI8F_128 1 "register_operand" "0,x")
4545 (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm"))
4546 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4547 (match_operand 4 "const_2_to_3_operand" "")])))]
4551 mask = INTVAL (operands[3]);
4552 mask |= (INTVAL (operands[4]) - 2) << 1;
4553 operands[3] = GEN_INT (mask);
4555 switch (which_alternative)
4558 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4560 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4565 [(set_attr "isa" "noavx,avx")
4566 (set_attr "type" "sselog")
4567 (set_attr "length_immediate" "1")
4568 (set_attr "prefix" "orig,vex")
4569 (set_attr "mode" "V2DF")])
4571 ;; Avoid combining registers from different units in a single alternative,
4572 ;; see comment above inline_secondary_memory_needed function in i386.c
4573 (define_insn "sse2_storehpd"
4574 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r")
4576 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
4577 (parallel [(const_int 1)])))]
4578 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4580 %vmovhpd\t{%1, %0|%0, %1}
4582 vunpckhpd\t{%d1, %0|%0, %d1}
4586 [(set_attr "isa" "*,noavx,avx,*,*,*")
4587 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
4588 (set (attr "prefix_data16")
4590 (and (eq_attr "alternative" "0")
4591 (not (match_test "TARGET_AVX")))
4593 (const_string "*")))
4594 (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
4595 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
4598 [(set (match_operand:DF 0 "register_operand" "")
4600 (match_operand:V2DF 1 "memory_operand" "")
4601 (parallel [(const_int 1)])))]
4602 "TARGET_SSE2 && reload_completed"
4603 [(set (match_dup 0) (match_dup 1))]
4604 "operands[1] = adjust_address (operands[1], DFmode, 8);")
4606 (define_insn "*vec_extractv2df_1_sse"
4607 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4609 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
4610 (parallel [(const_int 1)])))]
4611 "!TARGET_SSE2 && TARGET_SSE
4612 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4614 movhps\t{%1, %0|%0, %1}
4615 movhlps\t{%1, %0|%0, %1}
4616 movlps\t{%H1, %0|%0, %H1}"
4617 [(set_attr "type" "ssemov")
4618 (set_attr "mode" "V2SF,V4SF,V2SF")])
4620 ;; Avoid combining registers from different units in a single alternative,
4621 ;; see comment above inline_secondary_memory_needed function in i386.c
4622 (define_insn "sse2_storelpd"
4623 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4625 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4626 (parallel [(const_int 0)])))]
4627 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4629 %vmovlpd\t{%1, %0|%0, %1}
4634 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4635 (set_attr "prefix_data16" "1,*,*,*,*")
4636 (set_attr "prefix" "maybe_vex")
4637 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4640 [(set (match_operand:DF 0 "register_operand" "")
4642 (match_operand:V2DF 1 "nonimmediate_operand" "")
4643 (parallel [(const_int 0)])))]
4644 "TARGET_SSE2 && reload_completed"
4647 rtx op1 = operands[1];
4649 op1 = gen_rtx_REG (DFmode, REGNO (op1));
4651 op1 = gen_lowpart (DFmode, op1);
4652 emit_move_insn (operands[0], op1);
4656 (define_insn "*vec_extractv2df_0_sse"
4657 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4659 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
4660 (parallel [(const_int 0)])))]
4661 "!TARGET_SSE2 && TARGET_SSE
4662 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4664 movlps\t{%1, %0|%0, %1}
4665 movaps\t{%1, %0|%0, %1}
4666 movlps\t{%1, %0|%0, %1}"
4667 [(set_attr "type" "ssemov")
4668 (set_attr "mode" "V2SF,V4SF,V2SF")])
4670 (define_expand "sse2_loadhpd_exp"
4671 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4674 (match_operand:V2DF 1 "nonimmediate_operand" "")
4675 (parallel [(const_int 0)]))
4676 (match_operand:DF 2 "nonimmediate_operand" "")))]
4679 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4681 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
4683 /* Fix up the destination if needed. */
4684 if (dst != operands[0])
4685 emit_move_insn (operands[0], dst);
4690 ;; Avoid combining registers from different units in a single alternative,
4691 ;; see comment above inline_secondary_memory_needed function in i386.c
4692 (define_insn "sse2_loadhpd"
4693 [(set (match_operand:V2DF 0 "nonimmediate_operand"
4697 (match_operand:V2DF 1 "nonimmediate_operand"
4699 (parallel [(const_int 0)]))
4700 (match_operand:DF 2 "nonimmediate_operand"
4701 " m,m,x,x,x,*f,r")))]
4702 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4704 movhpd\t{%2, %0|%0, %2}
4705 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4706 unpcklpd\t{%2, %0|%0, %2}
4707 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4711 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
4712 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
4713 (set_attr "prefix_data16" "1,*,*,*,*,*,*")
4714 (set_attr "prefix" "orig,vex,orig,vex,*,*,*")
4715 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
4718 [(set (match_operand:V2DF 0 "memory_operand" "")
4720 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
4721 (match_operand:DF 1 "register_operand" "")))]
4722 "TARGET_SSE2 && reload_completed"
4723 [(set (match_dup 0) (match_dup 1))]
4724 "operands[0] = adjust_address (operands[0], DFmode, 8);")
4726 (define_expand "sse2_loadlpd_exp"
4727 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4729 (match_operand:DF 2 "nonimmediate_operand" "")
4731 (match_operand:V2DF 1 "nonimmediate_operand" "")
4732 (parallel [(const_int 1)]))))]
4735 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4737 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
4739 /* Fix up the destination if needed. */
4740 if (dst != operands[0])
4741 emit_move_insn (operands[0], dst);
4746 ;; Avoid combining registers from different units in a single alternative,
4747 ;; see comment above inline_secondary_memory_needed function in i386.c
4748 (define_insn "sse2_loadlpd"
4749 [(set (match_operand:V2DF 0 "nonimmediate_operand"
4750 "=x,x,x,x,x,x,x,x,m,m ,m")
4752 (match_operand:DF 2 "nonimmediate_operand"
4753 " m,m,m,x,x,0,0,x,x,*f,r")
4755 (match_operand:V2DF 1 "vector_move_operand"
4756 " C,0,x,0,x,x,o,o,0,0 ,0")
4757 (parallel [(const_int 1)]))))]
4758 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4760 %vmovsd\t{%2, %0|%0, %2}
4761 movlpd\t{%2, %0|%0, %2}
4762 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4763 movsd\t{%2, %0|%0, %2}
4764 vmovsd\t{%2, %1, %0|%0, %1, %2}
4765 shufpd\t{$2, %1, %0|%0, %1, 2}
4766 movhpd\t{%H1, %0|%0, %H1}
4767 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
4771 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
4773 (cond [(eq_attr "alternative" "5")
4774 (const_string "sselog")
4775 (eq_attr "alternative" "9")
4776 (const_string "fmov")
4777 (eq_attr "alternative" "10")
4778 (const_string "imov")
4780 (const_string "ssemov")))
4781 (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
4782 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
4783 (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
4784 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
4787 [(set (match_operand:V2DF 0 "memory_operand" "")
4789 (match_operand:DF 1 "register_operand" "")
4790 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
4791 "TARGET_SSE2 && reload_completed"
4792 [(set (match_dup 0) (match_dup 1))]
4793 "operands[0] = adjust_address (operands[0], DFmode, 8);")
4795 (define_insn "sse2_movsd"
4796 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o")
4798 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
4799 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
4803 movsd\t{%2, %0|%0, %2}
4804 vmovsd\t{%2, %1, %0|%0, %1, %2}
4805 movlpd\t{%2, %0|%0, %2}
4806 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4807 %vmovlpd\t{%2, %0|%0, %2}
4808 shufpd\t{$2, %1, %0|%0, %1, 2}
4809 movhps\t{%H1, %0|%0, %H1}
4810 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
4811 %vmovhps\t{%1, %H0|%H0, %1}"
4812 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
4815 (eq_attr "alternative" "5")
4816 (const_string "sselog")
4817 (const_string "ssemov")))
4818 (set (attr "prefix_data16")
4820 (and (eq_attr "alternative" "2,4")
4821 (not (match_test "TARGET_AVX")))
4823 (const_string "*")))
4824 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
4825 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
4826 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
4828 (define_expand "vec_dupv2df"
4829 [(set (match_operand:V2DF 0 "register_operand" "")
4831 (match_operand:DF 1 "nonimmediate_operand" "")))]
4835 operands[1] = force_reg (DFmode, operands[1]);
4838 (define_insn "*vec_dupv2df_sse3"
4839 [(set (match_operand:V2DF 0 "register_operand" "=x")
4841 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
4843 "%vmovddup\t{%1, %0|%0, %1}"
4844 [(set_attr "type" "sselog1")
4845 (set_attr "prefix" "maybe_vex")
4846 (set_attr "mode" "DF")])
4848 (define_insn "*vec_dupv2df"
4849 [(set (match_operand:V2DF 0 "register_operand" "=x")
4851 (match_operand:DF 1 "register_operand" "0")))]
4854 [(set_attr "type" "sselog1")
4855 (set_attr "mode" "V2DF")])
4857 (define_insn "*vec_concatv2df_sse3"
4858 [(set (match_operand:V2DF 0 "register_operand" "=x")
4860 (match_operand:DF 1 "nonimmediate_operand" "xm")
4863 "%vmovddup\t{%1, %0|%0, %1}"
4864 [(set_attr "type" "sselog1")
4865 (set_attr "prefix" "maybe_vex")
4866 (set_attr "mode" "DF")])
4868 (define_insn "*vec_concatv2df"
4869 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x,x,x,x,x")
4871 (match_operand:DF 1 "nonimmediate_operand" " 0,x,0,x,m,0,0")
4872 (match_operand:DF 2 "vector_move_operand" " x,x,m,m,C,x,m")))]
4875 unpcklpd\t{%2, %0|%0, %2}
4876 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4877 movhpd\t{%2, %0|%0, %2}
4878 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4879 %vmovsd\t{%1, %0|%0, %1}
4880 movlhps\t{%2, %0|%0, %2}
4881 movhps\t{%2, %0|%0, %2}"
4882 [(set_attr "isa" "sse2_noavx,avx,sse2_noavx,avx,sse2,noavx,noavx")
4885 (eq_attr "alternative" "0,1")
4886 (const_string "sselog")
4887 (const_string "ssemov")))
4888 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
4889 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
4890 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF,DF,V4SF,V2SF")])
4892 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4894 ;; Parallel integral arithmetic
4896 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4898 (define_expand "neg<mode>2"
4899 [(set (match_operand:VI_AVX2 0 "register_operand" "")
4902 (match_operand:VI_AVX2 1 "nonimmediate_operand" "")))]
4904 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
4906 (define_expand "<plusminus_insn><mode>3"
4907 [(set (match_operand:VI_AVX2 0 "register_operand" "")
4909 (match_operand:VI_AVX2 1 "nonimmediate_operand" "")
4910 (match_operand:VI_AVX2 2 "nonimmediate_operand" "")))]
4912 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4914 (define_insn "*<plusminus_insn><mode>3"
4915 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,x")
4917 (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,x")
4918 (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,xm")))]
4919 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4921 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
4922 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
4923 [(set_attr "isa" "noavx,avx")
4924 (set_attr "type" "sseiadd")
4925 (set_attr "prefix_data16" "1,*")
4926 (set_attr "prefix" "orig,vex")
4927 (set_attr "mode" "<sseinsnmode>")])
4929 (define_expand "<sse2_avx2>_<plusminus_insn><mode>3"
4930 [(set (match_operand:VI12_AVX2 0 "register_operand" "")
4931 (sat_plusminus:VI12_AVX2
4932 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "")
4933 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "")))]
4935 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4937 (define_insn "*<sse2_avx2>_<plusminus_insn><mode>3"
4938 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,x")
4939 (sat_plusminus:VI12_AVX2
4940 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,x")
4941 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,xm")))]
4942 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4944 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
4945 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
4946 [(set_attr "isa" "noavx,avx")
4947 (set_attr "type" "sseiadd")
4948 (set_attr "prefix_data16" "1,*")
4949 (set_attr "prefix" "orig,vex")
4950 (set_attr "mode" "TI")])
4952 (define_insn_and_split "mul<mode>3"
4953 [(set (match_operand:VI1_AVX2 0 "register_operand" "")
4954 (mult:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand" "")
4955 (match_operand:VI1_AVX2 2 "register_operand" "")))]
4957 && can_create_pseudo_p ()"
4964 enum machine_mode mulmode = <sseunpackmode>mode;
4966 for (i = 0; i < 6; ++i)
4967 t[i] = gen_reg_rtx (<MODE>mode);
4969 /* Unpack data such that we've got a source byte in each low byte of
4970 each word. We don't care what goes into the high byte of each word.
4971 Rather than trying to get zero in there, most convenient is to let
4972 it be a copy of the low byte. */
4973 emit_insn (gen_<vec_avx2>_interleave_high<mode> (t[0], operands[1],
4975 emit_insn (gen_<vec_avx2>_interleave_high<mode> (t[1], operands[2],
4977 emit_insn (gen_<vec_avx2>_interleave_low<mode> (t[2], operands[1],
4979 emit_insn (gen_<vec_avx2>_interleave_low<mode> (t[3], operands[2],
4982 /* Multiply words. The end-of-line annotations here give a picture of what
4983 the output of that instruction looks like. Dot means don't care; the
4984 letters are the bytes of the result with A being the most significant. */
4985 emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (mulmode, t[4]),
4986 gen_rtx_MULT (mulmode, /* .A.B.C.D.E.F.G.H */
4987 gen_lowpart (mulmode, t[0]),
4988 gen_lowpart (mulmode, t[1]))));
4989 emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (mulmode, t[5]),
4990 gen_rtx_MULT (mulmode, /* .I.J.K.L.M.N.O.P */
4991 gen_lowpart (mulmode, t[2]),
4992 gen_lowpart (mulmode, t[3]))));
4994 /* Extract the even bytes and merge them back together. */
4995 ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0);
4997 set_unique_reg_note (get_last_insn (), REG_EQUAL,
4998 gen_rtx_MULT (<MODE>mode, operands[1], operands[2]));
5002 (define_expand "mul<mode>3"
5003 [(set (match_operand:VI2_AVX2 0 "register_operand" "")
5004 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "")
5005 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "")))]
5007 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
5009 (define_insn "*mul<mode>3"
5010 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
5011 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x")
5012 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))]
5013 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
5015 pmullw\t{%2, %0|%0, %2}
5016 vpmullw\t{%2, %1, %0|%0, %1, %2}"
5017 [(set_attr "isa" "noavx,avx")
5018 (set_attr "type" "sseimul")
5019 (set_attr "prefix_data16" "1,*")
5020 (set_attr "prefix" "orig,vex")
5021 (set_attr "mode" "<sseinsnmode>")])
5023 (define_expand "<s>mul<mode>3_highpart"
5024 [(set (match_operand:VI2_AVX2 0 "register_operand" "")
5026 (lshiftrt:<ssedoublemode>
5027 (mult:<ssedoublemode>
5028 (any_extend:<ssedoublemode>
5029 (match_operand:VI2_AVX2 1 "nonimmediate_operand" ""))
5030 (any_extend:<ssedoublemode>
5031 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "")))
5034 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
5036 (define_insn "*<s>mul<mode>3_highpart"
5037 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
5039 (lshiftrt:<ssedoublemode>
5040 (mult:<ssedoublemode>
5041 (any_extend:<ssedoublemode>
5042 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
5043 (any_extend:<ssedoublemode>
5044 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
5046 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
5048 pmulh<u>w\t{%2, %0|%0, %2}
5049 vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
5050 [(set_attr "isa" "noavx,avx")
5051 (set_attr "type" "sseimul")
5052 (set_attr "prefix_data16" "1,*")
5053 (set_attr "prefix" "orig,vex")
5054 (set_attr "mode" "<sseinsnmode>")])
5056 (define_expand "avx2_umulv4siv4di3"
5057 [(set (match_operand:V4DI 0 "register_operand" "")
5061 (match_operand:V8SI 1 "nonimmediate_operand" "")
5062 (parallel [(const_int 0) (const_int 2)
5063 (const_int 4) (const_int 6)])))
5066 (match_operand:V8SI 2 "nonimmediate_operand" "")
5067 (parallel [(const_int 0) (const_int 2)
5068 (const_int 4) (const_int 6)])))))]
5070 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
5072 (define_insn "*avx_umulv4siv4di3"
5073 [(set (match_operand:V4DI 0 "register_operand" "=x")
5077 (match_operand:V8SI 1 "nonimmediate_operand" "%x")
5078 (parallel [(const_int 0) (const_int 2)
5079 (const_int 4) (const_int 6)])))
5082 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
5083 (parallel [(const_int 0) (const_int 2)
5084 (const_int 4) (const_int 6)])))))]
5085 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
5086 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5087 [(set_attr "type" "sseimul")
5088 (set_attr "prefix" "vex")
5089 (set_attr "mode" "OI")])
5091 (define_expand "sse2_umulv2siv2di3"
5092 [(set (match_operand:V2DI 0 "register_operand" "")
5096 (match_operand:V4SI 1 "nonimmediate_operand" "")
5097 (parallel [(const_int 0) (const_int 2)])))
5100 (match_operand:V4SI 2 "nonimmediate_operand" "")
5101 (parallel [(const_int 0) (const_int 2)])))))]
5103 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5105 (define_insn "*sse2_umulv2siv2di3"
5106 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5110 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
5111 (parallel [(const_int 0) (const_int 2)])))
5114 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
5115 (parallel [(const_int 0) (const_int 2)])))))]
5116 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5118 pmuludq\t{%2, %0|%0, %2}
5119 vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5120 [(set_attr "isa" "noavx,avx")
5121 (set_attr "type" "sseimul")
5122 (set_attr "prefix_data16" "1,*")
5123 (set_attr "prefix" "orig,vex")
5124 (set_attr "mode" "TI")])
5126 (define_expand "avx2_mulv4siv4di3"
5127 [(set (match_operand:V4DI 0 "register_operand" "")
5131 (match_operand:V8SI 1 "nonimmediate_operand" "")
5132 (parallel [(const_int 0) (const_int 2)
5133 (const_int 4) (const_int 6)])))
5136 (match_operand:V8SI 2 "nonimmediate_operand" "")
5137 (parallel [(const_int 0) (const_int 2)
5138 (const_int 4) (const_int 6)])))))]
5140 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
5142 (define_insn "*avx2_mulv4siv4di3"
5143 [(set (match_operand:V4DI 0 "register_operand" "=x")
5147 (match_operand:V8SI 1 "nonimmediate_operand" "x")
5148 (parallel [(const_int 0) (const_int 2)
5149 (const_int 4) (const_int 6)])))
5152 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
5153 (parallel [(const_int 0) (const_int 2)
5154 (const_int 4) (const_int 6)])))))]
5155 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
5156 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5157 [(set_attr "isa" "avx")
5158 (set_attr "type" "sseimul")
5159 (set_attr "prefix_extra" "1")
5160 (set_attr "prefix" "vex")
5161 (set_attr "mode" "OI")])
5163 (define_expand "sse4_1_mulv2siv2di3"
5164 [(set (match_operand:V2DI 0 "register_operand" "")
5168 (match_operand:V4SI 1 "nonimmediate_operand" "")
5169 (parallel [(const_int 0) (const_int 2)])))
5172 (match_operand:V4SI 2 "nonimmediate_operand" "")
5173 (parallel [(const_int 0) (const_int 2)])))))]
5175 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5177 (define_insn "*sse4_1_mulv2siv2di3"
5178 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5182 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
5183 (parallel [(const_int 0) (const_int 2)])))
5186 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
5187 (parallel [(const_int 0) (const_int 2)])))))]
5188 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5190 pmuldq\t{%2, %0|%0, %2}
5191 vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5192 [(set_attr "isa" "noavx,avx")
5193 (set_attr "type" "sseimul")
5194 (set_attr "prefix_data16" "1,*")
5195 (set_attr "prefix_extra" "1")
5196 (set_attr "prefix" "orig,vex")
5197 (set_attr "mode" "TI")])
5199 (define_expand "avx2_pmaddwd"
5200 [(set (match_operand:V8SI 0 "register_operand" "")
5205 (match_operand:V16HI 1 "nonimmediate_operand" "")
5206 (parallel [(const_int 0)
5216 (match_operand:V16HI 2 "nonimmediate_operand" "")
5217 (parallel [(const_int 0)
5227 (vec_select:V8HI (match_dup 1)
5228 (parallel [(const_int 1)
5237 (vec_select:V8HI (match_dup 2)
5238 (parallel [(const_int 1)
5245 (const_int 15)]))))))]
5247 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
5249 (define_expand "sse2_pmaddwd"
5250 [(set (match_operand:V4SI 0 "register_operand" "")
5255 (match_operand:V8HI 1 "nonimmediate_operand" "")
5256 (parallel [(const_int 0)
5262 (match_operand:V8HI 2 "nonimmediate_operand" "")
5263 (parallel [(const_int 0)
5269 (vec_select:V4HI (match_dup 1)
5270 (parallel [(const_int 1)
5275 (vec_select:V4HI (match_dup 2)
5276 (parallel [(const_int 1)
5279 (const_int 7)]))))))]
5281 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5283 (define_insn "*avx2_pmaddwd"
5284 [(set (match_operand:V8SI 0 "register_operand" "=x")
5289 (match_operand:V16HI 1 "nonimmediate_operand" "%x")
5290 (parallel [(const_int 0)
5300 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
5301 (parallel [(const_int 0)
5311 (vec_select:V8HI (match_dup 1)
5312 (parallel [(const_int 1)
5321 (vec_select:V8HI (match_dup 2)
5322 (parallel [(const_int 1)
5329 (const_int 15)]))))))]
5330 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
5331 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5332 [(set_attr "type" "sseiadd")
5333 (set_attr "prefix" "vex")
5334 (set_attr "mode" "OI")])
5336 (define_insn "*sse2_pmaddwd"
5337 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
5342 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
5343 (parallel [(const_int 0)
5349 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
5350 (parallel [(const_int 0)
5356 (vec_select:V4HI (match_dup 1)
5357 (parallel [(const_int 1)
5362 (vec_select:V4HI (match_dup 2)
5363 (parallel [(const_int 1)
5366 (const_int 7)]))))))]
5367 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5369 pmaddwd\t{%2, %0|%0, %2}
5370 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5371 [(set_attr "isa" "noavx,avx")
5372 (set_attr "type" "sseiadd")
5373 (set_attr "atom_unit" "simul")
5374 (set_attr "prefix_data16" "1,*")
5375 (set_attr "prefix" "orig,vex")
5376 (set_attr "mode" "TI")])
5378 (define_expand "mul<mode>3"
5379 [(set (match_operand:VI4_AVX2 0 "register_operand" "")
5380 (mult:VI4_AVX2 (match_operand:VI4_AVX2 1 "register_operand" "")
5381 (match_operand:VI4_AVX2 2 "register_operand" "")))]
5384 if (TARGET_SSE4_1 || TARGET_AVX)
5385 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
5388 (define_insn "*<sse4_1_avx2>_mul<mode>3"
5389 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,x")
5390 (mult:VI4_AVX2 (match_operand:VI4_AVX2 1 "nonimmediate_operand" "%0,x")
5391 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm,xm")))]
5392 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
5394 pmulld\t{%2, %0|%0, %2}
5395 vpmulld\t{%2, %1, %0|%0, %1, %2}"
5396 [(set_attr "isa" "noavx,avx")
5397 (set_attr "type" "sseimul")
5398 (set_attr "prefix_extra" "1")
5399 (set_attr "prefix" "orig,vex")
5400 (set_attr "mode" "<sseinsnmode>")])
5402 (define_insn_and_split "*sse2_mulv4si3"
5403 [(set (match_operand:V4SI 0 "register_operand" "")
5404 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5405 (match_operand:V4SI 2 "register_operand" "")))]
5406 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_AVX
5407 && can_create_pseudo_p ()"
5412 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5418 t1 = gen_reg_rtx (V4SImode);
5419 t2 = gen_reg_rtx (V4SImode);
5420 t3 = gen_reg_rtx (V4SImode);
5421 t4 = gen_reg_rtx (V4SImode);
5422 t5 = gen_reg_rtx (V4SImode);
5423 t6 = gen_reg_rtx (V4SImode);
5424 thirtytwo = GEN_INT (32);
5426 /* Multiply elements 2 and 0. */
5427 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
5430 /* Shift both input vectors down one element, so that elements 3
5431 and 1 are now in the slots for elements 2 and 0. For K8, at
5432 least, this is faster than using a shuffle. */
5433 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5434 gen_lowpart (V1TImode, op1),
5436 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5437 gen_lowpart (V1TImode, op2),
5439 /* Multiply elements 3 and 1. */
5440 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
5443 /* Move the results in element 2 down to element 1; we don't care
5444 what goes in elements 2 and 3. */
5445 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
5446 const0_rtx, const0_rtx));
5447 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
5448 const0_rtx, const0_rtx));
5450 /* Merge the parts back together. */
5451 emit_insn (gen_vec_interleave_lowv4si (op0, t5, t6));
5453 set_unique_reg_note (get_last_insn (), REG_EQUAL,
5454 gen_rtx_MULT (V4SImode, operands[1], operands[2]));
5458 (define_insn_and_split "mul<mode>3"
5459 [(set (match_operand:VI8_AVX2 0 "register_operand" "")
5460 (mult:VI8_AVX2 (match_operand:VI8_AVX2 1 "register_operand" "")
5461 (match_operand:VI8_AVX2 2 "register_operand" "")))]
5463 && can_create_pseudo_p ()"
5468 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5475 if (TARGET_XOP && <MODE>mode == V2DImode)
5477 /* op1: A,B,C,D, op2: E,F,G,H */
5478 op1 = gen_lowpart (V4SImode, op1);
5479 op2 = gen_lowpart (V4SImode, op2);
5481 t1 = gen_reg_rtx (V4SImode);
5482 t2 = gen_reg_rtx (V4SImode);
5483 t3 = gen_reg_rtx (V2DImode);
5484 t4 = gen_reg_rtx (V2DImode);
5487 emit_insn (gen_sse2_pshufd_1 (t1, op1,
5493 /* t2: (B*E),(A*F),(D*G),(C*H) */
5494 emit_insn (gen_mulv4si3 (t2, t1, op2));
5496 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
5497 emit_insn (gen_xop_phadddq (t3, t2));
5499 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
5500 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
5502 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
5503 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
5507 t1 = gen_reg_rtx (<MODE>mode);
5508 t2 = gen_reg_rtx (<MODE>mode);
5509 t3 = gen_reg_rtx (<MODE>mode);
5510 t4 = gen_reg_rtx (<MODE>mode);
5511 t5 = gen_reg_rtx (<MODE>mode);
5512 t6 = gen_reg_rtx (<MODE>mode);
5513 thirtytwo = GEN_INT (32);
5515 /* Multiply low parts. */
5516 emit_insn (gen_<sse2_avx2>_umulv<ssescalarnum>si<mode>3
5517 (t1, gen_lowpart (<ssepackmode>mode, op1),
5518 gen_lowpart (<ssepackmode>mode, op2)));
5520 /* Shift input vectors right 32 bits so we can multiply high parts. */
5521 emit_insn (gen_lshr<mode>3 (t2, op1, thirtytwo));
5522 emit_insn (gen_lshr<mode>3 (t3, op2, thirtytwo));
5524 /* Multiply high parts by low parts. */
5525 emit_insn (gen_<sse2_avx2>_umulv<ssescalarnum>si<mode>3
5526 (t4, gen_lowpart (<ssepackmode>mode, op1),
5527 gen_lowpart (<ssepackmode>mode, t3)));
5528 emit_insn (gen_<sse2_avx2>_umulv<ssescalarnum>si<mode>3
5529 (t5, gen_lowpart (<ssepackmode>mode, op2),
5530 gen_lowpart (<ssepackmode>mode, t2)));
5532 /* Shift them back. */
5533 emit_insn (gen_ashl<mode>3 (t4, t4, thirtytwo));
5534 emit_insn (gen_ashl<mode>3 (t5, t5, thirtytwo));
5536 /* Add the three parts together. */
5537 emit_insn (gen_add<mode>3 (t6, t1, t4));
5538 emit_insn (gen_add<mode>3 (op0, t6, t5));
5541 set_unique_reg_note (get_last_insn (), REG_EQUAL,
5542 gen_rtx_MULT (<MODE>mode, operands[1], operands[2]));
5546 (define_expand "vec_widen_<s>mult_hi_<mode>"
5547 [(match_operand:<sseunpackmode> 0 "register_operand" "")
5548 (any_extend:<sseunpackmode>
5549 (match_operand:VI2_AVX2 1 "register_operand" ""))
5550 (match_operand:VI2_AVX2 2 "register_operand" "")]
5553 rtx op1, op2, t1, t2, dest;
5557 t1 = gen_reg_rtx (<MODE>mode);
5558 t2 = gen_reg_rtx (<MODE>mode);
5559 dest = gen_lowpart (<MODE>mode, operands[0]);
5561 emit_insn (gen_mul<mode>3 (t1, op1, op2));
5562 emit_insn (gen_<s>mul<mode>3_highpart (t2, op1, op2));
5563 emit_insn (gen_vec_interleave_high<mode> (dest, t1, t2));
5567 (define_expand "vec_widen_<s>mult_lo_<mode>"
5568 [(match_operand:<sseunpackmode> 0 "register_operand" "")
5569 (any_extend:<sseunpackmode>
5570 (match_operand:VI2_AVX2 1 "register_operand" ""))
5571 (match_operand:VI2_AVX2 2 "register_operand" "")]
5574 rtx op1, op2, t1, t2, dest;
5578 t1 = gen_reg_rtx (<MODE>mode);
5579 t2 = gen_reg_rtx (<MODE>mode);
5580 dest = gen_lowpart (<MODE>mode, operands[0]);
5582 emit_insn (gen_mul<mode>3 (t1, op1, op2));
5583 emit_insn (gen_<s>mul<mode>3_highpart (t2, op1, op2));
5584 emit_insn (gen_vec_interleave_low<mode> (dest, t1, t2));
5588 (define_expand "vec_widen_<s>mult_hi_v8si"
5589 [(match_operand:V4DI 0 "register_operand" "")
5590 (any_extend:V4DI (match_operand:V8SI 1 "nonimmediate_operand" ""))
5591 (match_operand:V8SI 2 "nonimmediate_operand" "")]
5596 t1 = gen_reg_rtx (V4DImode);
5597 t2 = gen_reg_rtx (V4DImode);
5598 t3 = gen_reg_rtx (V8SImode);
5599 t4 = gen_reg_rtx (V8SImode);
5600 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, operands[1]),
5601 const0_rtx, const2_rtx,
5602 const1_rtx, GEN_INT (3)));
5603 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, operands[2]),
5604 const0_rtx, const2_rtx,
5605 const1_rtx, GEN_INT (3)));
5606 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1),
5607 GEN_INT (2 + (2 << 2) + (3 << 4) + (3 << 6))));
5608 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2),
5609 GEN_INT (2 + (2 << 2) + (3 << 4) + (3 << 6))));
5610 emit_insn (gen_avx2_<u>mulv4siv4di3 (operands[0], t3, t4));
5614 (define_expand "vec_widen_<s>mult_lo_v8si"
5615 [(match_operand:V4DI 0 "register_operand" "")
5616 (any_extend:V4DI (match_operand:V8SI 1 "nonimmediate_operand" ""))
5617 (match_operand:V8SI 2 "nonimmediate_operand" "")]
5622 t1 = gen_reg_rtx (V4DImode);
5623 t2 = gen_reg_rtx (V4DImode);
5624 t3 = gen_reg_rtx (V8SImode);
5625 t4 = gen_reg_rtx (V8SImode);
5626 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, operands[1]),
5627 const0_rtx, const2_rtx,
5628 const1_rtx, GEN_INT (3)));
5629 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, operands[2]),
5630 const0_rtx, const2_rtx,
5631 const1_rtx, GEN_INT (3)));
5632 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1),
5633 GEN_INT (0 + (0 << 2) + (1 << 4) + (1 << 6))));
5634 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2),
5635 GEN_INT (0 + (0 << 2) + (1 << 4) + (1 << 6))));
5636 emit_insn (gen_avx2_<u>mulv4siv4di3 (operands[0], t3, t4));
5640 (define_expand "vec_widen_smult_hi_v4si"
5641 [(match_operand:V2DI 0 "register_operand" "")
5642 (match_operand:V4SI 1 "register_operand" "")
5643 (match_operand:V4SI 2 "register_operand" "")]
5646 rtx op1, op2, t1, t2;
5650 t1 = gen_reg_rtx (V4SImode);
5651 t2 = gen_reg_rtx (V4SImode);
5655 emit_insn (gen_sse2_pshufd_1 (t1, op1, GEN_INT (0), GEN_INT (2),
5656 GEN_INT (1), GEN_INT (3)));
5657 emit_insn (gen_sse2_pshufd_1 (t2, op2, GEN_INT (0), GEN_INT (2),
5658 GEN_INT (1), GEN_INT (3)));
5659 emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
5663 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5664 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5665 emit_insn (gen_sse4_1_mulv2siv2di3 (operands[0], t1, t2));
5669 (define_expand "vec_widen_smult_lo_v4si"
5670 [(match_operand:V2DI 0 "register_operand" "")
5671 (match_operand:V4SI 1 "register_operand" "")
5672 (match_operand:V4SI 2 "register_operand" "")]
5675 rtx op1, op2, t1, t2;
5679 t1 = gen_reg_rtx (V4SImode);
5680 t2 = gen_reg_rtx (V4SImode);
5684 emit_insn (gen_sse2_pshufd_1 (t1, op1, GEN_INT (0), GEN_INT (2),
5685 GEN_INT (1), GEN_INT (3)));
5686 emit_insn (gen_sse2_pshufd_1 (t2, op2, GEN_INT (0), GEN_INT (2),
5687 GEN_INT (1), GEN_INT (3)));
5688 emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
5692 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5693 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5694 emit_insn (gen_sse4_1_mulv2siv2di3 (operands[0], t1, t2));
5698 (define_expand "vec_widen_umult_hi_v4si"
5699 [(match_operand:V2DI 0 "register_operand" "")
5700 (match_operand:V4SI 1 "register_operand" "")
5701 (match_operand:V4SI 2 "register_operand" "")]
5704 rtx op1, op2, t1, t2;
5708 t1 = gen_reg_rtx (V4SImode);
5709 t2 = gen_reg_rtx (V4SImode);
5711 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5712 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5713 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5717 (define_expand "vec_widen_umult_lo_v4si"
5718 [(match_operand:V2DI 0 "register_operand" "")
5719 (match_operand:V4SI 1 "register_operand" "")
5720 (match_operand:V4SI 2 "register_operand" "")]
5723 rtx op1, op2, t1, t2;
5727 t1 = gen_reg_rtx (V4SImode);
5728 t2 = gen_reg_rtx (V4SImode);
5730 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5731 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5732 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5736 (define_expand "sdot_prod<mode>"
5737 [(match_operand:<sseunpackmode> 0 "register_operand" "")
5738 (match_operand:VI2_AVX2 1 "register_operand" "")
5739 (match_operand:VI2_AVX2 2 "register_operand" "")
5740 (match_operand:<sseunpackmode> 3 "register_operand" "")]
5743 rtx t = gen_reg_rtx (<sseunpackmode>mode);
5744 emit_insn (gen_<sse2_avx2>_pmaddwd (t, operands[1], operands[2]));
5745 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5746 gen_rtx_PLUS (<sseunpackmode>mode,
5751 (define_code_attr sse2_sse4_1
5752 [(zero_extend "sse2") (sign_extend "sse4_1")])
5754 (define_expand "<s>dot_prodv4si"
5755 [(match_operand:V2DI 0 "register_operand" "")
5756 (any_extend:V2DI (match_operand:V4SI 1 "register_operand" ""))
5757 (match_operand:V4SI 2 "register_operand" "")
5758 (match_operand:V2DI 3 "register_operand" "")]
5759 "<CODE> == ZERO_EXTEND ? TARGET_SSE2 : TARGET_SSE4_1"
5763 t1 = gen_reg_rtx (V2DImode);
5764 emit_insn (gen_<sse2_sse4_1>_<u>mulv2siv2di3 (t1, operands[1], operands[2]));
5765 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5767 t2 = gen_reg_rtx (V4SImode);
5768 t3 = gen_reg_rtx (V4SImode);
5769 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5770 gen_lowpart (V1TImode, operands[1]),
5772 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5773 gen_lowpart (V1TImode, operands[2]),
5776 t4 = gen_reg_rtx (V2DImode);
5777 emit_insn (gen_<sse2_sse4_1>_<u>mulv2siv2di3 (t4, t2, t3));
5779 emit_insn (gen_addv2di3 (operands[0], t1, t4));
5783 (define_expand "<s>dot_prodv8si"
5784 [(match_operand:V4DI 0 "register_operand" "")
5785 (any_extend:V4DI (match_operand:V8SI 1 "register_operand" ""))
5786 (match_operand:V8SI 2 "register_operand" "")
5787 (match_operand:V4DI 3 "register_operand" "")]
5792 t1 = gen_reg_rtx (V4DImode);
5793 emit_insn (gen_avx2_<u>mulv4siv4di3 (t1, operands[1], operands[2]));
5794 emit_insn (gen_addv4di3 (t1, t1, operands[3]));
5796 t2 = gen_reg_rtx (V8SImode);
5797 t3 = gen_reg_rtx (V8SImode);
5798 emit_insn (gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, t2),
5799 gen_lowpart (V2TImode, operands[1]),
5801 emit_insn (gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, t3),
5802 gen_lowpart (V2TImode, operands[2]),
5805 t4 = gen_reg_rtx (V4DImode);
5806 emit_insn (gen_avx2_<u>mulv4siv4di3 (t4, t2, t3));
5808 emit_insn (gen_addv4di3 (operands[0], t1, t4));
5812 (define_insn "ashr<mode>3"
5813 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
5815 (match_operand:VI24_AVX2 1 "register_operand" "0,x")
5816 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5819 psra<ssemodesuffix>\t{%2, %0|%0, %2}
5820 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5821 [(set_attr "isa" "noavx,avx")
5822 (set_attr "type" "sseishft")
5823 (set (attr "length_immediate")
5824 (if_then_else (match_operand 2 "const_int_operand" "")
5826 (const_string "0")))
5827 (set_attr "prefix_data16" "1,*")
5828 (set_attr "prefix" "orig,vex")
5829 (set_attr "mode" "<sseinsnmode>")])
5831 (define_insn "lshr<mode>3"
5832 [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
5833 (lshiftrt:VI248_AVX2
5834 (match_operand:VI248_AVX2 1 "register_operand" "0,x")
5835 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5838 psrl<ssemodesuffix>\t{%2, %0|%0, %2}
5839 vpsrl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5840 [(set_attr "isa" "noavx,avx")
5841 (set_attr "type" "sseishft")
5842 (set (attr "length_immediate")
5843 (if_then_else (match_operand 2 "const_int_operand" "")
5845 (const_string "0")))
5846 (set_attr "prefix_data16" "1,*")
5847 (set_attr "prefix" "orig,vex")
5848 (set_attr "mode" "<sseinsnmode>")])
5850 (define_insn "ashl<mode>3"
5851 [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
5853 (match_operand:VI248_AVX2 1 "register_operand" "0,x")
5854 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5857 psll<ssemodesuffix>\t{%2, %0|%0, %2}
5858 vpsll<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5859 [(set_attr "isa" "noavx,avx")
5860 (set_attr "type" "sseishft")
5861 (set (attr "length_immediate")
5862 (if_then_else (match_operand 2 "const_int_operand" "")
5864 (const_string "0")))
5865 (set_attr "prefix_data16" "1,*")
5866 (set_attr "prefix" "orig,vex")
5867 (set_attr "mode" "<sseinsnmode>")])
5869 (define_expand "vec_shl_<mode>"
5870 [(set (match_operand:VI_128 0 "register_operand" "")
5872 (match_operand:VI_128 1 "register_operand" "")
5873 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5876 operands[0] = gen_lowpart (V1TImode, operands[0]);
5877 operands[1] = gen_lowpart (V1TImode, operands[1]);
5880 (define_insn "<sse2_avx2>_ashl<mode>3"
5881 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
5883 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
5884 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5887 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5889 switch (which_alternative)
5892 return "pslldq\t{%2, %0|%0, %2}";
5894 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
5899 [(set_attr "isa" "noavx,avx")
5900 (set_attr "type" "sseishft")
5901 (set_attr "length_immediate" "1")
5902 (set_attr "prefix_data16" "1,*")
5903 (set_attr "prefix" "orig,vex")
5904 (set_attr "mode" "<sseinsnmode>")])
5906 (define_expand "vec_shr_<mode>"
5907 [(set (match_operand:VI_128 0 "register_operand" "")
5909 (match_operand:VI_128 1 "register_operand" "")
5910 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5913 operands[0] = gen_lowpart (V1TImode, operands[0]);
5914 operands[1] = gen_lowpart (V1TImode, operands[1]);
5917 (define_insn "<sse2_avx2>_lshr<mode>3"
5918 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
5919 (lshiftrt:VIMAX_AVX2
5920 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
5921 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5924 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5926 switch (which_alternative)
5929 return "psrldq\t{%2, %0|%0, %2}";
5931 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
5936 [(set_attr "isa" "noavx,avx")
5937 (set_attr "type" "sseishft")
5938 (set_attr "length_immediate" "1")
5939 (set_attr "atom_unit" "sishuf")
5940 (set_attr "prefix_data16" "1,*")
5941 (set_attr "prefix" "orig,vex")
5942 (set_attr "mode" "<sseinsnmode>")])
5945 (define_expand "<code><mode>3"
5946 [(set (match_operand:VI124_256 0 "register_operand" "")
5948 (match_operand:VI124_256 1 "nonimmediate_operand" "")
5949 (match_operand:VI124_256 2 "nonimmediate_operand" "")))]
5951 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5953 (define_insn "*avx2_<code><mode>3"
5954 [(set (match_operand:VI124_256 0 "register_operand" "=x")
5956 (match_operand:VI124_256 1 "nonimmediate_operand" "%x")
5957 (match_operand:VI124_256 2 "nonimmediate_operand" "xm")))]
5958 "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5959 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5960 [(set_attr "type" "sseiadd")
5961 (set_attr "prefix_extra" "1")
5962 (set_attr "prefix" "vex")
5963 (set_attr "mode" "OI")])
5965 (define_expand "<code><mode>3"
5966 [(set (match_operand:VI8_AVX2 0 "register_operand" "")
5967 (maxmin:VI8_AVX2 (match_operand:VI8_AVX2 1 "register_operand" "")
5968 (match_operand:VI8_AVX2 2 "register_operand" "")))]
5975 xops[0] = operands[0];
5977 if (<CODE> == SMAX || <CODE> == UMAX)
5979 xops[1] = operands[1];
5980 xops[2] = operands[2];
5984 xops[1] = operands[2];
5985 xops[2] = operands[1];
5988 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
5990 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
5991 xops[4] = operands[1];
5992 xops[5] = operands[2];
5994 ok = ix86_expand_int_vcond (xops);
5999 (define_expand "<code><mode>3"
6000 [(set (match_operand:VI124_128 0 "register_operand" "")
6001 (smaxmin:VI124_128 (match_operand:VI124_128 1 "nonimmediate_operand" "")
6002 (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
6005 if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
6006 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
6012 xops[0] = operands[0];
6013 operands[1] = force_reg (<MODE>mode, operands[1]);
6014 operands[2] = force_reg (<MODE>mode, operands[2]);
6018 xops[1] = operands[1];
6019 xops[2] = operands[2];
6023 xops[1] = operands[2];
6024 xops[2] = operands[1];
6027 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6028 xops[4] = operands[1];
6029 xops[5] = operands[2];
6031 ok = ix86_expand_int_vcond (xops);
6037 (define_insn "*sse4_1_<code><mode>3"
6038 [(set (match_operand:VI14_128 0 "register_operand" "=x,x")
6040 (match_operand:VI14_128 1 "nonimmediate_operand" "%0,x")
6041 (match_operand:VI14_128 2 "nonimmediate_operand" "xm,xm")))]
6042 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6044 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
6045 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6046 [(set_attr "isa" "noavx,avx")
6047 (set_attr "type" "sseiadd")
6048 (set_attr "prefix_extra" "1,*")
6049 (set_attr "prefix" "orig,vex")
6050 (set_attr "mode" "TI")])
6052 (define_insn "*<code>v8hi3"
6053 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6055 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
6056 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
6057 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
6059 p<maxmin_int>w\t{%2, %0|%0, %2}
6060 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
6061 [(set_attr "isa" "noavx,avx")
6062 (set_attr "type" "sseiadd")
6063 (set_attr "prefix_data16" "1,*")
6064 (set_attr "prefix_extra" "*,1")
6065 (set_attr "prefix" "orig,vex")
6066 (set_attr "mode" "TI")])
6068 (define_expand "<code><mode>3"
6069 [(set (match_operand:VI124_128 0 "register_operand" "")
6070 (umaxmin:VI124_128 (match_operand:VI124_128 1 "nonimmediate_operand" "")
6071 (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
6074 if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
6075 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
6076 else if (<CODE> == UMAX && <MODE>mode == V8HImode)
6078 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
6079 operands[1] = force_reg (<MODE>mode, operands[1]);
6080 if (rtx_equal_p (op3, op2))
6081 op3 = gen_reg_rtx (V8HImode);
6082 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
6083 emit_insn (gen_addv8hi3 (op0, op3, op2));
6091 operands[1] = force_reg (<MODE>mode, operands[1]);
6092 operands[2] = force_reg (<MODE>mode, operands[2]);
6094 xops[0] = operands[0];
6098 xops[1] = operands[1];
6099 xops[2] = operands[2];
6103 xops[1] = operands[2];
6104 xops[2] = operands[1];
6107 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6108 xops[4] = operands[1];
6109 xops[5] = operands[2];
6111 ok = ix86_expand_int_vcond (xops);
6117 (define_insn "*sse4_1_<code><mode>3"
6118 [(set (match_operand:VI24_128 0 "register_operand" "=x,x")
6120 (match_operand:VI24_128 1 "nonimmediate_operand" "%0,x")
6121 (match_operand:VI24_128 2 "nonimmediate_operand" "xm,xm")))]
6122 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6124 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
6125 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6126 [(set_attr "isa" "noavx,avx")
6127 (set_attr "type" "sseiadd")
6128 (set_attr "prefix_extra" "1,*")
6129 (set_attr "prefix" "orig,vex")
6130 (set_attr "mode" "TI")])
6132 (define_insn "*<code>v16qi3"
6133 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6135 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")
6136 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))]
6137 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
6139 p<maxmin_int>b\t{%2, %0|%0, %2}
6140 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
6141 [(set_attr "isa" "noavx,avx")
6142 (set_attr "type" "sseiadd")
6143 (set_attr "prefix_data16" "1,*")
6144 (set_attr "prefix_extra" "*,1")
6145 (set_attr "prefix" "orig,vex")
6146 (set_attr "mode" "TI")])
6148 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6150 ;; Parallel integral comparisons
6152 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6154 (define_expand "avx2_eq<mode>3"
6155 [(set (match_operand:VI_256 0 "register_operand" "")
6157 (match_operand:VI_256 1 "nonimmediate_operand" "")
6158 (match_operand:VI_256 2 "nonimmediate_operand" "")))]
6160 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6162 (define_insn "*avx2_eq<mode>3"
6163 [(set (match_operand:VI_256 0 "register_operand" "=x")
6165 (match_operand:VI_256 1 "nonimmediate_operand" "%x")
6166 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
6167 "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6168 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6169 [(set_attr "type" "ssecmp")
6170 (set_attr "prefix_extra" "1")
6171 (set_attr "prefix" "vex")
6172 (set_attr "mode" "OI")])
6174 (define_insn "*sse4_1_eqv2di3"
6175 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6177 (match_operand:V2DI 1 "nonimmediate_operand" "%0,x")
6178 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
6179 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
6181 pcmpeqq\t{%2, %0|%0, %2}
6182 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
6183 [(set_attr "isa" "noavx,avx")
6184 (set_attr "type" "ssecmp")
6185 (set_attr "prefix_extra" "1")
6186 (set_attr "prefix" "orig,vex")
6187 (set_attr "mode" "TI")])
6189 (define_insn "*sse2_eq<mode>3"
6190 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
6192 (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x")
6193 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
6194 "TARGET_SSE2 && !TARGET_XOP
6195 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6197 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
6198 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6199 [(set_attr "isa" "noavx,avx")
6200 (set_attr "type" "ssecmp")
6201 (set_attr "prefix_data16" "1,*")
6202 (set_attr "prefix" "orig,vex")
6203 (set_attr "mode" "TI")])
6205 (define_expand "sse2_eq<mode>3"
6206 [(set (match_operand:VI124_128 0 "register_operand" "")
6208 (match_operand:VI124_128 1 "nonimmediate_operand" "")
6209 (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
6210 "TARGET_SSE2 && !TARGET_XOP "
6211 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6213 (define_expand "sse4_1_eqv2di3"
6214 [(set (match_operand:V2DI 0 "register_operand" "")
6216 (match_operand:V2DI 1 "nonimmediate_operand" "")
6217 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
6219 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
6221 (define_insn "sse4_2_gtv2di3"
6222 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6224 (match_operand:V2DI 1 "register_operand" "0,x")
6225 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
6228 pcmpgtq\t{%2, %0|%0, %2}
6229 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
6230 [(set_attr "isa" "noavx,avx")
6231 (set_attr "type" "ssecmp")
6232 (set_attr "prefix_extra" "1")
6233 (set_attr "prefix" "orig,vex")
6234 (set_attr "mode" "TI")])
6236 (define_insn "avx2_gt<mode>3"
6237 [(set (match_operand:VI_256 0 "register_operand" "=x")
6239 (match_operand:VI_256 1 "register_operand" "x")
6240 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
6242 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6243 [(set_attr "type" "ssecmp")
6244 (set_attr "prefix_extra" "1")
6245 (set_attr "prefix" "vex")
6246 (set_attr "mode" "OI")])
6248 (define_insn "sse2_gt<mode>3"
6249 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
6251 (match_operand:VI124_128 1 "register_operand" "0,x")
6252 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
6253 "TARGET_SSE2 && !TARGET_XOP"
6255 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
6256 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6257 [(set_attr "isa" "noavx,avx")
6258 (set_attr "type" "ssecmp")
6259 (set_attr "prefix_data16" "1,*")
6260 (set_attr "prefix" "orig,vex")
6261 (set_attr "mode" "TI")])
6263 (define_expand "vcond<V_256:mode><VI_256:mode>"
6264 [(set (match_operand:V_256 0 "register_operand" "")
6266 (match_operator 3 ""
6267 [(match_operand:VI_256 4 "nonimmediate_operand" "")
6268 (match_operand:VI_256 5 "nonimmediate_operand" "")])
6269 (match_operand:V_256 1 "general_operand" "")
6270 (match_operand:V_256 2 "general_operand" "")))]
6272 && (GET_MODE_NUNITS (<V_256:MODE>mode)
6273 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
6275 bool ok = ix86_expand_int_vcond (operands);
6280 (define_expand "vcond<V_128:mode><VI124_128:mode>"
6281 [(set (match_operand:V_128 0 "register_operand" "")
6283 (match_operator 3 ""
6284 [(match_operand:VI124_128 4 "nonimmediate_operand" "")
6285 (match_operand:VI124_128 5 "nonimmediate_operand" "")])
6286 (match_operand:V_128 1 "general_operand" "")
6287 (match_operand:V_128 2 "general_operand" "")))]
6289 && (GET_MODE_NUNITS (<V_128:MODE>mode)
6290 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
6292 bool ok = ix86_expand_int_vcond (operands);
6297 (define_expand "vcond<VI8F_128:mode>v2di"
6298 [(set (match_operand:VI8F_128 0 "register_operand" "")
6299 (if_then_else:VI8F_128
6300 (match_operator 3 ""
6301 [(match_operand:V2DI 4 "nonimmediate_operand" "")
6302 (match_operand:V2DI 5 "nonimmediate_operand" "")])
6303 (match_operand:VI8F_128 1 "general_operand" "")
6304 (match_operand:VI8F_128 2 "general_operand" "")))]
6307 bool ok = ix86_expand_int_vcond (operands);
6312 (define_expand "vcondu<V_256:mode><VI_256:mode>"
6313 [(set (match_operand:V_256 0 "register_operand" "")
6315 (match_operator 3 ""
6316 [(match_operand:VI_256 4 "nonimmediate_operand" "")
6317 (match_operand:VI_256 5 "nonimmediate_operand" "")])
6318 (match_operand:V_256 1 "general_operand" "")
6319 (match_operand:V_256 2 "general_operand" "")))]
6321 && (GET_MODE_NUNITS (<V_256:MODE>mode)
6322 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
6324 bool ok = ix86_expand_int_vcond (operands);
6329 (define_expand "vcondu<V_128:mode><VI124_128:mode>"
6330 [(set (match_operand:V_128 0 "register_operand" "")
6332 (match_operator 3 ""
6333 [(match_operand:VI124_128 4 "nonimmediate_operand" "")
6334 (match_operand:VI124_128 5 "nonimmediate_operand" "")])
6335 (match_operand:V_128 1 "general_operand" "")
6336 (match_operand:V_128 2 "general_operand" "")))]
6338 && (GET_MODE_NUNITS (<V_128:MODE>mode)
6339 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
6341 bool ok = ix86_expand_int_vcond (operands);
6346 (define_expand "vcondu<VI8F_128:mode>v2di"
6347 [(set (match_operand:VI8F_128 0 "register_operand" "")
6348 (if_then_else:VI8F_128
6349 (match_operator 3 ""
6350 [(match_operand:V2DI 4 "nonimmediate_operand" "")
6351 (match_operand:V2DI 5 "nonimmediate_operand" "")])
6352 (match_operand:VI8F_128 1 "general_operand" "")
6353 (match_operand:VI8F_128 2 "general_operand" "")))]
6356 bool ok = ix86_expand_int_vcond (operands);
6361 (define_mode_iterator VEC_PERM_AVX2
6362 [V16QI V8HI V4SI V2DI V4SF V2DF
6363 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
6364 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
6365 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")])
6367 (define_expand "vec_perm<mode>"
6368 [(match_operand:VEC_PERM_AVX2 0 "register_operand" "")
6369 (match_operand:VEC_PERM_AVX2 1 "register_operand" "")
6370 (match_operand:VEC_PERM_AVX2 2 "register_operand" "")
6371 (match_operand:<sseintvecmode> 3 "register_operand" "")]
6372 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
6374 ix86_expand_vec_perm (operands);
6378 (define_mode_iterator VEC_PERM_CONST
6379 [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE")
6380 (V2DF "TARGET_SSE") (V2DI "TARGET_SSE")
6381 (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2")
6382 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
6383 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
6384 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")])
6386 (define_expand "vec_perm_const<mode>"
6387 [(match_operand:VEC_PERM_CONST 0 "register_operand" "")
6388 (match_operand:VEC_PERM_CONST 1 "register_operand" "")
6389 (match_operand:VEC_PERM_CONST 2 "register_operand" "")
6390 (match_operand:<sseintvecmode> 3 "" "")]
6393 if (ix86_expand_vec_perm_const (operands))
6399 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6401 ;; Parallel bitwise logical operations
6403 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6405 (define_expand "one_cmpl<mode>2"
6406 [(set (match_operand:VI 0 "register_operand" "")
6407 (xor:VI (match_operand:VI 1 "nonimmediate_operand" "")
6411 int i, n = GET_MODE_NUNITS (<MODE>mode);
6412 rtvec v = rtvec_alloc (n);
6414 for (i = 0; i < n; ++i)
6415 RTVEC_ELT (v, i) = constm1_rtx;
6417 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
6420 (define_expand "<sse2_avx2>_andnot<mode>3"
6421 [(set (match_operand:VI_AVX2 0 "register_operand" "")
6423 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand" ""))
6424 (match_operand:VI_AVX2 2 "nonimmediate_operand" "")))]
6427 (define_insn "*andnot<mode>3"
6428 [(set (match_operand:VI 0 "register_operand" "=x,x")
6430 (not:VI (match_operand:VI 1 "register_operand" "0,x"))
6431 (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
6434 static char buf[32];
6438 switch (get_attr_mode (insn))
6441 gcc_assert (TARGET_AVX2);
6443 gcc_assert (TARGET_SSE2);
6449 gcc_assert (TARGET_AVX);
6451 gcc_assert (TARGET_SSE);
6460 switch (which_alternative)
6463 ops = "%s\t{%%2, %%0|%%0, %%2}";
6466 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
6472 snprintf (buf, sizeof (buf), ops, tmp);
6475 [(set_attr "isa" "noavx,avx")
6476 (set_attr "type" "sselog")
6477 (set (attr "prefix_data16")
6479 (and (eq_attr "alternative" "0")
6480 (eq_attr "mode" "TI"))
6482 (const_string "*")))
6483 (set_attr "prefix" "orig,vex")
6485 (cond [(and (not (match_test "TARGET_AVX2"))
6486 (match_test "GET_MODE_SIZE (<MODE>mode) > 16"))
6487 (const_string "V8SF")
6488 (not (match_test "TARGET_SSE2"))
6489 (const_string "V4SF")
6491 (const_string "<sseinsnmode>")))])
6493 (define_expand "<code><mode>3"
6494 [(set (match_operand:VI 0 "register_operand" "")
6496 (match_operand:VI 1 "nonimmediate_operand" "")
6497 (match_operand:VI 2 "nonimmediate_operand" "")))]
6499 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
6501 (define_insn "*<code><mode>3"
6502 [(set (match_operand:VI 0 "register_operand" "=x,x")
6504 (match_operand:VI 1 "nonimmediate_operand" "%0,x")
6505 (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
6507 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6509 static char buf[32];
6513 switch (get_attr_mode (insn))
6516 gcc_assert (TARGET_AVX2);
6518 gcc_assert (TARGET_SSE2);
6524 gcc_assert (TARGET_AVX);
6526 gcc_assert (TARGET_SSE);
6535 switch (which_alternative)
6538 ops = "%s\t{%%2, %%0|%%0, %%2}";
6541 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
6547 snprintf (buf, sizeof (buf), ops, tmp);
6550 [(set_attr "isa" "noavx,avx")
6551 (set_attr "type" "sselog")
6552 (set (attr "prefix_data16")
6554 (and (eq_attr "alternative" "0")
6555 (eq_attr "mode" "TI"))
6557 (const_string "*")))
6558 (set_attr "prefix" "orig,vex")
6560 (cond [(and (not (match_test "TARGET_AVX2"))
6561 (match_test "GET_MODE_SIZE (<MODE>mode) > 16"))
6562 (const_string "V8SF")
6563 (not (match_test "TARGET_SSE2"))
6564 (const_string "V4SF")
6566 (const_string "<sseinsnmode>")))])
6568 (define_insn "*andnottf3"
6569 [(set (match_operand:TF 0 "register_operand" "=x,x")
6571 (not:TF (match_operand:TF 1 "register_operand" "0,x"))
6572 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
6575 pandn\t{%2, %0|%0, %2}
6576 vpandn\t{%2, %1, %0|%0, %1, %2}"
6577 [(set_attr "isa" "noavx,avx")
6578 (set_attr "type" "sselog")
6579 (set_attr "prefix_data16" "1,*")
6580 (set_attr "prefix" "orig,vex")
6581 (set_attr "mode" "TI")])
6583 (define_expand "<code>tf3"
6584 [(set (match_operand:TF 0 "register_operand" "")
6586 (match_operand:TF 1 "nonimmediate_operand" "")
6587 (match_operand:TF 2 "nonimmediate_operand" "")))]
6589 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
6591 (define_insn "*<code>tf3"
6592 [(set (match_operand:TF 0 "register_operand" "=x,x")
6594 (match_operand:TF 1 "nonimmediate_operand" "%0,x")
6595 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
6597 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
6599 p<logic>\t{%2, %0|%0, %2}
6600 vp<logic>\t{%2, %1, %0|%0, %1, %2}"
6601 [(set_attr "isa" "noavx,avx")
6602 (set_attr "type" "sselog")
6603 (set_attr "prefix_data16" "1,*")
6604 (set_attr "prefix" "orig,vex")
6605 (set_attr "mode" "TI")])
6607 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6609 ;; Parallel integral element swizzling
6611 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6613 (define_expand "vec_pack_trunc_<mode>"
6614 [(match_operand:<ssepackmode> 0 "register_operand" "")
6615 (match_operand:VI248_AVX2 1 "register_operand" "")
6616 (match_operand:VI248_AVX2 2 "register_operand" "")]
6619 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
6620 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
6621 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6625 (define_insn "<sse2_avx2>_packsswb"
6626 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
6627 (vec_concat:VI1_AVX2
6628 (ss_truncate:<ssehalfvecmode>
6629 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6630 (ss_truncate:<ssehalfvecmode>
6631 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6634 packsswb\t{%2, %0|%0, %2}
6635 vpacksswb\t{%2, %1, %0|%0, %1, %2}"
6636 [(set_attr "isa" "noavx,avx")
6637 (set_attr "type" "sselog")
6638 (set_attr "prefix_data16" "1,*")
6639 (set_attr "prefix" "orig,vex")
6640 (set_attr "mode" "<sseinsnmode>")])
6642 (define_insn "<sse2_avx2>_packssdw"
6643 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
6644 (vec_concat:VI2_AVX2
6645 (ss_truncate:<ssehalfvecmode>
6646 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6647 (ss_truncate:<ssehalfvecmode>
6648 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6651 packssdw\t{%2, %0|%0, %2}
6652 vpackssdw\t{%2, %1, %0|%0, %1, %2}"
6653 [(set_attr "isa" "noavx,avx")
6654 (set_attr "type" "sselog")
6655 (set_attr "prefix_data16" "1,*")
6656 (set_attr "prefix" "orig,vex")
6657 (set_attr "mode" "<sseinsnmode>")])
6659 (define_insn "<sse2_avx2>_packuswb"
6660 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
6661 (vec_concat:VI1_AVX2
6662 (us_truncate:<ssehalfvecmode>
6663 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6664 (us_truncate:<ssehalfvecmode>
6665 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6668 packuswb\t{%2, %0|%0, %2}
6669 vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6670 [(set_attr "isa" "noavx,avx")
6671 (set_attr "type" "sselog")
6672 (set_attr "prefix_data16" "1,*")
6673 (set_attr "prefix" "orig,vex")
6674 (set_attr "mode" "<sseinsnmode>")])
6676 (define_insn "avx2_interleave_highv32qi"
6677 [(set (match_operand:V32QI 0 "register_operand" "=x")
6680 (match_operand:V32QI 1 "register_operand" "x")
6681 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
6682 (parallel [(const_int 8) (const_int 40)
6683 (const_int 9) (const_int 41)
6684 (const_int 10) (const_int 42)
6685 (const_int 11) (const_int 43)
6686 (const_int 12) (const_int 44)
6687 (const_int 13) (const_int 45)
6688 (const_int 14) (const_int 46)
6689 (const_int 15) (const_int 47)
6690 (const_int 24) (const_int 56)
6691 (const_int 25) (const_int 57)
6692 (const_int 26) (const_int 58)
6693 (const_int 27) (const_int 59)
6694 (const_int 28) (const_int 60)
6695 (const_int 29) (const_int 61)
6696 (const_int 30) (const_int 62)
6697 (const_int 31) (const_int 63)])))]
6699 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6700 [(set_attr "type" "sselog")
6701 (set_attr "prefix" "vex")
6702 (set_attr "mode" "OI")])
6704 (define_insn "vec_interleave_highv16qi"
6705 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6708 (match_operand:V16QI 1 "register_operand" "0,x")
6709 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
6710 (parallel [(const_int 8) (const_int 24)
6711 (const_int 9) (const_int 25)
6712 (const_int 10) (const_int 26)
6713 (const_int 11) (const_int 27)
6714 (const_int 12) (const_int 28)
6715 (const_int 13) (const_int 29)
6716 (const_int 14) (const_int 30)
6717 (const_int 15) (const_int 31)])))]
6720 punpckhbw\t{%2, %0|%0, %2}
6721 vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6722 [(set_attr "isa" "noavx,avx")
6723 (set_attr "type" "sselog")
6724 (set_attr "prefix_data16" "1,*")
6725 (set_attr "prefix" "orig,vex")
6726 (set_attr "mode" "TI")])
6728 (define_insn "avx2_interleave_lowv32qi"
6729 [(set (match_operand:V32QI 0 "register_operand" "=x")
6732 (match_operand:V32QI 1 "register_operand" "x")
6733 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
6734 (parallel [(const_int 0) (const_int 32)
6735 (const_int 1) (const_int 33)
6736 (const_int 2) (const_int 34)
6737 (const_int 3) (const_int 35)
6738 (const_int 4) (const_int 36)
6739 (const_int 5) (const_int 37)
6740 (const_int 6) (const_int 38)
6741 (const_int 7) (const_int 39)
6742 (const_int 16) (const_int 48)
6743 (const_int 17) (const_int 49)
6744 (const_int 18) (const_int 50)
6745 (const_int 19) (const_int 51)
6746 (const_int 20) (const_int 52)
6747 (const_int 21) (const_int 53)
6748 (const_int 22) (const_int 54)
6749 (const_int 23) (const_int 55)])))]
6751 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6752 [(set_attr "type" "sselog")
6753 (set_attr "prefix" "vex")
6754 (set_attr "mode" "OI")])
6756 (define_insn "vec_interleave_lowv16qi"
6757 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6760 (match_operand:V16QI 1 "register_operand" "0,x")
6761 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
6762 (parallel [(const_int 0) (const_int 16)
6763 (const_int 1) (const_int 17)
6764 (const_int 2) (const_int 18)
6765 (const_int 3) (const_int 19)
6766 (const_int 4) (const_int 20)
6767 (const_int 5) (const_int 21)
6768 (const_int 6) (const_int 22)
6769 (const_int 7) (const_int 23)])))]
6772 punpcklbw\t{%2, %0|%0, %2}
6773 vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6774 [(set_attr "isa" "noavx,avx")
6775 (set_attr "type" "sselog")
6776 (set_attr "prefix_data16" "1,*")
6777 (set_attr "prefix" "orig,vex")
6778 (set_attr "mode" "TI")])
6780 (define_insn "avx2_interleave_highv16hi"
6781 [(set (match_operand:V16HI 0 "register_operand" "=x")
6784 (match_operand:V16HI 1 "register_operand" "x")
6785 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
6786 (parallel [(const_int 4) (const_int 20)
6787 (const_int 5) (const_int 21)
6788 (const_int 6) (const_int 22)
6789 (const_int 7) (const_int 23)
6790 (const_int 12) (const_int 28)
6791 (const_int 13) (const_int 29)
6792 (const_int 14) (const_int 30)
6793 (const_int 15) (const_int 31)])))]
6795 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6796 [(set_attr "type" "sselog")
6797 (set_attr "prefix" "vex")
6798 (set_attr "mode" "OI")])
6800 (define_insn "vec_interleave_highv8hi"
6801 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6804 (match_operand:V8HI 1 "register_operand" "0,x")
6805 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
6806 (parallel [(const_int 4) (const_int 12)
6807 (const_int 5) (const_int 13)
6808 (const_int 6) (const_int 14)
6809 (const_int 7) (const_int 15)])))]
6812 punpckhwd\t{%2, %0|%0, %2}
6813 vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6814 [(set_attr "isa" "noavx,avx")
6815 (set_attr "type" "sselog")
6816 (set_attr "prefix_data16" "1,*")
6817 (set_attr "prefix" "orig,vex")
6818 (set_attr "mode" "TI")])
6820 (define_insn "avx2_interleave_lowv16hi"
6821 [(set (match_operand:V16HI 0 "register_operand" "=x")
6824 (match_operand:V16HI 1 "register_operand" "x")
6825 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
6826 (parallel [(const_int 0) (const_int 16)
6827 (const_int 1) (const_int 17)
6828 (const_int 2) (const_int 18)
6829 (const_int 3) (const_int 19)
6830 (const_int 8) (const_int 24)
6831 (const_int 9) (const_int 25)
6832 (const_int 10) (const_int 26)
6833 (const_int 11) (const_int 27)])))]
6835 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6836 [(set_attr "type" "sselog")
6837 (set_attr "prefix" "vex")
6838 (set_attr "mode" "OI")])
6840 (define_insn "vec_interleave_lowv8hi"
6841 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6844 (match_operand:V8HI 1 "register_operand" "0,x")
6845 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
6846 (parallel [(const_int 0) (const_int 8)
6847 (const_int 1) (const_int 9)
6848 (const_int 2) (const_int 10)
6849 (const_int 3) (const_int 11)])))]
6852 punpcklwd\t{%2, %0|%0, %2}
6853 vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6854 [(set_attr "isa" "noavx,avx")
6855 (set_attr "type" "sselog")
6856 (set_attr "prefix_data16" "1,*")
6857 (set_attr "prefix" "orig,vex")
6858 (set_attr "mode" "TI")])
6860 (define_insn "avx2_interleave_highv8si"
6861 [(set (match_operand:V8SI 0 "register_operand" "=x")
6864 (match_operand:V8SI 1 "register_operand" "x")
6865 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
6866 (parallel [(const_int 2) (const_int 10)
6867 (const_int 3) (const_int 11)
6868 (const_int 6) (const_int 14)
6869 (const_int 7) (const_int 15)])))]
6871 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6872 [(set_attr "type" "sselog")
6873 (set_attr "prefix" "vex")
6874 (set_attr "mode" "OI")])
6876 (define_insn "vec_interleave_highv4si"
6877 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6880 (match_operand:V4SI 1 "register_operand" "0,x")
6881 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6882 (parallel [(const_int 2) (const_int 6)
6883 (const_int 3) (const_int 7)])))]
6886 punpckhdq\t{%2, %0|%0, %2}
6887 vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6888 [(set_attr "isa" "noavx,avx")
6889 (set_attr "type" "sselog")
6890 (set_attr "prefix_data16" "1,*")
6891 (set_attr "prefix" "orig,vex")
6892 (set_attr "mode" "TI")])
6894 (define_insn "avx2_interleave_lowv8si"
6895 [(set (match_operand:V8SI 0 "register_operand" "=x")
6898 (match_operand:V8SI 1 "register_operand" "x")
6899 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
6900 (parallel [(const_int 0) (const_int 8)
6901 (const_int 1) (const_int 9)
6902 (const_int 4) (const_int 12)
6903 (const_int 5) (const_int 13)])))]
6905 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6906 [(set_attr "type" "sselog")
6907 (set_attr "prefix" "vex")
6908 (set_attr "mode" "OI")])
6910 (define_insn "vec_interleave_lowv4si"
6911 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6914 (match_operand:V4SI 1 "register_operand" "0,x")
6915 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6916 (parallel [(const_int 0) (const_int 4)
6917 (const_int 1) (const_int 5)])))]
6920 punpckldq\t{%2, %0|%0, %2}
6921 vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6922 [(set_attr "isa" "noavx,avx")
6923 (set_attr "type" "sselog")
6924 (set_attr "prefix_data16" "1,*")
6925 (set_attr "prefix" "orig,vex")
6926 (set_attr "mode" "TI")])
6928 (define_expand "vec_interleave_high<mode>"
6929 [(match_operand:VI_256 0 "register_operand" "=x")
6930 (match_operand:VI_256 1 "register_operand" "x")
6931 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
6934 rtx t1 = gen_reg_rtx (<MODE>mode);
6935 rtx t2 = gen_reg_rtx (<MODE>mode);
6936 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
6937 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
6938 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, operands[0]),
6939 gen_lowpart (V4DImode, t1),
6940 gen_lowpart (V4DImode, t2), GEN_INT (1 + (3 << 4))));
6944 (define_expand "vec_interleave_low<mode>"
6945 [(match_operand:VI_256 0 "register_operand" "=x")
6946 (match_operand:VI_256 1 "register_operand" "x")
6947 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
6950 rtx t1 = gen_reg_rtx (<MODE>mode);
6951 rtx t2 = gen_reg_rtx (<MODE>mode);
6952 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
6953 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
6954 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, operands[0]),
6955 gen_lowpart (V4DImode, t1),
6956 gen_lowpart (V4DImode, t2), GEN_INT (0 + (2 << 4))));
6960 ;; Modes handled by pinsr patterns.
6961 (define_mode_iterator PINSR_MODE
6962 [(V16QI "TARGET_SSE4_1") V8HI
6963 (V4SI "TARGET_SSE4_1")
6964 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
6966 (define_mode_attr sse2p4_1
6967 [(V16QI "sse4_1") (V8HI "sse2")
6968 (V4SI "sse4_1") (V2DI "sse4_1")])
6970 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
6971 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
6972 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x")
6973 (vec_merge:PINSR_MODE
6974 (vec_duplicate:PINSR_MODE
6975 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m"))
6976 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x")
6977 (match_operand:SI 3 "const_int_operand" "")))]
6979 && ((unsigned) exact_log2 (INTVAL (operands[3]))
6980 < GET_MODE_NUNITS (<MODE>mode))"
6982 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6984 switch (which_alternative)
6987 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
6988 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
6991 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
6993 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
6994 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
6997 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7002 [(set_attr "isa" "noavx,noavx,avx,avx")
7003 (set_attr "type" "sselog")
7004 (set (attr "prefix_rex")
7006 (and (not (match_test "TARGET_AVX"))
7007 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
7009 (const_string "*")))
7010 (set (attr "prefix_data16")
7012 (and (not (match_test "TARGET_AVX"))
7013 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
7015 (const_string "*")))
7016 (set (attr "prefix_extra")
7018 (and (not (match_test "TARGET_AVX"))
7019 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
7021 (const_string "1")))
7022 (set_attr "length_immediate" "1")
7023 (set_attr "prefix" "orig,orig,vex,vex")
7024 (set_attr "mode" "TI")])
7026 (define_insn "*sse4_1_pextrb_<mode>"
7027 [(set (match_operand:SWI48 0 "register_operand" "=r")
7030 (match_operand:V16QI 1 "register_operand" "x")
7031 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
7033 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
7034 [(set_attr "type" "sselog")
7035 (set_attr "prefix_extra" "1")
7036 (set_attr "length_immediate" "1")
7037 (set_attr "prefix" "maybe_vex")
7038 (set_attr "mode" "TI")])
7040 (define_insn "*sse4_1_pextrb_memory"
7041 [(set (match_operand:QI 0 "memory_operand" "=m")
7043 (match_operand:V16QI 1 "register_operand" "x")
7044 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
7046 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
7047 [(set_attr "type" "sselog")
7048 (set_attr "prefix_extra" "1")
7049 (set_attr "length_immediate" "1")
7050 (set_attr "prefix" "maybe_vex")
7051 (set_attr "mode" "TI")])
7053 (define_insn "*sse2_pextrw_<mode>"
7054 [(set (match_operand:SWI48 0 "register_operand" "=r")
7057 (match_operand:V8HI 1 "register_operand" "x")
7058 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
7060 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
7061 [(set_attr "type" "sselog")
7062 (set_attr "prefix_data16" "1")
7063 (set_attr "length_immediate" "1")
7064 (set_attr "prefix" "maybe_vex")
7065 (set_attr "mode" "TI")])
7067 (define_insn "*sse4_1_pextrw_memory"
7068 [(set (match_operand:HI 0 "memory_operand" "=m")
7070 (match_operand:V8HI 1 "register_operand" "x")
7071 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
7073 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
7074 [(set_attr "type" "sselog")
7075 (set_attr "prefix_extra" "1")
7076 (set_attr "length_immediate" "1")
7077 (set_attr "prefix" "maybe_vex")
7078 (set_attr "mode" "TI")])
7080 (define_insn "*sse4_1_pextrd"
7081 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
7083 (match_operand:V4SI 1 "register_operand" "x")
7084 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
7086 "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
7087 [(set_attr "type" "sselog")
7088 (set_attr "prefix_extra" "1")
7089 (set_attr "length_immediate" "1")
7090 (set_attr "prefix" "maybe_vex")
7091 (set_attr "mode" "TI")])
7093 (define_insn "*sse4_1_pextrd_zext"
7094 [(set (match_operand:DI 0 "register_operand" "=r")
7097 (match_operand:V4SI 1 "register_operand" "x")
7098 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))]
7099 "TARGET_64BIT && TARGET_SSE4_1"
7100 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
7101 [(set_attr "type" "sselog")
7102 (set_attr "prefix_extra" "1")
7103 (set_attr "length_immediate" "1")
7104 (set_attr "prefix" "maybe_vex")
7105 (set_attr "mode" "TI")])
7107 ;; It must come before *vec_extractv2di_1_rex64 since it is preferred.
7108 (define_insn "*sse4_1_pextrq"
7109 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
7111 (match_operand:V2DI 1 "register_operand" "x")
7112 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
7113 "TARGET_SSE4_1 && TARGET_64BIT"
7114 "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
7115 [(set_attr "type" "sselog")
7116 (set_attr "prefix_rex" "1")
7117 (set_attr "prefix_extra" "1")
7118 (set_attr "length_immediate" "1")
7119 (set_attr "prefix" "maybe_vex")
7120 (set_attr "mode" "TI")])
7122 (define_expand "avx2_pshufdv3"
7123 [(match_operand:V8SI 0 "register_operand" "")
7124 (match_operand:V8SI 1 "nonimmediate_operand" "")
7125 (match_operand:SI 2 "const_0_to_255_operand" "")]
7128 int mask = INTVAL (operands[2]);
7129 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
7130 GEN_INT ((mask >> 0) & 3),
7131 GEN_INT ((mask >> 2) & 3),
7132 GEN_INT ((mask >> 4) & 3),
7133 GEN_INT ((mask >> 6) & 3),
7134 GEN_INT (((mask >> 0) & 3) + 4),
7135 GEN_INT (((mask >> 2) & 3) + 4),
7136 GEN_INT (((mask >> 4) & 3) + 4),
7137 GEN_INT (((mask >> 6) & 3) + 4)));
7141 (define_insn "avx2_pshufd_1"
7142 [(set (match_operand:V8SI 0 "register_operand" "=x")
7144 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
7145 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7146 (match_operand 3 "const_0_to_3_operand" "")
7147 (match_operand 4 "const_0_to_3_operand" "")
7148 (match_operand 5 "const_0_to_3_operand" "")
7149 (match_operand 6 "const_4_to_7_operand" "")
7150 (match_operand 7 "const_4_to_7_operand" "")
7151 (match_operand 8 "const_4_to_7_operand" "")
7152 (match_operand 9 "const_4_to_7_operand" "")])))]
7154 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
7155 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
7156 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
7157 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
7160 mask |= INTVAL (operands[2]) << 0;
7161 mask |= INTVAL (operands[3]) << 2;
7162 mask |= INTVAL (operands[4]) << 4;
7163 mask |= INTVAL (operands[5]) << 6;
7164 operands[2] = GEN_INT (mask);
7166 return "vpshufd\t{%2, %1, %0|%0, %1, %2}";
7168 [(set_attr "type" "sselog1")
7169 (set_attr "prefix" "vex")
7170 (set_attr "length_immediate" "1")
7171 (set_attr "mode" "OI")])
7173 (define_expand "sse2_pshufd"
7174 [(match_operand:V4SI 0 "register_operand" "")
7175 (match_operand:V4SI 1 "nonimmediate_operand" "")
7176 (match_operand:SI 2 "const_int_operand" "")]
7179 int mask = INTVAL (operands[2]);
7180 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
7181 GEN_INT ((mask >> 0) & 3),
7182 GEN_INT ((mask >> 2) & 3),
7183 GEN_INT ((mask >> 4) & 3),
7184 GEN_INT ((mask >> 6) & 3)));
7188 (define_insn "sse2_pshufd_1"
7189 [(set (match_operand:V4SI 0 "register_operand" "=x")
7191 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7192 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7193 (match_operand 3 "const_0_to_3_operand" "")
7194 (match_operand 4 "const_0_to_3_operand" "")
7195 (match_operand 5 "const_0_to_3_operand" "")])))]
7199 mask |= INTVAL (operands[2]) << 0;
7200 mask |= INTVAL (operands[3]) << 2;
7201 mask |= INTVAL (operands[4]) << 4;
7202 mask |= INTVAL (operands[5]) << 6;
7203 operands[2] = GEN_INT (mask);
7205 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
7207 [(set_attr "type" "sselog1")
7208 (set_attr "prefix_data16" "1")
7209 (set_attr "prefix" "maybe_vex")
7210 (set_attr "length_immediate" "1")
7211 (set_attr "mode" "TI")])
7213 (define_expand "avx2_pshuflwv3"
7214 [(match_operand:V16HI 0 "register_operand" "")
7215 (match_operand:V16HI 1 "nonimmediate_operand" "")
7216 (match_operand:SI 2 "const_0_to_255_operand" "")]
7219 int mask = INTVAL (operands[2]);
7220 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
7221 GEN_INT ((mask >> 0) & 3),
7222 GEN_INT ((mask >> 2) & 3),
7223 GEN_INT ((mask >> 4) & 3),
7224 GEN_INT ((mask >> 6) & 3),
7225 GEN_INT (((mask >> 0) & 3) + 8),
7226 GEN_INT (((mask >> 2) & 3) + 8),
7227 GEN_INT (((mask >> 4) & 3) + 8),
7228 GEN_INT (((mask >> 6) & 3) + 8)));
7232 (define_insn "avx2_pshuflw_1"
7233 [(set (match_operand:V16HI 0 "register_operand" "=x")
7235 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
7236 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7237 (match_operand 3 "const_0_to_3_operand" "")
7238 (match_operand 4 "const_0_to_3_operand" "")
7239 (match_operand 5 "const_0_to_3_operand" "")
7244 (match_operand 6 "const_8_to_11_operand" "")
7245 (match_operand 7 "const_8_to_11_operand" "")
7246 (match_operand 8 "const_8_to_11_operand" "")
7247 (match_operand 9 "const_8_to_11_operand" "")
7253 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
7254 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
7255 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
7256 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
7259 mask |= INTVAL (operands[2]) << 0;
7260 mask |= INTVAL (operands[3]) << 2;
7261 mask |= INTVAL (operands[4]) << 4;
7262 mask |= INTVAL (operands[5]) << 6;
7263 operands[2] = GEN_INT (mask);
7265 return "vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7267 [(set_attr "type" "sselog")
7268 (set_attr "prefix" "vex")
7269 (set_attr "length_immediate" "1")
7270 (set_attr "mode" "OI")])
7272 (define_expand "sse2_pshuflw"
7273 [(match_operand:V8HI 0 "register_operand" "")
7274 (match_operand:V8HI 1 "nonimmediate_operand" "")
7275 (match_operand:SI 2 "const_int_operand" "")]
7278 int mask = INTVAL (operands[2]);
7279 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
7280 GEN_INT ((mask >> 0) & 3),
7281 GEN_INT ((mask >> 2) & 3),
7282 GEN_INT ((mask >> 4) & 3),
7283 GEN_INT ((mask >> 6) & 3)));
7287 (define_insn "sse2_pshuflw_1"
7288 [(set (match_operand:V8HI 0 "register_operand" "=x")
7290 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7291 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7292 (match_operand 3 "const_0_to_3_operand" "")
7293 (match_operand 4 "const_0_to_3_operand" "")
7294 (match_operand 5 "const_0_to_3_operand" "")
7302 mask |= INTVAL (operands[2]) << 0;
7303 mask |= INTVAL (operands[3]) << 2;
7304 mask |= INTVAL (operands[4]) << 4;
7305 mask |= INTVAL (operands[5]) << 6;
7306 operands[2] = GEN_INT (mask);
7308 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7310 [(set_attr "type" "sselog")
7311 (set_attr "prefix_data16" "0")
7312 (set_attr "prefix_rep" "1")
7313 (set_attr "prefix" "maybe_vex")
7314 (set_attr "length_immediate" "1")
7315 (set_attr "mode" "TI")])
7317 (define_expand "avx2_pshufhwv3"
7318 [(match_operand:V16HI 0 "register_operand" "")
7319 (match_operand:V16HI 1 "nonimmediate_operand" "")
7320 (match_operand:SI 2 "const_0_to_255_operand" "")]
7323 int mask = INTVAL (operands[2]);
7324 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
7325 GEN_INT (((mask >> 0) & 3) + 4),
7326 GEN_INT (((mask >> 2) & 3) + 4),
7327 GEN_INT (((mask >> 4) & 3) + 4),
7328 GEN_INT (((mask >> 6) & 3) + 4),
7329 GEN_INT (((mask >> 0) & 3) + 12),
7330 GEN_INT (((mask >> 2) & 3) + 12),
7331 GEN_INT (((mask >> 4) & 3) + 12),
7332 GEN_INT (((mask >> 6) & 3) + 12)));
7336 (define_insn "avx2_pshufhw_1"
7337 [(set (match_operand:V16HI 0 "register_operand" "=x")
7339 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
7340 (parallel [(const_int 0)
7344 (match_operand 2 "const_4_to_7_operand" "")
7345 (match_operand 3 "const_4_to_7_operand" "")
7346 (match_operand 4 "const_4_to_7_operand" "")
7347 (match_operand 5 "const_4_to_7_operand" "")
7352 (match_operand 6 "const_12_to_15_operand" "")
7353 (match_operand 7 "const_12_to_15_operand" "")
7354 (match_operand 8 "const_12_to_15_operand" "")
7355 (match_operand 9 "const_12_to_15_operand" "")])))]
7357 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
7358 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
7359 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
7360 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
7363 mask |= (INTVAL (operands[2]) - 4) << 0;
7364 mask |= (INTVAL (operands[3]) - 4) << 2;
7365 mask |= (INTVAL (operands[4]) - 4) << 4;
7366 mask |= (INTVAL (operands[5]) - 4) << 6;
7367 operands[2] = GEN_INT (mask);
7369 return "vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7371 [(set_attr "type" "sselog")
7372 (set_attr "prefix" "vex")
7373 (set_attr "length_immediate" "1")
7374 (set_attr "mode" "OI")])
7376 (define_expand "sse2_pshufhw"
7377 [(match_operand:V8HI 0 "register_operand" "")
7378 (match_operand:V8HI 1 "nonimmediate_operand" "")
7379 (match_operand:SI 2 "const_int_operand" "")]
7382 int mask = INTVAL (operands[2]);
7383 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
7384 GEN_INT (((mask >> 0) & 3) + 4),
7385 GEN_INT (((mask >> 2) & 3) + 4),
7386 GEN_INT (((mask >> 4) & 3) + 4),
7387 GEN_INT (((mask >> 6) & 3) + 4)));
7391 (define_insn "sse2_pshufhw_1"
7392 [(set (match_operand:V8HI 0 "register_operand" "=x")
7394 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7395 (parallel [(const_int 0)
7399 (match_operand 2 "const_4_to_7_operand" "")
7400 (match_operand 3 "const_4_to_7_operand" "")
7401 (match_operand 4 "const_4_to_7_operand" "")
7402 (match_operand 5 "const_4_to_7_operand" "")])))]
7406 mask |= (INTVAL (operands[2]) - 4) << 0;
7407 mask |= (INTVAL (operands[3]) - 4) << 2;
7408 mask |= (INTVAL (operands[4]) - 4) << 4;
7409 mask |= (INTVAL (operands[5]) - 4) << 6;
7410 operands[2] = GEN_INT (mask);
7412 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7414 [(set_attr "type" "sselog")
7415 (set_attr "prefix_rep" "1")
7416 (set_attr "prefix_data16" "0")
7417 (set_attr "prefix" "maybe_vex")
7418 (set_attr "length_immediate" "1")
7419 (set_attr "mode" "TI")])
7421 (define_expand "sse2_loadd"
7422 [(set (match_operand:V4SI 0 "register_operand" "")
7425 (match_operand:SI 1 "nonimmediate_operand" ""))
7429 "operands[2] = CONST0_RTX (V4SImode);")
7431 (define_insn "sse2_loadld"
7432 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x,x,x")
7435 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x"))
7436 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,x")
7440 %vmovd\t{%2, %0|%0, %2}
7441 %vmovd\t{%2, %0|%0, %2}
7442 movss\t{%2, %0|%0, %2}
7443 movss\t{%2, %0|%0, %2}
7444 vmovss\t{%2, %1, %0|%0, %1, %2}"
7445 [(set_attr "isa" "sse2,*,noavx,noavx,avx")
7446 (set_attr "type" "ssemov")
7447 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex")
7448 (set_attr "mode" "TI,TI,V4SF,SF,SF")])
7450 (define_insn_and_split "sse2_stored"
7451 [(set (match_operand:SI 0 "nonimmediate_operand" "=xm,r")
7453 (match_operand:V4SI 1 "register_operand" "x,Yi")
7454 (parallel [(const_int 0)])))]
7457 "&& reload_completed
7458 && (TARGET_INTER_UNIT_MOVES
7459 || MEM_P (operands [0])
7460 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7461 [(set (match_dup 0) (match_dup 1))]
7462 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
7464 (define_insn_and_split "*vec_ext_v4si_mem"
7465 [(set (match_operand:SI 0 "register_operand" "=r")
7467 (match_operand:V4SI 1 "memory_operand" "o")
7468 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
7474 int i = INTVAL (operands[2]);
7476 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
7480 (define_expand "sse_storeq"
7481 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7483 (match_operand:V2DI 1 "register_operand" "")
7484 (parallel [(const_int 0)])))]
7487 (define_insn "*sse2_storeq_rex64"
7488 [(set (match_operand:DI 0 "nonimmediate_operand" "=xm,*r,r")
7490 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
7491 (parallel [(const_int 0)])))]
7492 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7496 mov{q}\t{%1, %0|%0, %1}"
7497 [(set_attr "type" "*,*,imov")
7498 (set_attr "mode" "*,*,DI")])
7500 (define_insn "*sse2_storeq"
7501 [(set (match_operand:DI 0 "nonimmediate_operand" "=xm")
7503 (match_operand:V2DI 1 "register_operand" "x")
7504 (parallel [(const_int 0)])))]
7509 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7511 (match_operand:V2DI 1 "register_operand" "")
7512 (parallel [(const_int 0)])))]
7515 && (TARGET_INTER_UNIT_MOVES
7516 || MEM_P (operands [0])
7517 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7518 [(set (match_dup 0) (match_dup 1))]
7519 "operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));")
7521 (define_insn "*vec_extractv2di_1_rex64"
7522 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,x,r")
7524 (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o,o")
7525 (parallel [(const_int 1)])))]
7526 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7528 %vmovhps\t{%1, %0|%0, %1}
7529 psrldq\t{$8, %0|%0, 8}
7530 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7531 %vmovq\t{%H1, %0|%0, %H1}
7532 mov{q}\t{%H1, %0|%0, %H1}"
7533 [(set_attr "isa" "*,noavx,avx,*,*")
7534 (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,imov")
7535 (set_attr "length_immediate" "*,1,1,*,*")
7536 (set_attr "memory" "*,none,none,*,*")
7537 (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig")
7538 (set_attr "mode" "V2SF,TI,TI,TI,DI")])
7540 (define_insn "*vec_extractv2di_1"
7541 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,x,x,x")
7543 (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o,x,o")
7544 (parallel [(const_int 1)])))]
7545 "!TARGET_64BIT && TARGET_SSE
7546 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7548 %vmovhps\t{%1, %0|%0, %1}
7549 psrldq\t{$8, %0|%0, 8}
7550 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7551 %vmovq\t{%H1, %0|%0, %H1}
7552 movhlps\t{%1, %0|%0, %1}
7553 movlps\t{%H1, %0|%0, %H1}"
7554 [(set_attr "isa" "*,sse2_noavx,avx,sse2,noavx,noavx")
7555 (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,ssemov,ssemov")
7556 (set_attr "length_immediate" "*,1,1,*,*,*")
7557 (set_attr "memory" "*,none,none,*,*,*")
7558 (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig,orig")
7559 (set_attr "mode" "V2SF,TI,TI,TI,V4SF,V2SF")])
7561 (define_insn "*vec_dupv4si_avx"
7562 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7564 (match_operand:SI 1 "nonimmediate_operand" " x,m")))]
7567 vpshufd\t{$0, %1, %0|%0, %1, 0}
7568 vbroadcastss\t{%1, %0|%0, %1}"
7569 [(set_attr "type" "sselog1,ssemov")
7570 (set_attr "length_immediate" "1,0")
7571 (set_attr "prefix_extra" "0,1")
7572 (set_attr "prefix" "vex")
7573 (set_attr "mode" "TI,V4SF")])
7575 (define_insn "*vec_dupv4si"
7576 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7578 (match_operand:SI 1 "register_operand" " x,0")))]
7581 pshufd\t{$0, %1, %0|%0, %1, 0}
7582 shufps\t{$0, %0, %0|%0, %0, 0}"
7583 [(set_attr "isa" "sse2,*")
7584 (set_attr "type" "sselog1")
7585 (set_attr "length_immediate" "1")
7586 (set_attr "mode" "TI,V4SF")])
7588 (define_insn "*vec_dupv2di_sse3"
7589 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
7591 (match_operand:DI 1 "nonimmediate_operand" " 0,x,m")))]
7595 vpunpcklqdq\t{%d1, %0|%0, %d1}
7596 %vmovddup\t{%1, %0|%0, %1}"
7597 [(set_attr "isa" "noavx,avx,*")
7598 (set_attr "type" "sselog1")
7599 (set_attr "prefix" "orig,vex,maybe_vex")
7600 (set_attr "mode" "TI,TI,DF")])
7602 (define_insn "*vec_dupv2di"
7603 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7605 (match_operand:DI 1 "register_operand" " 0,0")))]
7610 [(set_attr "isa" "sse2,*")
7611 (set_attr "type" "sselog1,ssemov")
7612 (set_attr "mode" "TI,V4SF")])
7614 (define_insn "*vec_concatv2si_sse4_1"
7615 [(set (match_operand:V2SI 0 "register_operand" "=x, x,x,x, x, *y,*y")
7617 (match_operand:SI 1 "nonimmediate_operand" " 0, x,0,x,rm, 0,rm")
7618 (match_operand:SI 2 "vector_move_operand" "rm,rm,x,x, C,*ym, C")))]
7621 pinsrd\t{$1, %2, %0|%0, %2, 1}
7622 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
7623 punpckldq\t{%2, %0|%0, %2}
7624 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7625 %vmovd\t{%1, %0|%0, %1}
7626 punpckldq\t{%2, %0|%0, %2}
7627 movd\t{%1, %0|%0, %1}"
7628 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
7629 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
7630 (set_attr "prefix_extra" "1,1,*,*,*,*,*")
7631 (set_attr "length_immediate" "1,1,*,*,*,*,*")
7632 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
7633 (set_attr "mode" "TI,TI,TI,TI,TI,DI,DI")])
7635 ;; ??? In theory we can match memory for the MMX alternative, but allowing
7636 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7637 ;; alternatives pretty much forces the MMX alternative to be chosen.
7638 (define_insn "*vec_concatv2si_sse2"
7639 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
7641 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
7642 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
7645 punpckldq\t{%2, %0|%0, %2}
7646 movd\t{%1, %0|%0, %1}
7647 punpckldq\t{%2, %0|%0, %2}
7648 movd\t{%1, %0|%0, %1}"
7649 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7650 (set_attr "mode" "TI,TI,DI,DI")])
7652 (define_insn "*vec_concatv2si_sse"
7653 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
7655 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
7656 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
7659 unpcklps\t{%2, %0|%0, %2}
7660 movss\t{%1, %0|%0, %1}
7661 punpckldq\t{%2, %0|%0, %2}
7662 movd\t{%1, %0|%0, %1}"
7663 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7664 (set_attr "mode" "V4SF,V4SF,DI,DI")])
7666 (define_insn "*vec_concatv4si"
7667 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x,x")
7669 (match_operand:V2SI 1 "register_operand" " 0,x,0,0,x")
7670 (match_operand:V2SI 2 "nonimmediate_operand" " x,x,x,m,m")))]
7673 punpcklqdq\t{%2, %0|%0, %2}
7674 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7675 movlhps\t{%2, %0|%0, %2}
7676 movhps\t{%2, %0|%0, %2}
7677 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7678 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
7679 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
7680 (set_attr "prefix" "orig,vex,orig,orig,vex")
7681 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
7683 ;; movd instead of movq is required to handle broken assemblers.
7684 (define_insn "*vec_concatv2di_rex64"
7685 [(set (match_operand:V2DI 0 "register_operand"
7686 "=x,x ,x ,Yi,!x,x,x,x,x")
7688 (match_operand:DI 1 "nonimmediate_operand"
7689 " 0,x ,xm,r ,*y,0,x,0,x")
7690 (match_operand:DI 2 "vector_move_operand"
7691 "rm,rm,C ,C ,C ,x,x,m,m")))]
7694 pinsrq\t{$1, %2, %0|%0, %2, 1}
7695 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
7696 %vmovq\t{%1, %0|%0, %1}
7697 %vmovd\t{%1, %0|%0, %1}
7698 movq2dq\t{%1, %0|%0, %1}
7699 punpcklqdq\t{%2, %0|%0, %2}
7700 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7701 movhps\t{%2, %0|%0, %2}
7702 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7703 [(set_attr "isa" "sse4_noavx,avx,*,*,*,noavx,avx,noavx,avx")
7706 (eq_attr "alternative" "0,1,5,6")
7707 (const_string "sselog")
7708 (const_string "ssemov")))
7709 (set (attr "prefix_rex")
7711 (and (eq_attr "alternative" "0,3")
7712 (not (match_test "TARGET_AVX")))
7714 (const_string "*")))
7715 (set_attr "prefix_extra" "1,1,*,*,*,*,*,*,*")
7716 (set_attr "length_immediate" "1,1,*,*,*,*,*,*,*")
7717 (set_attr "prefix" "orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex")
7718 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,V2SF,V2SF")])
7720 (define_insn "vec_concatv2di"
7721 [(set (match_operand:V2DI 0 "register_operand" "=x,?x,x,x,x,x,x")
7723 (match_operand:DI 1 "nonimmediate_operand" "xm,*y,0,x,0,0,x")
7724 (match_operand:DI 2 "vector_move_operand" " C, C,x,x,x,m,m")))]
7725 "!TARGET_64BIT && TARGET_SSE"
7727 %vmovq\t{%1, %0|%0, %1}
7728 movq2dq\t{%1, %0|%0, %1}
7729 punpcklqdq\t{%2, %0|%0, %2}
7730 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7731 movlhps\t{%2, %0|%0, %2}
7732 movhps\t{%2, %0|%0, %2}
7733 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7734 [(set_attr "isa" "sse2,sse2,sse2_noavx,avx,noavx,noavx,avx")
7735 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,ssemov,ssemov")
7736 (set_attr "prefix" "maybe_vex,orig,orig,vex,orig,orig,vex")
7737 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")])
7739 (define_expand "vec_unpacks_lo_<mode>"
7740 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7741 (match_operand:VI124_AVX2 1 "register_operand" "")]
7743 "ix86_expand_sse_unpack (operands, false, false); DONE;")
7745 (define_expand "vec_unpacks_hi_<mode>"
7746 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7747 (match_operand:VI124_AVX2 1 "register_operand" "")]
7749 "ix86_expand_sse_unpack (operands, false, true); DONE;")
7751 (define_expand "vec_unpacku_lo_<mode>"
7752 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7753 (match_operand:VI124_AVX2 1 "register_operand" "")]
7755 "ix86_expand_sse_unpack (operands, true, false); DONE;")
7757 (define_expand "vec_unpacku_hi_<mode>"
7758 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7759 (match_operand:VI124_AVX2 1 "register_operand" "")]
7761 "ix86_expand_sse_unpack (operands, true, true); DONE;")
7763 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7767 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7769 (define_expand "avx2_uavgv32qi3"
7770 [(set (match_operand:V32QI 0 "register_operand" "")
7776 (match_operand:V32QI 1 "nonimmediate_operand" ""))
7778 (match_operand:V32QI 2 "nonimmediate_operand" "")))
7779 (const_vector:V32QI [(const_int 1) (const_int 1)
7780 (const_int 1) (const_int 1)
7781 (const_int 1) (const_int 1)
7782 (const_int 1) (const_int 1)
7783 (const_int 1) (const_int 1)
7784 (const_int 1) (const_int 1)
7785 (const_int 1) (const_int 1)
7786 (const_int 1) (const_int 1)
7787 (const_int 1) (const_int 1)
7788 (const_int 1) (const_int 1)
7789 (const_int 1) (const_int 1)
7790 (const_int 1) (const_int 1)
7791 (const_int 1) (const_int 1)
7792 (const_int 1) (const_int 1)
7793 (const_int 1) (const_int 1)
7794 (const_int 1) (const_int 1)]))
7797 "ix86_fixup_binary_operands_no_copy (PLUS, V32QImode, operands);")
7799 (define_expand "sse2_uavgv16qi3"
7800 [(set (match_operand:V16QI 0 "register_operand" "")
7806 (match_operand:V16QI 1 "nonimmediate_operand" ""))
7808 (match_operand:V16QI 2 "nonimmediate_operand" "")))
7809 (const_vector:V16QI [(const_int 1) (const_int 1)
7810 (const_int 1) (const_int 1)
7811 (const_int 1) (const_int 1)
7812 (const_int 1) (const_int 1)
7813 (const_int 1) (const_int 1)
7814 (const_int 1) (const_int 1)
7815 (const_int 1) (const_int 1)
7816 (const_int 1) (const_int 1)]))
7819 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
7821 (define_insn "*avx2_uavgv32qi3"
7822 [(set (match_operand:V32QI 0 "register_operand" "=x")
7828 (match_operand:V32QI 1 "nonimmediate_operand" "%x"))
7830 (match_operand:V32QI 2 "nonimmediate_operand" "xm")))
7831 (const_vector:V32QI [(const_int 1) (const_int 1)
7832 (const_int 1) (const_int 1)
7833 (const_int 1) (const_int 1)
7834 (const_int 1) (const_int 1)
7835 (const_int 1) (const_int 1)
7836 (const_int 1) (const_int 1)
7837 (const_int 1) (const_int 1)
7838 (const_int 1) (const_int 1)
7839 (const_int 1) (const_int 1)
7840 (const_int 1) (const_int 1)
7841 (const_int 1) (const_int 1)
7842 (const_int 1) (const_int 1)
7843 (const_int 1) (const_int 1)
7844 (const_int 1) (const_int 1)
7845 (const_int 1) (const_int 1)
7846 (const_int 1) (const_int 1)]))
7848 "TARGET_AVX2 && ix86_binary_operator_ok (PLUS, V32QImode, operands)"
7849 "vpavgb\t{%2, %1, %0|%0, %1, %2}"
7850 [(set_attr "type" "sseiadd")
7851 (set_attr "prefix" "vex")
7852 (set_attr "mode" "OI")])
7854 (define_insn "*sse2_uavgv16qi3"
7855 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
7861 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x"))
7863 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))
7864 (const_vector:V16QI [(const_int 1) (const_int 1)
7865 (const_int 1) (const_int 1)
7866 (const_int 1) (const_int 1)
7867 (const_int 1) (const_int 1)
7868 (const_int 1) (const_int 1)
7869 (const_int 1) (const_int 1)
7870 (const_int 1) (const_int 1)
7871 (const_int 1) (const_int 1)]))
7873 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7875 pavgb\t{%2, %0|%0, %2}
7876 vpavgb\t{%2, %1, %0|%0, %1, %2}"
7877 [(set_attr "isa" "noavx,avx")
7878 (set_attr "type" "sseiadd")
7879 (set_attr "prefix_data16" "1,*")
7880 (set_attr "prefix" "orig,vex")
7881 (set_attr "mode" "TI")])
7883 (define_expand "avx2_uavgv16hi3"
7884 [(set (match_operand:V16HI 0 "register_operand" "")
7890 (match_operand:V16HI 1 "nonimmediate_operand" ""))
7892 (match_operand:V16HI 2 "nonimmediate_operand" "")))
7893 (const_vector:V16HI [(const_int 1) (const_int 1)
7894 (const_int 1) (const_int 1)
7895 (const_int 1) (const_int 1)
7896 (const_int 1) (const_int 1)
7897 (const_int 1) (const_int 1)
7898 (const_int 1) (const_int 1)
7899 (const_int 1) (const_int 1)
7900 (const_int 1) (const_int 1)]))
7903 "ix86_fixup_binary_operands_no_copy (PLUS, V16HImode, operands);")
7905 (define_expand "sse2_uavgv8hi3"
7906 [(set (match_operand:V8HI 0 "register_operand" "")
7912 (match_operand:V8HI 1 "nonimmediate_operand" ""))
7914 (match_operand:V8HI 2 "nonimmediate_operand" "")))
7915 (const_vector:V8HI [(const_int 1) (const_int 1)
7916 (const_int 1) (const_int 1)
7917 (const_int 1) (const_int 1)
7918 (const_int 1) (const_int 1)]))
7921 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
7923 (define_insn "*avx2_uavgv16hi3"
7924 [(set (match_operand:V16HI 0 "register_operand" "=x")
7930 (match_operand:V16HI 1 "nonimmediate_operand" "%x"))
7932 (match_operand:V16HI 2 "nonimmediate_operand" "xm")))
7933 (const_vector:V16HI [(const_int 1) (const_int 1)
7934 (const_int 1) (const_int 1)
7935 (const_int 1) (const_int 1)
7936 (const_int 1) (const_int 1)
7937 (const_int 1) (const_int 1)
7938 (const_int 1) (const_int 1)
7939 (const_int 1) (const_int 1)
7940 (const_int 1) (const_int 1)]))
7942 "TARGET_AVX2 && ix86_binary_operator_ok (PLUS, V16HImode, operands)"
7943 "vpavgw\t{%2, %1, %0|%0, %1, %2}"
7944 [(set_attr "type" "sseiadd")
7945 (set_attr "prefix" "vex")
7946 (set_attr "mode" "OI")])
7948 (define_insn "*sse2_uavgv8hi3"
7949 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7955 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
7957 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
7958 (const_vector:V8HI [(const_int 1) (const_int 1)
7959 (const_int 1) (const_int 1)
7960 (const_int 1) (const_int 1)
7961 (const_int 1) (const_int 1)]))
7963 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7965 pavgw\t{%2, %0|%0, %2}
7966 vpavgw\t{%2, %1, %0|%0, %1, %2}"
7967 [(set_attr "isa" "noavx,avx")
7968 (set_attr "type" "sseiadd")
7969 (set_attr "prefix_data16" "1,*")
7970 (set_attr "prefix" "orig,vex")
7971 (set_attr "mode" "TI")])
7973 ;; The correct representation for this is absolutely enormous, and
7974 ;; surely not generally useful.
7975 (define_insn "<sse2_avx2>_psadbw"
7976 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x,x")
7977 (unspec:VI8_AVX2 [(match_operand:<ssebytemode> 1 "register_operand" "0,x")
7978 (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,xm")]
7982 psadbw\t{%2, %0|%0, %2}
7983 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
7984 [(set_attr "isa" "noavx,avx")
7985 (set_attr "type" "sseiadd")
7986 (set_attr "atom_unit" "simul")
7987 (set_attr "prefix_data16" "1,*")
7988 (set_attr "prefix" "orig,vex")
7989 (set_attr "mode" "<sseinsnmode>")])
7991 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
7992 [(set (match_operand:SI 0 "register_operand" "=r")
7994 [(match_operand:VF 1 "register_operand" "x")]
7997 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
7998 [(set_attr "type" "ssemov")
7999 (set_attr "prefix" "maybe_vex")
8000 (set_attr "mode" "<MODE>")])
8002 (define_insn "avx2_pmovmskb"
8003 [(set (match_operand:SI 0 "register_operand" "=r")
8004 (unspec:SI [(match_operand:V32QI 1 "register_operand" "x")]
8007 "vpmovmskb\t{%1, %0|%0, %1}"
8008 [(set_attr "type" "ssemov")
8009 (set_attr "prefix" "vex")
8010 (set_attr "mode" "DI")])
8012 (define_insn "sse2_pmovmskb"
8013 [(set (match_operand:SI 0 "register_operand" "=r")
8014 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
8017 "%vpmovmskb\t{%1, %0|%0, %1}"
8018 [(set_attr "type" "ssemov")
8019 (set_attr "prefix_data16" "1")
8020 (set_attr "prefix" "maybe_vex")
8021 (set_attr "mode" "SI")])
8023 (define_expand "sse2_maskmovdqu"
8024 [(set (match_operand:V16QI 0 "memory_operand" "")
8025 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
8026 (match_operand:V16QI 2 "register_operand" "")
8031 (define_insn "*sse2_maskmovdqu"
8032 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
8033 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8034 (match_operand:V16QI 2 "register_operand" "x")
8035 (mem:V16QI (match_dup 0))]
8038 "%vmaskmovdqu\t{%2, %1|%1, %2}"
8039 [(set_attr "type" "ssemov")
8040 (set_attr "prefix_data16" "1")
8041 ;; The implicit %rdi operand confuses default length_vex computation.
8042 (set (attr "length_vex")
8043 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
8044 (set_attr "prefix" "maybe_vex")
8045 (set_attr "mode" "TI")])
8047 (define_insn "sse_ldmxcsr"
8048 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
8052 [(set_attr "type" "sse")
8053 (set_attr "atom_sse_attr" "mxcsr")
8054 (set_attr "prefix" "maybe_vex")
8055 (set_attr "memory" "load")])
8057 (define_insn "sse_stmxcsr"
8058 [(set (match_operand:SI 0 "memory_operand" "=m")
8059 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
8062 [(set_attr "type" "sse")
8063 (set_attr "atom_sse_attr" "mxcsr")
8064 (set_attr "prefix" "maybe_vex")
8065 (set_attr "memory" "store")])
8067 (define_expand "sse_sfence"
8069 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
8070 "TARGET_SSE || TARGET_3DNOW_A"
8072 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8073 MEM_VOLATILE_P (operands[0]) = 1;
8076 (define_insn "*sse_sfence"
8077 [(set (match_operand:BLK 0 "" "")
8078 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
8079 "TARGET_SSE || TARGET_3DNOW_A"
8081 [(set_attr "type" "sse")
8082 (set_attr "length_address" "0")
8083 (set_attr "atom_sse_attr" "fence")
8084 (set_attr "memory" "unknown")])
8086 (define_insn "sse2_clflush"
8087 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
8091 [(set_attr "type" "sse")
8092 (set_attr "atom_sse_attr" "fence")
8093 (set_attr "memory" "unknown")])
8095 (define_expand "sse2_mfence"
8097 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8100 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8101 MEM_VOLATILE_P (operands[0]) = 1;
8104 (define_insn "*sse2_mfence"
8105 [(set (match_operand:BLK 0 "" "")
8106 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8107 "TARGET_64BIT || TARGET_SSE2"
8109 [(set_attr "type" "sse")
8110 (set_attr "length_address" "0")
8111 (set_attr "atom_sse_attr" "fence")
8112 (set_attr "memory" "unknown")])
8114 (define_expand "sse2_lfence"
8116 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8119 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8120 MEM_VOLATILE_P (operands[0]) = 1;
8123 (define_insn "*sse2_lfence"
8124 [(set (match_operand:BLK 0 "" "")
8125 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8128 [(set_attr "type" "sse")
8129 (set_attr "length_address" "0")
8130 (set_attr "atom_sse_attr" "lfence")
8131 (set_attr "memory" "unknown")])
8133 (define_insn "sse3_mwait"
8134 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8135 (match_operand:SI 1 "register_operand" "c")]
8138 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
8139 ;; Since 32bit register operands are implicitly zero extended to 64bit,
8140 ;; we only need to set up 32bit registers.
8142 [(set_attr "length" "3")])
8144 (define_insn "sse3_monitor"
8145 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8146 (match_operand:SI 1 "register_operand" "c")
8147 (match_operand:SI 2 "register_operand" "d")]
8149 "TARGET_SSE3 && !TARGET_64BIT"
8150 "monitor\t%0, %1, %2"
8151 [(set_attr "length" "3")])
8153 (define_insn "sse3_monitor64"
8154 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
8155 (match_operand:SI 1 "register_operand" "c")
8156 (match_operand:SI 2 "register_operand" "d")]
8158 "TARGET_SSE3 && TARGET_64BIT"
8159 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
8160 ;; RCX and RDX are used. Since 32bit register operands are implicitly
8161 ;; zero extended to 64bit, we only need to set up 32bit registers.
8163 [(set_attr "length" "3")])
8165 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8167 ;; SSSE3 instructions
8169 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8171 (define_insn "avx2_phaddwv16hi3"
8172 [(set (match_operand:V16HI 0 "register_operand" "=x")
8179 (match_operand:V16HI 1 "register_operand" "x")
8180 (parallel [(const_int 0)]))
8181 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8183 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8184 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8187 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8188 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8190 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8191 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8195 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8196 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8198 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8199 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8202 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8203 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8205 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8206 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8212 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8213 (parallel [(const_int 0)]))
8214 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8216 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8217 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8220 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8221 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8223 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8224 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8228 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8229 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8231 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8232 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8235 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8236 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8238 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8239 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8241 "vphaddw\t{%2, %1, %0|%0, %1, %2}"
8242 [(set_attr "type" "sseiadd")
8243 (set_attr "prefix_extra" "1")
8244 (set_attr "prefix" "vex")
8245 (set_attr "mode" "OI")])
8247 (define_insn "ssse3_phaddwv8hi3"
8248 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8254 (match_operand:V8HI 1 "register_operand" "0,x")
8255 (parallel [(const_int 0)]))
8256 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8258 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8259 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8262 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8263 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8265 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8266 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8271 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8272 (parallel [(const_int 0)]))
8273 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8275 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8276 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8279 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8280 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8282 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8283 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8286 phaddw\t{%2, %0|%0, %2}
8287 vphaddw\t{%2, %1, %0|%0, %1, %2}"
8288 [(set_attr "isa" "noavx,avx")
8289 (set_attr "type" "sseiadd")
8290 (set_attr "atom_unit" "complex")
8291 (set_attr "prefix_data16" "1,*")
8292 (set_attr "prefix_extra" "1")
8293 (set_attr "prefix" "orig,vex")
8294 (set_attr "mode" "TI")])
8296 (define_insn "ssse3_phaddwv4hi3"
8297 [(set (match_operand:V4HI 0 "register_operand" "=y")
8302 (match_operand:V4HI 1 "register_operand" "0")
8303 (parallel [(const_int 0)]))
8304 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8306 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8307 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8311 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8312 (parallel [(const_int 0)]))
8313 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8315 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8316 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8318 "phaddw\t{%2, %0|%0, %2}"
8319 [(set_attr "type" "sseiadd")
8320 (set_attr "atom_unit" "complex")
8321 (set_attr "prefix_extra" "1")
8322 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8323 (set_attr "mode" "DI")])
8325 (define_insn "avx2_phadddv8si3"
8326 [(set (match_operand:V8SI 0 "register_operand" "=x")
8332 (match_operand:V8SI 1 "register_operand" "x")
8333 (parallel [(const_int 0)]))
8334 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8336 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8337 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8340 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
8341 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
8343 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
8344 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
8349 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
8350 (parallel [(const_int 0)]))
8351 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8353 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8354 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
8357 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
8358 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
8360 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
8361 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
8363 "vphaddd\t{%2, %1, %0|%0, %1, %2}"
8364 [(set_attr "type" "sseiadd")
8365 (set_attr "prefix_extra" "1")
8366 (set_attr "prefix" "vex")
8367 (set_attr "mode" "OI")])
8369 (define_insn "ssse3_phadddv4si3"
8370 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
8375 (match_operand:V4SI 1 "register_operand" "0,x")
8376 (parallel [(const_int 0)]))
8377 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8379 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8380 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8384 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
8385 (parallel [(const_int 0)]))
8386 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8388 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8389 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8392 phaddd\t{%2, %0|%0, %2}
8393 vphaddd\t{%2, %1, %0|%0, %1, %2}"
8394 [(set_attr "isa" "noavx,avx")
8395 (set_attr "type" "sseiadd")
8396 (set_attr "atom_unit" "complex")
8397 (set_attr "prefix_data16" "1,*")
8398 (set_attr "prefix_extra" "1")
8399 (set_attr "prefix" "orig,vex")
8400 (set_attr "mode" "TI")])
8402 (define_insn "ssse3_phadddv2si3"
8403 [(set (match_operand:V2SI 0 "register_operand" "=y")
8407 (match_operand:V2SI 1 "register_operand" "0")
8408 (parallel [(const_int 0)]))
8409 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8412 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8413 (parallel [(const_int 0)]))
8414 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8416 "phaddd\t{%2, %0|%0, %2}"
8417 [(set_attr "type" "sseiadd")
8418 (set_attr "atom_unit" "complex")
8419 (set_attr "prefix_extra" "1")
8420 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8421 (set_attr "mode" "DI")])
8423 (define_insn "avx2_phaddswv16hi3"
8424 [(set (match_operand:V16HI 0 "register_operand" "=x")
8431 (match_operand:V16HI 1 "register_operand" "x")
8432 (parallel [(const_int 0)]))
8433 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8435 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8436 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8439 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8440 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8442 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8443 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8447 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8448 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8450 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8451 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8454 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8455 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8457 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8458 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8464 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8465 (parallel [(const_int 0)]))
8466 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8468 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8469 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8472 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8473 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8475 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8476 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8480 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8481 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8483 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8484 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8487 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8488 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8490 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8491 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8493 "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8494 [(set_attr "type" "sseiadd")
8495 (set_attr "prefix_extra" "1")
8496 (set_attr "prefix" "vex")
8497 (set_attr "mode" "OI")])
8499 (define_insn "ssse3_phaddswv8hi3"
8500 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8506 (match_operand:V8HI 1 "register_operand" "0,x")
8507 (parallel [(const_int 0)]))
8508 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8510 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8511 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8514 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8515 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8517 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8518 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8523 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8524 (parallel [(const_int 0)]))
8525 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8527 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8528 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8531 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8532 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8534 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8535 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8538 phaddsw\t{%2, %0|%0, %2}
8539 vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8540 [(set_attr "isa" "noavx,avx")
8541 (set_attr "type" "sseiadd")
8542 (set_attr "atom_unit" "complex")
8543 (set_attr "prefix_data16" "1,*")
8544 (set_attr "prefix_extra" "1")
8545 (set_attr "prefix" "orig,vex")
8546 (set_attr "mode" "TI")])
8548 (define_insn "ssse3_phaddswv4hi3"
8549 [(set (match_operand:V4HI 0 "register_operand" "=y")
8554 (match_operand:V4HI 1 "register_operand" "0")
8555 (parallel [(const_int 0)]))
8556 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8558 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8559 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8563 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8564 (parallel [(const_int 0)]))
8565 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8567 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8568 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8570 "phaddsw\t{%2, %0|%0, %2}"
8571 [(set_attr "type" "sseiadd")
8572 (set_attr "atom_unit" "complex")
8573 (set_attr "prefix_extra" "1")
8574 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8575 (set_attr "mode" "DI")])
8577 (define_insn "avx2_phsubwv16hi3"
8578 [(set (match_operand:V16HI 0 "register_operand" "=x")
8585 (match_operand:V16HI 1 "register_operand" "x")
8586 (parallel [(const_int 0)]))
8587 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8589 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8590 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8593 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8594 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8596 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8597 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8601 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8602 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8604 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8605 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8608 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8609 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8611 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8612 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8618 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8619 (parallel [(const_int 0)]))
8620 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8622 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8623 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8626 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8627 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8629 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8630 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8634 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8635 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8637 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8638 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8641 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8642 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8644 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8645 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8647 "vphsubw\t{%2, %1, %0|%0, %1, %2}"
8648 [(set_attr "type" "sseiadd")
8649 (set_attr "prefix_extra" "1")
8650 (set_attr "prefix" "vex")
8651 (set_attr "mode" "OI")])
8653 (define_insn "ssse3_phsubwv8hi3"
8654 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8660 (match_operand:V8HI 1 "register_operand" "0,x")
8661 (parallel [(const_int 0)]))
8662 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8664 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8665 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8668 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8669 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8671 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8672 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8677 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8678 (parallel [(const_int 0)]))
8679 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8681 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8682 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8685 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8686 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8688 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8689 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8692 phsubw\t{%2, %0|%0, %2}
8693 vphsubw\t{%2, %1, %0|%0, %1, %2}"
8694 [(set_attr "isa" "noavx,avx")
8695 (set_attr "type" "sseiadd")
8696 (set_attr "atom_unit" "complex")
8697 (set_attr "prefix_data16" "1,*")
8698 (set_attr "prefix_extra" "1")
8699 (set_attr "prefix" "orig,vex")
8700 (set_attr "mode" "TI")])
8702 (define_insn "ssse3_phsubwv4hi3"
8703 [(set (match_operand:V4HI 0 "register_operand" "=y")
8708 (match_operand:V4HI 1 "register_operand" "0")
8709 (parallel [(const_int 0)]))
8710 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8712 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8713 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8717 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8718 (parallel [(const_int 0)]))
8719 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8721 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8722 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8724 "phsubw\t{%2, %0|%0, %2}"
8725 [(set_attr "type" "sseiadd")
8726 (set_attr "atom_unit" "complex")
8727 (set_attr "prefix_extra" "1")
8728 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8729 (set_attr "mode" "DI")])
8731 (define_insn "avx2_phsubdv8si3"
8732 [(set (match_operand:V8SI 0 "register_operand" "=x")
8738 (match_operand:V8SI 1 "register_operand" "x")
8739 (parallel [(const_int 0)]))
8740 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8742 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8743 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8746 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
8747 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
8749 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
8750 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
8755 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
8756 (parallel [(const_int 0)]))
8757 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8759 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8760 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
8763 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
8764 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
8766 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
8767 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
8769 "vphsubd\t{%2, %1, %0|%0, %1, %2}"
8770 [(set_attr "type" "sseiadd")
8771 (set_attr "prefix_extra" "1")
8772 (set_attr "prefix" "vex")
8773 (set_attr "mode" "OI")])
8775 (define_insn "ssse3_phsubdv4si3"
8776 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
8781 (match_operand:V4SI 1 "register_operand" "0,x")
8782 (parallel [(const_int 0)]))
8783 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8785 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8786 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8790 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
8791 (parallel [(const_int 0)]))
8792 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8794 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8795 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8798 phsubd\t{%2, %0|%0, %2}
8799 vphsubd\t{%2, %1, %0|%0, %1, %2}"
8801 [(set_attr "isa" "noavx,avx")
8802 (set_attr "type" "sseiadd")
8803 (set_attr "atom_unit" "complex")
8804 (set_attr "prefix_data16" "1,*")
8805 (set_attr "prefix_extra" "1")
8806 (set_attr "prefix" "orig,vex")
8807 (set_attr "mode" "TI")])
8809 (define_insn "ssse3_phsubdv2si3"
8810 [(set (match_operand:V2SI 0 "register_operand" "=y")
8814 (match_operand:V2SI 1 "register_operand" "0")
8815 (parallel [(const_int 0)]))
8816 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8819 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8820 (parallel [(const_int 0)]))
8821 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8823 "phsubd\t{%2, %0|%0, %2}"
8824 [(set_attr "type" "sseiadd")
8825 (set_attr "atom_unit" "complex")
8826 (set_attr "prefix_extra" "1")
8827 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8828 (set_attr "mode" "DI")])
8830 (define_insn "avx2_phsubswv16hi3"
8831 [(set (match_operand:V16HI 0 "register_operand" "=x")
8838 (match_operand:V16HI 1 "register_operand" "x")
8839 (parallel [(const_int 0)]))
8840 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8842 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8843 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8846 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8847 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8849 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8850 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8854 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8855 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8857 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8858 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8861 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8862 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8864 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8865 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8871 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8872 (parallel [(const_int 0)]))
8873 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8875 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8876 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8879 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8880 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8882 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8883 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8887 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8888 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8890 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8891 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8894 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8895 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8897 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8898 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8900 "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8901 [(set_attr "type" "sseiadd")
8902 (set_attr "prefix_extra" "1")
8903 (set_attr "prefix" "vex")
8904 (set_attr "mode" "OI")])
8906 (define_insn "ssse3_phsubswv8hi3"
8907 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8913 (match_operand:V8HI 1 "register_operand" "0,x")
8914 (parallel [(const_int 0)]))
8915 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8917 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8918 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8921 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8922 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8924 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8925 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8930 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8931 (parallel [(const_int 0)]))
8932 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8934 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8935 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8938 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8939 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8941 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8942 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8945 phsubsw\t{%2, %0|%0, %2}
8946 vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8947 [(set_attr "isa" "noavx,avx")
8948 (set_attr "type" "sseiadd")
8949 (set_attr "atom_unit" "complex")
8950 (set_attr "prefix_data16" "1,*")
8951 (set_attr "prefix_extra" "1")
8952 (set_attr "prefix" "orig,vex")
8953 (set_attr "mode" "TI")])
8955 (define_insn "ssse3_phsubswv4hi3"
8956 [(set (match_operand:V4HI 0 "register_operand" "=y")
8961 (match_operand:V4HI 1 "register_operand" "0")
8962 (parallel [(const_int 0)]))
8963 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8965 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8966 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8970 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8971 (parallel [(const_int 0)]))
8972 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8974 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8975 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8977 "phsubsw\t{%2, %0|%0, %2}"
8978 [(set_attr "type" "sseiadd")
8979 (set_attr "atom_unit" "complex")
8980 (set_attr "prefix_extra" "1")
8981 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8982 (set_attr "mode" "DI")])
8984 (define_insn "avx2_pmaddubsw256"
8985 [(set (match_operand:V16HI 0 "register_operand" "=x")
8990 (match_operand:V32QI 1 "register_operand" "x")
8991 (parallel [(const_int 0)
9009 (match_operand:V32QI 2 "nonimmediate_operand" "xm")
9010 (parallel [(const_int 0)
9028 (vec_select:V16QI (match_dup 1)
9029 (parallel [(const_int 1)
9046 (vec_select:V16QI (match_dup 2)
9047 (parallel [(const_int 1)
9062 (const_int 31)]))))))]
9064 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
9065 [(set_attr "type" "sseiadd")
9066 (set_attr "prefix_extra" "1")
9067 (set_attr "prefix" "vex")
9068 (set_attr "mode" "OI")])
9070 (define_insn "ssse3_pmaddubsw128"
9071 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9076 (match_operand:V16QI 1 "register_operand" "0,x")
9077 (parallel [(const_int 0)
9087 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
9088 (parallel [(const_int 0)
9098 (vec_select:V8QI (match_dup 1)
9099 (parallel [(const_int 1)
9108 (vec_select:V8QI (match_dup 2)
9109 (parallel [(const_int 1)
9116 (const_int 15)]))))))]
9119 pmaddubsw\t{%2, %0|%0, %2}
9120 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
9121 [(set_attr "isa" "noavx,avx")
9122 (set_attr "type" "sseiadd")
9123 (set_attr "atom_unit" "simul")
9124 (set_attr "prefix_data16" "1,*")
9125 (set_attr "prefix_extra" "1")
9126 (set_attr "prefix" "orig,vex")
9127 (set_attr "mode" "TI")])
9129 (define_insn "ssse3_pmaddubsw"
9130 [(set (match_operand:V4HI 0 "register_operand" "=y")
9135 (match_operand:V8QI 1 "register_operand" "0")
9136 (parallel [(const_int 0)
9142 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
9143 (parallel [(const_int 0)
9149 (vec_select:V4QI (match_dup 1)
9150 (parallel [(const_int 1)
9155 (vec_select:V4QI (match_dup 2)
9156 (parallel [(const_int 1)
9159 (const_int 7)]))))))]
9161 "pmaddubsw\t{%2, %0|%0, %2}"
9162 [(set_attr "type" "sseiadd")
9163 (set_attr "atom_unit" "simul")
9164 (set_attr "prefix_extra" "1")
9165 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9166 (set_attr "mode" "DI")])
9168 (define_expand "avx2_umulhrswv16hi3"
9169 [(set (match_operand:V16HI 0 "register_operand" "")
9176 (match_operand:V16HI 1 "nonimmediate_operand" ""))
9178 (match_operand:V16HI 2 "nonimmediate_operand" "")))
9180 (const_vector:V16HI [(const_int 1) (const_int 1)
9181 (const_int 1) (const_int 1)
9182 (const_int 1) (const_int 1)
9183 (const_int 1) (const_int 1)
9184 (const_int 1) (const_int 1)
9185 (const_int 1) (const_int 1)
9186 (const_int 1) (const_int 1)
9187 (const_int 1) (const_int 1)]))
9190 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
9192 (define_insn "*avx2_umulhrswv16hi3"
9193 [(set (match_operand:V16HI 0 "register_operand" "=x")
9200 (match_operand:V16HI 1 "nonimmediate_operand" "%x"))
9202 (match_operand:V16HI 2 "nonimmediate_operand" "xm")))
9204 (const_vector:V16HI [(const_int 1) (const_int 1)
9205 (const_int 1) (const_int 1)
9206 (const_int 1) (const_int 1)
9207 (const_int 1) (const_int 1)
9208 (const_int 1) (const_int 1)
9209 (const_int 1) (const_int 1)
9210 (const_int 1) (const_int 1)
9211 (const_int 1) (const_int 1)]))
9213 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
9214 "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9215 [(set_attr "type" "sseimul")
9216 (set_attr "prefix_extra" "1")
9217 (set_attr "prefix" "vex")
9218 (set_attr "mode" "OI")])
9220 (define_expand "ssse3_pmulhrswv8hi3"
9221 [(set (match_operand:V8HI 0 "register_operand" "")
9228 (match_operand:V8HI 1 "nonimmediate_operand" ""))
9230 (match_operand:V8HI 2 "nonimmediate_operand" "")))
9232 (const_vector:V8HI [(const_int 1) (const_int 1)
9233 (const_int 1) (const_int 1)
9234 (const_int 1) (const_int 1)
9235 (const_int 1) (const_int 1)]))
9238 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
9240 (define_insn "*ssse3_pmulhrswv8hi3"
9241 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9248 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
9250 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
9252 (const_vector:V8HI [(const_int 1) (const_int 1)
9253 (const_int 1) (const_int 1)
9254 (const_int 1) (const_int 1)
9255 (const_int 1) (const_int 1)]))
9257 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9259 pmulhrsw\t{%2, %0|%0, %2}
9260 vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9261 [(set_attr "isa" "noavx,avx")
9262 (set_attr "type" "sseimul")
9263 (set_attr "prefix_data16" "1,*")
9264 (set_attr "prefix_extra" "1")
9265 (set_attr "prefix" "orig,vex")
9266 (set_attr "mode" "TI")])
9268 (define_expand "ssse3_pmulhrswv4hi3"
9269 [(set (match_operand:V4HI 0 "register_operand" "")
9276 (match_operand:V4HI 1 "nonimmediate_operand" ""))
9278 (match_operand:V4HI 2 "nonimmediate_operand" "")))
9280 (const_vector:V4HI [(const_int 1) (const_int 1)
9281 (const_int 1) (const_int 1)]))
9284 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
9286 (define_insn "*ssse3_pmulhrswv4hi3"
9287 [(set (match_operand:V4HI 0 "register_operand" "=y")
9294 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
9296 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
9298 (const_vector:V4HI [(const_int 1) (const_int 1)
9299 (const_int 1) (const_int 1)]))
9301 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
9302 "pmulhrsw\t{%2, %0|%0, %2}"
9303 [(set_attr "type" "sseimul")
9304 (set_attr "prefix_extra" "1")
9305 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9306 (set_attr "mode" "DI")])
9308 (define_insn "<ssse3_avx2>_pshufb<mode>3"
9309 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
9310 (unspec:VI1_AVX2 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
9311 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")]
9315 pshufb\t{%2, %0|%0, %2}
9316 vpshufb\t{%2, %1, %0|%0, %1, %2}"
9317 [(set_attr "isa" "noavx,avx")
9318 (set_attr "type" "sselog1")
9319 (set_attr "prefix_data16" "1,*")
9320 (set_attr "prefix_extra" "1")
9321 (set_attr "prefix" "orig,vex")
9322 (set_attr "mode" "<sseinsnmode>")])
9324 (define_insn "ssse3_pshufbv8qi3"
9325 [(set (match_operand:V8QI 0 "register_operand" "=y")
9326 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
9327 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
9330 "pshufb\t{%2, %0|%0, %2}";
9331 [(set_attr "type" "sselog1")
9332 (set_attr "prefix_extra" "1")
9333 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9334 (set_attr "mode" "DI")])
9336 (define_insn "<ssse3_avx2>_psign<mode>3"
9337 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
9339 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
9340 (match_operand:VI124_AVX2 2 "nonimmediate_operand" "xm,xm")]
9344 psign<ssemodesuffix>\t{%2, %0|%0, %2}
9345 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9346 [(set_attr "isa" "noavx,avx")
9347 (set_attr "type" "sselog1")
9348 (set_attr "prefix_data16" "1,*")
9349 (set_attr "prefix_extra" "1")
9350 (set_attr "prefix" "orig,vex")
9351 (set_attr "mode" "<sseinsnmode>")])
9353 (define_insn "ssse3_psign<mode>3"
9354 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9356 [(match_operand:MMXMODEI 1 "register_operand" "0")
9357 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
9360 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
9361 [(set_attr "type" "sselog1")
9362 (set_attr "prefix_extra" "1")
9363 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9364 (set_attr "mode" "DI")])
9366 (define_insn "<ssse3_avx2>_palignr<mode>"
9367 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x")
9368 (unspec:SSESCALARMODE [(match_operand:SSESCALARMODE 1 "register_operand" "0,x")
9369 (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,xm")
9370 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
9374 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9376 switch (which_alternative)
9379 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9381 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9386 [(set_attr "isa" "noavx,avx")
9387 (set_attr "type" "sseishft")
9388 (set_attr "atom_unit" "sishuf")
9389 (set_attr "prefix_data16" "1,*")
9390 (set_attr "prefix_extra" "1")
9391 (set_attr "length_immediate" "1")
9392 (set_attr "prefix" "orig,vex")
9393 (set_attr "mode" "<sseinsnmode>")])
9395 (define_insn "ssse3_palignrdi"
9396 [(set (match_operand:DI 0 "register_operand" "=y")
9397 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
9398 (match_operand:DI 2 "nonimmediate_operand" "ym")
9399 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9403 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9404 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9406 [(set_attr "type" "sseishft")
9407 (set_attr "atom_unit" "sishuf")
9408 (set_attr "prefix_extra" "1")
9409 (set_attr "length_immediate" "1")
9410 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9411 (set_attr "mode" "DI")])
9413 (define_insn "abs<mode>2"
9414 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x")
9416 (match_operand:VI124_AVX2 1 "nonimmediate_operand" "xm")))]
9418 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
9419 [(set_attr "type" "sselog1")
9420 (set_attr "prefix_data16" "1")
9421 (set_attr "prefix_extra" "1")
9422 (set_attr "prefix" "maybe_vex")
9423 (set_attr "mode" "<sseinsnmode>")])
9425 (define_insn "abs<mode>2"
9426 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9428 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
9430 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
9431 [(set_attr "type" "sselog1")
9432 (set_attr "prefix_rep" "0")
9433 (set_attr "prefix_extra" "1")
9434 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9435 (set_attr "mode" "DI")])
9437 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9439 ;; AMD SSE4A instructions
9441 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9443 (define_insn "sse4a_movnt<mode>"
9444 [(set (match_operand:MODEF 0 "memory_operand" "=m")
9446 [(match_operand:MODEF 1 "register_operand" "x")]
9449 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
9450 [(set_attr "type" "ssemov")
9451 (set_attr "mode" "<MODE>")])
9453 (define_insn "sse4a_vmmovnt<mode>"
9454 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
9455 (unspec:<ssescalarmode>
9456 [(vec_select:<ssescalarmode>
9457 (match_operand:VF_128 1 "register_operand" "x")
9458 (parallel [(const_int 0)]))]
9461 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
9462 [(set_attr "type" "ssemov")
9463 (set_attr "mode" "<ssescalarmode>")])
9465 (define_insn "sse4a_extrqi"
9466 [(set (match_operand:V2DI 0 "register_operand" "=x")
9467 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9468 (match_operand 2 "const_0_to_255_operand" "")
9469 (match_operand 3 "const_0_to_255_operand" "")]
9472 "extrq\t{%3, %2, %0|%0, %2, %3}"
9473 [(set_attr "type" "sse")
9474 (set_attr "prefix_data16" "1")
9475 (set_attr "length_immediate" "2")
9476 (set_attr "mode" "TI")])
9478 (define_insn "sse4a_extrq"
9479 [(set (match_operand:V2DI 0 "register_operand" "=x")
9480 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9481 (match_operand:V16QI 2 "register_operand" "x")]
9484 "extrq\t{%2, %0|%0, %2}"
9485 [(set_attr "type" "sse")
9486 (set_attr "prefix_data16" "1")
9487 (set_attr "mode" "TI")])
9489 (define_insn "sse4a_insertqi"
9490 [(set (match_operand:V2DI 0 "register_operand" "=x")
9491 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9492 (match_operand:V2DI 2 "register_operand" "x")
9493 (match_operand 3 "const_0_to_255_operand" "")
9494 (match_operand 4 "const_0_to_255_operand" "")]
9497 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
9498 [(set_attr "type" "sseins")
9499 (set_attr "prefix_data16" "0")
9500 (set_attr "prefix_rep" "1")
9501 (set_attr "length_immediate" "2")
9502 (set_attr "mode" "TI")])
9504 (define_insn "sse4a_insertq"
9505 [(set (match_operand:V2DI 0 "register_operand" "=x")
9506 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9507 (match_operand:V2DI 2 "register_operand" "x")]
9510 "insertq\t{%2, %0|%0, %2}"
9511 [(set_attr "type" "sseins")
9512 (set_attr "prefix_data16" "0")
9513 (set_attr "prefix_rep" "1")
9514 (set_attr "mode" "TI")])
9516 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9518 ;; Intel SSE4.1 instructions
9520 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9522 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
9523 [(set (match_operand:VF 0 "register_operand" "=x,x")
9525 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
9526 (match_operand:VF 1 "register_operand" "0,x")
9527 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "")))]
9530 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
9531 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9532 [(set_attr "isa" "noavx,avx")
9533 (set_attr "type" "ssemov")
9534 (set_attr "length_immediate" "1")
9535 (set_attr "prefix_data16" "1,*")
9536 (set_attr "prefix_extra" "1")
9537 (set_attr "prefix" "orig,vex")
9538 (set_attr "mode" "<MODE>")])
9540 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
9541 [(set (match_operand:VF 0 "reg_not_xmm0_operand_maybe_avx" "=x,x")
9543 [(match_operand:VF 1 "reg_not_xmm0_operand_maybe_avx" "0,x")
9544 (match_operand:VF 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
9545 (match_operand:VF 3 "register_operand" "Yz,x")]
9549 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
9550 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9551 [(set_attr "isa" "noavx,avx")
9552 (set_attr "type" "ssemov")
9553 (set_attr "length_immediate" "1")
9554 (set_attr "prefix_data16" "1,*")
9555 (set_attr "prefix_extra" "1")
9556 (set_attr "prefix" "orig,vex")
9557 (set_attr "mode" "<MODE>")])
9559 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
9560 [(set (match_operand:VF 0 "register_operand" "=x,x")
9562 [(match_operand:VF 1 "nonimmediate_operand" "%0,x")
9563 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
9564 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9568 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
9569 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9570 [(set_attr "isa" "noavx,avx")
9571 (set_attr "type" "ssemul")
9572 (set_attr "length_immediate" "1")
9573 (set_attr "prefix_data16" "1,*")
9574 (set_attr "prefix_extra" "1")
9575 (set_attr "prefix" "orig,vex")
9576 (set_attr "mode" "<MODE>")])
9578 (define_insn "<sse4_1_avx2>_movntdqa"
9579 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x")
9580 (unspec:VI8_AVX2 [(match_operand:VI8_AVX2 1 "memory_operand" "m")]
9583 "%vmovntdqa\t{%1, %0|%0, %1}"
9584 [(set_attr "type" "ssemov")
9585 (set_attr "prefix_extra" "1")
9586 (set_attr "prefix" "maybe_vex")
9587 (set_attr "mode" "<sseinsnmode>")])
9589 (define_insn "<sse4_1_avx2>_mpsadbw"
9590 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
9591 (unspec:VI1_AVX2 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
9592 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
9593 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9597 mpsadbw\t{%3, %2, %0|%0, %2, %3}
9598 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9599 [(set_attr "isa" "noavx,avx")
9600 (set_attr "type" "sselog1")
9601 (set_attr "length_immediate" "1")
9602 (set_attr "prefix_extra" "1")
9603 (set_attr "prefix" "orig,vex")
9604 (set_attr "mode" "<sseinsnmode>")])
9606 (define_insn "avx2_packusdw"
9607 [(set (match_operand:V16HI 0 "register_operand" "=x")
9610 (match_operand:V8SI 1 "register_operand" "x"))
9612 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))))]
9614 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9615 [(set_attr "type" "sselog")
9616 (set_attr "prefix_extra" "1")
9617 (set_attr "prefix" "vex")
9618 (set_attr "mode" "OI")])
9620 (define_insn "sse4_1_packusdw"
9621 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9624 (match_operand:V4SI 1 "register_operand" "0,x"))
9626 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))]
9629 packusdw\t{%2, %0|%0, %2}
9630 vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9631 [(set_attr "isa" "noavx,avx")
9632 (set_attr "type" "sselog")
9633 (set_attr "prefix_extra" "1")
9634 (set_attr "prefix" "orig,vex")
9635 (set_attr "mode" "TI")])
9637 (define_insn "<sse4_1_avx2>_pblendvb"
9638 [(set (match_operand:VI1_AVX2 0 "reg_not_xmm0_operand" "=x,x")
9640 [(match_operand:VI1_AVX2 1 "reg_not_xmm0_operand_maybe_avx" "0,x")
9641 (match_operand:VI1_AVX2 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
9642 (match_operand:VI1_AVX2 3 "register_operand" "Yz,x")]
9646 pblendvb\t{%3, %2, %0|%0, %2, %3}
9647 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9648 [(set_attr "isa" "noavx,avx")
9649 (set_attr "type" "ssemov")
9650 (set_attr "prefix_extra" "1")
9651 (set_attr "length_immediate" "*,1")
9652 (set_attr "prefix" "orig,vex")
9653 (set_attr "mode" "<sseinsnmode>")])
9655 (define_insn "sse4_1_pblendw"
9656 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9658 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
9659 (match_operand:V8HI 1 "register_operand" "0,x")
9660 (match_operand:SI 3 "const_0_to_255_operand" "n,n")))]
9663 pblendw\t{%3, %2, %0|%0, %2, %3}
9664 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9665 [(set_attr "isa" "noavx,avx")
9666 (set_attr "type" "ssemov")
9667 (set_attr "prefix_extra" "1")
9668 (set_attr "length_immediate" "1")
9669 (set_attr "prefix" "orig,vex")
9670 (set_attr "mode" "TI")])
9672 ;; The builtin uses an 8-bit immediate. Expand that.
9673 (define_expand "avx2_pblendw"
9674 [(set (match_operand:V16HI 0 "register_operand" "")
9676 (match_operand:V16HI 2 "nonimmediate_operand" "")
9677 (match_operand:V16HI 1 "register_operand" "")
9678 (match_operand:SI 3 "const_0_to_255_operand" "")))]
9681 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
9682 operands[3] = GEN_INT (val << 8 | val);
9685 (define_insn "*avx2_pblendw"
9686 [(set (match_operand:V16HI 0 "register_operand" "=x")
9688 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
9689 (match_operand:V16HI 1 "register_operand" "x")
9690 (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
9693 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
9694 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9696 [(set_attr "type" "ssemov")
9697 (set_attr "prefix_extra" "1")
9698 (set_attr "length_immediate" "1")
9699 (set_attr "prefix" "vex")
9700 (set_attr "mode" "OI")])
9702 (define_insn "avx2_pblendd<mode>"
9703 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
9705 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
9706 (match_operand:VI4_AVX2 1 "register_operand" "x")
9707 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9709 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9710 [(set_attr "type" "ssemov")
9711 (set_attr "prefix_extra" "1")
9712 (set_attr "length_immediate" "1")
9713 (set_attr "prefix" "vex")
9714 (set_attr "mode" "<sseinsnmode>")])
9716 (define_insn "sse4_1_phminposuw"
9717 [(set (match_operand:V8HI 0 "register_operand" "=x")
9718 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
9719 UNSPEC_PHMINPOSUW))]
9721 "%vphminposuw\t{%1, %0|%0, %1}"
9722 [(set_attr "type" "sselog1")
9723 (set_attr "prefix_extra" "1")
9724 (set_attr "prefix" "maybe_vex")
9725 (set_attr "mode" "TI")])
9727 (define_insn "avx2_<code>v16qiv16hi2"
9728 [(set (match_operand:V16HI 0 "register_operand" "=x")
9730 (match_operand:V16QI 1 "nonimmediate_operand" "xm")))]
9732 "vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
9733 [(set_attr "type" "ssemov")
9734 (set_attr "prefix_extra" "1")
9735 (set_attr "prefix" "vex")
9736 (set_attr "mode" "OI")])
9738 (define_insn "sse4_1_<code>v8qiv8hi2"
9739 [(set (match_operand:V8HI 0 "register_operand" "=x")
9742 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9743 (parallel [(const_int 0)
9752 "%vpmov<extsuffix>bw\t{%1, %0|%0, %q1}"
9753 [(set_attr "type" "ssemov")
9754 (set_attr "prefix_extra" "1")
9755 (set_attr "prefix" "maybe_vex")
9756 (set_attr "mode" "TI")])
9758 (define_insn "avx2_<code>v8qiv8si2"
9759 [(set (match_operand:V8SI 0 "register_operand" "=x")
9762 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9763 (parallel [(const_int 0)
9772 "vpmov<extsuffix>bd\t{%1, %0|%0, %q1}"
9773 [(set_attr "type" "ssemov")
9774 (set_attr "prefix_extra" "1")
9775 (set_attr "prefix" "vex")
9776 (set_attr "mode" "OI")])
9778 (define_insn "sse4_1_<code>v4qiv4si2"
9779 [(set (match_operand:V4SI 0 "register_operand" "=x")
9782 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9783 (parallel [(const_int 0)
9788 "%vpmov<extsuffix>bd\t{%1, %0|%0, %k1}"
9789 [(set_attr "type" "ssemov")
9790 (set_attr "prefix_extra" "1")
9791 (set_attr "prefix" "maybe_vex")
9792 (set_attr "mode" "TI")])
9794 (define_insn "avx2_<code>v8hiv8si2"
9795 [(set (match_operand:V8SI 0 "register_operand" "=x")
9797 (match_operand:V8HI 1 "nonimmediate_operand" "xm")))]
9799 "vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
9800 [(set_attr "type" "ssemov")
9801 (set_attr "prefix_extra" "1")
9802 (set_attr "prefix" "vex")
9803 (set_attr "mode" "OI")])
9805 (define_insn "sse4_1_<code>v4hiv4si2"
9806 [(set (match_operand:V4SI 0 "register_operand" "=x")
9809 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9810 (parallel [(const_int 0)
9815 "%vpmov<extsuffix>wd\t{%1, %0|%0, %q1}"
9816 [(set_attr "type" "ssemov")
9817 (set_attr "prefix_extra" "1")
9818 (set_attr "prefix" "maybe_vex")
9819 (set_attr "mode" "TI")])
9821 (define_insn "avx2_<code>v4qiv4di2"
9822 [(set (match_operand:V4DI 0 "register_operand" "=x")
9825 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9826 (parallel [(const_int 0)
9831 "vpmov<extsuffix>bq\t{%1, %0|%0, %k1}"
9832 [(set_attr "type" "ssemov")
9833 (set_attr "prefix_extra" "1")
9834 (set_attr "prefix" "vex")
9835 (set_attr "mode" "OI")])
9837 (define_insn "sse4_1_<code>v2qiv2di2"
9838 [(set (match_operand:V2DI 0 "register_operand" "=x")
9841 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9842 (parallel [(const_int 0)
9845 "%vpmov<extsuffix>bq\t{%1, %0|%0, %w1}"
9846 [(set_attr "type" "ssemov")
9847 (set_attr "prefix_extra" "1")
9848 (set_attr "prefix" "maybe_vex")
9849 (set_attr "mode" "TI")])
9851 (define_insn "avx2_<code>v4hiv4di2"
9852 [(set (match_operand:V4DI 0 "register_operand" "=x")
9855 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9856 (parallel [(const_int 0)
9861 "vpmov<extsuffix>wq\t{%1, %0|%0, %q1}"
9862 [(set_attr "type" "ssemov")
9863 (set_attr "prefix_extra" "1")
9864 (set_attr "prefix" "vex")
9865 (set_attr "mode" "OI")])
9867 (define_insn "sse4_1_<code>v2hiv2di2"
9868 [(set (match_operand:V2DI 0 "register_operand" "=x")
9871 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9872 (parallel [(const_int 0)
9875 "%vpmov<extsuffix>wq\t{%1, %0|%0, %k1}"
9876 [(set_attr "type" "ssemov")
9877 (set_attr "prefix_extra" "1")
9878 (set_attr "prefix" "maybe_vex")
9879 (set_attr "mode" "TI")])
9881 (define_insn "avx2_<code>v4siv4di2"
9882 [(set (match_operand:V4DI 0 "register_operand" "=x")
9884 (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
9886 "vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
9887 [(set_attr "type" "ssemov")
9888 (set_attr "prefix_extra" "1")
9889 (set_attr "mode" "OI")])
9891 (define_insn "sse4_1_<code>v2siv2di2"
9892 [(set (match_operand:V2DI 0 "register_operand" "=x")
9895 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9896 (parallel [(const_int 0)
9899 "%vpmov<extsuffix>dq\t{%1, %0|%0, %q1}"
9900 [(set_attr "type" "ssemov")
9901 (set_attr "prefix_extra" "1")
9902 (set_attr "prefix" "maybe_vex")
9903 (set_attr "mode" "TI")])
9905 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
9906 ;; setting FLAGS_REG. But it is not a really compare instruction.
9907 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
9908 [(set (reg:CC FLAGS_REG)
9909 (unspec:CC [(match_operand:VF 0 "register_operand" "x")
9910 (match_operand:VF 1 "nonimmediate_operand" "xm")]
9913 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
9914 [(set_attr "type" "ssecomi")
9915 (set_attr "prefix_extra" "1")
9916 (set_attr "prefix" "vex")
9917 (set_attr "mode" "<MODE>")])
9919 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
9920 ;; But it is not a really compare instruction.
9921 (define_insn "avx_ptest256"
9922 [(set (reg:CC FLAGS_REG)
9923 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
9924 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
9927 "vptest\t{%1, %0|%0, %1}"
9928 [(set_attr "type" "ssecomi")
9929 (set_attr "prefix_extra" "1")
9930 (set_attr "prefix" "vex")
9931 (set_attr "mode" "OI")])
9933 (define_insn "sse4_1_ptest"
9934 [(set (reg:CC FLAGS_REG)
9935 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
9936 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
9939 "%vptest\t{%1, %0|%0, %1}"
9940 [(set_attr "type" "ssecomi")
9941 (set_attr "prefix_extra" "1")
9942 (set_attr "prefix" "maybe_vex")
9943 (set_attr "mode" "TI")])
9945 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
9946 [(set (match_operand:VF 0 "register_operand" "=x")
9948 [(match_operand:VF 1 "nonimmediate_operand" "xm")
9949 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9952 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9953 [(set_attr "type" "ssecvt")
9954 (set (attr "prefix_data16")
9956 (match_test "TARGET_AVX")
9958 (const_string "1")))
9959 (set_attr "prefix_extra" "1")
9960 (set_attr "length_immediate" "1")
9961 (set_attr "prefix" "maybe_vex")
9962 (set_attr "mode" "<MODE>")])
9964 (define_insn "sse4_1_round<ssescalarmodesuffix>"
9965 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
9968 [(match_operand:VF_128 2 "register_operand" "x,x")
9969 (match_operand:SI 3 "const_0_to_15_operand" "n,n")]
9971 (match_operand:VF_128 1 "register_operand" "0,x")
9975 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
9976 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9977 [(set_attr "isa" "noavx,avx")
9978 (set_attr "type" "ssecvt")
9979 (set_attr "length_immediate" "1")
9980 (set_attr "prefix_data16" "1,*")
9981 (set_attr "prefix_extra" "1")
9982 (set_attr "prefix" "orig,vex")
9983 (set_attr "mode" "<MODE>")])
9985 (define_expand "round<mode>2"
9988 (match_operand:VF 1 "nonimmediate_operand" "")
9990 (set (match_operand:VF 0 "register_operand" "")
9992 [(match_dup 4) (match_dup 5)]
9994 "TARGET_ROUND && !flag_trapping_math"
9996 enum machine_mode scalar_mode;
9997 const struct real_format *fmt;
9998 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
10001 scalar_mode = GET_MODE_INNER (<MODE>mode);
10003 /* load nextafter (0.5, 0.0) */
10004 fmt = REAL_MODE_FORMAT (scalar_mode);
10005 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
10006 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
10007 half = const_double_from_real_value (pred_half, scalar_mode);
10009 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
10010 vec_half = force_reg (<MODE>mode, vec_half);
10012 operands[3] = gen_reg_rtx (<MODE>mode);
10013 emit_insn (gen_copysign<mode>3 (operands[3], vec_half, operands[1]));
10015 operands[4] = gen_reg_rtx (<MODE>mode);
10016 operands[5] = GEN_INT (ROUND_TRUNC);
10019 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10021 ;; Intel SSE4.2 string/text processing instructions
10023 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10025 (define_insn_and_split "sse4_2_pcmpestr"
10026 [(set (match_operand:SI 0 "register_operand" "=c,c")
10028 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
10029 (match_operand:SI 3 "register_operand" "a,a")
10030 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
10031 (match_operand:SI 5 "register_operand" "d,d")
10032 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
10034 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
10042 (set (reg:CC FLAGS_REG)
10051 && can_create_pseudo_p ()"
10056 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10057 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10058 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10061 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
10062 operands[3], operands[4],
10063 operands[5], operands[6]));
10065 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
10066 operands[3], operands[4],
10067 operands[5], operands[6]));
10068 if (flags && !(ecx || xmm0))
10069 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
10070 operands[2], operands[3],
10071 operands[4], operands[5],
10073 if (!(flags || ecx || xmm0))
10074 emit_note (NOTE_INSN_DELETED);
10078 [(set_attr "type" "sselog")
10079 (set_attr "prefix_data16" "1")
10080 (set_attr "prefix_extra" "1")
10081 (set_attr "length_immediate" "1")
10082 (set_attr "memory" "none,load")
10083 (set_attr "mode" "TI")])
10085 (define_insn "sse4_2_pcmpestri"
10086 [(set (match_operand:SI 0 "register_operand" "=c,c")
10088 [(match_operand:V16QI 1 "register_operand" "x,x")
10089 (match_operand:SI 2 "register_operand" "a,a")
10090 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
10091 (match_operand:SI 4 "register_operand" "d,d")
10092 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
10094 (set (reg:CC FLAGS_REG)
10103 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
10104 [(set_attr "type" "sselog")
10105 (set_attr "prefix_data16" "1")
10106 (set_attr "prefix_extra" "1")
10107 (set_attr "prefix" "maybe_vex")
10108 (set_attr "length_immediate" "1")
10109 (set_attr "memory" "none,load")
10110 (set_attr "mode" "TI")])
10112 (define_insn "sse4_2_pcmpestrm"
10113 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10115 [(match_operand:V16QI 1 "register_operand" "x,x")
10116 (match_operand:SI 2 "register_operand" "a,a")
10117 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
10118 (match_operand:SI 4 "register_operand" "d,d")
10119 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
10121 (set (reg:CC FLAGS_REG)
10130 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
10131 [(set_attr "type" "sselog")
10132 (set_attr "prefix_data16" "1")
10133 (set_attr "prefix_extra" "1")
10134 (set_attr "length_immediate" "1")
10135 (set_attr "prefix" "maybe_vex")
10136 (set_attr "memory" "none,load")
10137 (set_attr "mode" "TI")])
10139 (define_insn "sse4_2_pcmpestr_cconly"
10140 [(set (reg:CC FLAGS_REG)
10142 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10143 (match_operand:SI 3 "register_operand" "a,a,a,a")
10144 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
10145 (match_operand:SI 5 "register_operand" "d,d,d,d")
10146 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
10148 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10149 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10152 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10153 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10154 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
10155 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
10156 [(set_attr "type" "sselog")
10157 (set_attr "prefix_data16" "1")
10158 (set_attr "prefix_extra" "1")
10159 (set_attr "length_immediate" "1")
10160 (set_attr "memory" "none,load,none,load")
10161 (set_attr "prefix" "maybe_vex")
10162 (set_attr "mode" "TI")])
10164 (define_insn_and_split "sse4_2_pcmpistr"
10165 [(set (match_operand:SI 0 "register_operand" "=c,c")
10167 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
10168 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
10169 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
10171 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
10177 (set (reg:CC FLAGS_REG)
10184 && can_create_pseudo_p ()"
10189 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10190 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10191 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10194 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
10195 operands[3], operands[4]));
10197 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
10198 operands[3], operands[4]));
10199 if (flags && !(ecx || xmm0))
10200 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
10201 operands[2], operands[3],
10203 if (!(flags || ecx || xmm0))
10204 emit_note (NOTE_INSN_DELETED);
10208 [(set_attr "type" "sselog")
10209 (set_attr "prefix_data16" "1")
10210 (set_attr "prefix_extra" "1")
10211 (set_attr "length_immediate" "1")
10212 (set_attr "memory" "none,load")
10213 (set_attr "mode" "TI")])
10215 (define_insn "sse4_2_pcmpistri"
10216 [(set (match_operand:SI 0 "register_operand" "=c,c")
10218 [(match_operand:V16QI 1 "register_operand" "x,x")
10219 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10220 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10222 (set (reg:CC FLAGS_REG)
10229 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
10230 [(set_attr "type" "sselog")
10231 (set_attr "prefix_data16" "1")
10232 (set_attr "prefix_extra" "1")
10233 (set_attr "length_immediate" "1")
10234 (set_attr "prefix" "maybe_vex")
10235 (set_attr "memory" "none,load")
10236 (set_attr "mode" "TI")])
10238 (define_insn "sse4_2_pcmpistrm"
10239 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10241 [(match_operand:V16QI 1 "register_operand" "x,x")
10242 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10243 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10245 (set (reg:CC FLAGS_REG)
10252 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
10253 [(set_attr "type" "sselog")
10254 (set_attr "prefix_data16" "1")
10255 (set_attr "prefix_extra" "1")
10256 (set_attr "length_immediate" "1")
10257 (set_attr "prefix" "maybe_vex")
10258 (set_attr "memory" "none,load")
10259 (set_attr "mode" "TI")])
10261 (define_insn "sse4_2_pcmpistr_cconly"
10262 [(set (reg:CC FLAGS_REG)
10264 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10265 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
10266 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
10268 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10269 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10272 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10273 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10274 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
10275 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
10276 [(set_attr "type" "sselog")
10277 (set_attr "prefix_data16" "1")
10278 (set_attr "prefix_extra" "1")
10279 (set_attr "length_immediate" "1")
10280 (set_attr "memory" "none,load,none,load")
10281 (set_attr "prefix" "maybe_vex")
10282 (set_attr "mode" "TI")])
10284 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10286 ;; XOP instructions
10288 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10290 ;; XOP parallel integer multiply/add instructions.
10291 ;; Note the XOP multiply/add instructions
10292 ;; a[i] = b[i] * c[i] + d[i];
10293 ;; do not allow the value being added to be a memory operation.
10294 (define_insn "xop_pmacsww"
10295 [(set (match_operand:V8HI 0 "register_operand" "=x")
10298 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10299 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10300 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10302 "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10303 [(set_attr "type" "ssemuladd")
10304 (set_attr "mode" "TI")])
10306 (define_insn "xop_pmacssww"
10307 [(set (match_operand:V8HI 0 "register_operand" "=x")
10309 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10310 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10311 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10313 "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10314 [(set_attr "type" "ssemuladd")
10315 (set_attr "mode" "TI")])
10317 (define_insn "xop_pmacsdd"
10318 [(set (match_operand:V4SI 0 "register_operand" "=x")
10321 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10322 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10323 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10325 "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10326 [(set_attr "type" "ssemuladd")
10327 (set_attr "mode" "TI")])
10329 (define_insn "xop_pmacssdd"
10330 [(set (match_operand:V4SI 0 "register_operand" "=x")
10332 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10333 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10334 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10336 "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10337 [(set_attr "type" "ssemuladd")
10338 (set_attr "mode" "TI")])
10340 (define_insn "xop_pmacssdql"
10341 [(set (match_operand:V2DI 0 "register_operand" "=x")
10346 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10347 (parallel [(const_int 1)
10350 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10351 (parallel [(const_int 1)
10353 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10355 "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10356 [(set_attr "type" "ssemuladd")
10357 (set_attr "mode" "TI")])
10359 (define_insn "xop_pmacssdqh"
10360 [(set (match_operand:V2DI 0 "register_operand" "=x")
10365 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10366 (parallel [(const_int 0)
10370 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10371 (parallel [(const_int 0)
10373 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10375 "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10376 [(set_attr "type" "ssemuladd")
10377 (set_attr "mode" "TI")])
10379 (define_insn "xop_pmacsdql"
10380 [(set (match_operand:V2DI 0 "register_operand" "=x")
10385 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10386 (parallel [(const_int 1)
10390 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10391 (parallel [(const_int 1)
10393 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10395 "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10396 [(set_attr "type" "ssemuladd")
10397 (set_attr "mode" "TI")])
10399 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10400 ;; fake it with a multiply/add. In general, we expect the define_split to
10401 ;; occur before register allocation, so we have to handle the corner case where
10402 ;; the target is the same as operands 1/2
10403 (define_insn_and_split "xop_mulv2div2di3_low"
10404 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10408 (match_operand:V4SI 1 "register_operand" "%x")
10409 (parallel [(const_int 1)
10413 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10414 (parallel [(const_int 1)
10415 (const_int 3)])))))]
10418 "&& reload_completed"
10419 [(set (match_dup 0)
10427 (parallel [(const_int 1)
10432 (parallel [(const_int 1)
10436 operands[3] = CONST0_RTX (V2DImode);
10438 [(set_attr "type" "ssemul")
10439 (set_attr "mode" "TI")])
10441 (define_insn "xop_pmacsdqh"
10442 [(set (match_operand:V2DI 0 "register_operand" "=x")
10447 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10448 (parallel [(const_int 0)
10452 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10453 (parallel [(const_int 0)
10455 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10457 "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10458 [(set_attr "type" "ssemuladd")
10459 (set_attr "mode" "TI")])
10461 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10462 ;; fake it with a multiply/add. In general, we expect the define_split to
10463 ;; occur before register allocation, so we have to handle the corner case where
10464 ;; the target is the same as either operands[1] or operands[2]
10465 (define_insn_and_split "xop_mulv2div2di3_high"
10466 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10470 (match_operand:V4SI 1 "register_operand" "%x")
10471 (parallel [(const_int 0)
10475 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10476 (parallel [(const_int 0)
10477 (const_int 2)])))))]
10480 "&& reload_completed"
10481 [(set (match_dup 0)
10489 (parallel [(const_int 0)
10494 (parallel [(const_int 0)
10498 operands[3] = CONST0_RTX (V2DImode);
10500 [(set_attr "type" "ssemul")
10501 (set_attr "mode" "TI")])
10503 ;; XOP parallel integer multiply/add instructions for the intrinisics
10504 (define_insn "xop_pmacsswd"
10505 [(set (match_operand:V4SI 0 "register_operand" "=x")
10510 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10511 (parallel [(const_int 1)
10517 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10518 (parallel [(const_int 1)
10522 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10524 "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10525 [(set_attr "type" "ssemuladd")
10526 (set_attr "mode" "TI")])
10528 (define_insn "xop_pmacswd"
10529 [(set (match_operand:V4SI 0 "register_operand" "=x")
10534 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10535 (parallel [(const_int 1)
10541 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10542 (parallel [(const_int 1)
10546 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10548 "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10549 [(set_attr "type" "ssemuladd")
10550 (set_attr "mode" "TI")])
10552 (define_insn "xop_pmadcsswd"
10553 [(set (match_operand:V4SI 0 "register_operand" "=x")
10559 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10560 (parallel [(const_int 0)
10566 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10567 (parallel [(const_int 0)
10575 (parallel [(const_int 1)
10582 (parallel [(const_int 1)
10585 (const_int 7)])))))
10586 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10588 "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10589 [(set_attr "type" "ssemuladd")
10590 (set_attr "mode" "TI")])
10592 (define_insn "xop_pmadcswd"
10593 [(set (match_operand:V4SI 0 "register_operand" "=x")
10599 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10600 (parallel [(const_int 0)
10606 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10607 (parallel [(const_int 0)
10615 (parallel [(const_int 1)
10622 (parallel [(const_int 1)
10625 (const_int 7)])))))
10626 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10628 "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10629 [(set_attr "type" "ssemuladd")
10630 (set_attr "mode" "TI")])
10632 ;; XOP parallel XMM conditional moves
10633 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
10634 [(set (match_operand:V 0 "register_operand" "=x,x")
10636 (match_operand:V 3 "nonimmediate_operand" "x,m")
10637 (match_operand:V 1 "register_operand" "x,x")
10638 (match_operand:V 2 "nonimmediate_operand" "xm,x")))]
10640 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10641 [(set_attr "type" "sse4arg")])
10643 ;; XOP horizontal add/subtract instructions
10644 (define_insn "xop_phaddbw"
10645 [(set (match_operand:V8HI 0 "register_operand" "=x")
10649 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10650 (parallel [(const_int 0)
10661 (parallel [(const_int 1)
10668 (const_int 15)])))))]
10670 "vphaddbw\t{%1, %0|%0, %1}"
10671 [(set_attr "type" "sseiadd1")])
10673 (define_insn "xop_phaddbd"
10674 [(set (match_operand:V4SI 0 "register_operand" "=x")
10679 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10680 (parallel [(const_int 0)
10687 (parallel [(const_int 1)
10690 (const_int 13)]))))
10695 (parallel [(const_int 2)
10702 (parallel [(const_int 3)
10705 (const_int 15)]))))))]
10707 "vphaddbd\t{%1, %0|%0, %1}"
10708 [(set_attr "type" "sseiadd1")])
10710 (define_insn "xop_phaddbq"
10711 [(set (match_operand:V2DI 0 "register_operand" "=x")
10717 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10718 (parallel [(const_int 0)
10723 (parallel [(const_int 1)
10729 (parallel [(const_int 2)
10734 (parallel [(const_int 3)
10735 (const_int 7)])))))
10741 (parallel [(const_int 8)
10746 (parallel [(const_int 9)
10747 (const_int 13)]))))
10752 (parallel [(const_int 10)
10757 (parallel [(const_int 11)
10758 (const_int 15)])))))))]
10760 "vphaddbq\t{%1, %0|%0, %1}"
10761 [(set_attr "type" "sseiadd1")])
10763 (define_insn "xop_phaddwd"
10764 [(set (match_operand:V4SI 0 "register_operand" "=x")
10768 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10769 (parallel [(const_int 0)
10776 (parallel [(const_int 1)
10779 (const_int 7)])))))]
10781 "vphaddwd\t{%1, %0|%0, %1}"
10782 [(set_attr "type" "sseiadd1")])
10784 (define_insn "xop_phaddwq"
10785 [(set (match_operand:V2DI 0 "register_operand" "=x")
10790 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10791 (parallel [(const_int 0)
10796 (parallel [(const_int 1)
10802 (parallel [(const_int 2)
10807 (parallel [(const_int 3)
10808 (const_int 7)]))))))]
10810 "vphaddwq\t{%1, %0|%0, %1}"
10811 [(set_attr "type" "sseiadd1")])
10813 (define_insn "xop_phadddq"
10814 [(set (match_operand:V2DI 0 "register_operand" "=x")
10818 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10819 (parallel [(const_int 0)
10824 (parallel [(const_int 1)
10825 (const_int 3)])))))]
10827 "vphadddq\t{%1, %0|%0, %1}"
10828 [(set_attr "type" "sseiadd1")])
10830 (define_insn "xop_phaddubw"
10831 [(set (match_operand:V8HI 0 "register_operand" "=x")
10835 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10836 (parallel [(const_int 0)
10847 (parallel [(const_int 1)
10854 (const_int 15)])))))]
10856 "vphaddubw\t{%1, %0|%0, %1}"
10857 [(set_attr "type" "sseiadd1")])
10859 (define_insn "xop_phaddubd"
10860 [(set (match_operand:V4SI 0 "register_operand" "=x")
10865 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10866 (parallel [(const_int 0)
10873 (parallel [(const_int 1)
10876 (const_int 13)]))))
10881 (parallel [(const_int 2)
10888 (parallel [(const_int 3)
10891 (const_int 15)]))))))]
10893 "vphaddubd\t{%1, %0|%0, %1}"
10894 [(set_attr "type" "sseiadd1")])
10896 (define_insn "xop_phaddubq"
10897 [(set (match_operand:V2DI 0 "register_operand" "=x")
10903 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10904 (parallel [(const_int 0)
10909 (parallel [(const_int 1)
10915 (parallel [(const_int 2)
10920 (parallel [(const_int 3)
10921 (const_int 7)])))))
10927 (parallel [(const_int 8)
10932 (parallel [(const_int 9)
10933 (const_int 13)]))))
10938 (parallel [(const_int 10)
10943 (parallel [(const_int 11)
10944 (const_int 15)])))))))]
10946 "vphaddubq\t{%1, %0|%0, %1}"
10947 [(set_attr "type" "sseiadd1")])
10949 (define_insn "xop_phadduwd"
10950 [(set (match_operand:V4SI 0 "register_operand" "=x")
10954 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10955 (parallel [(const_int 0)
10962 (parallel [(const_int 1)
10965 (const_int 7)])))))]
10967 "vphadduwd\t{%1, %0|%0, %1}"
10968 [(set_attr "type" "sseiadd1")])
10970 (define_insn "xop_phadduwq"
10971 [(set (match_operand:V2DI 0 "register_operand" "=x")
10976 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10977 (parallel [(const_int 0)
10982 (parallel [(const_int 1)
10988 (parallel [(const_int 2)
10993 (parallel [(const_int 3)
10994 (const_int 7)]))))))]
10996 "vphadduwq\t{%1, %0|%0, %1}"
10997 [(set_attr "type" "sseiadd1")])
10999 (define_insn "xop_phaddudq"
11000 [(set (match_operand:V2DI 0 "register_operand" "=x")
11004 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
11005 (parallel [(const_int 0)
11010 (parallel [(const_int 1)
11011 (const_int 3)])))))]
11013 "vphaddudq\t{%1, %0|%0, %1}"
11014 [(set_attr "type" "sseiadd1")])
11016 (define_insn "xop_phsubbw"
11017 [(set (match_operand:V8HI 0 "register_operand" "=x")
11021 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11022 (parallel [(const_int 0)
11033 (parallel [(const_int 1)
11040 (const_int 15)])))))]
11042 "vphsubbw\t{%1, %0|%0, %1}"
11043 [(set_attr "type" "sseiadd1")])
11045 (define_insn "xop_phsubwd"
11046 [(set (match_operand:V4SI 0 "register_operand" "=x")
11050 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11051 (parallel [(const_int 0)
11058 (parallel [(const_int 1)
11061 (const_int 7)])))))]
11063 "vphsubwd\t{%1, %0|%0, %1}"
11064 [(set_attr "type" "sseiadd1")])
11066 (define_insn "xop_phsubdq"
11067 [(set (match_operand:V2DI 0 "register_operand" "=x")
11071 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
11072 (parallel [(const_int 0)
11077 (parallel [(const_int 1)
11078 (const_int 3)])))))]
11080 "vphsubdq\t{%1, %0|%0, %1}"
11081 [(set_attr "type" "sseiadd1")])
11083 ;; XOP permute instructions
11084 (define_insn "xop_pperm"
11085 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
11087 [(match_operand:V16QI 1 "register_operand" "x,x")
11088 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
11089 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
11090 UNSPEC_XOP_PERMUTE))]
11091 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11092 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11093 [(set_attr "type" "sse4arg")
11094 (set_attr "mode" "TI")])
11096 ;; XOP pack instructions that combine two vectors into a smaller vector
11097 (define_insn "xop_pperm_pack_v2di_v4si"
11098 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
11101 (match_operand:V2DI 1 "register_operand" "x,x"))
11103 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
11104 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11105 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11106 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11107 [(set_attr "type" "sse4arg")
11108 (set_attr "mode" "TI")])
11110 (define_insn "xop_pperm_pack_v4si_v8hi"
11111 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
11114 (match_operand:V4SI 1 "register_operand" "x,x"))
11116 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
11117 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11118 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11119 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11120 [(set_attr "type" "sse4arg")
11121 (set_attr "mode" "TI")])
11123 (define_insn "xop_pperm_pack_v8hi_v16qi"
11124 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
11127 (match_operand:V8HI 1 "register_operand" "x,x"))
11129 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
11130 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11131 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11132 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11133 [(set_attr "type" "sse4arg")
11134 (set_attr "mode" "TI")])
11136 ;; XOP packed rotate instructions
11137 (define_expand "rotl<mode>3"
11138 [(set (match_operand:VI_128 0 "register_operand" "")
11140 (match_operand:VI_128 1 "nonimmediate_operand" "")
11141 (match_operand:SI 2 "general_operand")))]
11144 /* If we were given a scalar, convert it to parallel */
11145 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11147 rtvec vs = rtvec_alloc (<ssescalarnum>);
11148 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11149 rtx reg = gen_reg_rtx (<MODE>mode);
11150 rtx op2 = operands[2];
11153 if (GET_MODE (op2) != <ssescalarmode>mode)
11155 op2 = gen_reg_rtx (<ssescalarmode>mode);
11156 convert_move (op2, operands[2], false);
11159 for (i = 0; i < <ssescalarnum>; i++)
11160 RTVEC_ELT (vs, i) = op2;
11162 emit_insn (gen_vec_init<mode> (reg, par));
11163 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11168 (define_expand "rotr<mode>3"
11169 [(set (match_operand:VI_128 0 "register_operand" "")
11171 (match_operand:VI_128 1 "nonimmediate_operand" "")
11172 (match_operand:SI 2 "general_operand")))]
11175 /* If we were given a scalar, convert it to parallel */
11176 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11178 rtvec vs = rtvec_alloc (<ssescalarnum>);
11179 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11180 rtx neg = gen_reg_rtx (<MODE>mode);
11181 rtx reg = gen_reg_rtx (<MODE>mode);
11182 rtx op2 = operands[2];
11185 if (GET_MODE (op2) != <ssescalarmode>mode)
11187 op2 = gen_reg_rtx (<ssescalarmode>mode);
11188 convert_move (op2, operands[2], false);
11191 for (i = 0; i < <ssescalarnum>; i++)
11192 RTVEC_ELT (vs, i) = op2;
11194 emit_insn (gen_vec_init<mode> (reg, par));
11195 emit_insn (gen_neg<mode>2 (neg, reg));
11196 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
11201 (define_insn "xop_rotl<mode>3"
11202 [(set (match_operand:VI_128 0 "register_operand" "=x")
11204 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
11205 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11207 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11208 [(set_attr "type" "sseishft")
11209 (set_attr "length_immediate" "1")
11210 (set_attr "mode" "TI")])
11212 (define_insn "xop_rotr<mode>3"
11213 [(set (match_operand:VI_128 0 "register_operand" "=x")
11215 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
11216 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11219 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
11220 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
11222 [(set_attr "type" "sseishft")
11223 (set_attr "length_immediate" "1")
11224 (set_attr "mode" "TI")])
11226 (define_expand "vrotr<mode>3"
11227 [(match_operand:VI_128 0 "register_operand" "")
11228 (match_operand:VI_128 1 "register_operand" "")
11229 (match_operand:VI_128 2 "register_operand" "")]
11232 rtx reg = gen_reg_rtx (<MODE>mode);
11233 emit_insn (gen_neg<mode>2 (reg, operands[2]));
11234 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11238 (define_expand "vrotl<mode>3"
11239 [(match_operand:VI_128 0 "register_operand" "")
11240 (match_operand:VI_128 1 "register_operand" "")
11241 (match_operand:VI_128 2 "register_operand" "")]
11244 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
11248 (define_insn "xop_vrotl<mode>3"
11249 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
11250 (if_then_else:VI_128
11252 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
11255 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
11259 (neg:VI_128 (match_dup 2)))))]
11260 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11261 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11262 [(set_attr "type" "sseishft")
11263 (set_attr "prefix_data16" "0")
11264 (set_attr "prefix_extra" "2")
11265 (set_attr "mode" "TI")])
11267 ;; XOP packed shift instructions.
11268 ;; FIXME: add V2DI back in
11269 (define_expand "vlshr<mode>3"
11270 [(match_operand:VI124_128 0 "register_operand" "")
11271 (match_operand:VI124_128 1 "register_operand" "")
11272 (match_operand:VI124_128 2 "register_operand" "")]
11275 rtx neg = gen_reg_rtx (<MODE>mode);
11276 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11277 emit_insn (gen_xop_lshl<mode>3 (operands[0], operands[1], neg));
11281 (define_expand "vashr<mode>3"
11282 [(match_operand:VI124_128 0 "register_operand" "")
11283 (match_operand:VI124_128 1 "register_operand" "")
11284 (match_operand:VI124_128 2 "register_operand" "")]
11287 rtx neg = gen_reg_rtx (<MODE>mode);
11288 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11289 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], neg));
11293 (define_expand "vashl<mode>3"
11294 [(match_operand:VI124_128 0 "register_operand" "")
11295 (match_operand:VI124_128 1 "register_operand" "")
11296 (match_operand:VI124_128 2 "register_operand" "")]
11299 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], operands[2]));
11303 (define_insn "xop_ashl<mode>3"
11304 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
11305 (if_then_else:VI_128
11307 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
11310 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
11314 (neg:VI_128 (match_dup 2)))))]
11315 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11316 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11317 [(set_attr "type" "sseishft")
11318 (set_attr "prefix_data16" "0")
11319 (set_attr "prefix_extra" "2")
11320 (set_attr "mode" "TI")])
11322 (define_insn "xop_lshl<mode>3"
11323 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
11324 (if_then_else:VI_128
11326 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
11329 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
11333 (neg:VI_128 (match_dup 2)))))]
11334 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11335 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11336 [(set_attr "type" "sseishft")
11337 (set_attr "prefix_data16" "0")
11338 (set_attr "prefix_extra" "2")
11339 (set_attr "mode" "TI")])
11341 ;; SSE2 doesn't have some shift varients, so define versions for XOP
11342 (define_expand "ashlv16qi3"
11343 [(match_operand:V16QI 0 "register_operand" "")
11344 (match_operand:V16QI 1 "register_operand" "")
11345 (match_operand:SI 2 "nonmemory_operand" "")]
11348 rtvec vs = rtvec_alloc (16);
11349 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11350 rtx reg = gen_reg_rtx (V16QImode);
11352 for (i = 0; i < 16; i++)
11353 RTVEC_ELT (vs, i) = operands[2];
11355 emit_insn (gen_vec_initv16qi (reg, par));
11356 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11360 (define_expand "lshlv16qi3"
11361 [(match_operand:V16QI 0 "register_operand" "")
11362 (match_operand:V16QI 1 "register_operand" "")
11363 (match_operand:SI 2 "nonmemory_operand" "")]
11366 rtvec vs = rtvec_alloc (16);
11367 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11368 rtx reg = gen_reg_rtx (V16QImode);
11370 for (i = 0; i < 16; i++)
11371 RTVEC_ELT (vs, i) = operands[2];
11373 emit_insn (gen_vec_initv16qi (reg, par));
11374 emit_insn (gen_xop_lshlv16qi3 (operands[0], operands[1], reg));
11378 (define_expand "ashrv16qi3"
11379 [(match_operand:V16QI 0 "register_operand" "")
11380 (match_operand:V16QI 1 "register_operand" "")
11381 (match_operand:SI 2 "nonmemory_operand" "")]
11384 rtvec vs = rtvec_alloc (16);
11385 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11386 rtx reg = gen_reg_rtx (V16QImode);
11388 rtx ele = ((CONST_INT_P (operands[2]))
11389 ? GEN_INT (- INTVAL (operands[2]))
11392 for (i = 0; i < 16; i++)
11393 RTVEC_ELT (vs, i) = ele;
11395 emit_insn (gen_vec_initv16qi (reg, par));
11397 if (!CONST_INT_P (operands[2]))
11399 rtx neg = gen_reg_rtx (V16QImode);
11400 emit_insn (gen_negv16qi2 (neg, reg));
11401 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], neg));
11404 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11409 (define_expand "ashrv2di3"
11410 [(match_operand:V2DI 0 "register_operand" "")
11411 (match_operand:V2DI 1 "register_operand" "")
11412 (match_operand:DI 2 "nonmemory_operand" "")]
11415 rtvec vs = rtvec_alloc (2);
11416 rtx par = gen_rtx_PARALLEL (V2DImode, vs);
11417 rtx reg = gen_reg_rtx (V2DImode);
11420 if (CONST_INT_P (operands[2]))
11421 ele = GEN_INT (- INTVAL (operands[2]));
11422 else if (GET_MODE (operands[2]) != DImode)
11424 rtx move = gen_reg_rtx (DImode);
11425 ele = gen_reg_rtx (DImode);
11426 convert_move (move, operands[2], false);
11427 emit_insn (gen_negdi2 (ele, move));
11431 ele = gen_reg_rtx (DImode);
11432 emit_insn (gen_negdi2 (ele, operands[2]));
11435 RTVEC_ELT (vs, 0) = ele;
11436 RTVEC_ELT (vs, 1) = ele;
11437 emit_insn (gen_vec_initv2di (reg, par));
11438 emit_insn (gen_xop_ashlv2di3 (operands[0], operands[1], reg));
11442 ;; XOP FRCZ support
11443 (define_insn "xop_frcz<mode>2"
11444 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
11446 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
11449 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
11450 [(set_attr "type" "ssecvt1")
11451 (set_attr "mode" "<MODE>")])
11454 (define_expand "xop_vmfrcz<mode>2"
11455 [(set (match_operand:VF_128 0 "register_operand")
11458 [(match_operand:VF_128 1 "nonimmediate_operand")]
11464 operands[3] = CONST0_RTX (<MODE>mode);
11467 (define_insn "*xop_vmfrcz_<mode>"
11468 [(set (match_operand:VF_128 0 "register_operand" "=x")
11471 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
11473 (match_operand:VF_128 2 "const0_operand")
11476 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
11477 [(set_attr "type" "ssecvt1")
11478 (set_attr "mode" "<MODE>")])
11480 (define_insn "xop_maskcmp<mode>3"
11481 [(set (match_operand:VI_128 0 "register_operand" "=x")
11482 (match_operator:VI_128 1 "ix86_comparison_int_operator"
11483 [(match_operand:VI_128 2 "register_operand" "x")
11484 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
11486 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
11487 [(set_attr "type" "sse4arg")
11488 (set_attr "prefix_data16" "0")
11489 (set_attr "prefix_rep" "0")
11490 (set_attr "prefix_extra" "2")
11491 (set_attr "length_immediate" "1")
11492 (set_attr "mode" "TI")])
11494 (define_insn "xop_maskcmp_uns<mode>3"
11495 [(set (match_operand:VI_128 0 "register_operand" "=x")
11496 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
11497 [(match_operand:VI_128 2 "register_operand" "x")
11498 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
11500 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
11501 [(set_attr "type" "ssecmp")
11502 (set_attr "prefix_data16" "0")
11503 (set_attr "prefix_rep" "0")
11504 (set_attr "prefix_extra" "2")
11505 (set_attr "length_immediate" "1")
11506 (set_attr "mode" "TI")])
11508 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
11509 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
11510 ;; the exact instruction generated for the intrinsic.
11511 (define_insn "xop_maskcmp_uns2<mode>3"
11512 [(set (match_operand:VI_128 0 "register_operand" "=x")
11514 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
11515 [(match_operand:VI_128 2 "register_operand" "x")
11516 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
11517 UNSPEC_XOP_UNSIGNED_CMP))]
11519 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
11520 [(set_attr "type" "ssecmp")
11521 (set_attr "prefix_data16" "0")
11522 (set_attr "prefix_extra" "2")
11523 (set_attr "length_immediate" "1")
11524 (set_attr "mode" "TI")])
11526 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
11527 ;; being added here to be complete.
11528 (define_insn "xop_pcom_tf<mode>3"
11529 [(set (match_operand:VI_128 0 "register_operand" "=x")
11531 [(match_operand:VI_128 1 "register_operand" "x")
11532 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
11533 (match_operand:SI 3 "const_int_operand" "n")]
11534 UNSPEC_XOP_TRUEFALSE))]
11537 return ((INTVAL (operands[3]) != 0)
11538 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11539 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
11541 [(set_attr "type" "ssecmp")
11542 (set_attr "prefix_data16" "0")
11543 (set_attr "prefix_extra" "2")
11544 (set_attr "length_immediate" "1")
11545 (set_attr "mode" "TI")])
11547 (define_insn "xop_vpermil2<mode>3"
11548 [(set (match_operand:VF 0 "register_operand" "=x")
11550 [(match_operand:VF 1 "register_operand" "x")
11551 (match_operand:VF 2 "nonimmediate_operand" "%x")
11552 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm")
11553 (match_operand:SI 4 "const_0_to_3_operand" "n")]
11556 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
11557 [(set_attr "type" "sse4arg")
11558 (set_attr "length_immediate" "1")
11559 (set_attr "mode" "<MODE>")])
11561 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11563 (define_insn "aesenc"
11564 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11565 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11566 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11570 aesenc\t{%2, %0|%0, %2}
11571 vaesenc\t{%2, %1, %0|%0, %1, %2}"
11572 [(set_attr "isa" "noavx,avx")
11573 (set_attr "type" "sselog1")
11574 (set_attr "prefix_extra" "1")
11575 (set_attr "prefix" "orig,vex")
11576 (set_attr "mode" "TI")])
11578 (define_insn "aesenclast"
11579 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11580 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11581 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11582 UNSPEC_AESENCLAST))]
11585 aesenclast\t{%2, %0|%0, %2}
11586 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
11587 [(set_attr "isa" "noavx,avx")
11588 (set_attr "type" "sselog1")
11589 (set_attr "prefix_extra" "1")
11590 (set_attr "prefix" "orig,vex")
11591 (set_attr "mode" "TI")])
11593 (define_insn "aesdec"
11594 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11595 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11596 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11600 aesdec\t{%2, %0|%0, %2}
11601 vaesdec\t{%2, %1, %0|%0, %1, %2}"
11602 [(set_attr "isa" "noavx,avx")
11603 (set_attr "type" "sselog1")
11604 (set_attr "prefix_extra" "1")
11605 (set_attr "prefix" "orig,vex")
11606 (set_attr "mode" "TI")])
11608 (define_insn "aesdeclast"
11609 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11610 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11611 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11612 UNSPEC_AESDECLAST))]
11615 aesdeclast\t{%2, %0|%0, %2}
11616 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
11617 [(set_attr "isa" "noavx,avx")
11618 (set_attr "type" "sselog1")
11619 (set_attr "prefix_extra" "1")
11620 (set_attr "prefix" "orig,vex")
11621 (set_attr "mode" "TI")])
11623 (define_insn "aesimc"
11624 [(set (match_operand:V2DI 0 "register_operand" "=x")
11625 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
11628 "%vaesimc\t{%1, %0|%0, %1}"
11629 [(set_attr "type" "sselog1")
11630 (set_attr "prefix_extra" "1")
11631 (set_attr "prefix" "maybe_vex")
11632 (set_attr "mode" "TI")])
11634 (define_insn "aeskeygenassist"
11635 [(set (match_operand:V2DI 0 "register_operand" "=x")
11636 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
11637 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11638 UNSPEC_AESKEYGENASSIST))]
11640 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
11641 [(set_attr "type" "sselog1")
11642 (set_attr "prefix_extra" "1")
11643 (set_attr "length_immediate" "1")
11644 (set_attr "prefix" "maybe_vex")
11645 (set_attr "mode" "TI")])
11647 (define_insn "pclmulqdq"
11648 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11649 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11650 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")
11651 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
11655 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
11656 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11657 [(set_attr "isa" "noavx,avx")
11658 (set_attr "type" "sselog1")
11659 (set_attr "prefix_extra" "1")
11660 (set_attr "length_immediate" "1")
11661 (set_attr "prefix" "orig,vex")
11662 (set_attr "mode" "TI")])
11664 (define_expand "avx_vzeroall"
11665 [(match_par_dup 0 [(const_int 0)])]
11668 int nregs = TARGET_64BIT ? 16 : 8;
11671 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11673 XVECEXP (operands[0], 0, 0)
11674 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11677 for (regno = 0; regno < nregs; regno++)
11678 XVECEXP (operands[0], 0, regno + 1)
11679 = gen_rtx_SET (VOIDmode,
11680 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
11681 CONST0_RTX (V8SImode));
11684 (define_insn "*avx_vzeroall"
11685 [(match_parallel 0 "vzeroall_operation"
11686 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
11689 [(set_attr "type" "sse")
11690 (set_attr "modrm" "0")
11691 (set_attr "memory" "none")
11692 (set_attr "prefix" "vex")
11693 (set_attr "mode" "OI")])
11695 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
11696 ;; if the upper 128bits are unused.
11697 (define_insn "avx_vzeroupper"
11698 [(unspec_volatile [(match_operand 0 "const_int_operand" "")]
11699 UNSPECV_VZEROUPPER)]
11702 [(set_attr "type" "sse")
11703 (set_attr "modrm" "0")
11704 (set_attr "memory" "none")
11705 (set_attr "prefix" "vex")
11706 (set_attr "mode" "OI")])
11708 (define_mode_attr AVXTOSSEMODE
11709 [(V4DI "V2DI") (V2DI "V2DI")
11710 (V8SI "V4SI") (V4SI "V4SI")
11711 (V16HI "V8HI") (V8HI "V8HI")
11712 (V32QI "V16QI") (V16QI "V16QI")])
11714 (define_insn "avx2_pbroadcast<mode>"
11715 [(set (match_operand:VI 0 "register_operand" "=x")
11717 (vec_select:<ssescalarmode>
11718 (match_operand:<AVXTOSSEMODE> 1 "nonimmediate_operand" "xm")
11719 (parallel [(const_int 0)]))))]
11721 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %1}"
11722 [(set_attr "type" "ssemov")
11723 (set_attr "prefix_extra" "1")
11724 (set_attr "prefix" "vex")
11725 (set_attr "mode" "<sseinsnmode>")])
11727 (define_insn "avx2_permvarv8si"
11728 [(set (match_operand:V8SI 0 "register_operand" "=x")
11730 [(match_operand:V8SI 1 "register_operand" "x")
11731 (match_operand:V8SI 2 "nonimmediate_operand" "xm")]
11734 "vpermd\t{%2, %1, %0|%0, %1, %2}"
11735 [(set_attr "type" "sselog")
11736 (set_attr "prefix" "vex")
11737 (set_attr "mode" "OI")])
11739 (define_insn "avx2_permv4df"
11740 [(set (match_operand:V4DF 0 "register_operand" "=x")
11742 [(match_operand:V4DF 1 "register_operand" "xm")
11743 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11746 "vpermpd\t{%2, %1, %0|%0, %1, %2}"
11747 [(set_attr "type" "sselog")
11748 (set_attr "prefix_extra" "1")
11749 (set_attr "prefix" "vex")
11750 (set_attr "mode" "OI")])
11752 (define_insn "avx2_permvarv8sf"
11753 [(set (match_operand:V8SF 0 "register_operand" "=x")
11755 [(match_operand:V8SF 1 "register_operand" "x")
11756 (match_operand:V8SF 2 "nonimmediate_operand" "xm")]
11759 "vpermps\t{%2, %1, %0|%0, %1, %2}"
11760 [(set_attr "type" "sselog")
11761 (set_attr "prefix" "vex")
11762 (set_attr "mode" "OI")])
11764 (define_expand "avx2_permv4di"
11765 [(match_operand:V4DI 0 "register_operand" "")
11766 (match_operand:V4DI 1 "nonimmediate_operand" "")
11767 (match_operand:SI 2 "const_0_to_255_operand" "")]
11770 int mask = INTVAL (operands[2]);
11771 emit_insn (gen_avx2_permv4di_1 (operands[0], operands[1],
11772 GEN_INT ((mask >> 0) & 3),
11773 GEN_INT ((mask >> 2) & 3),
11774 GEN_INT ((mask >> 4) & 3),
11775 GEN_INT ((mask >> 6) & 3)));
11779 (define_insn "avx2_permv4di_1"
11780 [(set (match_operand:V4DI 0 "register_operand" "=x")
11782 (match_operand:V4DI 1 "nonimmediate_operand" "xm")
11783 (parallel [(match_operand 2 "const_0_to_3_operand" "")
11784 (match_operand 3 "const_0_to_3_operand" "")
11785 (match_operand 4 "const_0_to_3_operand" "")
11786 (match_operand 5 "const_0_to_3_operand" "")])))]
11790 mask |= INTVAL (operands[2]) << 0;
11791 mask |= INTVAL (operands[3]) << 2;
11792 mask |= INTVAL (operands[4]) << 4;
11793 mask |= INTVAL (operands[5]) << 6;
11794 operands[2] = GEN_INT (mask);
11795 return "vpermq\t{%2, %1, %0|%0, %1, %2}";
11797 [(set_attr "type" "sselog")
11798 (set_attr "prefix" "vex")
11799 (set_attr "mode" "OI")])
11801 (define_insn "avx2_permv2ti"
11802 [(set (match_operand:V4DI 0 "register_operand" "=x")
11804 [(match_operand:V4DI 1 "register_operand" "x")
11805 (match_operand:V4DI 2 "nonimmediate_operand" "xm")
11806 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11809 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11810 [(set_attr "type" "sselog")
11811 (set_attr "prefix" "vex")
11812 (set_attr "mode" "OI")])
11814 (define_insn "avx2_vec_dupv4df"
11815 [(set (match_operand:V4DF 0 "register_operand" "=x")
11816 (vec_duplicate:V4DF
11818 (match_operand:V2DF 1 "register_operand" "x")
11819 (parallel [(const_int 0)]))))]
11821 "vbroadcastsd\t{%1, %0|%0, %1}"
11822 [(set_attr "type" "sselog1")
11823 (set_attr "prefix" "vex")
11824 (set_attr "mode" "V4DF")])
11826 ;; Modes handled by AVX vec_dup patterns.
11827 (define_mode_iterator AVX_VEC_DUP_MODE
11828 [V8SI V8SF V4DI V4DF])
11830 (define_insn "vec_dup<mode>"
11831 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x")
11832 (vec_duplicate:AVX_VEC_DUP_MODE
11833 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,?x")))]
11836 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
11838 [(set_attr "type" "ssemov")
11839 (set_attr "prefix_extra" "1")
11840 (set_attr "prefix" "vex")
11841 (set_attr "mode" "V8SF")])
11843 (define_insn "avx2_vbroadcasti128_<mode>"
11844 [(set (match_operand:VI_256 0 "register_operand" "=x")
11846 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m")
11849 "vbroadcasti128\t{%1, %0|%0, %1}"
11850 [(set_attr "type" "ssemov")
11851 (set_attr "prefix_extra" "1")
11852 (set_attr "prefix" "vex")
11853 (set_attr "mode" "OI")])
11856 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "")
11857 (vec_duplicate:AVX_VEC_DUP_MODE
11858 (match_operand:<ssescalarmode> 1 "register_operand" "")))]
11859 "TARGET_AVX && reload_completed"
11860 [(set (match_dup 2)
11861 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
11863 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
11864 "operands[2] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[0]));")
11866 (define_insn "avx_vbroadcastf128_<mode>"
11867 [(set (match_operand:V_256 0 "register_operand" "=x,x,x")
11869 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
11873 vbroadcast<i128>\t{%1, %0|%0, %1}
11874 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
11875 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
11876 [(set_attr "type" "ssemov,sselog1,sselog1")
11877 (set_attr "prefix_extra" "1")
11878 (set_attr "length_immediate" "0,1,1")
11879 (set_attr "prefix" "vex")
11880 (set_attr "mode" "<sseinsnmode>")])
11882 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
11883 ;; If it so happens that the input is in memory, use vbroadcast.
11884 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
11885 (define_insn "*avx_vperm_broadcast_v4sf"
11886 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
11888 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
11889 (match_parallel 2 "avx_vbroadcast_operand"
11890 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11893 int elt = INTVAL (operands[3]);
11894 switch (which_alternative)
11898 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
11899 return "vbroadcastss\t{%1, %0|%0, %1}";
11901 operands[2] = GEN_INT (elt * 0x55);
11902 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
11904 gcc_unreachable ();
11907 [(set_attr "type" "ssemov,ssemov,sselog1")
11908 (set_attr "prefix_extra" "1")
11909 (set_attr "length_immediate" "0,0,1")
11910 (set_attr "prefix" "vex")
11911 (set_attr "mode" "SF,SF,V4SF")])
11913 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
11914 [(set (match_operand:VF_256 0 "register_operand" "=x,x,x")
11916 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x")
11917 (match_parallel 2 "avx_vbroadcast_operand"
11918 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11921 "&& reload_completed"
11922 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
11924 rtx op0 = operands[0], op1 = operands[1];
11925 int elt = INTVAL (operands[3]);
11931 /* Shuffle element we care about into all elements of the 128-bit lane.
11932 The other lane gets shuffled too, but we don't care. */
11933 if (<MODE>mode == V4DFmode)
11934 mask = (elt & 1 ? 15 : 0);
11936 mask = (elt & 3) * 0x55;
11937 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
11939 /* Shuffle the lane we care about into both lanes of the dest. */
11940 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
11941 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
11945 operands[1] = adjust_address_nv (op1, <ssescalarmode>mode,
11946 elt * GET_MODE_SIZE (<ssescalarmode>mode));
11949 (define_expand "avx_vpermil<mode>"
11950 [(set (match_operand:VF2 0 "register_operand" "")
11952 (match_operand:VF2 1 "nonimmediate_operand" "")
11953 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11956 int mask = INTVAL (operands[2]);
11957 rtx perm[<ssescalarnum>];
11959 perm[0] = GEN_INT (mask & 1);
11960 perm[1] = GEN_INT ((mask >> 1) & 1);
11961 if (<MODE>mode == V4DFmode)
11963 perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
11964 perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
11968 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11971 (define_expand "avx_vpermil<mode>"
11972 [(set (match_operand:VF1 0 "register_operand" "")
11974 (match_operand:VF1 1 "nonimmediate_operand" "")
11975 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11978 int mask = INTVAL (operands[2]);
11979 rtx perm[<ssescalarnum>];
11981 perm[0] = GEN_INT (mask & 3);
11982 perm[1] = GEN_INT ((mask >> 2) & 3);
11983 perm[2] = GEN_INT ((mask >> 4) & 3);
11984 perm[3] = GEN_INT ((mask >> 6) & 3);
11985 if (<MODE>mode == V8SFmode)
11987 perm[4] = GEN_INT ((mask & 3) + 4);
11988 perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
11989 perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
11990 perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
11994 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11997 (define_insn "*avx_vpermilp<mode>"
11998 [(set (match_operand:VF 0 "register_operand" "=x")
12000 (match_operand:VF 1 "nonimmediate_operand" "xm")
12001 (match_parallel 2 ""
12002 [(match_operand 3 "const_int_operand" "")])))]
12004 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
12006 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
12007 operands[2] = GEN_INT (mask);
12008 return "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
12010 [(set_attr "type" "sselog")
12011 (set_attr "prefix_extra" "1")
12012 (set_attr "length_immediate" "1")
12013 (set_attr "prefix" "vex")
12014 (set_attr "mode" "<MODE>")])
12016 (define_insn "avx_vpermilvar<mode>3"
12017 [(set (match_operand:VF 0 "register_operand" "=x")
12019 [(match_operand:VF 1 "register_operand" "x")
12020 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "xm")]
12023 "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12024 [(set_attr "type" "sselog")
12025 (set_attr "prefix_extra" "1")
12026 (set_attr "prefix" "vex")
12027 (set_attr "mode" "<MODE>")])
12029 (define_expand "avx_vperm2f128<mode>3"
12030 [(set (match_operand:AVX256MODE2P 0 "register_operand" "")
12031 (unspec:AVX256MODE2P
12032 [(match_operand:AVX256MODE2P 1 "register_operand" "")
12033 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "")
12034 (match_operand:SI 3 "const_0_to_255_operand" "")]
12035 UNSPEC_VPERMIL2F128))]
12038 int mask = INTVAL (operands[3]);
12039 if ((mask & 0x88) == 0)
12041 rtx perm[<ssescalarnum>], t1, t2;
12042 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
12044 base = (mask & 3) * nelt2;
12045 for (i = 0; i < nelt2; ++i)
12046 perm[i] = GEN_INT (base + i);
12048 base = ((mask >> 4) & 3) * nelt2;
12049 for (i = 0; i < nelt2; ++i)
12050 perm[i + nelt2] = GEN_INT (base + i);
12052 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
12053 operands[1], operands[2]);
12054 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
12055 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
12056 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
12062 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
12063 ;; means that in order to represent this properly in rtl we'd have to
12064 ;; nest *another* vec_concat with a zero operand and do the select from
12065 ;; a 4x wide vector. That doesn't seem very nice.
12066 (define_insn "*avx_vperm2f128<mode>_full"
12067 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
12068 (unspec:AVX256MODE2P
12069 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
12070 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
12071 (match_operand:SI 3 "const_0_to_255_operand" "n")]
12072 UNSPEC_VPERMIL2F128))]
12074 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12075 [(set_attr "type" "sselog")
12076 (set_attr "prefix_extra" "1")
12077 (set_attr "length_immediate" "1")
12078 (set_attr "prefix" "vex")
12079 (set_attr "mode" "<sseinsnmode>")])
12081 (define_insn "*avx_vperm2f128<mode>_nozero"
12082 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
12083 (vec_select:AVX256MODE2P
12084 (vec_concat:<ssedoublevecmode>
12085 (match_operand:AVX256MODE2P 1 "register_operand" "x")
12086 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
12087 (match_parallel 3 ""
12088 [(match_operand 4 "const_int_operand" "")])))]
12090 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
12092 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
12093 operands[3] = GEN_INT (mask);
12094 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
12096 [(set_attr "type" "sselog")
12097 (set_attr "prefix_extra" "1")
12098 (set_attr "length_immediate" "1")
12099 (set_attr "prefix" "vex")
12100 (set_attr "mode" "<sseinsnmode>")])
12102 (define_expand "avx_vinsertf128<mode>"
12103 [(match_operand:V_256 0 "register_operand" "")
12104 (match_operand:V_256 1 "register_operand" "")
12105 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "")
12106 (match_operand:SI 3 "const_0_to_1_operand" "")]
12109 rtx (*insn)(rtx, rtx, rtx);
12111 switch (INTVAL (operands[3]))
12114 insn = gen_vec_set_lo_<mode>;
12117 insn = gen_vec_set_hi_<mode>;
12120 gcc_unreachable ();
12123 emit_insn (insn (operands[0], operands[1], operands[2]));
12127 (define_insn "avx2_vec_set_lo_v4di"
12128 [(set (match_operand:V4DI 0 "register_operand" "=x")
12130 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
12132 (match_operand:V4DI 1 "register_operand" "x")
12133 (parallel [(const_int 2) (const_int 3)]))))]
12135 "vinserti128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12136 [(set_attr "type" "sselog")
12137 (set_attr "prefix_extra" "1")
12138 (set_attr "length_immediate" "1")
12139 (set_attr "prefix" "vex")
12140 (set_attr "mode" "OI")])
12142 (define_insn "avx2_vec_set_hi_v4di"
12143 [(set (match_operand:V4DI 0 "register_operand" "=x")
12146 (match_operand:V4DI 1 "register_operand" "x")
12147 (parallel [(const_int 0) (const_int 1)]))
12148 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
12150 "vinserti128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12151 [(set_attr "type" "sselog")
12152 (set_attr "prefix_extra" "1")
12153 (set_attr "length_immediate" "1")
12154 (set_attr "prefix" "vex")
12155 (set_attr "mode" "OI")])
12157 (define_insn "vec_set_lo_<mode>"
12158 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
12159 (vec_concat:VI8F_256
12160 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
12161 (vec_select:<ssehalfvecmode>
12162 (match_operand:VI8F_256 1 "register_operand" "x")
12163 (parallel [(const_int 2) (const_int 3)]))))]
12165 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12166 [(set_attr "type" "sselog")
12167 (set_attr "prefix_extra" "1")
12168 (set_attr "length_immediate" "1")
12169 (set_attr "prefix" "vex")
12170 (set_attr "mode" "<sseinsnmode>")])
12172 (define_insn "vec_set_hi_<mode>"
12173 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
12174 (vec_concat:VI8F_256
12175 (vec_select:<ssehalfvecmode>
12176 (match_operand:VI8F_256 1 "register_operand" "x")
12177 (parallel [(const_int 0) (const_int 1)]))
12178 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
12180 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12181 [(set_attr "type" "sselog")
12182 (set_attr "prefix_extra" "1")
12183 (set_attr "length_immediate" "1")
12184 (set_attr "prefix" "vex")
12185 (set_attr "mode" "<sseinsnmode>")])
12187 (define_insn "vec_set_lo_<mode>"
12188 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
12189 (vec_concat:VI4F_256
12190 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
12191 (vec_select:<ssehalfvecmode>
12192 (match_operand:VI4F_256 1 "register_operand" "x")
12193 (parallel [(const_int 4) (const_int 5)
12194 (const_int 6) (const_int 7)]))))]
12196 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12197 [(set_attr "type" "sselog")
12198 (set_attr "prefix_extra" "1")
12199 (set_attr "length_immediate" "1")
12200 (set_attr "prefix" "vex")
12201 (set_attr "mode" "<sseinsnmode>")])
12203 (define_insn "vec_set_hi_<mode>"
12204 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
12205 (vec_concat:VI4F_256
12206 (vec_select:<ssehalfvecmode>
12207 (match_operand:VI4F_256 1 "register_operand" "x")
12208 (parallel [(const_int 0) (const_int 1)
12209 (const_int 2) (const_int 3)]))
12210 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
12212 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12213 [(set_attr "type" "sselog")
12214 (set_attr "prefix_extra" "1")
12215 (set_attr "length_immediate" "1")
12216 (set_attr "prefix" "vex")
12217 (set_attr "mode" "<sseinsnmode>")])
12219 (define_insn "vec_set_lo_v16hi"
12220 [(set (match_operand:V16HI 0 "register_operand" "=x")
12222 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
12224 (match_operand:V16HI 1 "register_operand" "x")
12225 (parallel [(const_int 8) (const_int 9)
12226 (const_int 10) (const_int 11)
12227 (const_int 12) (const_int 13)
12228 (const_int 14) (const_int 15)]))))]
12230 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12231 [(set_attr "type" "sselog")
12232 (set_attr "prefix_extra" "1")
12233 (set_attr "length_immediate" "1")
12234 (set_attr "prefix" "vex")
12235 (set_attr "mode" "OI")])
12237 (define_insn "vec_set_hi_v16hi"
12238 [(set (match_operand:V16HI 0 "register_operand" "=x")
12241 (match_operand:V16HI 1 "register_operand" "x")
12242 (parallel [(const_int 0) (const_int 1)
12243 (const_int 2) (const_int 3)
12244 (const_int 4) (const_int 5)
12245 (const_int 6) (const_int 7)]))
12246 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
12248 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12249 [(set_attr "type" "sselog")
12250 (set_attr "prefix_extra" "1")
12251 (set_attr "length_immediate" "1")
12252 (set_attr "prefix" "vex")
12253 (set_attr "mode" "OI")])
12255 (define_insn "vec_set_lo_v32qi"
12256 [(set (match_operand:V32QI 0 "register_operand" "=x")
12258 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
12260 (match_operand:V32QI 1 "register_operand" "x")
12261 (parallel [(const_int 16) (const_int 17)
12262 (const_int 18) (const_int 19)
12263 (const_int 20) (const_int 21)
12264 (const_int 22) (const_int 23)
12265 (const_int 24) (const_int 25)
12266 (const_int 26) (const_int 27)
12267 (const_int 28) (const_int 29)
12268 (const_int 30) (const_int 31)]))))]
12270 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12271 [(set_attr "type" "sselog")
12272 (set_attr "prefix_extra" "1")
12273 (set_attr "length_immediate" "1")
12274 (set_attr "prefix" "vex")
12275 (set_attr "mode" "OI")])
12277 (define_insn "vec_set_hi_v32qi"
12278 [(set (match_operand:V32QI 0 "register_operand" "=x")
12281 (match_operand:V32QI 1 "register_operand" "x")
12282 (parallel [(const_int 0) (const_int 1)
12283 (const_int 2) (const_int 3)
12284 (const_int 4) (const_int 5)
12285 (const_int 6) (const_int 7)
12286 (const_int 8) (const_int 9)
12287 (const_int 10) (const_int 11)
12288 (const_int 12) (const_int 13)
12289 (const_int 14) (const_int 15)]))
12290 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
12292 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12293 [(set_attr "type" "sselog")
12294 (set_attr "prefix_extra" "1")
12295 (set_attr "length_immediate" "1")
12296 (set_attr "prefix" "vex")
12297 (set_attr "mode" "OI")])
12299 (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
12300 [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
12302 [(match_operand:<sseintvecmode> 2 "register_operand" "x")
12303 (match_operand:V48_AVX2 1 "memory_operand" "m")]
12306 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
12307 [(set_attr "type" "sselog1")
12308 (set_attr "prefix_extra" "1")
12309 (set_attr "prefix" "vex")
12310 (set_attr "mode" "<sseinsnmode>")])
12312 (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
12313 [(set (match_operand:V48_AVX2 0 "memory_operand" "=m")
12315 [(match_operand:<sseintvecmode> 1 "register_operand" "x")
12316 (match_operand:V48_AVX2 2 "register_operand" "x")
12320 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12321 [(set_attr "type" "sselog1")
12322 (set_attr "prefix_extra" "1")
12323 (set_attr "prefix" "vex")
12324 (set_attr "mode" "<sseinsnmode>")])
12326 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
12327 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
12328 (unspec:AVX256MODE2P
12329 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
12333 "&& reload_completed"
12336 rtx op0 = operands[0];
12337 rtx op1 = operands[1];
12339 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
12341 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
12342 emit_move_insn (op0, op1);
12346 (define_expand "vec_init<mode>"
12347 [(match_operand:V_256 0 "register_operand" "")
12348 (match_operand 1 "" "")]
12351 ix86_expand_vector_init (false, operands[0], operands[1]);
12355 (define_expand "avx2_extracti128"
12356 [(match_operand:V2DI 0 "nonimmediate_operand" "")
12357 (match_operand:V4DI 1 "register_operand" "")
12358 (match_operand:SI 2 "const_0_to_1_operand" "")]
12361 rtx (*insn)(rtx, rtx);
12363 switch (INTVAL (operands[2]))
12366 insn = gen_vec_extract_lo_v4di;
12369 insn = gen_vec_extract_hi_v4di;
12372 gcc_unreachable ();
12375 emit_insn (insn (operands[0], operands[1]));
12379 (define_expand "avx2_inserti128"
12380 [(match_operand:V4DI 0 "register_operand" "")
12381 (match_operand:V4DI 1 "register_operand" "")
12382 (match_operand:V2DI 2 "nonimmediate_operand" "")
12383 (match_operand:SI 3 "const_0_to_1_operand" "")]
12386 rtx (*insn)(rtx, rtx, rtx);
12388 switch (INTVAL (operands[3]))
12391 insn = gen_avx2_vec_set_lo_v4di;
12394 insn = gen_avx2_vec_set_hi_v4di;
12397 gcc_unreachable ();
12400 emit_insn (insn (operands[0], operands[1], operands[2]));
12404 (define_insn "avx2_ashrvv8si"
12405 [(set (match_operand:V8SI 0 "register_operand" "=x")
12411 (match_operand:V8SI 1 "register_operand" "x")
12412 (parallel [(const_int 0)]))
12414 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
12415 (parallel [(const_int 0)])))
12419 (parallel [(const_int 1)]))
12422 (parallel [(const_int 1)]))))
12427 (parallel [(const_int 2)]))
12430 (parallel [(const_int 2)])))
12434 (parallel [(const_int 3)]))
12437 (parallel [(const_int 3)])))))
12443 (parallel [(const_int 0)]))
12446 (parallel [(const_int 0)])))
12450 (parallel [(const_int 1)]))
12453 (parallel [(const_int 1)]))))
12458 (parallel [(const_int 2)]))
12461 (parallel [(const_int 2)])))
12465 (parallel [(const_int 3)]))
12468 (parallel [(const_int 3)])))))))]
12470 "vpsravd\t{%2, %1, %0|%0, %1, %2}"
12471 [(set_attr "type" "sseishft")
12472 (set_attr "prefix" "vex")
12473 (set_attr "mode" "OI")])
12475 (define_insn "avx2_ashrvv4si"
12476 [(set (match_operand:V4SI 0 "register_operand" "=x")
12481 (match_operand:V4SI 1 "register_operand" "x")
12482 (parallel [(const_int 0)]))
12484 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
12485 (parallel [(const_int 0)])))
12489 (parallel [(const_int 1)]))
12492 (parallel [(const_int 1)]))))
12497 (parallel [(const_int 2)]))
12500 (parallel [(const_int 2)])))
12504 (parallel [(const_int 3)]))
12507 (parallel [(const_int 3)]))))))]
12509 "vpsravd\t{%2, %1, %0|%0, %1, %2}"
12510 [(set_attr "type" "sseishft")
12511 (set_attr "prefix" "vex")
12512 (set_attr "mode" "TI")])
12514 (define_insn "avx2_<lshift>vv8si"
12515 [(set (match_operand:V8SI 0 "register_operand" "=x")
12521 (match_operand:V8SI 1 "register_operand" "x")
12522 (parallel [(const_int 0)]))
12524 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
12525 (parallel [(const_int 0)])))
12529 (parallel [(const_int 1)]))
12532 (parallel [(const_int 1)]))))
12537 (parallel [(const_int 2)]))
12540 (parallel [(const_int 2)])))
12544 (parallel [(const_int 3)]))
12547 (parallel [(const_int 3)])))))
12553 (parallel [(const_int 0)]))
12556 (parallel [(const_int 0)])))
12560 (parallel [(const_int 1)]))
12563 (parallel [(const_int 1)]))))
12568 (parallel [(const_int 2)]))
12571 (parallel [(const_int 2)])))
12575 (parallel [(const_int 3)]))
12578 (parallel [(const_int 3)])))))))]
12580 "vp<lshift_insn>vd\t{%2, %1, %0|%0, %1, %2}"
12581 [(set_attr "type" "sseishft")
12582 (set_attr "prefix" "vex")
12583 (set_attr "mode" "OI")])
12585 (define_insn "avx2_<lshift>v<mode>"
12586 [(set (match_operand:VI4SD_AVX2 0 "register_operand" "=x")
12587 (vec_concat:VI4SD_AVX2
12588 (vec_concat:<ssehalfvecmode>
12589 (lshift:<ssescalarmode>
12590 (vec_select:<ssescalarmode>
12591 (match_operand:VI4SD_AVX2 1 "register_operand" "x")
12592 (parallel [(const_int 0)]))
12593 (vec_select:<ssescalarmode>
12594 (match_operand:VI4SD_AVX2 2 "nonimmediate_operand" "xm")
12595 (parallel [(const_int 0)])))
12596 (lshift:<ssescalarmode>
12597 (vec_select:<ssescalarmode>
12599 (parallel [(const_int 1)]))
12600 (vec_select:<ssescalarmode>
12602 (parallel [(const_int 1)]))))
12603 (vec_concat:<ssehalfvecmode>
12604 (lshift:<ssescalarmode>
12605 (vec_select:<ssescalarmode>
12607 (parallel [(const_int 2)]))
12608 (vec_select:<ssescalarmode>
12610 (parallel [(const_int 2)])))
12611 (lshift:<ssescalarmode>
12612 (vec_select:<ssescalarmode>
12614 (parallel [(const_int 3)]))
12615 (vec_select:<ssescalarmode>
12617 (parallel [(const_int 3)]))))))]
12619 "vp<lshift_insn>v<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12620 [(set_attr "type" "sseishft")
12621 (set_attr "prefix" "vex")
12622 (set_attr "mode" "<sseinsnmode>")])
12624 (define_insn "avx2_<lshift>vv2di"
12625 [(set (match_operand:V2DI 0 "register_operand" "=x")
12629 (match_operand:V2DI 1 "register_operand" "x")
12630 (parallel [(const_int 0)]))
12632 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
12633 (parallel [(const_int 0)])))
12637 (parallel [(const_int 1)]))
12640 (parallel [(const_int 1)])))))]
12642 "vp<lshift_insn>vq\t{%2, %1, %0|%0, %1, %2}"
12643 [(set_attr "type" "sseishft")
12644 (set_attr "prefix" "vex")
12645 (set_attr "mode" "TI")])
12647 (define_insn "avx_vec_concat<mode>"
12648 [(set (match_operand:V_256 0 "register_operand" "=x,x")
12650 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
12651 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))]
12654 switch (which_alternative)
12657 return "vinsert<i128>\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
12659 switch (get_attr_mode (insn))
12662 return "vmovaps\t{%1, %x0|%x0, %1}";
12664 return "vmovapd\t{%1, %x0|%x0, %1}";
12666 return "vmovdqa\t{%1, %x0|%x0, %1}";
12669 gcc_unreachable ();
12672 [(set_attr "type" "sselog,ssemov")
12673 (set_attr "prefix_extra" "1,*")
12674 (set_attr "length_immediate" "1,*")
12675 (set_attr "prefix" "vex")
12676 (set_attr "mode" "<sseinsnmode>")])
12678 (define_insn "vcvtph2ps"
12679 [(set (match_operand:V4SF 0 "register_operand" "=x")
12681 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
12683 (parallel [(const_int 0) (const_int 1)
12684 (const_int 1) (const_int 2)])))]
12686 "vcvtph2ps\t{%1, %0|%0, %1}"
12687 [(set_attr "type" "ssecvt")
12688 (set_attr "prefix" "vex")
12689 (set_attr "mode" "V4SF")])
12691 (define_insn "*vcvtph2ps_load"
12692 [(set (match_operand:V4SF 0 "register_operand" "=x")
12693 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
12694 UNSPEC_VCVTPH2PS))]
12696 "vcvtph2ps\t{%1, %0|%0, %1}"
12697 [(set_attr "type" "ssecvt")
12698 (set_attr "prefix" "vex")
12699 (set_attr "mode" "V8SF")])
12701 (define_insn "vcvtph2ps256"
12702 [(set (match_operand:V8SF 0 "register_operand" "=x")
12703 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
12704 UNSPEC_VCVTPH2PS))]
12706 "vcvtph2ps\t{%1, %0|%0, %1}"
12707 [(set_attr "type" "ssecvt")
12708 (set_attr "prefix" "vex")
12709 (set_attr "mode" "V8SF")])
12711 (define_expand "vcvtps2ph"
12712 [(set (match_operand:V8HI 0 "register_operand" "")
12714 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "")
12715 (match_operand:SI 2 "const_0_to_255_operand" "")]
12719 "operands[3] = CONST0_RTX (V4HImode);")
12721 (define_insn "*vcvtps2ph"
12722 [(set (match_operand:V8HI 0 "register_operand" "=x")
12724 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12725 (match_operand:SI 2 "const_0_to_255_operand" "N")]
12727 (match_operand:V4HI 3 "const0_operand" "")))]
12729 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12730 [(set_attr "type" "ssecvt")
12731 (set_attr "prefix" "vex")
12732 (set_attr "mode" "V4SF")])
12734 (define_insn "*vcvtps2ph_store"
12735 [(set (match_operand:V4HI 0 "memory_operand" "=m")
12736 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12737 (match_operand:SI 2 "const_0_to_255_operand" "N")]
12738 UNSPEC_VCVTPS2PH))]
12740 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12741 [(set_attr "type" "ssecvt")
12742 (set_attr "prefix" "vex")
12743 (set_attr "mode" "V4SF")])
12745 (define_insn "vcvtps2ph256"
12746 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
12747 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
12748 (match_operand:SI 2 "const_0_to_255_operand" "N")]
12749 UNSPEC_VCVTPS2PH))]
12751 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12752 [(set_attr "type" "ssecvt")
12753 (set_attr "prefix" "vex")
12754 (set_attr "mode" "V8SF")])
12756 ;; For gather* insn patterns
12757 (define_mode_iterator VEC_GATHER_MODE
12758 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
12759 (define_mode_attr VEC_GATHER_MODE
12760 [(V2DI "V4SI") (V2DF "V4SI")
12761 (V4DI "V4SI") (V4DF "V4SI")
12762 (V4SI "V4SI") (V4SF "V4SI")
12763 (V8SI "V8SI") (V8SF "V8SI")])
12765 (define_expand "avx2_gathersi<mode>"
12766 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "")
12767 (unspec:VEC_GATHER_MODE
12768 [(match_operand:VEC_GATHER_MODE 1 "register_operand" "")
12769 (mem:<ssescalarmode>
12771 [(match_operand 2 "vsib_address_operand" "")
12772 (match_operand:<VEC_GATHER_MODE> 3 "register_operand" "")
12773 (match_operand:SI 5 "const1248_operand " "")]))
12774 (mem:BLK (scratch))
12775 (match_operand:VEC_GATHER_MODE 4 "register_operand" "")]
12777 (clobber (match_scratch:VEC_GATHER_MODE 6 ""))])]
12781 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
12782 operands[5]), UNSPEC_VSIBADDR);
12785 (define_insn "*avx2_gathersi<mode>"
12786 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
12787 (unspec:VEC_GATHER_MODE
12788 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
12789 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
12791 [(match_operand:P 3 "vsib_address_operand" "p")
12792 (match_operand:<VEC_GATHER_MODE> 4 "register_operand" "x")
12793 (match_operand:SI 6 "const1248_operand" "n")]
12795 (mem:BLK (scratch))
12796 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
12798 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
12800 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
12801 [(set_attr "type" "ssemov")
12802 (set_attr "prefix" "vex")
12803 (set_attr "mode" "<sseinsnmode>")])
12805 (define_expand "avx2_gatherdi<mode>"
12806 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "")
12807 (unspec:VEC_GATHER_MODE
12808 [(match_operand:VEC_GATHER_MODE 1 "register_operand" "")
12809 (mem:<ssescalarmode>
12811 [(match_operand 2 "vsib_address_operand" "")
12812 (match_operand:<AVXMODE48P_DI> 3 "register_operand" "")
12813 (match_operand:SI 5 "const1248_operand " "")]))
12814 (mem:BLK (scratch))
12815 (match_operand:VEC_GATHER_MODE 4 "register_operand" "")]
12817 (clobber (match_scratch:VEC_GATHER_MODE 6 ""))])]
12821 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
12822 operands[5]), UNSPEC_VSIBADDR);
12825 (define_insn "*avx2_gatherdi<mode>"
12826 [(set (match_operand:AVXMODE48P_DI 0 "register_operand" "=&x")
12827 (unspec:AVXMODE48P_DI
12828 [(match_operand:AVXMODE48P_DI 2 "register_operand" "0")
12829 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
12831 [(match_operand:P 3 "vsib_address_operand" "p")
12832 (match_operand:<AVXMODE48P_DI> 4 "register_operand" "x")
12833 (match_operand:SI 6 "const1248_operand" "n")]
12835 (mem:BLK (scratch))
12836 (match_operand:AVXMODE48P_DI 5 "register_operand" "1")]
12838 (clobber (match_scratch:AVXMODE48P_DI 1 "=&x"))]
12840 "v<sseintprefix>gatherq<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
12841 [(set_attr "type" "ssemov")
12842 (set_attr "prefix" "vex")
12843 (set_attr "mode" "<sseinsnmode>")])
12845 ;; Special handling for VEX.256 with float arguments
12846 ;; since there're still xmms as operands
12847 (define_expand "avx2_gatherdi<mode>256"
12848 [(parallel [(set (match_operand:VI4F_128 0 "register_operand" "")
12850 [(match_operand:VI4F_128 1 "register_operand" "")
12851 (mem:<ssescalarmode>
12853 [(match_operand 2 "vsib_address_operand" "")
12854 (match_operand:V4DI 3 "register_operand" "")
12855 (match_operand:SI 5 "const1248_operand " "")]))
12856 (mem:BLK (scratch))
12857 (match_operand:VI4F_128 4 "register_operand" "")]
12859 (clobber (match_scratch:VI4F_128 6 ""))])]
12863 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
12864 operands[5]), UNSPEC_VSIBADDR);
12867 (define_insn "*avx2_gatherdi<mode>256"
12868 [(set (match_operand:VI4F_128 0 "register_operand" "=x")
12870 [(match_operand:VI4F_128 2 "register_operand" "0")
12871 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
12873 [(match_operand:P 3 "vsib_address_operand" "p")
12874 (match_operand:V4DI 4 "register_operand" "x")
12875 (match_operand:SI 6 "const1248_operand" "n")]
12877 (mem:BLK (scratch))
12878 (match_operand:VI4F_128 5 "register_operand" "1")]
12880 (clobber (match_scratch:VI4F_128 1 "=&x"))]
12882 "v<sseintprefix>gatherq<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
12883 [(set_attr "type" "ssemov")
12884 (set_attr "prefix" "vex")
12885 (set_attr "mode" "<sseinsnmode>")])