1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 ;; All vector modes including V?TImode, used in move patterns.
22 (define_mode_iterator V16
23 [(V32QI "TARGET_AVX") V16QI
24 (V16HI "TARGET_AVX") V8HI
25 (V8SI "TARGET_AVX") V4SI
26 (V4DI "TARGET_AVX") V2DI
27 (V2TI "TARGET_AVX") V1TI
28 (V8SF "TARGET_AVX") V4SF
29 (V4DF "TARGET_AVX") V2DF])
32 (define_mode_iterator V
33 [(V32QI "TARGET_AVX") V16QI
34 (V16HI "TARGET_AVX") V8HI
35 (V8SI "TARGET_AVX") V4SI
36 (V4DI "TARGET_AVX") V2DI
37 (V8SF "TARGET_AVX") V4SF
38 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
40 ;; All 128bit vector modes
41 (define_mode_iterator V_128
42 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
44 ;; All 256bit vector modes
45 (define_mode_iterator V_256
46 [V32QI V16HI V8SI V4DI V8SF V4DF])
48 ;; All vector float modes
49 (define_mode_iterator VF
50 [(V8SF "TARGET_AVX") V4SF
51 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
53 ;; All SFmode vector float modes
54 (define_mode_iterator VF1
55 [(V8SF "TARGET_AVX") V4SF])
57 ;; All DFmode vector float modes
58 (define_mode_iterator VF2
59 [(V4DF "TARGET_AVX") V2DF])
61 ;; All 128bit vector float modes
62 (define_mode_iterator VF_128
63 [V4SF (V2DF "TARGET_SSE2")])
65 ;; All 256bit vector float modes
66 (define_mode_iterator VF_256
69 ;; All vector integer modes
70 (define_mode_iterator VI
71 [(V32QI "TARGET_AVX") V16QI
72 (V16HI "TARGET_AVX") V8HI
73 (V8SI "TARGET_AVX") V4SI
74 (V4DI "TARGET_AVX") V2DI])
76 (define_mode_iterator VI_AVX2
77 [(V32QI "TARGET_AVX2") V16QI
78 (V16HI "TARGET_AVX2") V8HI
79 (V8SI "TARGET_AVX2") V4SI
80 (V4DI "TARGET_AVX2") V2DI])
82 ;; All QImode vector integer modes
83 (define_mode_iterator VI1
84 [(V32QI "TARGET_AVX") V16QI])
86 ;; All DImode vector integer modes
87 (define_mode_iterator VI8
88 [(V4DI "TARGET_AVX") V2DI])
90 (define_mode_iterator VI1_AVX2
91 [(V32QI "TARGET_AVX2") V16QI])
93 (define_mode_iterator VI2_AVX2
94 [(V16HI "TARGET_AVX2") V8HI])
96 (define_mode_iterator VI4_AVX2
97 [(V8SI "TARGET_AVX2") V4SI])
99 (define_mode_iterator VI8_AVX2
100 [(V4DI "TARGET_AVX2") V2DI])
102 ;; ??? We should probably use TImode instead.
103 (define_mode_iterator VIMAX_AVX2
104 [(V2TI "TARGET_AVX2") V1TI])
106 ;; ??? This should probably be dropped in favor of VIMAX_AVX2.
107 (define_mode_iterator SSESCALARMODE
108 [(V2TI "TARGET_AVX2") TI])
110 (define_mode_iterator VI12_AVX2
111 [(V32QI "TARGET_AVX2") V16QI
112 (V16HI "TARGET_AVX2") V8HI])
114 (define_mode_iterator VI24_AVX2
115 [(V16HI "TARGET_AVX2") V8HI
116 (V8SI "TARGET_AVX2") V4SI])
118 (define_mode_iterator VI124_AVX2
119 [(V32QI "TARGET_AVX2") V16QI
120 (V16HI "TARGET_AVX2") V8HI
121 (V8SI "TARGET_AVX2") V4SI])
123 (define_mode_iterator VI248_AVX2
124 [(V16HI "TARGET_AVX2") V8HI
125 (V8SI "TARGET_AVX2") V4SI
126 (V4DI "TARGET_AVX2") V2DI])
128 (define_mode_iterator VI4SD_AVX2
131 (define_mode_iterator V48_AVX2
134 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
135 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
137 (define_mode_attr sse2_avx2
138 [(V16QI "sse2") (V32QI "avx2")
139 (V8HI "sse2") (V16HI "avx2")
140 (V4SI "sse2") (V8SI "avx2")
141 (V2DI "sse2") (V4DI "avx2")
142 (V1TI "sse2") (V2TI "avx2")])
144 (define_mode_attr ssse3_avx2
145 [(V16QI "ssse3") (V32QI "avx2")
146 (V8HI "ssse3") (V16HI "avx2")
147 (V4SI "ssse3") (V8SI "avx2")
148 (V2DI "ssse3") (V4DI "avx2")
149 (TI "ssse3") (V2TI "avx2")])
151 (define_mode_attr sse4_1_avx2
152 [(V16QI "sse4_1") (V32QI "avx2")
153 (V8HI "sse4_1") (V16HI "avx2")
154 (V4SI "sse4_1") (V8SI "avx2")
155 (V2DI "sse4_1") (V4DI "avx2")])
157 (define_mode_attr avx_avx2
158 [(V4SF "avx") (V2DF "avx")
159 (V8SF "avx") (V4DF "avx")
160 (V4SI "avx2") (V2DI "avx2")
161 (V8SI "avx2") (V4DI "avx2")])
163 (define_mode_attr vec_avx2
164 [(V16QI "vec") (V32QI "avx2")
165 (V8HI "vec") (V16HI "avx2")
166 (V4SI "vec") (V8SI "avx2")
167 (V2DI "vec") (V4DI "avx2")])
169 ;; Mapping of logic-shift operators
170 (define_code_iterator lshift [lshiftrt ashift])
172 ;; Base name for define_insn
173 (define_code_attr lshift_insn [(lshiftrt "srl") (ashift "sll")])
175 ;; Base name for insn mnemonic
176 (define_code_attr lshift [(lshiftrt "lshr") (ashift "lshl")])
178 (define_mode_attr ssedoublemode
179 [(V16HI "V16SI") (V8HI "V8SI")])
181 (define_mode_attr ssebytemode
182 [(V4DI "V32QI") (V2DI "V16QI")])
184 ;; All 128bit vector integer modes
185 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
187 ;; All 256bit vector integer modes
188 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
190 ;; Random 128bit vector integer mode combinations
191 (define_mode_iterator VI12_128 [V16QI V8HI])
192 (define_mode_iterator VI14_128 [V16QI V4SI])
193 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
194 (define_mode_iterator VI24_128 [V8HI V4SI])
195 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
197 ;; Random 256bit vector integer mode combinations
198 (define_mode_iterator VI124_256 [V32QI V16HI V8SI])
200 ;; Int-float size matches
201 (define_mode_iterator VI4F_128 [V4SI V4SF])
202 (define_mode_iterator VI8F_128 [V2DI V2DF])
203 (define_mode_iterator VI4F_256 [V8SI V8SF])
204 (define_mode_iterator VI8F_256 [V4DI V4DF])
206 ;; Mapping from float mode to required SSE level
207 (define_mode_attr sse
208 [(SF "sse") (DF "sse2")
209 (V4SF "sse") (V2DF "sse2")
210 (V8SF "avx") (V4DF "avx")])
212 (define_mode_attr sse2
213 [(V16QI "sse2") (V32QI "avx")
214 (V2DI "sse2") (V4DI "avx")])
216 (define_mode_attr sse3
217 [(V16QI "sse3") (V32QI "avx")])
219 (define_mode_attr sse4_1
220 [(V4SF "sse4_1") (V2DF "sse4_1")
221 (V8SF "avx") (V4DF "avx")])
223 (define_mode_attr avxsizesuffix
224 [(V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
225 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
226 (V8SF "256") (V4DF "256")
227 (V4SF "") (V2DF "")])
229 ;; SSE instruction mode
230 (define_mode_attr sseinsnmode
231 [(V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
232 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
233 (V8SF "V8SF") (V4DF "V4DF")
234 (V4SF "V4SF") (V2DF "V2DF")
237 ;; Mapping of vector float modes to an integer mode of the same size
238 (define_mode_attr sseintvecmode
239 [(V8SF "V8SI") (V4DF "V4DI")
240 (V4SF "V4SI") (V2DF "V2DI")
241 (V4DF "V4DI") (V8SF "V8SI")
242 (V8SI "V8SI") (V4DI "V4DI")
243 (V4SI "V4SI") (V2DI "V2DI")
244 (V16HI "V16HI") (V8HI "V8HI")
245 (V32QI "V32QI") (V16QI "V16QI")])
247 ;; Mapping of vector modes to a vector mode of double size
248 (define_mode_attr ssedoublevecmode
249 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
250 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
251 (V8SF "V16SF") (V4DF "V8DF")
252 (V4SF "V8SF") (V2DF "V4DF")])
254 ;; Mapping of vector modes to a vector mode of half size
255 (define_mode_attr ssehalfvecmode
256 [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
257 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
258 (V8SF "V4SF") (V4DF "V2DF")
261 ;; Mapping of vector modes back to the scalar modes
262 (define_mode_attr ssescalarmode
263 [(V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI")
264 (V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI")
265 (V8SF "SF") (V4DF "DF")
266 (V4SF "SF") (V2DF "DF")])
268 ;; Number of scalar elements in each vector type
269 (define_mode_attr ssescalarnum
270 [(V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
271 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
272 (V8SF "8") (V4DF "4")
273 (V4SF "4") (V2DF "2")])
275 ;; SSE prefix for integer vector modes
276 (define_mode_attr sseintprefix
277 [(V2DI "p") (V2DF "")
280 (V8SI "p") (V8SF "")])
282 ;; SSE scalar suffix for vector modes
283 (define_mode_attr ssescalarmodesuffix
285 (V8SF "ss") (V4DF "sd")
286 (V4SF "ss") (V2DF "sd")
287 (V8SI "ss") (V4DI "sd")
290 ;; Pack/unpack vector modes
291 (define_mode_attr sseunpackmode
292 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
293 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")])
295 (define_mode_attr ssepackmode
296 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
297 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")])
299 ;; Mapping of the max integer size for xop rotate immediate constraint
300 (define_mode_attr sserotatemax
301 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
303 ;; Mapping of mode to cast intrinsic name
304 (define_mode_attr castmode [(V8SI "si") (V8SF "ps") (V4DF "pd")])
306 ;; Instruction suffix for sign and zero extensions.
307 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
309 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
310 (define_mode_attr i128
311 [(V8SF "f128") (V4DF "f128") (V32QI "%~128") (V16HI "%~128")
312 (V8SI "%~128") (V4DI "%~128")])
315 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
317 (define_mode_iterator AVXMODE48P_DI
318 [V2DI V2DF V4DI V4DF V4SF V4SI])
319 (define_mode_attr AVXMODE48P_DI
320 [(V2DI "V2DI") (V2DF "V2DI")
321 (V4DI "V4DI") (V4DF "V4DI")
322 (V4SI "V2DI") (V4SF "V2DI")
323 (V8SI "V4DI") (V8SF "V4DI")])
325 (define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
327 ;; Mapping of immediate bits for blend instructions
328 (define_mode_attr blendbits
329 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
331 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
333 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
337 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
339 ;; All of these patterns are enabled for SSE1 as well as SSE2.
340 ;; This is essential for maintaining stable calling conventions.
342 (define_expand "mov<mode>"
343 [(set (match_operand:V16 0 "nonimmediate_operand" "")
344 (match_operand:V16 1 "nonimmediate_operand" ""))]
347 ix86_expand_vector_move (<MODE>mode, operands);
351 (define_insn "*mov<mode>_internal"
352 [(set (match_operand:V16 0 "nonimmediate_operand" "=x,x ,m")
353 (match_operand:V16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
355 && (register_operand (operands[0], <MODE>mode)
356 || register_operand (operands[1], <MODE>mode))"
358 switch (which_alternative)
361 return standard_sse_constant_opcode (insn, operands[1]);
364 switch (get_attr_mode (insn))
369 && (misaligned_operand (operands[0], <MODE>mode)
370 || misaligned_operand (operands[1], <MODE>mode)))
371 return "vmovups\t{%1, %0|%0, %1}";
373 return "%vmovaps\t{%1, %0|%0, %1}";
378 && (misaligned_operand (operands[0], <MODE>mode)
379 || misaligned_operand (operands[1], <MODE>mode)))
380 return "vmovupd\t{%1, %0|%0, %1}";
381 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
382 return "%vmovaps\t{%1, %0|%0, %1}";
384 return "%vmovapd\t{%1, %0|%0, %1}";
389 && (misaligned_operand (operands[0], <MODE>mode)
390 || misaligned_operand (operands[1], <MODE>mode)))
391 return "vmovdqu\t{%1, %0|%0, %1}";
392 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
393 return "%vmovaps\t{%1, %0|%0, %1}";
395 return "%vmovdqa\t{%1, %0|%0, %1}";
404 [(set_attr "type" "sselog1,ssemov,ssemov")
405 (set_attr "prefix" "maybe_vex")
407 (cond [(match_test "TARGET_AVX")
408 (const_string "<sseinsnmode>")
409 (ior (ior (match_test "optimize_function_for_size_p (cfun)")
410 (not (match_test "TARGET_SSE2")))
411 (and (eq_attr "alternative" "2")
412 (match_test "TARGET_SSE_TYPELESS_STORES")))
413 (const_string "V4SF")
414 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
415 (const_string "V4SF")
416 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
417 (const_string "V2DF")
419 (const_string "TI")))])
421 (define_insn "sse2_movq128"
422 [(set (match_operand:V2DI 0 "register_operand" "=x")
425 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
426 (parallel [(const_int 0)]))
429 "%vmovq\t{%1, %0|%0, %1}"
430 [(set_attr "type" "ssemov")
431 (set_attr "prefix" "maybe_vex")
432 (set_attr "mode" "TI")])
434 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
435 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
436 ;; from memory, we'd prefer to load the memory directly into the %xmm
437 ;; register. To facilitate this happy circumstance, this pattern won't
438 ;; split until after register allocation. If the 64-bit value didn't
439 ;; come from memory, this is the best we can do. This is much better
440 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
443 (define_insn_and_split "movdi_to_sse"
445 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
446 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
447 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
448 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
450 "&& reload_completed"
453 if (register_operand (operands[1], DImode))
455 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
456 Assemble the 64-bit DImode value in an xmm register. */
457 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
458 gen_rtx_SUBREG (SImode, operands[1], 0)));
459 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
460 gen_rtx_SUBREG (SImode, operands[1], 4)));
461 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
464 else if (memory_operand (operands[1], DImode))
465 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
466 operands[1], const0_rtx));
472 [(set (match_operand:V4SF 0 "register_operand" "")
473 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
474 "TARGET_SSE && reload_completed"
477 (vec_duplicate:V4SF (match_dup 1))
481 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
482 operands[2] = CONST0_RTX (V4SFmode);
486 [(set (match_operand:V2DF 0 "register_operand" "")
487 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
488 "TARGET_SSE2 && reload_completed"
489 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
491 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
492 operands[2] = CONST0_RTX (DFmode);
495 (define_expand "push<mode>1"
496 [(match_operand:V16 0 "register_operand" "")]
499 ix86_expand_push (<MODE>mode, operands[0]);
503 (define_expand "movmisalign<mode>"
504 [(set (match_operand:V16 0 "nonimmediate_operand" "")
505 (match_operand:V16 1 "nonimmediate_operand" ""))]
508 ix86_expand_vector_move_misalign (<MODE>mode, operands);
512 (define_expand "<sse>_movu<ssemodesuffix><avxsizesuffix>"
513 [(set (match_operand:VF 0 "nonimmediate_operand" "")
515 [(match_operand:VF 1 "nonimmediate_operand" "")]
519 if (MEM_P (operands[0]) && MEM_P (operands[1]))
520 operands[1] = force_reg (<MODE>mode, operands[1]);
523 (define_insn "*<sse>_movu<ssemodesuffix><avxsizesuffix>"
524 [(set (match_operand:VF 0 "nonimmediate_operand" "=x,m")
526 [(match_operand:VF 1 "nonimmediate_operand" "xm,x")]
528 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
529 "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
530 [(set_attr "type" "ssemov")
531 (set_attr "movu" "1")
532 (set_attr "prefix" "maybe_vex")
533 (set_attr "mode" "<MODE>")])
535 (define_expand "<sse2>_movdqu<avxsizesuffix>"
536 [(set (match_operand:VI1 0 "nonimmediate_operand" "")
537 (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "")]
541 if (MEM_P (operands[0]) && MEM_P (operands[1]))
542 operands[1] = force_reg (<MODE>mode, operands[1]);
545 (define_insn "*<sse2>_movdqu<avxsizesuffix>"
546 [(set (match_operand:VI1 0 "nonimmediate_operand" "=x,m")
547 (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "xm,x")]
549 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
550 "%vmovdqu\t{%1, %0|%0, %1}"
551 [(set_attr "type" "ssemov")
552 (set_attr "movu" "1")
553 (set (attr "prefix_data16")
555 (match_test "TARGET_AVX")
558 (set_attr "prefix" "maybe_vex")
559 (set_attr "mode" "<sseinsnmode>")])
561 (define_insn "<sse3>_lddqu<avxsizesuffix>"
562 [(set (match_operand:VI1 0 "register_operand" "=x")
563 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
566 "%vlddqu\t{%1, %0|%0, %1}"
567 [(set_attr "type" "ssemov")
568 (set_attr "movu" "1")
569 (set (attr "prefix_data16")
571 (match_test "TARGET_AVX")
574 (set (attr "prefix_rep")
576 (match_test "TARGET_AVX")
579 (set_attr "prefix" "maybe_vex")
580 (set_attr "mode" "<sseinsnmode>")])
582 (define_insn "sse2_movntsi"
583 [(set (match_operand:SI 0 "memory_operand" "=m")
584 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
587 "movnti\t{%1, %0|%0, %1}"
588 [(set_attr "type" "ssemov")
589 (set_attr "prefix_data16" "0")
590 (set_attr "mode" "V2DF")])
592 (define_insn "<sse>_movnt<mode>"
593 [(set (match_operand:VF 0 "memory_operand" "=m")
594 (unspec:VF [(match_operand:VF 1 "register_operand" "x")]
597 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
598 [(set_attr "type" "ssemov")
599 (set_attr "prefix" "maybe_vex")
600 (set_attr "mode" "<MODE>")])
602 (define_insn "<sse2>_movnt<mode>"
603 [(set (match_operand:VI8 0 "memory_operand" "=m")
604 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "x")]
607 "%vmovntdq\t{%1, %0|%0, %1}"
608 [(set_attr "type" "ssecvt")
609 (set (attr "prefix_data16")
611 (match_test "TARGET_AVX")
614 (set_attr "prefix" "maybe_vex")
615 (set_attr "mode" "<sseinsnmode>")])
617 ; Expand patterns for non-temporal stores. At the moment, only those
618 ; that directly map to insns are defined; it would be possible to
619 ; define patterns for other modes that would expand to several insns.
621 ;; Modes handled by storent patterns.
622 (define_mode_iterator STORENT_MODE
623 [(SI "TARGET_SSE2") (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
625 (V8SF "TARGET_AVX") V4SF
626 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
628 (define_expand "storent<mode>"
629 [(set (match_operand:STORENT_MODE 0 "memory_operand" "")
631 [(match_operand:STORENT_MODE 1 "register_operand" "")]
635 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
637 ;; Parallel floating point arithmetic
639 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
641 (define_expand "<code><mode>2"
642 [(set (match_operand:VF 0 "register_operand" "")
644 (match_operand:VF 1 "register_operand" "")))]
646 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
648 (define_insn_and_split "*absneg<mode>2"
649 [(set (match_operand:VF 0 "register_operand" "=x,x,x,x")
650 (match_operator:VF 3 "absneg_operator"
651 [(match_operand:VF 1 "nonimmediate_operand" "0, xm,x, m")]))
652 (use (match_operand:VF 2 "nonimmediate_operand" "xm,0, xm,x"))]
655 "&& reload_completed"
658 enum rtx_code absneg_op;
664 if (MEM_P (operands[1]))
665 op1 = operands[2], op2 = operands[1];
667 op1 = operands[1], op2 = operands[2];
672 if (rtx_equal_p (operands[0], operands[1]))
678 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
679 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
680 t = gen_rtx_SET (VOIDmode, operands[0], t);
684 [(set_attr "isa" "noavx,noavx,avx,avx")])
686 (define_expand "<plusminus_insn><mode>3"
687 [(set (match_operand:VF 0 "register_operand" "")
689 (match_operand:VF 1 "nonimmediate_operand" "")
690 (match_operand:VF 2 "nonimmediate_operand" "")))]
692 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
694 (define_insn "*<plusminus_insn><mode>3"
695 [(set (match_operand:VF 0 "register_operand" "=x,x")
697 (match_operand:VF 1 "nonimmediate_operand" "<comm>0,x")
698 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
699 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
701 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
702 v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
703 [(set_attr "isa" "noavx,avx")
704 (set_attr "type" "sseadd")
705 (set_attr "prefix" "orig,vex")
706 (set_attr "mode" "<MODE>")])
708 (define_insn "<sse>_vm<plusminus_insn><mode>3"
709 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
712 (match_operand:VF_128 1 "register_operand" "0,x")
713 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
718 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}
719 v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
720 [(set_attr "isa" "noavx,avx")
721 (set_attr "type" "sseadd")
722 (set_attr "prefix" "orig,vex")
723 (set_attr "mode" "<ssescalarmode>")])
725 (define_expand "mul<mode>3"
726 [(set (match_operand:VF 0 "register_operand" "")
728 (match_operand:VF 1 "nonimmediate_operand" "")
729 (match_operand:VF 2 "nonimmediate_operand" "")))]
731 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
733 (define_insn "*mul<mode>3"
734 [(set (match_operand:VF 0 "register_operand" "=x,x")
736 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
737 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
738 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
740 mul<ssemodesuffix>\t{%2, %0|%0, %2}
741 vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
742 [(set_attr "isa" "noavx,avx")
743 (set_attr "type" "ssemul")
744 (set_attr "prefix" "orig,vex")
745 (set_attr "mode" "<MODE>")])
747 (define_insn "<sse>_vmmul<mode>3"
748 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
751 (match_operand:VF_128 1 "register_operand" "0,x")
752 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
757 mul<ssescalarmodesuffix>\t{%2, %0|%0, %2}
758 vmul<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
759 [(set_attr "isa" "noavx,avx")
760 (set_attr "type" "ssemul")
761 (set_attr "prefix" "orig,vex")
762 (set_attr "mode" "<ssescalarmode>")])
764 (define_expand "div<mode>3"
765 [(set (match_operand:VF2 0 "register_operand" "")
766 (div:VF2 (match_operand:VF2 1 "register_operand" "")
767 (match_operand:VF2 2 "nonimmediate_operand" "")))]
769 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
771 (define_expand "div<mode>3"
772 [(set (match_operand:VF1 0 "register_operand" "")
773 (div:VF1 (match_operand:VF1 1 "register_operand" "")
774 (match_operand:VF1 2 "nonimmediate_operand" "")))]
777 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
780 && TARGET_RECIP_VEC_DIV
781 && !optimize_insn_for_size_p ()
782 && flag_finite_math_only && !flag_trapping_math
783 && flag_unsafe_math_optimizations)
785 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
790 (define_insn "<sse>_div<mode>3"
791 [(set (match_operand:VF 0 "register_operand" "=x,x")
793 (match_operand:VF 1 "register_operand" "0,x")
794 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
797 div<ssemodesuffix>\t{%2, %0|%0, %2}
798 vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
799 [(set_attr "isa" "noavx,avx")
800 (set_attr "type" "ssediv")
801 (set_attr "prefix" "orig,vex")
802 (set_attr "mode" "<MODE>")])
804 (define_insn "<sse>_vmdiv<mode>3"
805 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
808 (match_operand:VF_128 1 "register_operand" "0,x")
809 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
814 div<ssescalarmodesuffix>\t{%2, %0|%0, %2}
815 vdiv<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
816 [(set_attr "isa" "noavx,avx")
817 (set_attr "type" "ssediv")
818 (set_attr "prefix" "orig,vex")
819 (set_attr "mode" "<ssescalarmode>")])
821 (define_insn "<sse>_rcp<mode>2"
822 [(set (match_operand:VF1 0 "register_operand" "=x")
824 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
826 "%vrcpps\t{%1, %0|%0, %1}"
827 [(set_attr "type" "sse")
828 (set_attr "atom_sse_attr" "rcp")
829 (set_attr "prefix" "maybe_vex")
830 (set_attr "mode" "<MODE>")])
832 (define_insn "sse_vmrcpv4sf2"
833 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
835 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
837 (match_operand:V4SF 2 "register_operand" "0,x")
841 rcpss\t{%1, %0|%0, %1}
842 vrcpss\t{%1, %2, %0|%0, %2, %1}"
843 [(set_attr "isa" "noavx,avx")
844 (set_attr "type" "sse")
845 (set_attr "atom_sse_attr" "rcp")
846 (set_attr "prefix" "orig,vex")
847 (set_attr "mode" "SF")])
849 (define_expand "sqrt<mode>2"
850 [(set (match_operand:VF2 0 "register_operand" "")
851 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand" "")))]
854 (define_expand "sqrt<mode>2"
855 [(set (match_operand:VF1 0 "register_operand" "")
856 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand" "")))]
860 && TARGET_RECIP_VEC_SQRT
861 && !optimize_insn_for_size_p ()
862 && flag_finite_math_only && !flag_trapping_math
863 && flag_unsafe_math_optimizations)
865 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
870 (define_insn "<sse>_sqrt<mode>2"
871 [(set (match_operand:VF 0 "register_operand" "=x")
872 (sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "xm")))]
874 "%vsqrt<ssemodesuffix>\t{%1, %0|%0, %1}"
875 [(set_attr "type" "sse")
876 (set_attr "atom_sse_attr" "sqrt")
877 (set_attr "prefix" "maybe_vex")
878 (set_attr "mode" "<MODE>")])
880 (define_insn "<sse>_vmsqrt<mode>2"
881 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
884 (match_operand:VF_128 1 "nonimmediate_operand" "xm,xm"))
885 (match_operand:VF_128 2 "register_operand" "0,x")
889 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}
890 vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
891 [(set_attr "isa" "noavx,avx")
892 (set_attr "type" "sse")
893 (set_attr "atom_sse_attr" "sqrt")
894 (set_attr "prefix" "orig,vex")
895 (set_attr "mode" "<ssescalarmode>")])
897 (define_expand "rsqrt<mode>2"
898 [(set (match_operand:VF1 0 "register_operand" "")
900 [(match_operand:VF1 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
903 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
907 (define_insn "<sse>_rsqrt<mode>2"
908 [(set (match_operand:VF1 0 "register_operand" "=x")
910 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
912 "%vrsqrtps\t{%1, %0|%0, %1}"
913 [(set_attr "type" "sse")
914 (set_attr "prefix" "maybe_vex")
915 (set_attr "mode" "<MODE>")])
917 (define_insn "sse_vmrsqrtv4sf2"
918 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
920 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
922 (match_operand:V4SF 2 "register_operand" "0,x")
926 rsqrtss\t{%1, %0|%0, %1}
927 vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
928 [(set_attr "isa" "noavx,avx")
929 (set_attr "type" "sse")
930 (set_attr "prefix" "orig,vex")
931 (set_attr "mode" "SF")])
933 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
934 ;; isn't really correct, as those rtl operators aren't defined when
935 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
937 (define_expand "<code><mode>3"
938 [(set (match_operand:VF 0 "register_operand" "")
940 (match_operand:VF 1 "nonimmediate_operand" "")
941 (match_operand:VF 2 "nonimmediate_operand" "")))]
944 if (!flag_finite_math_only)
945 operands[1] = force_reg (<MODE>mode, operands[1]);
946 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
949 (define_insn "*<code><mode>3_finite"
950 [(set (match_operand:VF 0 "register_operand" "=x,x")
952 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
953 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
954 "TARGET_SSE && flag_finite_math_only
955 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
957 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
958 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
959 [(set_attr "isa" "noavx,avx")
960 (set_attr "type" "sseadd")
961 (set_attr "prefix" "orig,vex")
962 (set_attr "mode" "<MODE>")])
964 (define_insn "*<code><mode>3"
965 [(set (match_operand:VF 0 "register_operand" "=x,x")
967 (match_operand:VF 1 "register_operand" "0,x")
968 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
969 "TARGET_SSE && !flag_finite_math_only"
971 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
972 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
973 [(set_attr "isa" "noavx,avx")
974 (set_attr "type" "sseadd")
975 (set_attr "prefix" "orig,vex")
976 (set_attr "mode" "<MODE>")])
978 (define_insn "<sse>_vm<code><mode>3"
979 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
982 (match_operand:VF_128 1 "register_operand" "0,x")
983 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
988 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %2}
989 v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
990 [(set_attr "isa" "noavx,avx")
991 (set_attr "type" "sse")
992 (set_attr "prefix" "orig,vex")
993 (set_attr "mode" "<ssescalarmode>")])
995 ;; These versions of the min/max patterns implement exactly the operations
996 ;; min = (op1 < op2 ? op1 : op2)
997 ;; max = (!(op1 < op2) ? op1 : op2)
998 ;; Their operands are not commutative, and thus they may be used in the
999 ;; presence of -0.0 and NaN.
1001 (define_insn "*ieee_smin<mode>3"
1002 [(set (match_operand:VF 0 "register_operand" "=x,x")
1004 [(match_operand:VF 1 "register_operand" "0,x")
1005 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
1009 min<ssemodesuffix>\t{%2, %0|%0, %2}
1010 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1011 [(set_attr "isa" "noavx,avx")
1012 (set_attr "type" "sseadd")
1013 (set_attr "prefix" "orig,vex")
1014 (set_attr "mode" "<MODE>")])
1016 (define_insn "*ieee_smax<mode>3"
1017 [(set (match_operand:VF 0 "register_operand" "=x,x")
1019 [(match_operand:VF 1 "register_operand" "0,x")
1020 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
1024 max<ssemodesuffix>\t{%2, %0|%0, %2}
1025 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1026 [(set_attr "isa" "noavx,avx")
1027 (set_attr "type" "sseadd")
1028 (set_attr "prefix" "orig,vex")
1029 (set_attr "mode" "<MODE>")])
1031 (define_insn "avx_addsubv4df3"
1032 [(set (match_operand:V4DF 0 "register_operand" "=x")
1035 (match_operand:V4DF 1 "register_operand" "x")
1036 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1037 (minus:V4DF (match_dup 1) (match_dup 2))
1040 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1041 [(set_attr "type" "sseadd")
1042 (set_attr "prefix" "vex")
1043 (set_attr "mode" "V4DF")])
1045 (define_insn "sse3_addsubv2df3"
1046 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1049 (match_operand:V2DF 1 "register_operand" "0,x")
1050 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
1051 (minus:V2DF (match_dup 1) (match_dup 2))
1055 addsubpd\t{%2, %0|%0, %2}
1056 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1057 [(set_attr "isa" "noavx,avx")
1058 (set_attr "type" "sseadd")
1059 (set_attr "atom_unit" "complex")
1060 (set_attr "prefix" "orig,vex")
1061 (set_attr "mode" "V2DF")])
1063 (define_insn "avx_addsubv8sf3"
1064 [(set (match_operand:V8SF 0 "register_operand" "=x")
1067 (match_operand:V8SF 1 "register_operand" "x")
1068 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1069 (minus:V8SF (match_dup 1) (match_dup 2))
1072 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1073 [(set_attr "type" "sseadd")
1074 (set_attr "prefix" "vex")
1075 (set_attr "mode" "V8SF")])
1077 (define_insn "sse3_addsubv4sf3"
1078 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1081 (match_operand:V4SF 1 "register_operand" "0,x")
1082 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
1083 (minus:V4SF (match_dup 1) (match_dup 2))
1087 addsubps\t{%2, %0|%0, %2}
1088 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1089 [(set_attr "isa" "noavx,avx")
1090 (set_attr "type" "sseadd")
1091 (set_attr "prefix" "orig,vex")
1092 (set_attr "prefix_rep" "1,*")
1093 (set_attr "mode" "V4SF")])
1095 (define_insn "avx_h<plusminus_insn>v4df3"
1096 [(set (match_operand:V4DF 0 "register_operand" "=x")
1101 (match_operand:V4DF 1 "register_operand" "x")
1102 (parallel [(const_int 0)]))
1103 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1105 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1106 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
1110 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1111 (parallel [(const_int 0)]))
1112 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
1114 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1115 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1117 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1118 [(set_attr "type" "sseadd")
1119 (set_attr "prefix" "vex")
1120 (set_attr "mode" "V4DF")])
1122 (define_insn "sse3_h<plusminus_insn>v2df3"
1123 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1127 (match_operand:V2DF 1 "register_operand" "0,x")
1128 (parallel [(const_int 0)]))
1129 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1132 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
1133 (parallel [(const_int 0)]))
1134 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1137 h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}
1138 vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1139 [(set_attr "isa" "noavx,avx")
1140 (set_attr "type" "sseadd")
1141 (set_attr "prefix" "orig,vex")
1142 (set_attr "mode" "V2DF")])
1144 (define_insn "avx_h<plusminus_insn>v8sf3"
1145 [(set (match_operand:V8SF 0 "register_operand" "=x")
1151 (match_operand:V8SF 1 "register_operand" "x")
1152 (parallel [(const_int 0)]))
1153 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1155 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1156 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1160 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1161 (parallel [(const_int 0)]))
1162 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1164 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1165 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1169 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1170 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1172 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1173 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1176 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1177 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1179 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1180 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1182 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1183 [(set_attr "type" "sseadd")
1184 (set_attr "prefix" "vex")
1185 (set_attr "mode" "V8SF")])
1187 (define_insn "sse3_h<plusminus_insn>v4sf3"
1188 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1193 (match_operand:V4SF 1 "register_operand" "0,x")
1194 (parallel [(const_int 0)]))
1195 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1197 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1198 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1202 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
1203 (parallel [(const_int 0)]))
1204 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1206 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1207 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1210 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
1211 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1212 [(set_attr "isa" "noavx,avx")
1213 (set_attr "type" "sseadd")
1214 (set_attr "atom_unit" "complex")
1215 (set_attr "prefix" "orig,vex")
1216 (set_attr "prefix_rep" "1,*")
1217 (set_attr "mode" "V4SF")])
1219 (define_expand "reduc_splus_v4df"
1220 [(match_operand:V4DF 0 "register_operand" "")
1221 (match_operand:V4DF 1 "register_operand" "")]
1224 rtx tmp = gen_reg_rtx (V4DFmode);
1225 rtx tmp2 = gen_reg_rtx (V4DFmode);
1226 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
1227 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
1228 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
1232 (define_expand "reduc_splus_v2df"
1233 [(match_operand:V2DF 0 "register_operand" "")
1234 (match_operand:V2DF 1 "register_operand" "")]
1237 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1241 (define_expand "reduc_splus_v8sf"
1242 [(match_operand:V8SF 0 "register_operand" "")
1243 (match_operand:V8SF 1 "register_operand" "")]
1246 rtx tmp = gen_reg_rtx (V8SFmode);
1247 rtx tmp2 = gen_reg_rtx (V8SFmode);
1248 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
1249 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
1250 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
1251 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
1255 (define_expand "reduc_splus_v4sf"
1256 [(match_operand:V4SF 0 "register_operand" "")
1257 (match_operand:V4SF 1 "register_operand" "")]
1262 rtx tmp = gen_reg_rtx (V4SFmode);
1263 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1264 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1267 ix86_expand_reduc (gen_addv4sf3, operands[0], operands[1]);
1271 ;; Modes handled by reduc_sm{in,ax}* patterns.
1272 (define_mode_iterator REDUC_SMINMAX_MODE
1273 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
1274 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
1275 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
1276 (V4SF "TARGET_SSE")])
1278 (define_expand "reduc_<code>_<mode>"
1279 [(smaxmin:REDUC_SMINMAX_MODE
1280 (match_operand:REDUC_SMINMAX_MODE 0 "register_operand" "")
1281 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand" ""))]
1284 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1288 (define_expand "reduc_<code>_<mode>"
1290 (match_operand:VI_256 0 "register_operand" "")
1291 (match_operand:VI_256 1 "register_operand" ""))]
1294 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1298 (define_expand "reduc_umin_v8hi"
1300 (match_operand:V8HI 0 "register_operand" "")
1301 (match_operand:V8HI 1 "register_operand" ""))]
1304 ix86_expand_reduc (gen_uminv8hi3, operands[0], operands[1]);
1308 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1310 ;; Parallel floating point comparisons
1312 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1314 (define_insn "avx_cmp<mode>3"
1315 [(set (match_operand:VF 0 "register_operand" "=x")
1317 [(match_operand:VF 1 "register_operand" "x")
1318 (match_operand:VF 2 "nonimmediate_operand" "xm")
1319 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1322 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1323 [(set_attr "type" "ssecmp")
1324 (set_attr "length_immediate" "1")
1325 (set_attr "prefix" "vex")
1326 (set_attr "mode" "<MODE>")])
1328 (define_insn "avx_vmcmp<mode>3"
1329 [(set (match_operand:VF_128 0 "register_operand" "=x")
1332 [(match_operand:VF_128 1 "register_operand" "x")
1333 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
1334 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1339 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1340 [(set_attr "type" "ssecmp")
1341 (set_attr "length_immediate" "1")
1342 (set_attr "prefix" "vex")
1343 (set_attr "mode" "<ssescalarmode>")])
1345 (define_insn "*<sse>_maskcmp<mode>3_comm"
1346 [(set (match_operand:VF 0 "register_operand" "=x,x")
1347 (match_operator:VF 3 "sse_comparison_operator"
1348 [(match_operand:VF 1 "register_operand" "%0,x")
1349 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1351 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
1353 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1354 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1355 [(set_attr "isa" "noavx,avx")
1356 (set_attr "type" "ssecmp")
1357 (set_attr "length_immediate" "1")
1358 (set_attr "prefix" "orig,vex")
1359 (set_attr "mode" "<MODE>")])
1361 (define_insn "<sse>_maskcmp<mode>3"
1362 [(set (match_operand:VF 0 "register_operand" "=x,x")
1363 (match_operator:VF 3 "sse_comparison_operator"
1364 [(match_operand:VF 1 "register_operand" "0,x")
1365 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1368 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1369 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1370 [(set_attr "isa" "noavx,avx")
1371 (set_attr "type" "ssecmp")
1372 (set_attr "length_immediate" "1")
1373 (set_attr "prefix" "orig,vex")
1374 (set_attr "mode" "<MODE>")])
1376 (define_insn "<sse>_vmmaskcmp<mode>3"
1377 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1379 (match_operator:VF_128 3 "sse_comparison_operator"
1380 [(match_operand:VF_128 1 "register_operand" "0,x")
1381 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
1386 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %2}
1387 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1388 [(set_attr "isa" "noavx,avx")
1389 (set_attr "type" "ssecmp")
1390 (set_attr "length_immediate" "1,*")
1391 (set_attr "prefix" "orig,vex")
1392 (set_attr "mode" "<ssescalarmode>")])
1394 (define_insn "<sse>_comi"
1395 [(set (reg:CCFP FLAGS_REG)
1398 (match_operand:<ssevecmode> 0 "register_operand" "x")
1399 (parallel [(const_int 0)]))
1401 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1402 (parallel [(const_int 0)]))))]
1403 "SSE_FLOAT_MODE_P (<MODE>mode)"
1404 "%vcomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1405 [(set_attr "type" "ssecomi")
1406 (set_attr "prefix" "maybe_vex")
1407 (set_attr "prefix_rep" "0")
1408 (set (attr "prefix_data16")
1409 (if_then_else (eq_attr "mode" "DF")
1411 (const_string "0")))
1412 (set_attr "mode" "<MODE>")])
1414 (define_insn "<sse>_ucomi"
1415 [(set (reg:CCFPU FLAGS_REG)
1418 (match_operand:<ssevecmode> 0 "register_operand" "x")
1419 (parallel [(const_int 0)]))
1421 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1422 (parallel [(const_int 0)]))))]
1423 "SSE_FLOAT_MODE_P (<MODE>mode)"
1424 "%vucomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1425 [(set_attr "type" "ssecomi")
1426 (set_attr "prefix" "maybe_vex")
1427 (set_attr "prefix_rep" "0")
1428 (set (attr "prefix_data16")
1429 (if_then_else (eq_attr "mode" "DF")
1431 (const_string "0")))
1432 (set_attr "mode" "<MODE>")])
1434 (define_expand "vcond<V_256:mode><VF_256:mode>"
1435 [(set (match_operand:V_256 0 "register_operand" "")
1437 (match_operator 3 ""
1438 [(match_operand:VF_256 4 "nonimmediate_operand" "")
1439 (match_operand:VF_256 5 "nonimmediate_operand" "")])
1440 (match_operand:V_256 1 "general_operand" "")
1441 (match_operand:V_256 2 "general_operand" "")))]
1443 && (GET_MODE_NUNITS (<V_256:MODE>mode)
1444 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
1446 bool ok = ix86_expand_fp_vcond (operands);
1451 (define_expand "vcond<V_128:mode><VF_128:mode>"
1452 [(set (match_operand:V_128 0 "register_operand" "")
1454 (match_operator 3 ""
1455 [(match_operand:VF_128 4 "nonimmediate_operand" "")
1456 (match_operand:VF_128 5 "nonimmediate_operand" "")])
1457 (match_operand:V_128 1 "general_operand" "")
1458 (match_operand:V_128 2 "general_operand" "")))]
1460 && (GET_MODE_NUNITS (<V_128:MODE>mode)
1461 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
1463 bool ok = ix86_expand_fp_vcond (operands);
1468 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1470 ;; Parallel floating point logical operations
1472 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1474 (define_insn "<sse>_andnot<mode>3"
1475 [(set (match_operand:VF 0 "register_operand" "=x,x")
1478 (match_operand:VF 1 "register_operand" "0,x"))
1479 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1482 static char buf[32];
1485 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1487 switch (which_alternative)
1490 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1493 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1499 snprintf (buf, sizeof (buf), insn, suffix);
1502 [(set_attr "isa" "noavx,avx")
1503 (set_attr "type" "sselog")
1504 (set_attr "prefix" "orig,vex")
1505 (set_attr "mode" "<MODE>")])
1507 (define_expand "<code><mode>3"
1508 [(set (match_operand:VF 0 "register_operand" "")
1510 (match_operand:VF 1 "nonimmediate_operand" "")
1511 (match_operand:VF 2 "nonimmediate_operand" "")))]
1513 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1515 (define_insn "*<code><mode>3"
1516 [(set (match_operand:VF 0 "register_operand" "=x,x")
1518 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
1519 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1520 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1522 static char buf[32];
1525 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1527 switch (which_alternative)
1530 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1533 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1539 snprintf (buf, sizeof (buf), insn, suffix);
1542 [(set_attr "isa" "noavx,avx")
1543 (set_attr "type" "sselog")
1544 (set_attr "prefix" "orig,vex")
1545 (set_attr "mode" "<MODE>")])
1547 (define_expand "copysign<mode>3"
1550 (not:VF (match_dup 3))
1551 (match_operand:VF 1 "nonimmediate_operand" "")))
1553 (and:VF (match_dup 3)
1554 (match_operand:VF 2 "nonimmediate_operand" "")))
1555 (set (match_operand:VF 0 "register_operand" "")
1556 (ior:VF (match_dup 4) (match_dup 5)))]
1559 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
1561 operands[4] = gen_reg_rtx (<MODE>mode);
1562 operands[5] = gen_reg_rtx (<MODE>mode);
1565 ;; Also define scalar versions. These are used for abs, neg, and
1566 ;; conditional move. Using subregs into vector modes causes register
1567 ;; allocation lossage. These patterns do not allow memory operands
1568 ;; because the native instructions read the full 128-bits.
1570 (define_insn "*andnot<mode>3"
1571 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1574 (match_operand:MODEF 1 "register_operand" "0,x"))
1575 (match_operand:MODEF 2 "register_operand" "x,x")))]
1576 "SSE_FLOAT_MODE_P (<MODE>mode)"
1578 static char buf[32];
1581 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1583 switch (which_alternative)
1586 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1589 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1595 snprintf (buf, sizeof (buf), insn, suffix);
1598 [(set_attr "isa" "noavx,avx")
1599 (set_attr "type" "sselog")
1600 (set_attr "prefix" "orig,vex")
1601 (set_attr "mode" "<ssevecmode>")])
1603 (define_insn "*<code><mode>3"
1604 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1606 (match_operand:MODEF 1 "register_operand" "%0,x")
1607 (match_operand:MODEF 2 "register_operand" "x,x")))]
1608 "SSE_FLOAT_MODE_P (<MODE>mode)"
1610 static char buf[32];
1613 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1615 switch (which_alternative)
1618 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1621 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1627 snprintf (buf, sizeof (buf), insn, suffix);
1630 [(set_attr "isa" "noavx,avx")
1631 (set_attr "type" "sselog")
1632 (set_attr "prefix" "orig,vex")
1633 (set_attr "mode" "<ssevecmode>")])
1635 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1637 ;; FMA4 floating point multiply/accumulate instructions. This
1638 ;; includes the scalar version of the instructions as well as the
1641 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1643 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1644 ;; combine to generate a multiply/add with two memory references. We then
1645 ;; split this insn, into loading up the destination register with one of the
1646 ;; memory operations. If we don't manage to split the insn, reload will
1647 ;; generate the appropriate moves. The reason this is needed, is that combine
1648 ;; has already folded one of the memory references into both the multiply and
1649 ;; add insns, and it can't generate a new pseudo. I.e.:
1650 ;; (set (reg1) (mem (addr1)))
1651 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1652 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1654 ;; ??? This is historic, pre-dating the gimple fma transformation.
1655 ;; We could now properly represent that only one memory operand is
1656 ;; allowed and not be penalized during optimization.
1658 ;; Intrinsic FMA operations.
1660 ;; The standard names for fma is only available with SSE math enabled.
1661 (define_expand "fma<mode>4"
1662 [(set (match_operand:FMAMODE 0 "register_operand")
1664 (match_operand:FMAMODE 1 "nonimmediate_operand")
1665 (match_operand:FMAMODE 2 "nonimmediate_operand")
1666 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1667 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1669 (define_expand "fms<mode>4"
1670 [(set (match_operand:FMAMODE 0 "register_operand")
1672 (match_operand:FMAMODE 1 "nonimmediate_operand")
1673 (match_operand:FMAMODE 2 "nonimmediate_operand")
1674 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1675 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1677 (define_expand "fnma<mode>4"
1678 [(set (match_operand:FMAMODE 0 "register_operand")
1680 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1681 (match_operand:FMAMODE 2 "nonimmediate_operand")
1682 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1683 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1685 (define_expand "fnms<mode>4"
1686 [(set (match_operand:FMAMODE 0 "register_operand")
1688 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1689 (match_operand:FMAMODE 2 "nonimmediate_operand")
1690 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1691 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1693 ;; The builtin for fma4intrin.h is not constrained by SSE math enabled.
1694 (define_expand "fma4i_fmadd_<mode>"
1695 [(set (match_operand:FMAMODE 0 "register_operand")
1697 (match_operand:FMAMODE 1 "nonimmediate_operand")
1698 (match_operand:FMAMODE 2 "nonimmediate_operand")
1699 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1700 "TARGET_FMA || TARGET_FMA4")
1702 (define_insn "*fma4i_fmadd_<mode>"
1703 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1705 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1706 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1707 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1709 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1710 [(set_attr "type" "ssemuladd")
1711 (set_attr "mode" "<MODE>")])
1713 (define_insn "*fma4i_fmsub_<mode>"
1714 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1716 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1717 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1719 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1721 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1722 [(set_attr "type" "ssemuladd")
1723 (set_attr "mode" "<MODE>")])
1725 (define_insn "*fma4i_fnmadd_<mode>"
1726 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1729 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1730 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1731 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1733 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1734 [(set_attr "type" "ssemuladd")
1735 (set_attr "mode" "<MODE>")])
1737 (define_insn "*fma4i_fnmsub_<mode>"
1738 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1741 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1742 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1744 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1746 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1747 [(set_attr "type" "ssemuladd")
1748 (set_attr "mode" "<MODE>")])
1750 ;; Scalar versions of the above. Unlike ADDSS et al, these write the
1751 ;; entire destination register, with the high-order elements zeroed.
1753 (define_expand "fma4i_vmfmadd_<mode>"
1754 [(set (match_operand:VF_128 0 "register_operand")
1757 (match_operand:VF_128 1 "nonimmediate_operand")
1758 (match_operand:VF_128 2 "nonimmediate_operand")
1759 (match_operand:VF_128 3 "nonimmediate_operand"))
1764 operands[4] = CONST0_RTX (<MODE>mode);
1767 (define_expand "fmai_vmfmadd_<mode>"
1768 [(set (match_operand:VF_128 0 "register_operand")
1771 (match_operand:VF_128 1 "nonimmediate_operand")
1772 (match_operand:VF_128 2 "nonimmediate_operand")
1773 (match_operand:VF_128 3 "nonimmediate_operand"))
1778 (define_insn "*fmai_fmadd_<mode>"
1779 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1782 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")
1783 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1784 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))
1789 vfmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1790 vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1791 vfmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1792 [(set_attr "type" "ssemuladd")
1793 (set_attr "mode" "<MODE>")])
1795 (define_insn "*fmai_fmsub_<mode>"
1796 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1799 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")
1800 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1802 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")))
1807 vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1808 vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1809 vfmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1810 [(set_attr "type" "ssemuladd")
1811 (set_attr "mode" "<MODE>")])
1813 (define_insn "*fmai_fnmadd_<mode>"
1814 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1818 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x"))
1819 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1820 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))
1825 vfnmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1826 vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1827 vfnmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1828 [(set_attr "type" "ssemuladd")
1829 (set_attr "mode" "<MODE>")])
1831 (define_insn "*fmai_fnmsub_<mode>"
1832 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1836 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x"))
1837 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1839 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")))
1844 vfnmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1845 vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1846 vfnmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1847 [(set_attr "type" "ssemuladd")
1848 (set_attr "mode" "<MODE>")])
1850 (define_insn "*fma4i_vmfmadd_<mode>"
1851 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1854 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
1855 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1856 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
1857 (match_operand:VF_128 4 "const0_operand" "")
1860 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1861 [(set_attr "type" "ssemuladd")
1862 (set_attr "mode" "<MODE>")])
1864 (define_insn "*fma4i_vmfmsub_<mode>"
1865 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1868 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
1869 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1871 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
1872 (match_operand:VF_128 4 "const0_operand" "")
1875 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1876 [(set_attr "type" "ssemuladd")
1877 (set_attr "mode" "<MODE>")])
1879 (define_insn "*fma4i_vmfnmadd_<mode>"
1880 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1884 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
1885 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1886 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
1887 (match_operand:VF_128 4 "const0_operand" "")
1890 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1891 [(set_attr "type" "ssemuladd")
1892 (set_attr "mode" "<MODE>")])
1894 (define_insn "*fma4i_vmfnmsub_<mode>"
1895 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1899 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
1900 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1902 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
1903 (match_operand:VF_128 4 "const0_operand" "")
1906 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1907 [(set_attr "type" "ssemuladd")
1908 (set_attr "mode" "<MODE>")])
1910 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1912 ;; FMA4 Parallel floating point multiply addsub and subadd operations.
1914 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1916 ;; It would be possible to represent these without the UNSPEC as
1919 ;; (fma op1 op2 op3)
1920 ;; (fma op1 op2 (neg op3))
1923 ;; But this doesn't seem useful in practice.
1925 (define_expand "fmaddsub_<mode>"
1926 [(set (match_operand:VF 0 "register_operand")
1928 [(match_operand:VF 1 "nonimmediate_operand")
1929 (match_operand:VF 2 "nonimmediate_operand")
1930 (match_operand:VF 3 "nonimmediate_operand")]
1932 "TARGET_FMA || TARGET_FMA4")
1934 (define_insn "*fma4_fmaddsub_<mode>"
1935 [(set (match_operand:VF 0 "register_operand" "=x,x")
1937 [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
1938 (match_operand:VF 2 "nonimmediate_operand" " x,m")
1939 (match_operand:VF 3 "nonimmediate_operand" "xm,x")]
1942 "vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1943 [(set_attr "type" "ssemuladd")
1944 (set_attr "mode" "<MODE>")])
1946 (define_insn "*fma4_fmsubadd_<mode>"
1947 [(set (match_operand:VF 0 "register_operand" "=x,x")
1949 [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
1950 (match_operand:VF 2 "nonimmediate_operand" " x,m")
1952 (match_operand:VF 3 "nonimmediate_operand" "xm,x"))]
1955 "vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1956 [(set_attr "type" "ssemuladd")
1957 (set_attr "mode" "<MODE>")])
1959 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1961 ;; FMA3 floating point multiply/accumulate instructions.
1963 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1965 (define_insn "*fma_fmadd_<mode>"
1966 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1968 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
1969 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1970 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
1973 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1974 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1975 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1976 [(set_attr "type" "ssemuladd")
1977 (set_attr "mode" "<MODE>")])
1979 (define_insn "*fma_fmsub_<mode>"
1980 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1982 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
1983 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1985 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
1988 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1989 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1990 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1991 [(set_attr "type" "ssemuladd")
1992 (set_attr "mode" "<MODE>")])
1994 (define_insn "*fma_fnmadd_<mode>"
1995 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1998 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
1999 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
2000 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
2003 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2004 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2005 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2006 [(set_attr "type" "ssemuladd")
2007 (set_attr "mode" "<MODE>")])
2009 (define_insn "*fma_fnmsub_<mode>"
2010 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
2013 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
2014 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
2016 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
2019 vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2020 vfnmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2021 vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2022 [(set_attr "type" "ssemuladd")
2023 (set_attr "mode" "<MODE>")])
2025 (define_insn "*fma_fmaddsub_<mode>"
2026 [(set (match_operand:VF 0 "register_operand" "=x,x,x")
2028 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
2029 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
2030 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0")]
2034 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2035 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2036 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2037 [(set_attr "type" "ssemuladd")
2038 (set_attr "mode" "<MODE>")])
2040 (define_insn "*fma_fmsubadd_<mode>"
2041 [(set (match_operand:VF 0 "register_operand" "=x,x,x")
2043 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
2044 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
2046 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0"))]
2050 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2051 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2052 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2053 [(set_attr "type" "ssemuladd")
2054 (set_attr "mode" "<MODE>")])
2056 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2058 ;; Parallel single-precision floating point conversion operations
2060 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2062 (define_insn "sse_cvtpi2ps"
2063 [(set (match_operand:V4SF 0 "register_operand" "=x")
2066 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2067 (match_operand:V4SF 1 "register_operand" "0")
2070 "cvtpi2ps\t{%2, %0|%0, %2}"
2071 [(set_attr "type" "ssecvt")
2072 (set_attr "mode" "V4SF")])
2074 (define_insn "sse_cvtps2pi"
2075 [(set (match_operand:V2SI 0 "register_operand" "=y")
2077 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2079 (parallel [(const_int 0) (const_int 1)])))]
2081 "cvtps2pi\t{%1, %0|%0, %1}"
2082 [(set_attr "type" "ssecvt")
2083 (set_attr "unit" "mmx")
2084 (set_attr "mode" "DI")])
2086 (define_insn "sse_cvttps2pi"
2087 [(set (match_operand:V2SI 0 "register_operand" "=y")
2089 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2090 (parallel [(const_int 0) (const_int 1)])))]
2092 "cvttps2pi\t{%1, %0|%0, %1}"
2093 [(set_attr "type" "ssecvt")
2094 (set_attr "unit" "mmx")
2095 (set_attr "prefix_rep" "0")
2096 (set_attr "mode" "SF")])
2098 (define_insn "sse_cvtsi2ss"
2099 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2102 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2103 (match_operand:V4SF 1 "register_operand" "0,0,x")
2107 cvtsi2ss\t{%2, %0|%0, %2}
2108 cvtsi2ss\t{%2, %0|%0, %2}
2109 vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2110 [(set_attr "isa" "noavx,noavx,avx")
2111 (set_attr "type" "sseicvt")
2112 (set_attr "athlon_decode" "vector,double,*")
2113 (set_attr "amdfam10_decode" "vector,double,*")
2114 (set_attr "bdver1_decode" "double,direct,*")
2115 (set_attr "prefix" "orig,orig,vex")
2116 (set_attr "mode" "SF")])
2118 (define_insn "sse_cvtsi2ssq"
2119 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2122 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2123 (match_operand:V4SF 1 "register_operand" "0,0,x")
2125 "TARGET_SSE && TARGET_64BIT"
2127 cvtsi2ssq\t{%2, %0|%0, %2}
2128 cvtsi2ssq\t{%2, %0|%0, %2}
2129 vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2130 [(set_attr "isa" "noavx,noavx,avx")
2131 (set_attr "type" "sseicvt")
2132 (set_attr "athlon_decode" "vector,double,*")
2133 (set_attr "amdfam10_decode" "vector,double,*")
2134 (set_attr "bdver1_decode" "double,direct,*")
2135 (set_attr "length_vex" "*,*,4")
2136 (set_attr "prefix_rex" "1,1,*")
2137 (set_attr "prefix" "orig,orig,vex")
2138 (set_attr "mode" "SF")])
2140 (define_insn "sse_cvtss2si"
2141 [(set (match_operand:SI 0 "register_operand" "=r,r")
2144 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2145 (parallel [(const_int 0)]))]
2146 UNSPEC_FIX_NOTRUNC))]
2148 "%vcvtss2si\t{%1, %0|%0, %1}"
2149 [(set_attr "type" "sseicvt")
2150 (set_attr "athlon_decode" "double,vector")
2151 (set_attr "bdver1_decode" "double,double")
2152 (set_attr "prefix_rep" "1")
2153 (set_attr "prefix" "maybe_vex")
2154 (set_attr "mode" "SI")])
2156 (define_insn "sse_cvtss2si_2"
2157 [(set (match_operand:SI 0 "register_operand" "=r,r")
2158 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2159 UNSPEC_FIX_NOTRUNC))]
2161 "%vcvtss2si\t{%1, %0|%0, %1}"
2162 [(set_attr "type" "sseicvt")
2163 (set_attr "athlon_decode" "double,vector")
2164 (set_attr "amdfam10_decode" "double,double")
2165 (set_attr "bdver1_decode" "double,double")
2166 (set_attr "prefix_rep" "1")
2167 (set_attr "prefix" "maybe_vex")
2168 (set_attr "mode" "SI")])
2170 (define_insn "sse_cvtss2siq"
2171 [(set (match_operand:DI 0 "register_operand" "=r,r")
2174 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2175 (parallel [(const_int 0)]))]
2176 UNSPEC_FIX_NOTRUNC))]
2177 "TARGET_SSE && TARGET_64BIT"
2178 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2179 [(set_attr "type" "sseicvt")
2180 (set_attr "athlon_decode" "double,vector")
2181 (set_attr "bdver1_decode" "double,double")
2182 (set_attr "prefix_rep" "1")
2183 (set_attr "prefix" "maybe_vex")
2184 (set_attr "mode" "DI")])
2186 (define_insn "sse_cvtss2siq_2"
2187 [(set (match_operand:DI 0 "register_operand" "=r,r")
2188 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2189 UNSPEC_FIX_NOTRUNC))]
2190 "TARGET_SSE && TARGET_64BIT"
2191 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2192 [(set_attr "type" "sseicvt")
2193 (set_attr "athlon_decode" "double,vector")
2194 (set_attr "amdfam10_decode" "double,double")
2195 (set_attr "bdver1_decode" "double,double")
2196 (set_attr "prefix_rep" "1")
2197 (set_attr "prefix" "maybe_vex")
2198 (set_attr "mode" "DI")])
2200 (define_insn "sse_cvttss2si"
2201 [(set (match_operand:SI 0 "register_operand" "=r,r")
2204 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2205 (parallel [(const_int 0)]))))]
2207 "%vcvttss2si\t{%1, %0|%0, %1}"
2208 [(set_attr "type" "sseicvt")
2209 (set_attr "athlon_decode" "double,vector")
2210 (set_attr "amdfam10_decode" "double,double")
2211 (set_attr "bdver1_decode" "double,double")
2212 (set_attr "prefix_rep" "1")
2213 (set_attr "prefix" "maybe_vex")
2214 (set_attr "mode" "SI")])
2216 (define_insn "sse_cvttss2siq"
2217 [(set (match_operand:DI 0 "register_operand" "=r,r")
2220 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2221 (parallel [(const_int 0)]))))]
2222 "TARGET_SSE && TARGET_64BIT"
2223 "%vcvttss2si{q}\t{%1, %0|%0, %1}"
2224 [(set_attr "type" "sseicvt")
2225 (set_attr "athlon_decode" "double,vector")
2226 (set_attr "amdfam10_decode" "double,double")
2227 (set_attr "bdver1_decode" "double,double")
2228 (set_attr "prefix_rep" "1")
2229 (set_attr "prefix" "maybe_vex")
2230 (set_attr "mode" "DI")])
2232 (define_insn "avx_cvtdq2ps256"
2233 [(set (match_operand:V8SF 0 "register_operand" "=x")
2234 (float:V8SF (match_operand:V8SI 1 "nonimmediate_operand" "xm")))]
2236 "vcvtdq2ps\t{%1, %0|%0, %1}"
2237 [(set_attr "type" "ssecvt")
2238 (set_attr "prefix" "vex")
2239 (set_attr "mode" "V8SF")])
2241 (define_insn "sse2_cvtdq2ps"
2242 [(set (match_operand:V4SF 0 "register_operand" "=x")
2243 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2245 "%vcvtdq2ps\t{%1, %0|%0, %1}"
2246 [(set_attr "type" "ssecvt")
2247 (set_attr "prefix" "maybe_vex")
2248 (set_attr "mode" "V4SF")])
2250 (define_expand "sse2_cvtudq2ps"
2252 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "")))
2254 (lt:V4SF (match_dup 5) (match_dup 3)))
2256 (and:V4SF (match_dup 6) (match_dup 4)))
2257 (set (match_operand:V4SF 0 "register_operand" "")
2258 (plus:V4SF (match_dup 5) (match_dup 7)))]
2261 REAL_VALUE_TYPE TWO32r;
2265 real_ldexp (&TWO32r, &dconst1, 32);
2266 x = const_double_from_real_value (TWO32r, SFmode);
2268 operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
2269 operands[4] = force_reg (V4SFmode,
2270 ix86_build_const_vector (V4SFmode, 1, x));
2272 for (i = 5; i < 8; i++)
2273 operands[i] = gen_reg_rtx (V4SFmode);
2276 (define_insn "avx_cvtps2dq256"
2277 [(set (match_operand:V8SI 0 "register_operand" "=x")
2278 (unspec:V8SI [(match_operand:V8SF 1 "nonimmediate_operand" "xm")]
2279 UNSPEC_FIX_NOTRUNC))]
2281 "vcvtps2dq\t{%1, %0|%0, %1}"
2282 [(set_attr "type" "ssecvt")
2283 (set_attr "prefix" "vex")
2284 (set_attr "mode" "OI")])
2286 (define_insn "sse2_cvtps2dq"
2287 [(set (match_operand:V4SI 0 "register_operand" "=x")
2288 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2289 UNSPEC_FIX_NOTRUNC))]
2291 "%vcvtps2dq\t{%1, %0|%0, %1}"
2292 [(set_attr "type" "ssecvt")
2293 (set (attr "prefix_data16")
2295 (match_test "TARGET_AVX")
2297 (const_string "1")))
2298 (set_attr "prefix" "maybe_vex")
2299 (set_attr "mode" "TI")])
2301 (define_insn "avx_cvttps2dq256"
2302 [(set (match_operand:V8SI 0 "register_operand" "=x")
2303 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
2305 "vcvttps2dq\t{%1, %0|%0, %1}"
2306 [(set_attr "type" "ssecvt")
2307 (set_attr "prefix" "vex")
2308 (set_attr "mode" "OI")])
2310 (define_insn "sse2_cvttps2dq"
2311 [(set (match_operand:V4SI 0 "register_operand" "=x")
2312 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2314 "%vcvttps2dq\t{%1, %0|%0, %1}"
2315 [(set_attr "type" "ssecvt")
2316 (set (attr "prefix_rep")
2318 (match_test "TARGET_AVX")
2320 (const_string "1")))
2321 (set (attr "prefix_data16")
2323 (match_test "TARGET_AVX")
2325 (const_string "0")))
2326 (set_attr "prefix_data16" "0")
2327 (set_attr "prefix" "maybe_vex")
2328 (set_attr "mode" "TI")])
2330 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2332 ;; Parallel double-precision floating point conversion operations
2334 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2336 (define_insn "sse2_cvtpi2pd"
2337 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2338 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2340 "cvtpi2pd\t{%1, %0|%0, %1}"
2341 [(set_attr "type" "ssecvt")
2342 (set_attr "unit" "mmx,*")
2343 (set_attr "prefix_data16" "1,*")
2344 (set_attr "mode" "V2DF")])
2346 (define_insn "sse2_cvtpd2pi"
2347 [(set (match_operand:V2SI 0 "register_operand" "=y")
2348 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2349 UNSPEC_FIX_NOTRUNC))]
2351 "cvtpd2pi\t{%1, %0|%0, %1}"
2352 [(set_attr "type" "ssecvt")
2353 (set_attr "unit" "mmx")
2354 (set_attr "bdver1_decode" "double")
2355 (set_attr "prefix_data16" "1")
2356 (set_attr "mode" "DI")])
2358 (define_insn "sse2_cvttpd2pi"
2359 [(set (match_operand:V2SI 0 "register_operand" "=y")
2360 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2362 "cvttpd2pi\t{%1, %0|%0, %1}"
2363 [(set_attr "type" "ssecvt")
2364 (set_attr "unit" "mmx")
2365 (set_attr "bdver1_decode" "double")
2366 (set_attr "prefix_data16" "1")
2367 (set_attr "mode" "TI")])
2369 (define_insn "sse2_cvtsi2sd"
2370 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2373 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2374 (match_operand:V2DF 1 "register_operand" "0,0,x")
2378 cvtsi2sd\t{%2, %0|%0, %2}
2379 cvtsi2sd\t{%2, %0|%0, %2}
2380 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2381 [(set_attr "isa" "noavx,noavx,avx")
2382 (set_attr "type" "sseicvt")
2383 (set_attr "athlon_decode" "double,direct,*")
2384 (set_attr "amdfam10_decode" "vector,double,*")
2385 (set_attr "bdver1_decode" "double,direct,*")
2386 (set_attr "prefix" "orig,orig,vex")
2387 (set_attr "mode" "DF")])
2389 (define_insn "sse2_cvtsi2sdq"
2390 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2393 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2394 (match_operand:V2DF 1 "register_operand" "0,0,x")
2396 "TARGET_SSE2 && TARGET_64BIT"
2398 cvtsi2sdq\t{%2, %0|%0, %2}
2399 cvtsi2sdq\t{%2, %0|%0, %2}
2400 vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2401 [(set_attr "isa" "noavx,noavx,avx")
2402 (set_attr "type" "sseicvt")
2403 (set_attr "athlon_decode" "double,direct,*")
2404 (set_attr "amdfam10_decode" "vector,double,*")
2405 (set_attr "bdver1_decode" "double,direct,*")
2406 (set_attr "length_vex" "*,*,4")
2407 (set_attr "prefix_rex" "1,1,*")
2408 (set_attr "prefix" "orig,orig,vex")
2409 (set_attr "mode" "DF")])
2411 (define_insn "sse2_cvtsd2si"
2412 [(set (match_operand:SI 0 "register_operand" "=r,r")
2415 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2416 (parallel [(const_int 0)]))]
2417 UNSPEC_FIX_NOTRUNC))]
2419 "%vcvtsd2si\t{%1, %0|%0, %1}"
2420 [(set_attr "type" "sseicvt")
2421 (set_attr "athlon_decode" "double,vector")
2422 (set_attr "bdver1_decode" "double,double")
2423 (set_attr "prefix_rep" "1")
2424 (set_attr "prefix" "maybe_vex")
2425 (set_attr "mode" "SI")])
2427 (define_insn "sse2_cvtsd2si_2"
2428 [(set (match_operand:SI 0 "register_operand" "=r,r")
2429 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2430 UNSPEC_FIX_NOTRUNC))]
2432 "%vcvtsd2si\t{%1, %0|%0, %1}"
2433 [(set_attr "type" "sseicvt")
2434 (set_attr "athlon_decode" "double,vector")
2435 (set_attr "amdfam10_decode" "double,double")
2436 (set_attr "bdver1_decode" "double,double")
2437 (set_attr "prefix_rep" "1")
2438 (set_attr "prefix" "maybe_vex")
2439 (set_attr "mode" "SI")])
2441 (define_insn "sse2_cvtsd2siq"
2442 [(set (match_operand:DI 0 "register_operand" "=r,r")
2445 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2446 (parallel [(const_int 0)]))]
2447 UNSPEC_FIX_NOTRUNC))]
2448 "TARGET_SSE2 && TARGET_64BIT"
2449 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2450 [(set_attr "type" "sseicvt")
2451 (set_attr "athlon_decode" "double,vector")
2452 (set_attr "bdver1_decode" "double,double")
2453 (set_attr "prefix_rep" "1")
2454 (set_attr "prefix" "maybe_vex")
2455 (set_attr "mode" "DI")])
2457 (define_insn "sse2_cvtsd2siq_2"
2458 [(set (match_operand:DI 0 "register_operand" "=r,r")
2459 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2460 UNSPEC_FIX_NOTRUNC))]
2461 "TARGET_SSE2 && TARGET_64BIT"
2462 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2463 [(set_attr "type" "sseicvt")
2464 (set_attr "athlon_decode" "double,vector")
2465 (set_attr "amdfam10_decode" "double,double")
2466 (set_attr "bdver1_decode" "double,double")
2467 (set_attr "prefix_rep" "1")
2468 (set_attr "prefix" "maybe_vex")
2469 (set_attr "mode" "DI")])
2471 (define_insn "sse2_cvttsd2si"
2472 [(set (match_operand:SI 0 "register_operand" "=r,r")
2475 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2476 (parallel [(const_int 0)]))))]
2478 "%vcvttsd2si\t{%1, %0|%0, %1}"
2479 [(set_attr "type" "sseicvt")
2480 (set_attr "athlon_decode" "double,vector")
2481 (set_attr "amdfam10_decode" "double,double")
2482 (set_attr "bdver1_decode" "double,double")
2483 (set_attr "prefix_rep" "1")
2484 (set_attr "prefix" "maybe_vex")
2485 (set_attr "mode" "SI")])
2487 (define_insn "sse2_cvttsd2siq"
2488 [(set (match_operand:DI 0 "register_operand" "=r,r")
2491 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2492 (parallel [(const_int 0)]))))]
2493 "TARGET_SSE2 && TARGET_64BIT"
2494 "%vcvttsd2si{q}\t{%1, %0|%0, %1}"
2495 [(set_attr "type" "sseicvt")
2496 (set_attr "athlon_decode" "double,vector")
2497 (set_attr "amdfam10_decode" "double,double")
2498 (set_attr "bdver1_decode" "double,double")
2499 (set_attr "prefix_rep" "1")
2500 (set_attr "prefix" "maybe_vex")
2501 (set_attr "mode" "DI")])
2503 (define_insn "avx_cvtdq2pd256"
2504 [(set (match_operand:V4DF 0 "register_operand" "=x")
2505 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2507 "vcvtdq2pd\t{%1, %0|%0, %1}"
2508 [(set_attr "type" "ssecvt")
2509 (set_attr "prefix" "vex")
2510 (set_attr "mode" "V4DF")])
2512 (define_insn "avx_cvtdq2pd256_2"
2513 [(set (match_operand:V4DF 0 "register_operand" "=x")
2516 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
2517 (parallel [(const_int 0) (const_int 1)
2518 (const_int 2) (const_int 3)]))))]
2520 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
2521 [(set_attr "type" "ssecvt")
2522 (set_attr "prefix" "vex")
2523 (set_attr "mode" "V4DF")])
2525 (define_insn "sse2_cvtdq2pd"
2526 [(set (match_operand:V2DF 0 "register_operand" "=x")
2529 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2530 (parallel [(const_int 0) (const_int 1)]))))]
2532 "%vcvtdq2pd\t{%1, %0|%0, %1}"
2533 [(set_attr "type" "ssecvt")
2534 (set_attr "prefix" "maybe_vex")
2535 (set_attr "mode" "V2DF")])
2537 (define_insn "avx_cvtpd2dq256"
2538 [(set (match_operand:V4SI 0 "register_operand" "=x")
2539 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2540 UNSPEC_FIX_NOTRUNC))]
2542 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2543 [(set_attr "type" "ssecvt")
2544 (set_attr "prefix" "vex")
2545 (set_attr "mode" "OI")])
2547 (define_expand "sse2_cvtpd2dq"
2548 [(set (match_operand:V4SI 0 "register_operand" "")
2550 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2554 "operands[2] = CONST0_RTX (V2SImode);")
2556 (define_insn "*sse2_cvtpd2dq"
2557 [(set (match_operand:V4SI 0 "register_operand" "=x")
2559 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2561 (match_operand:V2SI 2 "const0_operand" "")))]
2565 return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
2567 return "cvtpd2dq\t{%1, %0|%0, %1}";
2569 [(set_attr "type" "ssecvt")
2570 (set_attr "prefix_rep" "1")
2571 (set_attr "prefix_data16" "0")
2572 (set_attr "prefix" "maybe_vex")
2573 (set_attr "mode" "TI")
2574 (set_attr "amdfam10_decode" "double")
2575 (set_attr "athlon_decode" "vector")
2576 (set_attr "bdver1_decode" "double")])
2578 (define_insn "avx_cvttpd2dq256"
2579 [(set (match_operand:V4SI 0 "register_operand" "=x")
2580 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2582 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2583 [(set_attr "type" "ssecvt")
2584 (set_attr "prefix" "vex")
2585 (set_attr "mode" "OI")])
2587 (define_expand "sse2_cvttpd2dq"
2588 [(set (match_operand:V4SI 0 "register_operand" "")
2590 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2593 "operands[2] = CONST0_RTX (V2SImode);")
2595 (define_insn "*sse2_cvttpd2dq"
2596 [(set (match_operand:V4SI 0 "register_operand" "=x")
2598 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2599 (match_operand:V2SI 2 "const0_operand" "")))]
2603 return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
2605 return "cvttpd2dq\t{%1, %0|%0, %1}";
2607 [(set_attr "type" "ssecvt")
2608 (set_attr "amdfam10_decode" "double")
2609 (set_attr "athlon_decode" "vector")
2610 (set_attr "bdver1_decode" "double")
2611 (set_attr "prefix" "maybe_vex")
2612 (set_attr "mode" "TI")])
2614 (define_insn "sse2_cvtsd2ss"
2615 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2618 (float_truncate:V2SF
2619 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,xm")))
2620 (match_operand:V4SF 1 "register_operand" "0,0,x")
2624 cvtsd2ss\t{%2, %0|%0, %2}
2625 cvtsd2ss\t{%2, %0|%0, %2}
2626 vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2627 [(set_attr "isa" "noavx,noavx,avx")
2628 (set_attr "type" "ssecvt")
2629 (set_attr "athlon_decode" "vector,double,*")
2630 (set_attr "amdfam10_decode" "vector,double,*")
2631 (set_attr "bdver1_decode" "direct,direct,*")
2632 (set_attr "prefix" "orig,orig,vex")
2633 (set_attr "mode" "SF")])
2635 (define_insn "sse2_cvtss2sd"
2636 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2640 (match_operand:V4SF 2 "nonimmediate_operand" "x,m,xm")
2641 (parallel [(const_int 0) (const_int 1)])))
2642 (match_operand:V2DF 1 "register_operand" "0,0,x")
2646 cvtss2sd\t{%2, %0|%0, %2}
2647 cvtss2sd\t{%2, %0|%0, %2}
2648 vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
2649 [(set_attr "isa" "noavx,noavx,avx")
2650 (set_attr "type" "ssecvt")
2651 (set_attr "amdfam10_decode" "vector,double,*")
2652 (set_attr "athlon_decode" "direct,direct,*")
2653 (set_attr "bdver1_decode" "direct,direct,*")
2654 (set_attr "prefix" "orig,orig,vex")
2655 (set_attr "mode" "DF")])
2657 (define_insn "avx_cvtpd2ps256"
2658 [(set (match_operand:V4SF 0 "register_operand" "=x")
2659 (float_truncate:V4SF
2660 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2662 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
2663 [(set_attr "type" "ssecvt")
2664 (set_attr "prefix" "vex")
2665 (set_attr "mode" "V4SF")])
2667 (define_expand "sse2_cvtpd2ps"
2668 [(set (match_operand:V4SF 0 "register_operand" "")
2670 (float_truncate:V2SF
2671 (match_operand:V2DF 1 "nonimmediate_operand" ""))
2674 "operands[2] = CONST0_RTX (V2SFmode);")
2676 (define_insn "*sse2_cvtpd2ps"
2677 [(set (match_operand:V4SF 0 "register_operand" "=x")
2679 (float_truncate:V2SF
2680 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2681 (match_operand:V2SF 2 "const0_operand" "")))]
2685 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
2687 return "cvtpd2ps\t{%1, %0|%0, %1}";
2689 [(set_attr "type" "ssecvt")
2690 (set_attr "amdfam10_decode" "double")
2691 (set_attr "athlon_decode" "vector")
2692 (set_attr "bdver1_decode" "double")
2693 (set_attr "prefix_data16" "1")
2694 (set_attr "prefix" "maybe_vex")
2695 (set_attr "mode" "V4SF")])
2697 (define_insn "avx_cvtps2pd256"
2698 [(set (match_operand:V4DF 0 "register_operand" "=x")
2700 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2702 "vcvtps2pd\t{%1, %0|%0, %1}"
2703 [(set_attr "type" "ssecvt")
2704 (set_attr "prefix" "vex")
2705 (set_attr "mode" "V4DF")])
2707 (define_insn "*avx_cvtps2pd256_2"
2708 [(set (match_operand:V4DF 0 "register_operand" "=x")
2711 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
2712 (parallel [(const_int 0) (const_int 1)
2713 (const_int 2) (const_int 3)]))))]
2715 "vcvtps2pd\t{%x1, %0|%0, %x1}"
2716 [(set_attr "type" "ssecvt")
2717 (set_attr "prefix" "vex")
2718 (set_attr "mode" "V4DF")])
2720 (define_insn "sse2_cvtps2pd"
2721 [(set (match_operand:V2DF 0 "register_operand" "=x")
2724 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2725 (parallel [(const_int 0) (const_int 1)]))))]
2727 "%vcvtps2pd\t{%1, %0|%0, %1}"
2728 [(set_attr "type" "ssecvt")
2729 (set_attr "amdfam10_decode" "direct")
2730 (set_attr "athlon_decode" "double")
2731 (set_attr "bdver1_decode" "double")
2732 (set_attr "prefix_data16" "0")
2733 (set_attr "prefix" "maybe_vex")
2734 (set_attr "mode" "V2DF")])
2736 (define_expand "vec_unpacks_hi_v4sf"
2741 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2742 (parallel [(const_int 6) (const_int 7)
2743 (const_int 2) (const_int 3)])))
2744 (set (match_operand:V2DF 0 "register_operand" "")
2748 (parallel [(const_int 0) (const_int 1)]))))]
2750 "operands[2] = gen_reg_rtx (V4SFmode);")
2752 (define_expand "vec_unpacks_hi_v8sf"
2755 (match_operand:V8SF 1 "nonimmediate_operand" "")
2756 (parallel [(const_int 4) (const_int 5)
2757 (const_int 6) (const_int 7)])))
2758 (set (match_operand:V4DF 0 "register_operand" "")
2762 "operands[2] = gen_reg_rtx (V4SFmode);")
2764 (define_expand "vec_unpacks_lo_v4sf"
2765 [(set (match_operand:V2DF 0 "register_operand" "")
2768 (match_operand:V4SF 1 "nonimmediate_operand" "")
2769 (parallel [(const_int 0) (const_int 1)]))))]
2772 (define_expand "vec_unpacks_lo_v8sf"
2773 [(set (match_operand:V4DF 0 "register_operand" "")
2776 (match_operand:V8SF 1 "nonimmediate_operand" "")
2777 (parallel [(const_int 0) (const_int 1)
2778 (const_int 2) (const_int 3)]))))]
2781 (define_mode_attr sseunpackfltmode
2782 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF") (V8SI "V4DF")])
2784 (define_expand "vec_unpacks_float_hi_<mode>"
2785 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2786 (match_operand:VI2_AVX2 1 "register_operand" "")]
2789 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2791 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
2792 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2793 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2797 (define_expand "vec_unpacks_float_lo_<mode>"
2798 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2799 (match_operand:VI2_AVX2 1 "register_operand" "")]
2802 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2804 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
2805 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2806 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2810 (define_expand "vec_unpacku_float_hi_<mode>"
2811 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2812 (match_operand:VI2_AVX2 1 "register_operand" "")]
2815 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2817 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
2818 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2819 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2823 (define_expand "vec_unpacku_float_lo_<mode>"
2824 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2825 (match_operand:VI2_AVX2 1 "register_operand" "")]
2828 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2830 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
2831 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2832 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2836 (define_expand "vec_unpacks_float_hi_v4si"
2839 (match_operand:V4SI 1 "nonimmediate_operand" "")
2840 (parallel [(const_int 2) (const_int 3)
2841 (const_int 2) (const_int 3)])))
2842 (set (match_operand:V2DF 0 "register_operand" "")
2846 (parallel [(const_int 0) (const_int 1)]))))]
2848 "operands[2] = gen_reg_rtx (V4SImode);")
2850 (define_expand "vec_unpacks_float_lo_v4si"
2851 [(set (match_operand:V2DF 0 "register_operand" "")
2854 (match_operand:V4SI 1 "nonimmediate_operand" "")
2855 (parallel [(const_int 0) (const_int 1)]))))]
2858 (define_expand "vec_unpacks_float_hi_v8si"
2861 (match_operand:V8SI 1 "nonimmediate_operand" "")
2862 (parallel [(const_int 4) (const_int 5)
2863 (const_int 6) (const_int 7)])))
2864 (set (match_operand:V4DF 0 "register_operand" "")
2868 "operands[2] = gen_reg_rtx (V4SImode);")
2870 (define_expand "vec_unpacks_float_lo_v8si"
2871 [(set (match_operand:V4DF 0 "register_operand" "")
2874 (match_operand:V8SI 1 "nonimmediate_operand" "")
2875 (parallel [(const_int 0) (const_int 1)
2876 (const_int 2) (const_int 3)]))))]
2879 (define_expand "vec_unpacku_float_hi_v4si"
2882 (match_operand:V4SI 1 "nonimmediate_operand" "")
2883 (parallel [(const_int 2) (const_int 3)
2884 (const_int 2) (const_int 3)])))
2889 (parallel [(const_int 0) (const_int 1)]))))
2891 (lt:V2DF (match_dup 6) (match_dup 3)))
2893 (and:V2DF (match_dup 7) (match_dup 4)))
2894 (set (match_operand:V2DF 0 "register_operand" "")
2895 (plus:V2DF (match_dup 6) (match_dup 8)))]
2898 REAL_VALUE_TYPE TWO32r;
2902 real_ldexp (&TWO32r, &dconst1, 32);
2903 x = const_double_from_real_value (TWO32r, DFmode);
2905 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2906 operands[4] = force_reg (V2DFmode,
2907 ix86_build_const_vector (V2DFmode, 1, x));
2909 operands[5] = gen_reg_rtx (V4SImode);
2911 for (i = 6; i < 9; i++)
2912 operands[i] = gen_reg_rtx (V2DFmode);
2915 (define_expand "vec_unpacku_float_lo_v4si"
2919 (match_operand:V4SI 1 "nonimmediate_operand" "")
2920 (parallel [(const_int 0) (const_int 1)]))))
2922 (lt:V2DF (match_dup 5) (match_dup 3)))
2924 (and:V2DF (match_dup 6) (match_dup 4)))
2925 (set (match_operand:V2DF 0 "register_operand" "")
2926 (plus:V2DF (match_dup 5) (match_dup 7)))]
2929 REAL_VALUE_TYPE TWO32r;
2933 real_ldexp (&TWO32r, &dconst1, 32);
2934 x = const_double_from_real_value (TWO32r, DFmode);
2936 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2937 operands[4] = force_reg (V2DFmode,
2938 ix86_build_const_vector (V2DFmode, 1, x));
2940 for (i = 5; i < 8; i++)
2941 operands[i] = gen_reg_rtx (V2DFmode);
2944 (define_expand "vec_unpacku_float_hi_v8si"
2945 [(match_operand:V4DF 0 "register_operand" "")
2946 (match_operand:V8SI 1 "register_operand" "")]
2949 REAL_VALUE_TYPE TWO32r;
2953 real_ldexp (&TWO32r, &dconst1, 32);
2954 x = const_double_from_real_value (TWO32r, DFmode);
2956 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
2957 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
2958 tmp[5] = gen_reg_rtx (V4SImode);
2960 for (i = 2; i < 5; i++)
2961 tmp[i] = gen_reg_rtx (V4DFmode);
2962 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
2963 emit_insn (gen_avx_cvtdq2pd256 (tmp[2], tmp[5]));
2964 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
2965 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
2966 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
2967 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
2971 (define_expand "vec_unpacku_float_lo_v8si"
2972 [(match_operand:V4DF 0 "register_operand" "")
2973 (match_operand:V8SI 1 "nonimmediate_operand" "")]
2976 REAL_VALUE_TYPE TWO32r;
2980 real_ldexp (&TWO32r, &dconst1, 32);
2981 x = const_double_from_real_value (TWO32r, DFmode);
2983 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
2984 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
2986 for (i = 2; i < 5; i++)
2987 tmp[i] = gen_reg_rtx (V4DFmode);
2988 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
2989 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
2990 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
2991 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
2992 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
2996 (define_expand "vec_pack_trunc_v4df"
2998 (float_truncate:V4SF
2999 (match_operand:V4DF 1 "nonimmediate_operand" "")))
3001 (float_truncate:V4SF
3002 (match_operand:V4DF 2 "nonimmediate_operand" "")))
3003 (set (match_operand:V8SF 0 "register_operand" "")
3009 operands[3] = gen_reg_rtx (V4SFmode);
3010 operands[4] = gen_reg_rtx (V4SFmode);
3013 (define_expand "vec_pack_trunc_v2df"
3014 [(match_operand:V4SF 0 "register_operand" "")
3015 (match_operand:V2DF 1 "nonimmediate_operand" "")
3016 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3021 r1 = gen_reg_rtx (V4SFmode);
3022 r2 = gen_reg_rtx (V4SFmode);
3024 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
3025 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
3026 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
3030 (define_expand "vec_pack_sfix_trunc_v2df"
3031 [(match_operand:V4SI 0 "register_operand" "")
3032 (match_operand:V2DF 1 "nonimmediate_operand" "")
3033 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3038 r1 = gen_reg_rtx (V4SImode);
3039 r2 = gen_reg_rtx (V4SImode);
3041 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
3042 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
3043 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3044 gen_lowpart (V2DImode, r1),
3045 gen_lowpart (V2DImode, r2)));
3049 (define_expand "vec_pack_sfix_v2df"
3050 [(match_operand:V4SI 0 "register_operand" "")
3051 (match_operand:V2DF 1 "nonimmediate_operand" "")
3052 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3057 r1 = gen_reg_rtx (V4SImode);
3058 r2 = gen_reg_rtx (V4SImode);
3060 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
3061 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
3062 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3063 gen_lowpart (V2DImode, r1),
3064 gen_lowpart (V2DImode, r2)));
3068 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3070 ;; Parallel single-precision floating point element swizzling
3072 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3074 (define_expand "sse_movhlps_exp"
3075 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3078 (match_operand:V4SF 1 "nonimmediate_operand" "")
3079 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3080 (parallel [(const_int 6)
3086 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3088 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
3090 /* Fix up the destination if needed. */
3091 if (dst != operands[0])
3092 emit_move_insn (operands[0], dst);
3097 (define_insn "sse_movhlps"
3098 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
3101 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3102 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
3103 (parallel [(const_int 6)
3107 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3109 movhlps\t{%2, %0|%0, %2}
3110 vmovhlps\t{%2, %1, %0|%0, %1, %2}
3111 movlps\t{%H2, %0|%0, %H2}
3112 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3113 %vmovhps\t{%2, %0|%0, %2}"
3114 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3115 (set_attr "type" "ssemov")
3116 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3117 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3119 (define_expand "sse_movlhps_exp"
3120 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3123 (match_operand:V4SF 1 "nonimmediate_operand" "")
3124 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3125 (parallel [(const_int 0)
3131 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3133 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
3135 /* Fix up the destination if needed. */
3136 if (dst != operands[0])
3137 emit_move_insn (operands[0], dst);
3142 (define_insn "sse_movlhps"
3143 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
3146 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3147 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,x,x"))
3148 (parallel [(const_int 0)
3152 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3154 movlhps\t{%2, %0|%0, %2}
3155 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3156 movhps\t{%2, %0|%0, %2}
3157 vmovhps\t{%2, %1, %0|%0, %1, %2}
3158 %vmovlps\t{%2, %H0|%H0, %2}"
3159 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3160 (set_attr "type" "ssemov")
3161 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3162 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3164 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3165 (define_insn "avx_unpckhps256"
3166 [(set (match_operand:V8SF 0 "register_operand" "=x")
3169 (match_operand:V8SF 1 "register_operand" "x")
3170 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3171 (parallel [(const_int 2) (const_int 10)
3172 (const_int 3) (const_int 11)
3173 (const_int 6) (const_int 14)
3174 (const_int 7) (const_int 15)])))]
3176 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3177 [(set_attr "type" "sselog")
3178 (set_attr "prefix" "vex")
3179 (set_attr "mode" "V8SF")])
3181 (define_expand "vec_interleave_highv8sf"
3185 (match_operand:V8SF 1 "register_operand" "x")
3186 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3187 (parallel [(const_int 0) (const_int 8)
3188 (const_int 1) (const_int 9)
3189 (const_int 4) (const_int 12)
3190 (const_int 5) (const_int 13)])))
3196 (parallel [(const_int 2) (const_int 10)
3197 (const_int 3) (const_int 11)
3198 (const_int 6) (const_int 14)
3199 (const_int 7) (const_int 15)])))
3200 (set (match_operand:V8SF 0 "register_operand" "")
3205 (parallel [(const_int 4) (const_int 5)
3206 (const_int 6) (const_int 7)
3207 (const_int 12) (const_int 13)
3208 (const_int 14) (const_int 15)])))]
3211 operands[3] = gen_reg_rtx (V8SFmode);
3212 operands[4] = gen_reg_rtx (V8SFmode);
3215 (define_insn "vec_interleave_highv4sf"
3216 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3219 (match_operand:V4SF 1 "register_operand" "0,x")
3220 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3221 (parallel [(const_int 2) (const_int 6)
3222 (const_int 3) (const_int 7)])))]
3225 unpckhps\t{%2, %0|%0, %2}
3226 vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3227 [(set_attr "isa" "noavx,avx")
3228 (set_attr "type" "sselog")
3229 (set_attr "prefix" "orig,vex")
3230 (set_attr "mode" "V4SF")])
3232 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3233 (define_insn "avx_unpcklps256"
3234 [(set (match_operand:V8SF 0 "register_operand" "=x")
3237 (match_operand:V8SF 1 "register_operand" "x")
3238 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3239 (parallel [(const_int 0) (const_int 8)
3240 (const_int 1) (const_int 9)
3241 (const_int 4) (const_int 12)
3242 (const_int 5) (const_int 13)])))]
3244 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3245 [(set_attr "type" "sselog")
3246 (set_attr "prefix" "vex")
3247 (set_attr "mode" "V8SF")])
3249 (define_expand "vec_interleave_lowv8sf"
3253 (match_operand:V8SF 1 "register_operand" "x")
3254 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3255 (parallel [(const_int 0) (const_int 8)
3256 (const_int 1) (const_int 9)
3257 (const_int 4) (const_int 12)
3258 (const_int 5) (const_int 13)])))
3264 (parallel [(const_int 2) (const_int 10)
3265 (const_int 3) (const_int 11)
3266 (const_int 6) (const_int 14)
3267 (const_int 7) (const_int 15)])))
3268 (set (match_operand:V8SF 0 "register_operand" "")
3273 (parallel [(const_int 0) (const_int 1)
3274 (const_int 2) (const_int 3)
3275 (const_int 8) (const_int 9)
3276 (const_int 10) (const_int 11)])))]
3279 operands[3] = gen_reg_rtx (V8SFmode);
3280 operands[4] = gen_reg_rtx (V8SFmode);
3283 (define_insn "vec_interleave_lowv4sf"
3284 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3287 (match_operand:V4SF 1 "register_operand" "0,x")
3288 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3289 (parallel [(const_int 0) (const_int 4)
3290 (const_int 1) (const_int 5)])))]
3293 unpcklps\t{%2, %0|%0, %2}
3294 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3295 [(set_attr "isa" "noavx,avx")
3296 (set_attr "type" "sselog")
3297 (set_attr "prefix" "orig,vex")
3298 (set_attr "mode" "V4SF")])
3300 ;; These are modeled with the same vec_concat as the others so that we
3301 ;; capture users of shufps that can use the new instructions
3302 (define_insn "avx_movshdup256"
3303 [(set (match_operand:V8SF 0 "register_operand" "=x")
3306 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3308 (parallel [(const_int 1) (const_int 1)
3309 (const_int 3) (const_int 3)
3310 (const_int 5) (const_int 5)
3311 (const_int 7) (const_int 7)])))]
3313 "vmovshdup\t{%1, %0|%0, %1}"
3314 [(set_attr "type" "sse")
3315 (set_attr "prefix" "vex")
3316 (set_attr "mode" "V8SF")])
3318 (define_insn "sse3_movshdup"
3319 [(set (match_operand:V4SF 0 "register_operand" "=x")
3322 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3324 (parallel [(const_int 1)
3329 "%vmovshdup\t{%1, %0|%0, %1}"
3330 [(set_attr "type" "sse")
3331 (set_attr "prefix_rep" "1")
3332 (set_attr "prefix" "maybe_vex")
3333 (set_attr "mode" "V4SF")])
3335 (define_insn "avx_movsldup256"
3336 [(set (match_operand:V8SF 0 "register_operand" "=x")
3339 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3341 (parallel [(const_int 0) (const_int 0)
3342 (const_int 2) (const_int 2)
3343 (const_int 4) (const_int 4)
3344 (const_int 6) (const_int 6)])))]
3346 "vmovsldup\t{%1, %0|%0, %1}"
3347 [(set_attr "type" "sse")
3348 (set_attr "prefix" "vex")
3349 (set_attr "mode" "V8SF")])
3351 (define_insn "sse3_movsldup"
3352 [(set (match_operand:V4SF 0 "register_operand" "=x")
3355 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3357 (parallel [(const_int 0)
3362 "%vmovsldup\t{%1, %0|%0, %1}"
3363 [(set_attr "type" "sse")
3364 (set_attr "prefix_rep" "1")
3365 (set_attr "prefix" "maybe_vex")
3366 (set_attr "mode" "V4SF")])
3368 (define_expand "avx_shufps256"
3369 [(match_operand:V8SF 0 "register_operand" "")
3370 (match_operand:V8SF 1 "register_operand" "")
3371 (match_operand:V8SF 2 "nonimmediate_operand" "")
3372 (match_operand:SI 3 "const_int_operand" "")]
3375 int mask = INTVAL (operands[3]);
3376 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3377 GEN_INT ((mask >> 0) & 3),
3378 GEN_INT ((mask >> 2) & 3),
3379 GEN_INT (((mask >> 4) & 3) + 8),
3380 GEN_INT (((mask >> 6) & 3) + 8),
3381 GEN_INT (((mask >> 0) & 3) + 4),
3382 GEN_INT (((mask >> 2) & 3) + 4),
3383 GEN_INT (((mask >> 4) & 3) + 12),
3384 GEN_INT (((mask >> 6) & 3) + 12)));
3388 ;; One bit in mask selects 2 elements.
3389 (define_insn "avx_shufps256_1"
3390 [(set (match_operand:V8SF 0 "register_operand" "=x")
3393 (match_operand:V8SF 1 "register_operand" "x")
3394 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3395 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3396 (match_operand 4 "const_0_to_3_operand" "")
3397 (match_operand 5 "const_8_to_11_operand" "")
3398 (match_operand 6 "const_8_to_11_operand" "")
3399 (match_operand 7 "const_4_to_7_operand" "")
3400 (match_operand 8 "const_4_to_7_operand" "")
3401 (match_operand 9 "const_12_to_15_operand" "")
3402 (match_operand 10 "const_12_to_15_operand" "")])))]
3404 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3405 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3406 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3407 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3410 mask = INTVAL (operands[3]);
3411 mask |= INTVAL (operands[4]) << 2;
3412 mask |= (INTVAL (operands[5]) - 8) << 4;
3413 mask |= (INTVAL (operands[6]) - 8) << 6;
3414 operands[3] = GEN_INT (mask);
3416 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3418 [(set_attr "type" "sselog")
3419 (set_attr "length_immediate" "1")
3420 (set_attr "prefix" "vex")
3421 (set_attr "mode" "V8SF")])
3423 (define_expand "sse_shufps"
3424 [(match_operand:V4SF 0 "register_operand" "")
3425 (match_operand:V4SF 1 "register_operand" "")
3426 (match_operand:V4SF 2 "nonimmediate_operand" "")
3427 (match_operand:SI 3 "const_int_operand" "")]
3430 int mask = INTVAL (operands[3]);
3431 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3432 GEN_INT ((mask >> 0) & 3),
3433 GEN_INT ((mask >> 2) & 3),
3434 GEN_INT (((mask >> 4) & 3) + 4),
3435 GEN_INT (((mask >> 6) & 3) + 4)));
3439 (define_insn "sse_shufps_<mode>"
3440 [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
3441 (vec_select:VI4F_128
3442 (vec_concat:<ssedoublevecmode>
3443 (match_operand:VI4F_128 1 "register_operand" "0,x")
3444 (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm"))
3445 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3446 (match_operand 4 "const_0_to_3_operand" "")
3447 (match_operand 5 "const_4_to_7_operand" "")
3448 (match_operand 6 "const_4_to_7_operand" "")])))]
3452 mask |= INTVAL (operands[3]) << 0;
3453 mask |= INTVAL (operands[4]) << 2;
3454 mask |= (INTVAL (operands[5]) - 4) << 4;
3455 mask |= (INTVAL (operands[6]) - 4) << 6;
3456 operands[3] = GEN_INT (mask);
3458 switch (which_alternative)
3461 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3463 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3468 [(set_attr "isa" "noavx,avx")
3469 (set_attr "type" "sselog")
3470 (set_attr "length_immediate" "1")
3471 (set_attr "prefix" "orig,vex")
3472 (set_attr "mode" "V4SF")])
3474 (define_insn "sse_storehps"
3475 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3477 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3478 (parallel [(const_int 2) (const_int 3)])))]
3481 %vmovhps\t{%1, %0|%0, %1}
3482 %vmovhlps\t{%1, %d0|%d0, %1}
3483 %vmovlps\t{%H1, %d0|%d0, %H1}"
3484 [(set_attr "type" "ssemov")
3485 (set_attr "prefix" "maybe_vex")
3486 (set_attr "mode" "V2SF,V4SF,V2SF")])
3488 (define_expand "sse_loadhps_exp"
3489 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3492 (match_operand:V4SF 1 "nonimmediate_operand" "")
3493 (parallel [(const_int 0) (const_int 1)]))
3494 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3497 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3499 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
3501 /* Fix up the destination if needed. */
3502 if (dst != operands[0])
3503 emit_move_insn (operands[0], dst);
3508 (define_insn "sse_loadhps"
3509 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
3512 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3513 (parallel [(const_int 0) (const_int 1)]))
3514 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))]
3517 movhps\t{%2, %0|%0, %2}
3518 vmovhps\t{%2, %1, %0|%0, %1, %2}
3519 movlhps\t{%2, %0|%0, %2}
3520 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3521 %vmovlps\t{%2, %H0|%H0, %2}"
3522 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3523 (set_attr "type" "ssemov")
3524 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3525 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
3527 (define_insn "sse_storelps"
3528 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3530 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
3531 (parallel [(const_int 0) (const_int 1)])))]
3534 %vmovlps\t{%1, %0|%0, %1}
3535 %vmovaps\t{%1, %0|%0, %1}
3536 %vmovlps\t{%1, %d0|%d0, %1}"
3537 [(set_attr "type" "ssemov")
3538 (set_attr "prefix" "maybe_vex")
3539 (set_attr "mode" "V2SF,V4SF,V2SF")])
3541 (define_expand "sse_loadlps_exp"
3542 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3544 (match_operand:V2SF 2 "nonimmediate_operand" "")
3546 (match_operand:V4SF 1 "nonimmediate_operand" "")
3547 (parallel [(const_int 2) (const_int 3)]))))]
3550 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3552 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
3554 /* Fix up the destination if needed. */
3555 if (dst != operands[0])
3556 emit_move_insn (operands[0], dst);
3561 (define_insn "sse_loadlps"
3562 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
3564 (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,x,x")
3566 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
3567 (parallel [(const_int 2) (const_int 3)]))))]
3570 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3571 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3572 movlps\t{%2, %0|%0, %2}
3573 vmovlps\t{%2, %1, %0|%0, %1, %2}
3574 %vmovlps\t{%2, %0|%0, %2}"
3575 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3576 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
3577 (set_attr "length_immediate" "1,1,*,*,*")
3578 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3579 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3581 (define_insn "sse_movss"
3582 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3584 (match_operand:V4SF 2 "register_operand" " x,x")
3585 (match_operand:V4SF 1 "register_operand" " 0,x")
3589 movss\t{%2, %0|%0, %2}
3590 vmovss\t{%2, %1, %0|%0, %1, %2}"
3591 [(set_attr "isa" "noavx,avx")
3592 (set_attr "type" "ssemov")
3593 (set_attr "prefix" "orig,vex")
3594 (set_attr "mode" "SF")])
3596 (define_expand "vec_dupv4sf"
3597 [(set (match_operand:V4SF 0 "register_operand" "")
3599 (match_operand:SF 1 "nonimmediate_operand" "")))]
3603 operands[1] = force_reg (SFmode, operands[1]);
3606 (define_insn "avx2_vec_dupv4sf"
3607 [(set (match_operand:V4SF 0 "register_operand" "=x")
3610 (match_operand:V4SF 1 "register_operand" "x")
3611 (parallel [(const_int 0)]))))]
3613 "vbroadcastss\t{%1, %0|%0, %1}"
3614 [(set_attr "type" "sselog1")
3615 (set_attr "prefix" "vex")
3616 (set_attr "mode" "V4SF")])
3618 (define_insn "*vec_dupv4sf_avx"
3619 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3621 (match_operand:SF 1 "nonimmediate_operand" "x,m")))]
3624 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
3625 vbroadcastss\t{%1, %0|%0, %1}"
3626 [(set_attr "type" "sselog1,ssemov")
3627 (set_attr "length_immediate" "1,0")
3628 (set_attr "prefix_extra" "0,1")
3629 (set_attr "prefix" "vex")
3630 (set_attr "mode" "V4SF")])
3632 (define_insn "avx2_vec_dupv8sf"
3633 [(set (match_operand:V8SF 0 "register_operand" "=x")
3636 (match_operand:V4SF 1 "register_operand" "x")
3637 (parallel [(const_int 0)]))))]
3639 "vbroadcastss\t{%1, %0|%0, %1}"
3640 [(set_attr "type" "sselog1")
3641 (set_attr "prefix" "vex")
3642 (set_attr "mode" "V8SF")])
3644 (define_insn "*vec_dupv4sf"
3645 [(set (match_operand:V4SF 0 "register_operand" "=x")
3647 (match_operand:SF 1 "register_operand" "0")))]
3649 "shufps\t{$0, %0, %0|%0, %0, 0}"
3650 [(set_attr "type" "sselog1")
3651 (set_attr "length_immediate" "1")
3652 (set_attr "mode" "V4SF")])
3654 ;; Although insertps takes register source, we prefer
3655 ;; unpcklps with register source since it is shorter.
3656 (define_insn "*vec_concatv2sf_sse4_1"
3657 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,x,x,*y ,*y")
3659 (match_operand:SF 1 "nonimmediate_operand" " 0,x,0,x,m, 0 , m")
3660 (match_operand:SF 2 "vector_move_operand" " x,x,m,m,C,*ym, C")))]
3663 unpcklps\t{%2, %0|%0, %2}
3664 vunpcklps\t{%2, %1, %0|%0, %1, %2}
3665 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3666 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3667 %vmovss\t{%1, %0|%0, %1}
3668 punpckldq\t{%2, %0|%0, %2}
3669 movd\t{%1, %0|%0, %1}"
3670 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
3671 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
3672 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
3673 (set_attr "prefix_extra" "*,*,1,1,*,*,*")
3674 (set_attr "length_immediate" "*,*,1,1,*,*,*")
3675 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
3676 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
3678 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3679 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3680 ;; alternatives pretty much forces the MMX alternative to be chosen.
3681 (define_insn "*vec_concatv2sf_sse"
3682 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
3684 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3685 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
3688 unpcklps\t{%2, %0|%0, %2}
3689 movss\t{%1, %0|%0, %1}
3690 punpckldq\t{%2, %0|%0, %2}
3691 movd\t{%1, %0|%0, %1}"
3692 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3693 (set_attr "mode" "V4SF,SF,DI,DI")])
3695 (define_insn "*vec_concatv4sf"
3696 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x")
3698 (match_operand:V2SF 1 "register_operand" " 0,x,0,x")
3699 (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
3702 movlhps\t{%2, %0|%0, %2}
3703 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3704 movhps\t{%2, %0|%0, %2}
3705 vmovhps\t{%2, %1, %0|%0, %1, %2}"
3706 [(set_attr "isa" "noavx,avx,noavx,avx")
3707 (set_attr "type" "ssemov")
3708 (set_attr "prefix" "orig,vex,orig,vex")
3709 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
3711 (define_expand "vec_init<mode>"
3712 [(match_operand:V_128 0 "register_operand" "")
3713 (match_operand 1 "" "")]
3716 ix86_expand_vector_init (false, operands[0], operands[1]);
3720 ;; Avoid combining registers from different units in a single alternative,
3721 ;; see comment above inline_secondary_memory_needed function in i386.c
3722 (define_insn "vec_set<mode>_0"
3723 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
3724 "=x,x,x ,x,x,x,x ,x ,m,m ,m")
3726 (vec_duplicate:VI4F_128
3727 (match_operand:<ssescalarmode> 2 "general_operand"
3728 " x,m,*r,m,x,x,*rm,*rm,x,fF,*r"))
3729 (match_operand:VI4F_128 1 "vector_move_operand"
3730 " C,C,C ,C,0,x,0 ,x ,0,0 ,0")
3734 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
3735 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
3736 %vmovd\t{%2, %0|%0, %2}
3737 movss\t{%2, %0|%0, %2}
3738 movss\t{%2, %0|%0, %2}
3739 vmovss\t{%2, %1, %0|%0, %1, %2}
3740 pinsrd\t{$0, %2, %0|%0, %2, 0}
3741 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
3745 [(set_attr "isa" "sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,avx,*,*,*")
3747 (cond [(eq_attr "alternative" "0,6,7")
3748 (const_string "sselog")
3749 (eq_attr "alternative" "9")
3750 (const_string "fmov")
3751 (eq_attr "alternative" "10")
3752 (const_string "imov")
3754 (const_string "ssemov")))
3755 (set_attr "prefix_extra" "*,*,*,*,*,*,1,1,*,*,*")
3756 (set_attr "length_immediate" "*,*,*,*,*,*,1,1,*,*,*")
3757 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex,*,*,*")
3758 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,*,*,*")])
3760 ;; A subset is vec_setv4sf.
3761 (define_insn "*vec_setv4sf_sse4_1"
3762 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3765 (match_operand:SF 2 "nonimmediate_operand" "xm,xm"))
3766 (match_operand:V4SF 1 "register_operand" "0,x")
3767 (match_operand:SI 3 "const_int_operand" "")))]
3769 && ((unsigned) exact_log2 (INTVAL (operands[3]))
3770 < GET_MODE_NUNITS (V4SFmode))"
3772 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
3773 switch (which_alternative)
3776 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3778 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3783 [(set_attr "isa" "noavx,avx")
3784 (set_attr "type" "sselog")
3785 (set_attr "prefix_data16" "1,*")
3786 (set_attr "prefix_extra" "1")
3787 (set_attr "length_immediate" "1")
3788 (set_attr "prefix" "orig,vex")
3789 (set_attr "mode" "V4SF")])
3791 (define_insn "sse4_1_insertps"
3792 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3793 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
3794 (match_operand:V4SF 1 "register_operand" "0,x")
3795 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
3799 if (MEM_P (operands[2]))
3801 unsigned count_s = INTVAL (operands[3]) >> 6;
3803 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
3804 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
3806 switch (which_alternative)
3809 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3811 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3816 [(set_attr "isa" "noavx,avx")
3817 (set_attr "type" "sselog")
3818 (set_attr "prefix_data16" "1,*")
3819 (set_attr "prefix_extra" "1")
3820 (set_attr "length_immediate" "1")
3821 (set_attr "prefix" "orig,vex")
3822 (set_attr "mode" "V4SF")])
3825 [(set (match_operand:VI4F_128 0 "memory_operand" "")
3827 (vec_duplicate:VI4F_128
3828 (match_operand:<ssescalarmode> 1 "nonmemory_operand" ""))
3831 "TARGET_SSE && reload_completed"
3834 emit_move_insn (adjust_address (operands[0], <ssescalarmode>mode, 0),
3839 (define_expand "vec_set<mode>"
3840 [(match_operand:V 0 "register_operand" "")
3841 (match_operand:<ssescalarmode> 1 "register_operand" "")
3842 (match_operand 2 "const_int_operand" "")]
3845 ix86_expand_vector_set (false, operands[0], operands[1],
3846 INTVAL (operands[2]));
3850 (define_insn_and_split "*vec_extractv4sf_0"
3851 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
3853 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
3854 (parallel [(const_int 0)])))]
3855 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3857 "&& reload_completed"
3860 rtx op1 = operands[1];
3862 op1 = gen_rtx_REG (SFmode, REGNO (op1));
3864 op1 = gen_lowpart (SFmode, op1);
3865 emit_move_insn (operands[0], op1);
3869 (define_expand "avx_vextractf128<mode>"
3870 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "")
3871 (match_operand:V_256 1 "register_operand" "")
3872 (match_operand:SI 2 "const_0_to_1_operand" "")]
3875 rtx (*insn)(rtx, rtx);
3877 switch (INTVAL (operands[2]))
3880 insn = gen_vec_extract_lo_<mode>;
3883 insn = gen_vec_extract_hi_<mode>;
3889 emit_insn (insn (operands[0], operands[1]));
3893 (define_insn_and_split "vec_extract_lo_<mode>"
3894 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
3895 (vec_select:<ssehalfvecmode>
3896 (match_operand:VI8F_256 1 "nonimmediate_operand" "xm,x")
3897 (parallel [(const_int 0) (const_int 1)])))]
3898 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3900 "&& reload_completed"
3903 rtx op1 = operands[1];
3905 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
3907 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
3908 emit_move_insn (operands[0], op1);
3912 (define_insn "vec_extract_hi_<mode>"
3913 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
3914 (vec_select:<ssehalfvecmode>
3915 (match_operand:VI8F_256 1 "register_operand" "x,x")
3916 (parallel [(const_int 2) (const_int 3)])))]
3918 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
3919 [(set_attr "type" "sselog")
3920 (set_attr "prefix_extra" "1")
3921 (set_attr "length_immediate" "1")
3922 (set_attr "memory" "none,store")
3923 (set_attr "prefix" "vex")
3924 (set_attr "mode" "<sseinsnmode>")])
3926 (define_insn_and_split "vec_extract_lo_<mode>"
3927 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
3928 (vec_select:<ssehalfvecmode>
3929 (match_operand:VI4F_256 1 "nonimmediate_operand" "xm,x")
3930 (parallel [(const_int 0) (const_int 1)
3931 (const_int 2) (const_int 3)])))]
3932 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3934 "&& reload_completed"
3937 rtx op1 = operands[1];
3939 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
3941 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
3942 emit_move_insn (operands[0], op1);
3946 (define_insn "vec_extract_hi_<mode>"
3947 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
3948 (vec_select:<ssehalfvecmode>
3949 (match_operand:VI4F_256 1 "register_operand" "x,x")
3950 (parallel [(const_int 4) (const_int 5)
3951 (const_int 6) (const_int 7)])))]
3953 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
3954 [(set_attr "type" "sselog")
3955 (set_attr "prefix_extra" "1")
3956 (set_attr "length_immediate" "1")
3957 (set_attr "memory" "none,store")
3958 (set_attr "prefix" "vex")
3959 (set_attr "mode" "<sseinsnmode>")])
3961 (define_insn_and_split "vec_extract_lo_v16hi"
3962 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
3964 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
3965 (parallel [(const_int 0) (const_int 1)
3966 (const_int 2) (const_int 3)
3967 (const_int 4) (const_int 5)
3968 (const_int 6) (const_int 7)])))]
3969 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3971 "&& reload_completed"
3974 rtx op1 = operands[1];
3976 op1 = gen_rtx_REG (V8HImode, REGNO (op1));
3978 op1 = gen_lowpart (V8HImode, op1);
3979 emit_move_insn (operands[0], op1);
3983 (define_insn "vec_extract_hi_v16hi"
3984 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
3986 (match_operand:V16HI 1 "register_operand" "x,x")
3987 (parallel [(const_int 8) (const_int 9)
3988 (const_int 10) (const_int 11)
3989 (const_int 12) (const_int 13)
3990 (const_int 14) (const_int 15)])))]
3992 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
3993 [(set_attr "type" "sselog")
3994 (set_attr "prefix_extra" "1")
3995 (set_attr "length_immediate" "1")
3996 (set_attr "memory" "none,store")
3997 (set_attr "prefix" "vex")
3998 (set_attr "mode" "OI")])
4000 (define_insn_and_split "vec_extract_lo_v32qi"
4001 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4003 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
4004 (parallel [(const_int 0) (const_int 1)
4005 (const_int 2) (const_int 3)
4006 (const_int 4) (const_int 5)
4007 (const_int 6) (const_int 7)
4008 (const_int 8) (const_int 9)
4009 (const_int 10) (const_int 11)
4010 (const_int 12) (const_int 13)
4011 (const_int 14) (const_int 15)])))]
4012 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4014 "&& reload_completed"
4017 rtx op1 = operands[1];
4019 op1 = gen_rtx_REG (V16QImode, REGNO (op1));
4021 op1 = gen_lowpart (V16QImode, op1);
4022 emit_move_insn (operands[0], op1);
4026 (define_insn "vec_extract_hi_v32qi"
4027 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4029 (match_operand:V32QI 1 "register_operand" "x,x")
4030 (parallel [(const_int 16) (const_int 17)
4031 (const_int 18) (const_int 19)
4032 (const_int 20) (const_int 21)
4033 (const_int 22) (const_int 23)
4034 (const_int 24) (const_int 25)
4035 (const_int 26) (const_int 27)
4036 (const_int 28) (const_int 29)
4037 (const_int 30) (const_int 31)])))]
4039 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
4040 [(set_attr "type" "sselog")
4041 (set_attr "prefix_extra" "1")
4042 (set_attr "length_immediate" "1")
4043 (set_attr "memory" "none,store")
4044 (set_attr "prefix" "vex")
4045 (set_attr "mode" "OI")])
4047 (define_insn_and_split "*sse4_1_extractps"
4048 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,x,x")
4050 (match_operand:V4SF 1 "register_operand" "x,0,x")
4051 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n")])))]
4054 %vextractps\t{%2, %1, %0|%0, %1, %2}
4057 "&& reload_completed && SSE_REG_P (operands[0])"
4060 rtx dest = gen_rtx_REG (V4SFmode, REGNO (operands[0]));
4061 switch (INTVAL (operands[2]))
4065 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
4066 operands[2], operands[2],
4067 GEN_INT (INTVAL (operands[2]) + 4),
4068 GEN_INT (INTVAL (operands[2]) + 4)));
4071 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
4074 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
4079 [(set_attr "isa" "*,noavx,avx")
4080 (set_attr "type" "sselog,*,*")
4081 (set_attr "prefix_data16" "1,*,*")
4082 (set_attr "prefix_extra" "1,*,*")
4083 (set_attr "length_immediate" "1,*,*")
4084 (set_attr "prefix" "maybe_vex,*,*")
4085 (set_attr "mode" "V4SF,*,*")])
4087 (define_insn_and_split "*vec_extract_v4sf_mem"
4088 [(set (match_operand:SF 0 "register_operand" "=x*rf")
4090 (match_operand:V4SF 1 "memory_operand" "o")
4091 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
4094 "&& reload_completed"
4097 int i = INTVAL (operands[2]);
4099 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
4103 ;; Modes handled by vec_extract patterns.
4104 (define_mode_iterator VEC_EXTRACT_MODE
4105 [(V32QI "TARGET_AVX") V16QI
4106 (V16HI "TARGET_AVX") V8HI
4107 (V8SI "TARGET_AVX") V4SI
4108 (V4DI "TARGET_AVX") V2DI
4109 (V8SF "TARGET_AVX") V4SF
4110 (V4DF "TARGET_AVX") V2DF])
4112 (define_expand "vec_extract<mode>"
4113 [(match_operand:<ssescalarmode> 0 "register_operand" "")
4114 (match_operand:VEC_EXTRACT_MODE 1 "register_operand" "")
4115 (match_operand 2 "const_int_operand" "")]
4118 ix86_expand_vector_extract (false, operands[0], operands[1],
4119 INTVAL (operands[2]));
4123 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4125 ;; Parallel double-precision floating point element swizzling
4127 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4129 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4130 (define_insn "avx_unpckhpd256"
4131 [(set (match_operand:V4DF 0 "register_operand" "=x")
4134 (match_operand:V4DF 1 "register_operand" "x")
4135 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4136 (parallel [(const_int 1) (const_int 5)
4137 (const_int 3) (const_int 7)])))]
4139 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
4140 [(set_attr "type" "sselog")
4141 (set_attr "prefix" "vex")
4142 (set_attr "mode" "V4DF")])
4144 (define_expand "vec_interleave_highv4df"
4148 (match_operand:V4DF 1 "register_operand" "x")
4149 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4150 (parallel [(const_int 0) (const_int 4)
4151 (const_int 2) (const_int 6)])))
4157 (parallel [(const_int 1) (const_int 5)
4158 (const_int 3) (const_int 7)])))
4159 (set (match_operand:V4DF 0 "register_operand" "")
4164 (parallel [(const_int 2) (const_int 3)
4165 (const_int 6) (const_int 7)])))]
4168 operands[3] = gen_reg_rtx (V4DFmode);
4169 operands[4] = gen_reg_rtx (V4DFmode);
4173 (define_expand "vec_interleave_highv2df"
4174 [(set (match_operand:V2DF 0 "register_operand" "")
4177 (match_operand:V2DF 1 "nonimmediate_operand" "")
4178 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4179 (parallel [(const_int 1)
4183 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
4184 operands[2] = force_reg (V2DFmode, operands[2]);
4187 (define_insn "*vec_interleave_highv2df"
4188 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
4191 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x")
4192 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0"))
4193 (parallel [(const_int 1)
4195 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4197 unpckhpd\t{%2, %0|%0, %2}
4198 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
4199 %vmovddup\t{%H1, %0|%0, %H1}
4200 movlpd\t{%H1, %0|%0, %H1}
4201 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
4202 %vmovhpd\t{%1, %0|%0, %1}"
4203 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
4204 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
4205 (set_attr "prefix_data16" "*,*,*,1,*,1")
4206 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
4207 (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")])
4209 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4210 (define_expand "avx_movddup256"
4211 [(set (match_operand:V4DF 0 "register_operand" "")
4214 (match_operand:V4DF 1 "nonimmediate_operand" "")
4216 (parallel [(const_int 0) (const_int 4)
4217 (const_int 2) (const_int 6)])))]
4220 (define_expand "avx_unpcklpd256"
4221 [(set (match_operand:V4DF 0 "register_operand" "")
4224 (match_operand:V4DF 1 "register_operand" "")
4225 (match_operand:V4DF 2 "nonimmediate_operand" ""))
4226 (parallel [(const_int 0) (const_int 4)
4227 (const_int 2) (const_int 6)])))]
4230 (define_insn "*avx_unpcklpd256"
4231 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
4234 (match_operand:V4DF 1 "nonimmediate_operand" "xm,x")
4235 (match_operand:V4DF 2 "nonimmediate_operand" " 1,xm"))
4236 (parallel [(const_int 0) (const_int 4)
4237 (const_int 2) (const_int 6)])))]
4239 && (!MEM_P (operands[1]) || rtx_equal_p (operands[1], operands[2]))"
4241 vmovddup\t{%1, %0|%0, %1}
4242 vunpcklpd\t{%2, %1, %0|%0, %1, %2}"
4243 [(set_attr "type" "sselog")
4244 (set_attr "prefix" "vex")
4245 (set_attr "mode" "V4DF")])
4247 (define_expand "vec_interleave_lowv4df"
4251 (match_operand:V4DF 1 "register_operand" "x")
4252 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4253 (parallel [(const_int 0) (const_int 4)
4254 (const_int 2) (const_int 6)])))
4260 (parallel [(const_int 1) (const_int 5)
4261 (const_int 3) (const_int 7)])))
4262 (set (match_operand:V4DF 0 "register_operand" "")
4267 (parallel [(const_int 0) (const_int 1)
4268 (const_int 4) (const_int 5)])))]
4271 operands[3] = gen_reg_rtx (V4DFmode);
4272 operands[4] = gen_reg_rtx (V4DFmode);
4275 (define_expand "vec_interleave_lowv2df"
4276 [(set (match_operand:V2DF 0 "register_operand" "")
4279 (match_operand:V2DF 1 "nonimmediate_operand" "")
4280 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4281 (parallel [(const_int 0)
4285 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
4286 operands[1] = force_reg (V2DFmode, operands[1]);
4289 (define_insn "*vec_interleave_lowv2df"
4290 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,o")
4293 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0")
4294 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x"))
4295 (parallel [(const_int 0)
4297 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4299 unpcklpd\t{%2, %0|%0, %2}
4300 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4301 %vmovddup\t{%1, %0|%0, %1}
4302 movhpd\t{%2, %0|%0, %2}
4303 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4304 %vmovlpd\t{%2, %H0|%H0, %2}"
4305 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
4306 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
4307 (set_attr "prefix_data16" "*,*,*,1,*,1")
4308 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
4309 (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")])
4312 [(set (match_operand:V2DF 0 "memory_operand" "")
4315 (match_operand:V2DF 1 "register_operand" "")
4317 (parallel [(const_int 0)
4319 "TARGET_SSE3 && reload_completed"
4322 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4323 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4324 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4329 [(set (match_operand:V2DF 0 "register_operand" "")
4332 (match_operand:V2DF 1 "memory_operand" "")
4334 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "")
4335 (match_operand:SI 3 "const_int_operand" "")])))]
4336 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
4337 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
4339 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
4342 (define_expand "avx_shufpd256"
4343 [(match_operand:V4DF 0 "register_operand" "")
4344 (match_operand:V4DF 1 "register_operand" "")
4345 (match_operand:V4DF 2 "nonimmediate_operand" "")
4346 (match_operand:SI 3 "const_int_operand" "")]
4349 int mask = INTVAL (operands[3]);
4350 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4352 GEN_INT (mask & 2 ? 5 : 4),
4353 GEN_INT (mask & 4 ? 3 : 2),
4354 GEN_INT (mask & 8 ? 7 : 6)));
4358 (define_insn "avx_shufpd256_1"
4359 [(set (match_operand:V4DF 0 "register_operand" "=x")
4362 (match_operand:V4DF 1 "register_operand" "x")
4363 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4364 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4365 (match_operand 4 "const_4_to_5_operand" "")
4366 (match_operand 5 "const_2_to_3_operand" "")
4367 (match_operand 6 "const_6_to_7_operand" "")])))]
4371 mask = INTVAL (operands[3]);
4372 mask |= (INTVAL (operands[4]) - 4) << 1;
4373 mask |= (INTVAL (operands[5]) - 2) << 2;
4374 mask |= (INTVAL (operands[6]) - 6) << 3;
4375 operands[3] = GEN_INT (mask);
4377 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4379 [(set_attr "type" "sselog")
4380 (set_attr "length_immediate" "1")
4381 (set_attr "prefix" "vex")
4382 (set_attr "mode" "V4DF")])
4384 (define_expand "sse2_shufpd"
4385 [(match_operand:V2DF 0 "register_operand" "")
4386 (match_operand:V2DF 1 "register_operand" "")
4387 (match_operand:V2DF 2 "nonimmediate_operand" "")
4388 (match_operand:SI 3 "const_int_operand" "")]
4391 int mask = INTVAL (operands[3]);
4392 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4394 GEN_INT (mask & 2 ? 3 : 2)));
4398 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4399 (define_insn "avx2_interleave_highv4di"
4400 [(set (match_operand:V4DI 0 "register_operand" "=x")
4403 (match_operand:V4DI 1 "register_operand" "x")
4404 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
4405 (parallel [(const_int 1)
4410 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4411 [(set_attr "type" "sselog")
4412 (set_attr "prefix" "vex")
4413 (set_attr "mode" "OI")])
4415 (define_insn "vec_interleave_highv2di"
4416 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4419 (match_operand:V2DI 1 "register_operand" "0,x")
4420 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4421 (parallel [(const_int 1)
4425 punpckhqdq\t{%2, %0|%0, %2}
4426 vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4427 [(set_attr "isa" "noavx,avx")
4428 (set_attr "type" "sselog")
4429 (set_attr "prefix_data16" "1,*")
4430 (set_attr "prefix" "orig,vex")
4431 (set_attr "mode" "TI")])
4433 (define_insn "avx2_interleave_lowv4di"
4434 [(set (match_operand:V4DI 0 "register_operand" "=x")
4437 (match_operand:V4DI 1 "register_operand" "x")
4438 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
4439 (parallel [(const_int 0)
4444 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4445 [(set_attr "type" "sselog")
4446 (set_attr "prefix" "vex")
4447 (set_attr "mode" "OI")])
4449 (define_insn "vec_interleave_lowv2di"
4450 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4453 (match_operand:V2DI 1 "register_operand" "0,x")
4454 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4455 (parallel [(const_int 0)
4459 punpcklqdq\t{%2, %0|%0, %2}
4460 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4461 [(set_attr "isa" "noavx,avx")
4462 (set_attr "type" "sselog")
4463 (set_attr "prefix_data16" "1,*")
4464 (set_attr "prefix" "orig,vex")
4465 (set_attr "mode" "TI")])
4467 (define_insn "sse2_shufpd_<mode>"
4468 [(set (match_operand:VI8F_128 0 "register_operand" "=x,x")
4469 (vec_select:VI8F_128
4470 (vec_concat:<ssedoublevecmode>
4471 (match_operand:VI8F_128 1 "register_operand" "0,x")
4472 (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm"))
4473 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4474 (match_operand 4 "const_2_to_3_operand" "")])))]
4478 mask = INTVAL (operands[3]);
4479 mask |= (INTVAL (operands[4]) - 2) << 1;
4480 operands[3] = GEN_INT (mask);
4482 switch (which_alternative)
4485 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4487 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4492 [(set_attr "isa" "noavx,avx")
4493 (set_attr "type" "sselog")
4494 (set_attr "length_immediate" "1")
4495 (set_attr "prefix" "orig,vex")
4496 (set_attr "mode" "V2DF")])
4498 ;; Avoid combining registers from different units in a single alternative,
4499 ;; see comment above inline_secondary_memory_needed function in i386.c
4500 (define_insn "sse2_storehpd"
4501 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r")
4503 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
4504 (parallel [(const_int 1)])))]
4505 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4507 %vmovhpd\t{%1, %0|%0, %1}
4509 vunpckhpd\t{%d1, %0|%0, %d1}
4513 [(set_attr "isa" "*,noavx,avx,*,*,*")
4514 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
4515 (set (attr "prefix_data16")
4517 (and (eq_attr "alternative" "0")
4518 (not (match_test "TARGET_AVX")))
4520 (const_string "*")))
4521 (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
4522 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
4525 [(set (match_operand:DF 0 "register_operand" "")
4527 (match_operand:V2DF 1 "memory_operand" "")
4528 (parallel [(const_int 1)])))]
4529 "TARGET_SSE2 && reload_completed"
4530 [(set (match_dup 0) (match_dup 1))]
4531 "operands[1] = adjust_address (operands[1], DFmode, 8);")
4533 (define_insn "*vec_extractv2df_1_sse"
4534 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4536 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
4537 (parallel [(const_int 1)])))]
4538 "!TARGET_SSE2 && TARGET_SSE
4539 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4541 movhps\t{%1, %0|%0, %1}
4542 movhlps\t{%1, %0|%0, %1}
4543 movlps\t{%H1, %0|%0, %H1}"
4544 [(set_attr "type" "ssemov")
4545 (set_attr "mode" "V2SF,V4SF,V2SF")])
4547 ;; Avoid combining registers from different units in a single alternative,
4548 ;; see comment above inline_secondary_memory_needed function in i386.c
4549 (define_insn "sse2_storelpd"
4550 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4552 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4553 (parallel [(const_int 0)])))]
4554 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4556 %vmovlpd\t{%1, %0|%0, %1}
4561 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4562 (set_attr "prefix_data16" "1,*,*,*,*")
4563 (set_attr "prefix" "maybe_vex")
4564 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4567 [(set (match_operand:DF 0 "register_operand" "")
4569 (match_operand:V2DF 1 "nonimmediate_operand" "")
4570 (parallel [(const_int 0)])))]
4571 "TARGET_SSE2 && reload_completed"
4574 rtx op1 = operands[1];
4576 op1 = gen_rtx_REG (DFmode, REGNO (op1));
4578 op1 = gen_lowpart (DFmode, op1);
4579 emit_move_insn (operands[0], op1);
4583 (define_insn "*vec_extractv2df_0_sse"
4584 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4586 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
4587 (parallel [(const_int 0)])))]
4588 "!TARGET_SSE2 && TARGET_SSE
4589 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4591 movlps\t{%1, %0|%0, %1}
4592 movaps\t{%1, %0|%0, %1}
4593 movlps\t{%1, %0|%0, %1}"
4594 [(set_attr "type" "ssemov")
4595 (set_attr "mode" "V2SF,V4SF,V2SF")])
4597 (define_expand "sse2_loadhpd_exp"
4598 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4601 (match_operand:V2DF 1 "nonimmediate_operand" "")
4602 (parallel [(const_int 0)]))
4603 (match_operand:DF 2 "nonimmediate_operand" "")))]
4606 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4608 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
4610 /* Fix up the destination if needed. */
4611 if (dst != operands[0])
4612 emit_move_insn (operands[0], dst);
4617 ;; Avoid combining registers from different units in a single alternative,
4618 ;; see comment above inline_secondary_memory_needed function in i386.c
4619 (define_insn "sse2_loadhpd"
4620 [(set (match_operand:V2DF 0 "nonimmediate_operand"
4624 (match_operand:V2DF 1 "nonimmediate_operand"
4626 (parallel [(const_int 0)]))
4627 (match_operand:DF 2 "nonimmediate_operand"
4628 " m,m,x,x,x,*f,r")))]
4629 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4631 movhpd\t{%2, %0|%0, %2}
4632 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4633 unpcklpd\t{%2, %0|%0, %2}
4634 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4638 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
4639 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
4640 (set_attr "prefix_data16" "1,*,*,*,*,*,*")
4641 (set_attr "prefix" "orig,vex,orig,vex,*,*,*")
4642 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
4645 [(set (match_operand:V2DF 0 "memory_operand" "")
4647 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
4648 (match_operand:DF 1 "register_operand" "")))]
4649 "TARGET_SSE2 && reload_completed"
4650 [(set (match_dup 0) (match_dup 1))]
4651 "operands[0] = adjust_address (operands[0], DFmode, 8);")
4653 (define_expand "sse2_loadlpd_exp"
4654 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4656 (match_operand:DF 2 "nonimmediate_operand" "")
4658 (match_operand:V2DF 1 "nonimmediate_operand" "")
4659 (parallel [(const_int 1)]))))]
4662 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4664 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
4666 /* Fix up the destination if needed. */
4667 if (dst != operands[0])
4668 emit_move_insn (operands[0], dst);
4673 ;; Avoid combining registers from different units in a single alternative,
4674 ;; see comment above inline_secondary_memory_needed function in i386.c
4675 (define_insn "sse2_loadlpd"
4676 [(set (match_operand:V2DF 0 "nonimmediate_operand"
4677 "=x,x,x,x,x,x,x,x,m,m ,m")
4679 (match_operand:DF 2 "nonimmediate_operand"
4680 " m,m,m,x,x,0,0,x,x,*f,r")
4682 (match_operand:V2DF 1 "vector_move_operand"
4683 " C,0,x,0,x,x,o,o,0,0 ,0")
4684 (parallel [(const_int 1)]))))]
4685 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4687 %vmovsd\t{%2, %0|%0, %2}
4688 movlpd\t{%2, %0|%0, %2}
4689 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4690 movsd\t{%2, %0|%0, %2}
4691 vmovsd\t{%2, %1, %0|%0, %1, %2}
4692 shufpd\t{$2, %1, %0|%0, %1, 2}
4693 movhpd\t{%H1, %0|%0, %H1}
4694 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
4698 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
4700 (cond [(eq_attr "alternative" "5")
4701 (const_string "sselog")
4702 (eq_attr "alternative" "9")
4703 (const_string "fmov")
4704 (eq_attr "alternative" "10")
4705 (const_string "imov")
4707 (const_string "ssemov")))
4708 (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
4709 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
4710 (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
4711 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
4714 [(set (match_operand:V2DF 0 "memory_operand" "")
4716 (match_operand:DF 1 "register_operand" "")
4717 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
4718 "TARGET_SSE2 && reload_completed"
4719 [(set (match_dup 0) (match_dup 1))]
4720 "operands[0] = adjust_address (operands[0], DFmode, 8);")
4722 (define_insn "sse2_movsd"
4723 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o")
4725 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
4726 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
4730 movsd\t{%2, %0|%0, %2}
4731 vmovsd\t{%2, %1, %0|%0, %1, %2}
4732 movlpd\t{%2, %0|%0, %2}
4733 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4734 %vmovlpd\t{%2, %0|%0, %2}
4735 shufpd\t{$2, %1, %0|%0, %1, 2}
4736 movhps\t{%H1, %0|%0, %H1}
4737 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
4738 %vmovhps\t{%1, %H0|%H0, %1}"
4739 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
4742 (eq_attr "alternative" "5")
4743 (const_string "sselog")
4744 (const_string "ssemov")))
4745 (set (attr "prefix_data16")
4747 (and (eq_attr "alternative" "2,4")
4748 (not (match_test "TARGET_AVX")))
4750 (const_string "*")))
4751 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
4752 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
4753 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
4755 (define_expand "vec_dupv2df"
4756 [(set (match_operand:V2DF 0 "register_operand" "")
4758 (match_operand:DF 1 "nonimmediate_operand" "")))]
4762 operands[1] = force_reg (DFmode, operands[1]);
4765 (define_insn "*vec_dupv2df_sse3"
4766 [(set (match_operand:V2DF 0 "register_operand" "=x")
4768 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
4770 "%vmovddup\t{%1, %0|%0, %1}"
4771 [(set_attr "type" "sselog1")
4772 (set_attr "prefix" "maybe_vex")
4773 (set_attr "mode" "DF")])
4775 (define_insn "*vec_dupv2df"
4776 [(set (match_operand:V2DF 0 "register_operand" "=x")
4778 (match_operand:DF 1 "register_operand" "0")))]
4781 [(set_attr "type" "sselog1")
4782 (set_attr "mode" "V2DF")])
4784 (define_insn "*vec_concatv2df_sse3"
4785 [(set (match_operand:V2DF 0 "register_operand" "=x")
4787 (match_operand:DF 1 "nonimmediate_operand" "xm")
4790 "%vmovddup\t{%1, %0|%0, %1}"
4791 [(set_attr "type" "sselog1")
4792 (set_attr "prefix" "maybe_vex")
4793 (set_attr "mode" "DF")])
4795 (define_insn "*vec_concatv2df"
4796 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x,x,x,x,x")
4798 (match_operand:DF 1 "nonimmediate_operand" " 0,x,0,x,m,0,0")
4799 (match_operand:DF 2 "vector_move_operand" " x,x,m,m,C,x,m")))]
4802 unpcklpd\t{%2, %0|%0, %2}
4803 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4804 movhpd\t{%2, %0|%0, %2}
4805 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4806 %vmovsd\t{%1, %0|%0, %1}
4807 movlhps\t{%2, %0|%0, %2}
4808 movhps\t{%2, %0|%0, %2}"
4809 [(set_attr "isa" "sse2_noavx,avx,sse2_noavx,avx,sse2,noavx,noavx")
4812 (eq_attr "alternative" "0,1")
4813 (const_string "sselog")
4814 (const_string "ssemov")))
4815 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
4816 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
4817 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF,DF,V4SF,V2SF")])
4819 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4821 ;; Parallel integral arithmetic
4823 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4825 (define_expand "neg<mode>2"
4826 [(set (match_operand:VI_AVX2 0 "register_operand" "")
4829 (match_operand:VI_AVX2 1 "nonimmediate_operand" "")))]
4831 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
4833 (define_expand "<plusminus_insn><mode>3"
4834 [(set (match_operand:VI_AVX2 0 "register_operand" "")
4836 (match_operand:VI_AVX2 1 "nonimmediate_operand" "")
4837 (match_operand:VI_AVX2 2 "nonimmediate_operand" "")))]
4839 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4841 (define_insn "*<plusminus_insn><mode>3"
4842 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,x")
4844 (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,x")
4845 (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,xm")))]
4846 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4848 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
4849 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
4850 [(set_attr "isa" "noavx,avx")
4851 (set_attr "type" "sseiadd")
4852 (set_attr "prefix_data16" "1,*")
4853 (set_attr "prefix" "orig,vex")
4854 (set_attr "mode" "<sseinsnmode>")])
4856 (define_expand "<sse2_avx2>_<plusminus_insn><mode>3"
4857 [(set (match_operand:VI12_AVX2 0 "register_operand" "")
4858 (sat_plusminus:VI12_AVX2
4859 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "")
4860 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "")))]
4862 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4864 (define_insn "*<sse2_avx2>_<plusminus_insn><mode>3"
4865 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,x")
4866 (sat_plusminus:VI12_AVX2
4867 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,x")
4868 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,xm")))]
4869 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4871 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
4872 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
4873 [(set_attr "isa" "noavx,avx")
4874 (set_attr "type" "sseiadd")
4875 (set_attr "prefix_data16" "1,*")
4876 (set_attr "prefix" "orig,vex")
4877 (set_attr "mode" "TI")])
4879 (define_insn_and_split "mul<mode>3"
4880 [(set (match_operand:VI1_AVX2 0 "register_operand" "")
4881 (mult:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand" "")
4882 (match_operand:VI1_AVX2 2 "register_operand" "")))]
4884 && can_create_pseudo_p ()"
4891 enum machine_mode mulmode = <sseunpackmode>mode;
4893 for (i = 0; i < 6; ++i)
4894 t[i] = gen_reg_rtx (<MODE>mode);
4896 /* Unpack data such that we've got a source byte in each low byte of
4897 each word. We don't care what goes into the high byte of each word.
4898 Rather than trying to get zero in there, most convenient is to let
4899 it be a copy of the low byte. */
4900 emit_insn (gen_<vec_avx2>_interleave_high<mode> (t[0], operands[1],
4902 emit_insn (gen_<vec_avx2>_interleave_high<mode> (t[1], operands[2],
4904 emit_insn (gen_<vec_avx2>_interleave_low<mode> (t[2], operands[1],
4906 emit_insn (gen_<vec_avx2>_interleave_low<mode> (t[3], operands[2],
4909 /* Multiply words. The end-of-line annotations here give a picture of what
4910 the output of that instruction looks like. Dot means don't care; the
4911 letters are the bytes of the result with A being the most significant. */
4912 emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (mulmode, t[4]),
4913 gen_rtx_MULT (mulmode, /* .A.B.C.D.E.F.G.H */
4914 gen_lowpart (mulmode, t[0]),
4915 gen_lowpart (mulmode, t[1]))));
4916 emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (mulmode, t[5]),
4917 gen_rtx_MULT (mulmode, /* .I.J.K.L.M.N.O.P */
4918 gen_lowpart (mulmode, t[2]),
4919 gen_lowpart (mulmode, t[3]))));
4921 /* Extract the even bytes and merge them back together. */
4922 ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0);
4924 set_unique_reg_note (get_last_insn (), REG_EQUAL,
4925 gen_rtx_MULT (<MODE>mode, operands[1], operands[2]));
4929 (define_expand "mul<mode>3"
4930 [(set (match_operand:VI2_AVX2 0 "register_operand" "")
4931 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "")
4932 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "")))]
4934 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
4936 (define_insn "*mul<mode>3"
4937 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
4938 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x")
4939 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))]
4940 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
4942 pmullw\t{%2, %0|%0, %2}
4943 vpmullw\t{%2, %1, %0|%0, %1, %2}"
4944 [(set_attr "isa" "noavx,avx")
4945 (set_attr "type" "sseimul")
4946 (set_attr "prefix_data16" "1,*")
4947 (set_attr "prefix" "orig,vex")
4948 (set_attr "mode" "<sseinsnmode>")])
4950 (define_expand "<s>mul<mode>3_highpart"
4951 [(set (match_operand:VI2_AVX2 0 "register_operand" "")
4953 (lshiftrt:<ssedoublemode>
4954 (mult:<ssedoublemode>
4955 (any_extend:<ssedoublemode>
4956 (match_operand:VI2_AVX2 1 "nonimmediate_operand" ""))
4957 (any_extend:<ssedoublemode>
4958 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "")))
4961 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
4963 (define_insn "*<s>mul<mode>3_highpart"
4964 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
4966 (lshiftrt:<ssedoublemode>
4967 (mult:<ssedoublemode>
4968 (any_extend:<ssedoublemode>
4969 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
4970 (any_extend:<ssedoublemode>
4971 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
4973 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
4975 pmulh<u>w\t{%2, %0|%0, %2}
4976 vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
4977 [(set_attr "isa" "noavx,avx")
4978 (set_attr "type" "sseimul")
4979 (set_attr "prefix_data16" "1,*")
4980 (set_attr "prefix" "orig,vex")
4981 (set_attr "mode" "<sseinsnmode>")])
4983 (define_expand "avx2_umulv4siv4di3"
4984 [(set (match_operand:V4DI 0 "register_operand" "")
4988 (match_operand:V8SI 1 "nonimmediate_operand" "")
4989 (parallel [(const_int 0) (const_int 2)
4990 (const_int 4) (const_int 6)])))
4993 (match_operand:V8SI 2 "nonimmediate_operand" "")
4994 (parallel [(const_int 0) (const_int 2)
4995 (const_int 4) (const_int 6)])))))]
4997 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
4999 (define_insn "*avx_umulv4siv4di3"
5000 [(set (match_operand:V4DI 0 "register_operand" "=x")
5004 (match_operand:V8SI 1 "nonimmediate_operand" "%x")
5005 (parallel [(const_int 0) (const_int 2)
5006 (const_int 4) (const_int 6)])))
5009 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
5010 (parallel [(const_int 0) (const_int 2)
5011 (const_int 4) (const_int 6)])))))]
5012 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
5013 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5014 [(set_attr "type" "sseimul")
5015 (set_attr "prefix" "vex")
5016 (set_attr "mode" "OI")])
5018 (define_expand "sse2_umulv2siv2di3"
5019 [(set (match_operand:V2DI 0 "register_operand" "")
5023 (match_operand:V4SI 1 "nonimmediate_operand" "")
5024 (parallel [(const_int 0) (const_int 2)])))
5027 (match_operand:V4SI 2 "nonimmediate_operand" "")
5028 (parallel [(const_int 0) (const_int 2)])))))]
5030 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5032 (define_insn "*sse2_umulv2siv2di3"
5033 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5037 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
5038 (parallel [(const_int 0) (const_int 2)])))
5041 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
5042 (parallel [(const_int 0) (const_int 2)])))))]
5043 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5045 pmuludq\t{%2, %0|%0, %2}
5046 vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5047 [(set_attr "isa" "noavx,avx")
5048 (set_attr "type" "sseimul")
5049 (set_attr "prefix_data16" "1,*")
5050 (set_attr "prefix" "orig,vex")
5051 (set_attr "mode" "TI")])
5053 (define_expand "avx2_mulv4siv4di3"
5054 [(set (match_operand:V4DI 0 "register_operand" "")
5058 (match_operand:V8SI 1 "nonimmediate_operand" "")
5059 (parallel [(const_int 0) (const_int 2)
5060 (const_int 4) (const_int 6)])))
5063 (match_operand:V8SI 2 "nonimmediate_operand" "")
5064 (parallel [(const_int 0) (const_int 2)
5065 (const_int 4) (const_int 6)])))))]
5067 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
5069 (define_insn "*avx2_mulv4siv4di3"
5070 [(set (match_operand:V4DI 0 "register_operand" "=x")
5074 (match_operand:V8SI 1 "nonimmediate_operand" "x")
5075 (parallel [(const_int 0) (const_int 2)
5076 (const_int 4) (const_int 6)])))
5079 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
5080 (parallel [(const_int 0) (const_int 2)
5081 (const_int 4) (const_int 6)])))))]
5082 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
5083 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5084 [(set_attr "isa" "avx")
5085 (set_attr "type" "sseimul")
5086 (set_attr "prefix_extra" "1")
5087 (set_attr "prefix" "vex")
5088 (set_attr "mode" "OI")])
5090 (define_expand "sse4_1_mulv2siv2di3"
5091 [(set (match_operand:V2DI 0 "register_operand" "")
5095 (match_operand:V4SI 1 "nonimmediate_operand" "")
5096 (parallel [(const_int 0) (const_int 2)])))
5099 (match_operand:V4SI 2 "nonimmediate_operand" "")
5100 (parallel [(const_int 0) (const_int 2)])))))]
5102 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5104 (define_insn "*sse4_1_mulv2siv2di3"
5105 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5109 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
5110 (parallel [(const_int 0) (const_int 2)])))
5113 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
5114 (parallel [(const_int 0) (const_int 2)])))))]
5115 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5117 pmuldq\t{%2, %0|%0, %2}
5118 vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5119 [(set_attr "isa" "noavx,avx")
5120 (set_attr "type" "sseimul")
5121 (set_attr "prefix_data16" "1,*")
5122 (set_attr "prefix_extra" "1")
5123 (set_attr "prefix" "orig,vex")
5124 (set_attr "mode" "TI")])
5126 (define_expand "avx2_pmaddwd"
5127 [(set (match_operand:V8SI 0 "register_operand" "")
5132 (match_operand:V16HI 1 "nonimmediate_operand" "")
5133 (parallel [(const_int 0)
5143 (match_operand:V16HI 2 "nonimmediate_operand" "")
5144 (parallel [(const_int 0)
5154 (vec_select:V8HI (match_dup 1)
5155 (parallel [(const_int 1)
5164 (vec_select:V8HI (match_dup 2)
5165 (parallel [(const_int 1)
5172 (const_int 15)]))))))]
5174 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
5176 (define_expand "sse2_pmaddwd"
5177 [(set (match_operand:V4SI 0 "register_operand" "")
5182 (match_operand:V8HI 1 "nonimmediate_operand" "")
5183 (parallel [(const_int 0)
5189 (match_operand:V8HI 2 "nonimmediate_operand" "")
5190 (parallel [(const_int 0)
5196 (vec_select:V4HI (match_dup 1)
5197 (parallel [(const_int 1)
5202 (vec_select:V4HI (match_dup 2)
5203 (parallel [(const_int 1)
5206 (const_int 7)]))))))]
5208 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5210 (define_insn "*avx2_pmaddwd"
5211 [(set (match_operand:V8SI 0 "register_operand" "=x")
5216 (match_operand:V16HI 1 "nonimmediate_operand" "%x")
5217 (parallel [(const_int 0)
5227 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
5228 (parallel [(const_int 0)
5238 (vec_select:V8HI (match_dup 1)
5239 (parallel [(const_int 1)
5248 (vec_select:V8HI (match_dup 2)
5249 (parallel [(const_int 1)
5256 (const_int 15)]))))))]
5257 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
5258 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5259 [(set_attr "type" "sseiadd")
5260 (set_attr "prefix" "vex")
5261 (set_attr "mode" "OI")])
5263 (define_insn "*sse2_pmaddwd"
5264 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
5269 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
5270 (parallel [(const_int 0)
5276 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
5277 (parallel [(const_int 0)
5283 (vec_select:V4HI (match_dup 1)
5284 (parallel [(const_int 1)
5289 (vec_select:V4HI (match_dup 2)
5290 (parallel [(const_int 1)
5293 (const_int 7)]))))))]
5294 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5296 pmaddwd\t{%2, %0|%0, %2}
5297 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5298 [(set_attr "isa" "noavx,avx")
5299 (set_attr "type" "sseiadd")
5300 (set_attr "atom_unit" "simul")
5301 (set_attr "prefix_data16" "1,*")
5302 (set_attr "prefix" "orig,vex")
5303 (set_attr "mode" "TI")])
5305 (define_expand "mul<mode>3"
5306 [(set (match_operand:VI4_AVX2 0 "register_operand" "")
5307 (mult:VI4_AVX2 (match_operand:VI4_AVX2 1 "register_operand" "")
5308 (match_operand:VI4_AVX2 2 "register_operand" "")))]
5311 if (TARGET_SSE4_1 || TARGET_AVX)
5312 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
5315 (define_insn "*<sse4_1_avx2>_mul<mode>3"
5316 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,x")
5317 (mult:VI4_AVX2 (match_operand:VI4_AVX2 1 "nonimmediate_operand" "%0,x")
5318 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm,xm")))]
5319 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
5321 pmulld\t{%2, %0|%0, %2}
5322 vpmulld\t{%2, %1, %0|%0, %1, %2}"
5323 [(set_attr "isa" "noavx,avx")
5324 (set_attr "type" "sseimul")
5325 (set_attr "prefix_extra" "1")
5326 (set_attr "prefix" "orig,vex")
5327 (set_attr "mode" "<sseinsnmode>")])
5329 (define_insn_and_split "*sse2_mulv4si3"
5330 [(set (match_operand:V4SI 0 "register_operand" "")
5331 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5332 (match_operand:V4SI 2 "register_operand" "")))]
5333 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_AVX
5334 && can_create_pseudo_p ()"
5339 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5345 t1 = gen_reg_rtx (V4SImode);
5346 t2 = gen_reg_rtx (V4SImode);
5347 t3 = gen_reg_rtx (V4SImode);
5348 t4 = gen_reg_rtx (V4SImode);
5349 t5 = gen_reg_rtx (V4SImode);
5350 t6 = gen_reg_rtx (V4SImode);
5351 thirtytwo = GEN_INT (32);
5353 /* Multiply elements 2 and 0. */
5354 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
5357 /* Shift both input vectors down one element, so that elements 3
5358 and 1 are now in the slots for elements 2 and 0. For K8, at
5359 least, this is faster than using a shuffle. */
5360 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5361 gen_lowpart (V1TImode, op1),
5363 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5364 gen_lowpart (V1TImode, op2),
5366 /* Multiply elements 3 and 1. */
5367 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
5370 /* Move the results in element 2 down to element 1; we don't care
5371 what goes in elements 2 and 3. */
5372 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
5373 const0_rtx, const0_rtx));
5374 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
5375 const0_rtx, const0_rtx));
5377 /* Merge the parts back together. */
5378 emit_insn (gen_vec_interleave_lowv4si (op0, t5, t6));
5380 set_unique_reg_note (get_last_insn (), REG_EQUAL,
5381 gen_rtx_MULT (V4SImode, operands[1], operands[2]));
5385 (define_insn_and_split "mul<mode>3"
5386 [(set (match_operand:VI8_AVX2 0 "register_operand" "")
5387 (mult:VI8_AVX2 (match_operand:VI8_AVX2 1 "register_operand" "")
5388 (match_operand:VI8_AVX2 2 "register_operand" "")))]
5390 && can_create_pseudo_p ()"
5395 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5402 if (TARGET_XOP && <MODE>mode == V2DImode)
5404 /* op1: A,B,C,D, op2: E,F,G,H */
5405 op1 = gen_lowpart (V4SImode, op1);
5406 op2 = gen_lowpart (V4SImode, op2);
5408 t1 = gen_reg_rtx (V4SImode);
5409 t2 = gen_reg_rtx (V4SImode);
5410 t3 = gen_reg_rtx (V2DImode);
5411 t4 = gen_reg_rtx (V2DImode);
5414 emit_insn (gen_sse2_pshufd_1 (t1, op1,
5420 /* t2: (B*E),(A*F),(D*G),(C*H) */
5421 emit_insn (gen_mulv4si3 (t2, t1, op2));
5423 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
5424 emit_insn (gen_xop_phadddq (t3, t2));
5426 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
5427 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
5429 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
5430 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
5434 t1 = gen_reg_rtx (<MODE>mode);
5435 t2 = gen_reg_rtx (<MODE>mode);
5436 t3 = gen_reg_rtx (<MODE>mode);
5437 t4 = gen_reg_rtx (<MODE>mode);
5438 t5 = gen_reg_rtx (<MODE>mode);
5439 t6 = gen_reg_rtx (<MODE>mode);
5440 thirtytwo = GEN_INT (32);
5442 /* Multiply low parts. */
5443 emit_insn (gen_<sse2_avx2>_umulv<ssescalarnum>si<mode>3
5444 (t1, gen_lowpart (<ssepackmode>mode, op1),
5445 gen_lowpart (<ssepackmode>mode, op2)));
5447 /* Shift input vectors right 32 bits so we can multiply high parts. */
5448 emit_insn (gen_lshr<mode>3 (t2, op1, thirtytwo));
5449 emit_insn (gen_lshr<mode>3 (t3, op2, thirtytwo));
5451 /* Multiply high parts by low parts. */
5452 emit_insn (gen_<sse2_avx2>_umulv<ssescalarnum>si<mode>3
5453 (t4, gen_lowpart (<ssepackmode>mode, op1),
5454 gen_lowpart (<ssepackmode>mode, t3)));
5455 emit_insn (gen_<sse2_avx2>_umulv<ssescalarnum>si<mode>3
5456 (t5, gen_lowpart (<ssepackmode>mode, op2),
5457 gen_lowpart (<ssepackmode>mode, t2)));
5459 /* Shift them back. */
5460 emit_insn (gen_ashl<mode>3 (t4, t4, thirtytwo));
5461 emit_insn (gen_ashl<mode>3 (t5, t5, thirtytwo));
5463 /* Add the three parts together. */
5464 emit_insn (gen_add<mode>3 (t6, t1, t4));
5465 emit_insn (gen_add<mode>3 (op0, t6, t5));
5468 set_unique_reg_note (get_last_insn (), REG_EQUAL,
5469 gen_rtx_MULT (<MODE>mode, operands[1], operands[2]));
5473 (define_expand "vec_widen_<s>mult_hi_<mode>"
5474 [(match_operand:<sseunpackmode> 0 "register_operand" "")
5475 (any_extend:<sseunpackmode>
5476 (match_operand:VI2_AVX2 1 "register_operand" ""))
5477 (match_operand:VI2_AVX2 2 "register_operand" "")]
5480 rtx op1, op2, t1, t2, dest;
5484 t1 = gen_reg_rtx (<MODE>mode);
5485 t2 = gen_reg_rtx (<MODE>mode);
5486 dest = gen_lowpart (<MODE>mode, operands[0]);
5488 emit_insn (gen_mul<mode>3 (t1, op1, op2));
5489 emit_insn (gen_<s>mul<mode>3_highpart (t2, op1, op2));
5490 emit_insn (gen_vec_interleave_high<mode> (dest, t1, t2));
5494 (define_expand "vec_widen_<s>mult_lo_<mode>"
5495 [(match_operand:<sseunpackmode> 0 "register_operand" "")
5496 (any_extend:<sseunpackmode>
5497 (match_operand:VI2_AVX2 1 "register_operand" ""))
5498 (match_operand:VI2_AVX2 2 "register_operand" "")]
5501 rtx op1, op2, t1, t2, dest;
5505 t1 = gen_reg_rtx (<MODE>mode);
5506 t2 = gen_reg_rtx (<MODE>mode);
5507 dest = gen_lowpart (<MODE>mode, operands[0]);
5509 emit_insn (gen_mul<mode>3 (t1, op1, op2));
5510 emit_insn (gen_<s>mul<mode>3_highpart (t2, op1, op2));
5511 emit_insn (gen_vec_interleave_low<mode> (dest, t1, t2));
5515 (define_expand "vec_widen_<s>mult_hi_v8si"
5516 [(match_operand:V4DI 0 "register_operand" "")
5517 (any_extend:V4DI (match_operand:V8SI 1 "nonimmediate_operand" ""))
5518 (match_operand:V8SI 2 "nonimmediate_operand" "")]
5523 t1 = gen_reg_rtx (V4DImode);
5524 t2 = gen_reg_rtx (V4DImode);
5525 t3 = gen_reg_rtx (V8SImode);
5526 t4 = gen_reg_rtx (V8SImode);
5527 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, operands[1]),
5528 const0_rtx, const2_rtx,
5529 const1_rtx, GEN_INT (3)));
5530 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, operands[2]),
5531 const0_rtx, const2_rtx,
5532 const1_rtx, GEN_INT (3)));
5533 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1),
5534 GEN_INT (2 + (2 << 2) + (3 << 4) + (3 << 6))));
5535 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2),
5536 GEN_INT (2 + (2 << 2) + (3 << 4) + (3 << 6))));
5537 emit_insn (gen_avx2_<u>mulv4siv4di3 (operands[0], t3, t4));
5541 (define_expand "vec_widen_<s>mult_lo_v8si"
5542 [(match_operand:V4DI 0 "register_operand" "")
5543 (any_extend:V4DI (match_operand:V8SI 1 "nonimmediate_operand" ""))
5544 (match_operand:V8SI 2 "nonimmediate_operand" "")]
5549 t1 = gen_reg_rtx (V4DImode);
5550 t2 = gen_reg_rtx (V4DImode);
5551 t3 = gen_reg_rtx (V8SImode);
5552 t4 = gen_reg_rtx (V8SImode);
5553 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, operands[1]),
5554 const0_rtx, const2_rtx,
5555 const1_rtx, GEN_INT (3)));
5556 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, operands[2]),
5557 const0_rtx, const2_rtx,
5558 const1_rtx, GEN_INT (3)));
5559 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1),
5560 GEN_INT (0 + (0 << 2) + (1 << 4) + (1 << 6))));
5561 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2),
5562 GEN_INT (0 + (0 << 2) + (1 << 4) + (1 << 6))));
5563 emit_insn (gen_avx2_<u>mulv4siv4di3 (operands[0], t3, t4));
5567 (define_expand "vec_widen_smult_hi_v4si"
5568 [(match_operand:V2DI 0 "register_operand" "")
5569 (match_operand:V4SI 1 "register_operand" "")
5570 (match_operand:V4SI 2 "register_operand" "")]
5573 rtx op1, op2, t1, t2;
5577 t1 = gen_reg_rtx (V4SImode);
5578 t2 = gen_reg_rtx (V4SImode);
5582 emit_insn (gen_sse2_pshufd_1 (t1, op1, GEN_INT (0), GEN_INT (2),
5583 GEN_INT (1), GEN_INT (3)));
5584 emit_insn (gen_sse2_pshufd_1 (t2, op2, GEN_INT (0), GEN_INT (2),
5585 GEN_INT (1), GEN_INT (3)));
5586 emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
5590 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5591 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5592 emit_insn (gen_sse4_1_mulv2siv2di3 (operands[0], t1, t2));
5596 (define_expand "vec_widen_smult_lo_v4si"
5597 [(match_operand:V2DI 0 "register_operand" "")
5598 (match_operand:V4SI 1 "register_operand" "")
5599 (match_operand:V4SI 2 "register_operand" "")]
5602 rtx op1, op2, t1, t2;
5606 t1 = gen_reg_rtx (V4SImode);
5607 t2 = gen_reg_rtx (V4SImode);
5611 emit_insn (gen_sse2_pshufd_1 (t1, op1, GEN_INT (0), GEN_INT (2),
5612 GEN_INT (1), GEN_INT (3)));
5613 emit_insn (gen_sse2_pshufd_1 (t2, op2, GEN_INT (0), GEN_INT (2),
5614 GEN_INT (1), GEN_INT (3)));
5615 emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
5619 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5620 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5621 emit_insn (gen_sse4_1_mulv2siv2di3 (operands[0], t1, t2));
5625 (define_expand "vec_widen_umult_hi_v4si"
5626 [(match_operand:V2DI 0 "register_operand" "")
5627 (match_operand:V4SI 1 "register_operand" "")
5628 (match_operand:V4SI 2 "register_operand" "")]
5631 rtx op1, op2, t1, t2;
5635 t1 = gen_reg_rtx (V4SImode);
5636 t2 = gen_reg_rtx (V4SImode);
5638 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5639 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5640 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5644 (define_expand "vec_widen_umult_lo_v4si"
5645 [(match_operand:V2DI 0 "register_operand" "")
5646 (match_operand:V4SI 1 "register_operand" "")
5647 (match_operand:V4SI 2 "register_operand" "")]
5650 rtx op1, op2, t1, t2;
5654 t1 = gen_reg_rtx (V4SImode);
5655 t2 = gen_reg_rtx (V4SImode);
5657 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5658 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5659 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5663 (define_expand "sdot_prod<mode>"
5664 [(match_operand:<sseunpackmode> 0 "register_operand" "")
5665 (match_operand:VI2_AVX2 1 "register_operand" "")
5666 (match_operand:VI2_AVX2 2 "register_operand" "")
5667 (match_operand:<sseunpackmode> 3 "register_operand" "")]
5670 rtx t = gen_reg_rtx (<sseunpackmode>mode);
5671 emit_insn (gen_<sse2_avx2>_pmaddwd (t, operands[1], operands[2]));
5672 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5673 gen_rtx_PLUS (<sseunpackmode>mode,
5678 (define_code_attr sse2_sse4_1
5679 [(zero_extend "sse2") (sign_extend "sse4_1")])
5681 (define_expand "<s>dot_prodv4si"
5682 [(match_operand:V2DI 0 "register_operand" "")
5683 (any_extend:V2DI (match_operand:V4SI 1 "register_operand" ""))
5684 (match_operand:V4SI 2 "register_operand" "")
5685 (match_operand:V2DI 3 "register_operand" "")]
5686 "<CODE> == ZERO_EXTEND ? TARGET_SSE2 : TARGET_SSE4_1"
5690 t1 = gen_reg_rtx (V2DImode);
5691 emit_insn (gen_<sse2_sse4_1>_<u>mulv2siv2di3 (t1, operands[1], operands[2]));
5692 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5694 t2 = gen_reg_rtx (V4SImode);
5695 t3 = gen_reg_rtx (V4SImode);
5696 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5697 gen_lowpart (V1TImode, operands[1]),
5699 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5700 gen_lowpart (V1TImode, operands[2]),
5703 t4 = gen_reg_rtx (V2DImode);
5704 emit_insn (gen_<sse2_sse4_1>_<u>mulv2siv2di3 (t4, t2, t3));
5706 emit_insn (gen_addv2di3 (operands[0], t1, t4));
5710 (define_expand "<s>dot_prodv8si"
5711 [(match_operand:V4DI 0 "register_operand" "")
5712 (any_extend:V4DI (match_operand:V8SI 1 "register_operand" ""))
5713 (match_operand:V8SI 2 "register_operand" "")
5714 (match_operand:V4DI 3 "register_operand" "")]
5719 t1 = gen_reg_rtx (V4DImode);
5720 emit_insn (gen_avx2_<u>mulv4siv4di3 (t1, operands[1], operands[2]));
5721 emit_insn (gen_addv4di3 (t1, t1, operands[3]));
5723 t2 = gen_reg_rtx (V8SImode);
5724 t3 = gen_reg_rtx (V8SImode);
5725 emit_insn (gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, t2),
5726 gen_lowpart (V2TImode, operands[1]),
5728 emit_insn (gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, t3),
5729 gen_lowpart (V2TImode, operands[2]),
5732 t4 = gen_reg_rtx (V4DImode);
5733 emit_insn (gen_avx2_<u>mulv4siv4di3 (t4, t2, t3));
5735 emit_insn (gen_addv4di3 (operands[0], t1, t4));
5739 (define_insn "ashr<mode>3"
5740 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
5742 (match_operand:VI24_AVX2 1 "register_operand" "0,x")
5743 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5746 psra<ssemodesuffix>\t{%2, %0|%0, %2}
5747 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5748 [(set_attr "isa" "noavx,avx")
5749 (set_attr "type" "sseishft")
5750 (set (attr "length_immediate")
5751 (if_then_else (match_operand 2 "const_int_operand" "")
5753 (const_string "0")))
5754 (set_attr "prefix_data16" "1,*")
5755 (set_attr "prefix" "orig,vex")
5756 (set_attr "mode" "<sseinsnmode>")])
5758 (define_insn "lshr<mode>3"
5759 [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
5760 (lshiftrt:VI248_AVX2
5761 (match_operand:VI248_AVX2 1 "register_operand" "0,x")
5762 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5765 psrl<ssemodesuffix>\t{%2, %0|%0, %2}
5766 vpsrl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5767 [(set_attr "isa" "noavx,avx")
5768 (set_attr "type" "sseishft")
5769 (set (attr "length_immediate")
5770 (if_then_else (match_operand 2 "const_int_operand" "")
5772 (const_string "0")))
5773 (set_attr "prefix_data16" "1,*")
5774 (set_attr "prefix" "orig,vex")
5775 (set_attr "mode" "<sseinsnmode>")])
5777 (define_insn "ashl<mode>3"
5778 [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
5780 (match_operand:VI248_AVX2 1 "register_operand" "0,x")
5781 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5784 psll<ssemodesuffix>\t{%2, %0|%0, %2}
5785 vpsll<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5786 [(set_attr "isa" "noavx,avx")
5787 (set_attr "type" "sseishft")
5788 (set (attr "length_immediate")
5789 (if_then_else (match_operand 2 "const_int_operand" "")
5791 (const_string "0")))
5792 (set_attr "prefix_data16" "1,*")
5793 (set_attr "prefix" "orig,vex")
5794 (set_attr "mode" "<sseinsnmode>")])
5796 (define_expand "vec_shl_<mode>"
5797 [(set (match_operand:VI_128 0 "register_operand" "")
5799 (match_operand:VI_128 1 "register_operand" "")
5800 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5803 operands[0] = gen_lowpart (V1TImode, operands[0]);
5804 operands[1] = gen_lowpart (V1TImode, operands[1]);
5807 (define_insn "<sse2_avx2>_ashl<mode>3"
5808 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
5810 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
5811 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5814 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5816 switch (which_alternative)
5819 return "pslldq\t{%2, %0|%0, %2}";
5821 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
5826 [(set_attr "isa" "noavx,avx")
5827 (set_attr "type" "sseishft")
5828 (set_attr "length_immediate" "1")
5829 (set_attr "prefix_data16" "1,*")
5830 (set_attr "prefix" "orig,vex")
5831 (set_attr "mode" "<sseinsnmode>")])
5833 (define_expand "vec_shr_<mode>"
5834 [(set (match_operand:VI_128 0 "register_operand" "")
5836 (match_operand:VI_128 1 "register_operand" "")
5837 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5840 operands[0] = gen_lowpart (V1TImode, operands[0]);
5841 operands[1] = gen_lowpart (V1TImode, operands[1]);
5844 (define_insn "<sse2_avx2>_lshr<mode>3"
5845 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
5846 (lshiftrt:VIMAX_AVX2
5847 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
5848 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5851 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5853 switch (which_alternative)
5856 return "psrldq\t{%2, %0|%0, %2}";
5858 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
5863 [(set_attr "isa" "noavx,avx")
5864 (set_attr "type" "sseishft")
5865 (set_attr "length_immediate" "1")
5866 (set_attr "atom_unit" "sishuf")
5867 (set_attr "prefix_data16" "1,*")
5868 (set_attr "prefix" "orig,vex")
5869 (set_attr "mode" "<sseinsnmode>")])
5872 (define_expand "<code><mode>3"
5873 [(set (match_operand:VI124_256 0 "register_operand" "")
5875 (match_operand:VI124_256 1 "nonimmediate_operand" "")
5876 (match_operand:VI124_256 2 "nonimmediate_operand" "")))]
5878 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5880 (define_insn "*avx2_<code><mode>3"
5881 [(set (match_operand:VI124_256 0 "register_operand" "=x")
5883 (match_operand:VI124_256 1 "nonimmediate_operand" "%x")
5884 (match_operand:VI124_256 2 "nonimmediate_operand" "xm")))]
5885 "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5886 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5887 [(set_attr "type" "sseiadd")
5888 (set_attr "prefix_extra" "1")
5889 (set_attr "prefix" "vex")
5890 (set_attr "mode" "OI")])
5892 (define_expand "<code><mode>3"
5893 [(set (match_operand:VI8_AVX2 0 "register_operand" "")
5894 (maxmin:VI8_AVX2 (match_operand:VI8_AVX2 1 "register_operand" "")
5895 (match_operand:VI8_AVX2 2 "register_operand" "")))]
5902 xops[0] = operands[0];
5904 if (<CODE> == SMAX || <CODE> == UMAX)
5906 xops[1] = operands[1];
5907 xops[2] = operands[2];
5911 xops[1] = operands[2];
5912 xops[2] = operands[1];
5915 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
5917 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
5918 xops[4] = operands[1];
5919 xops[5] = operands[2];
5921 ok = ix86_expand_int_vcond (xops);
5926 (define_expand "<code><mode>3"
5927 [(set (match_operand:VI124_128 0 "register_operand" "")
5928 (smaxmin:VI124_128 (match_operand:VI124_128 1 "nonimmediate_operand" "")
5929 (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
5932 if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
5933 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
5939 xops[0] = operands[0];
5940 operands[1] = force_reg (<MODE>mode, operands[1]);
5941 operands[2] = force_reg (<MODE>mode, operands[2]);
5945 xops[1] = operands[1];
5946 xops[2] = operands[2];
5950 xops[1] = operands[2];
5951 xops[2] = operands[1];
5954 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5955 xops[4] = operands[1];
5956 xops[5] = operands[2];
5958 ok = ix86_expand_int_vcond (xops);
5964 (define_insn "*sse4_1_<code><mode>3"
5965 [(set (match_operand:VI14_128 0 "register_operand" "=x,x")
5967 (match_operand:VI14_128 1 "nonimmediate_operand" "%0,x")
5968 (match_operand:VI14_128 2 "nonimmediate_operand" "xm,xm")))]
5969 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5971 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
5972 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5973 [(set_attr "isa" "noavx,avx")
5974 (set_attr "type" "sseiadd")
5975 (set_attr "prefix_extra" "1,*")
5976 (set_attr "prefix" "orig,vex")
5977 (set_attr "mode" "TI")])
5979 (define_insn "*<code>v8hi3"
5980 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
5982 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
5983 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
5984 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
5986 p<maxmin_int>w\t{%2, %0|%0, %2}
5987 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
5988 [(set_attr "isa" "noavx,avx")
5989 (set_attr "type" "sseiadd")
5990 (set_attr "prefix_data16" "1,*")
5991 (set_attr "prefix_extra" "*,1")
5992 (set_attr "prefix" "orig,vex")
5993 (set_attr "mode" "TI")])
5995 (define_expand "<code><mode>3"
5996 [(set (match_operand:VI124_128 0 "register_operand" "")
5997 (umaxmin:VI124_128 (match_operand:VI124_128 1 "nonimmediate_operand" "")
5998 (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
6001 if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
6002 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
6003 else if (<CODE> == UMAX && <MODE>mode == V8HImode)
6005 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
6006 operands[1] = force_reg (<MODE>mode, operands[1]);
6007 if (rtx_equal_p (op3, op2))
6008 op3 = gen_reg_rtx (V8HImode);
6009 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
6010 emit_insn (gen_addv8hi3 (op0, op3, op2));
6018 operands[1] = force_reg (<MODE>mode, operands[1]);
6019 operands[2] = force_reg (<MODE>mode, operands[2]);
6021 xops[0] = operands[0];
6025 xops[1] = operands[1];
6026 xops[2] = operands[2];
6030 xops[1] = operands[2];
6031 xops[2] = operands[1];
6034 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6035 xops[4] = operands[1];
6036 xops[5] = operands[2];
6038 ok = ix86_expand_int_vcond (xops);
6044 (define_insn "*sse4_1_<code><mode>3"
6045 [(set (match_operand:VI24_128 0 "register_operand" "=x,x")
6047 (match_operand:VI24_128 1 "nonimmediate_operand" "%0,x")
6048 (match_operand:VI24_128 2 "nonimmediate_operand" "xm,xm")))]
6049 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6051 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
6052 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6053 [(set_attr "isa" "noavx,avx")
6054 (set_attr "type" "sseiadd")
6055 (set_attr "prefix_extra" "1,*")
6056 (set_attr "prefix" "orig,vex")
6057 (set_attr "mode" "TI")])
6059 (define_insn "*<code>v16qi3"
6060 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6062 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")
6063 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))]
6064 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
6066 p<maxmin_int>b\t{%2, %0|%0, %2}
6067 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
6068 [(set_attr "isa" "noavx,avx")
6069 (set_attr "type" "sseiadd")
6070 (set_attr "prefix_data16" "1,*")
6071 (set_attr "prefix_extra" "*,1")
6072 (set_attr "prefix" "orig,vex")
6073 (set_attr "mode" "TI")])
6075 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6077 ;; Parallel integral comparisons
6079 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6081 (define_expand "avx2_eq<mode>3"
6082 [(set (match_operand:VI_256 0 "register_operand" "")
6084 (match_operand:VI_256 1 "nonimmediate_operand" "")
6085 (match_operand:VI_256 2 "nonimmediate_operand" "")))]
6087 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6089 (define_insn "*avx2_eq<mode>3"
6090 [(set (match_operand:VI_256 0 "register_operand" "=x")
6092 (match_operand:VI_256 1 "nonimmediate_operand" "%x")
6093 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
6094 "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6095 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6096 [(set_attr "type" "ssecmp")
6097 (set_attr "prefix_extra" "1")
6098 (set_attr "prefix" "vex")
6099 (set_attr "mode" "OI")])
6101 (define_insn "*sse4_1_eqv2di3"
6102 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6104 (match_operand:V2DI 1 "nonimmediate_operand" "%0,x")
6105 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
6106 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
6108 pcmpeqq\t{%2, %0|%0, %2}
6109 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
6110 [(set_attr "isa" "noavx,avx")
6111 (set_attr "type" "ssecmp")
6112 (set_attr "prefix_extra" "1")
6113 (set_attr "prefix" "orig,vex")
6114 (set_attr "mode" "TI")])
6116 (define_insn "*sse2_eq<mode>3"
6117 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
6119 (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x")
6120 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
6121 "TARGET_SSE2 && !TARGET_XOP
6122 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6124 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
6125 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6126 [(set_attr "isa" "noavx,avx")
6127 (set_attr "type" "ssecmp")
6128 (set_attr "prefix_data16" "1,*")
6129 (set_attr "prefix" "orig,vex")
6130 (set_attr "mode" "TI")])
6132 (define_expand "sse2_eq<mode>3"
6133 [(set (match_operand:VI124_128 0 "register_operand" "")
6135 (match_operand:VI124_128 1 "nonimmediate_operand" "")
6136 (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
6137 "TARGET_SSE2 && !TARGET_XOP "
6138 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6140 (define_expand "sse4_1_eqv2di3"
6141 [(set (match_operand:V2DI 0 "register_operand" "")
6143 (match_operand:V2DI 1 "nonimmediate_operand" "")
6144 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
6146 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
6148 (define_insn "sse4_2_gtv2di3"
6149 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6151 (match_operand:V2DI 1 "register_operand" "0,x")
6152 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
6155 pcmpgtq\t{%2, %0|%0, %2}
6156 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
6157 [(set_attr "isa" "noavx,avx")
6158 (set_attr "type" "ssecmp")
6159 (set_attr "prefix_extra" "1")
6160 (set_attr "prefix" "orig,vex")
6161 (set_attr "mode" "TI")])
6163 (define_insn "avx2_gt<mode>3"
6164 [(set (match_operand:VI_256 0 "register_operand" "=x")
6166 (match_operand:VI_256 1 "register_operand" "x")
6167 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
6169 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6170 [(set_attr "type" "ssecmp")
6171 (set_attr "prefix_extra" "1")
6172 (set_attr "prefix" "vex")
6173 (set_attr "mode" "OI")])
6175 (define_insn "sse2_gt<mode>3"
6176 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
6178 (match_operand:VI124_128 1 "register_operand" "0,x")
6179 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
6180 "TARGET_SSE2 && !TARGET_XOP"
6182 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
6183 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6184 [(set_attr "isa" "noavx,avx")
6185 (set_attr "type" "ssecmp")
6186 (set_attr "prefix_data16" "1,*")
6187 (set_attr "prefix" "orig,vex")
6188 (set_attr "mode" "TI")])
6190 (define_expand "vcond<V_256:mode><VI_256:mode>"
6191 [(set (match_operand:V_256 0 "register_operand" "")
6193 (match_operator 3 ""
6194 [(match_operand:VI_256 4 "nonimmediate_operand" "")
6195 (match_operand:VI_256 5 "nonimmediate_operand" "")])
6196 (match_operand:V_256 1 "general_operand" "")
6197 (match_operand:V_256 2 "general_operand" "")))]
6199 && (GET_MODE_NUNITS (<V_256:MODE>mode)
6200 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
6202 bool ok = ix86_expand_int_vcond (operands);
6207 (define_expand "vcond<V_128:mode><VI124_128:mode>"
6208 [(set (match_operand:V_128 0 "register_operand" "")
6210 (match_operator 3 ""
6211 [(match_operand:VI124_128 4 "nonimmediate_operand" "")
6212 (match_operand:VI124_128 5 "nonimmediate_operand" "")])
6213 (match_operand:V_128 1 "general_operand" "")
6214 (match_operand:V_128 2 "general_operand" "")))]
6216 && (GET_MODE_NUNITS (<V_128:MODE>mode)
6217 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
6219 bool ok = ix86_expand_int_vcond (operands);
6224 (define_expand "vcond<VI8F_128:mode>v2di"
6225 [(set (match_operand:VI8F_128 0 "register_operand" "")
6226 (if_then_else:VI8F_128
6227 (match_operator 3 ""
6228 [(match_operand:V2DI 4 "nonimmediate_operand" "")
6229 (match_operand:V2DI 5 "nonimmediate_operand" "")])
6230 (match_operand:VI8F_128 1 "general_operand" "")
6231 (match_operand:VI8F_128 2 "general_operand" "")))]
6234 bool ok = ix86_expand_int_vcond (operands);
6239 (define_expand "vcondu<V_256:mode><VI_256:mode>"
6240 [(set (match_operand:V_256 0 "register_operand" "")
6242 (match_operator 3 ""
6243 [(match_operand:VI_256 4 "nonimmediate_operand" "")
6244 (match_operand:VI_256 5 "nonimmediate_operand" "")])
6245 (match_operand:V_256 1 "general_operand" "")
6246 (match_operand:V_256 2 "general_operand" "")))]
6248 && (GET_MODE_NUNITS (<V_256:MODE>mode)
6249 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
6251 bool ok = ix86_expand_int_vcond (operands);
6256 (define_expand "vcondu<V_128:mode><VI124_128:mode>"
6257 [(set (match_operand:V_128 0 "register_operand" "")
6259 (match_operator 3 ""
6260 [(match_operand:VI124_128 4 "nonimmediate_operand" "")
6261 (match_operand:VI124_128 5 "nonimmediate_operand" "")])
6262 (match_operand:V_128 1 "general_operand" "")
6263 (match_operand:V_128 2 "general_operand" "")))]
6265 && (GET_MODE_NUNITS (<V_128:MODE>mode)
6266 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
6268 bool ok = ix86_expand_int_vcond (operands);
6273 (define_expand "vcondu<VI8F_128:mode>v2di"
6274 [(set (match_operand:VI8F_128 0 "register_operand" "")
6275 (if_then_else:VI8F_128
6276 (match_operator 3 ""
6277 [(match_operand:V2DI 4 "nonimmediate_operand" "")
6278 (match_operand:V2DI 5 "nonimmediate_operand" "")])
6279 (match_operand:VI8F_128 1 "general_operand" "")
6280 (match_operand:VI8F_128 2 "general_operand" "")))]
6283 bool ok = ix86_expand_int_vcond (operands);
6288 (define_mode_iterator VEC_PERM_AVX2
6289 [V16QI V8HI V4SI V2DI V4SF V2DF
6290 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
6291 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
6292 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")])
6294 (define_expand "vec_perm<mode>"
6295 [(match_operand:VEC_PERM_AVX2 0 "register_operand" "")
6296 (match_operand:VEC_PERM_AVX2 1 "register_operand" "")
6297 (match_operand:VEC_PERM_AVX2 2 "register_operand" "")
6298 (match_operand:<sseintvecmode> 3 "register_operand" "")]
6299 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
6301 ix86_expand_vec_perm (operands);
6305 (define_mode_iterator VEC_PERM_CONST
6306 [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE")
6307 (V2DF "TARGET_SSE") (V2DI "TARGET_SSE")
6308 (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2")
6309 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
6310 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
6311 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")])
6313 (define_expand "vec_perm_const<mode>"
6314 [(match_operand:VEC_PERM_CONST 0 "register_operand" "")
6315 (match_operand:VEC_PERM_CONST 1 "register_operand" "")
6316 (match_operand:VEC_PERM_CONST 2 "register_operand" "")
6317 (match_operand:<sseintvecmode> 3 "" "")]
6320 if (ix86_expand_vec_perm_const (operands))
6326 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6328 ;; Parallel bitwise logical operations
6330 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6332 (define_expand "one_cmpl<mode>2"
6333 [(set (match_operand:VI 0 "register_operand" "")
6334 (xor:VI (match_operand:VI 1 "nonimmediate_operand" "")
6338 int i, n = GET_MODE_NUNITS (<MODE>mode);
6339 rtvec v = rtvec_alloc (n);
6341 for (i = 0; i < n; ++i)
6342 RTVEC_ELT (v, i) = constm1_rtx;
6344 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
6347 (define_expand "<sse2_avx2>_andnot<mode>3"
6348 [(set (match_operand:VI_AVX2 0 "register_operand" "")
6350 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand" ""))
6351 (match_operand:VI_AVX2 2 "nonimmediate_operand" "")))]
6354 (define_insn "*andnot<mode>3"
6355 [(set (match_operand:VI 0 "register_operand" "=x,x")
6357 (not:VI (match_operand:VI 1 "register_operand" "0,x"))
6358 (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
6361 static char buf[32];
6365 switch (get_attr_mode (insn))
6368 gcc_assert (TARGET_AVX2);
6370 gcc_assert (TARGET_SSE2);
6376 gcc_assert (TARGET_AVX);
6378 gcc_assert (TARGET_SSE);
6387 switch (which_alternative)
6390 ops = "%s\t{%%2, %%0|%%0, %%2}";
6393 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
6399 snprintf (buf, sizeof (buf), ops, tmp);
6402 [(set_attr "isa" "noavx,avx")
6403 (set_attr "type" "sselog")
6404 (set (attr "prefix_data16")
6406 (and (eq_attr "alternative" "0")
6407 (eq_attr "mode" "TI"))
6409 (const_string "*")))
6410 (set_attr "prefix" "orig,vex")
6412 (cond [(and (not (match_test "TARGET_AVX2"))
6413 (match_test "GET_MODE_SIZE (<MODE>mode) > 16"))
6414 (const_string "V8SF")
6415 (not (match_test "TARGET_SSE2"))
6416 (const_string "V4SF")
6418 (const_string "<sseinsnmode>")))])
6420 (define_expand "<code><mode>3"
6421 [(set (match_operand:VI 0 "register_operand" "")
6423 (match_operand:VI 1 "nonimmediate_operand" "")
6424 (match_operand:VI 2 "nonimmediate_operand" "")))]
6426 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
6428 (define_insn "*<code><mode>3"
6429 [(set (match_operand:VI 0 "register_operand" "=x,x")
6431 (match_operand:VI 1 "nonimmediate_operand" "%0,x")
6432 (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
6434 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6436 static char buf[32];
6440 switch (get_attr_mode (insn))
6443 gcc_assert (TARGET_AVX2);
6445 gcc_assert (TARGET_SSE2);
6451 gcc_assert (TARGET_AVX);
6453 gcc_assert (TARGET_SSE);
6462 switch (which_alternative)
6465 ops = "%s\t{%%2, %%0|%%0, %%2}";
6468 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
6474 snprintf (buf, sizeof (buf), ops, tmp);
6477 [(set_attr "isa" "noavx,avx")
6478 (set_attr "type" "sselog")
6479 (set (attr "prefix_data16")
6481 (and (eq_attr "alternative" "0")
6482 (eq_attr "mode" "TI"))
6484 (const_string "*")))
6485 (set_attr "prefix" "orig,vex")
6487 (cond [(and (not (match_test "TARGET_AVX2"))
6488 (match_test "GET_MODE_SIZE (<MODE>mode) > 16"))
6489 (const_string "V8SF")
6490 (not (match_test "TARGET_SSE2"))
6491 (const_string "V4SF")
6493 (const_string "<sseinsnmode>")))])
6495 (define_insn "*andnottf3"
6496 [(set (match_operand:TF 0 "register_operand" "=x,x")
6498 (not:TF (match_operand:TF 1 "register_operand" "0,x"))
6499 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
6502 pandn\t{%2, %0|%0, %2}
6503 vpandn\t{%2, %1, %0|%0, %1, %2}"
6504 [(set_attr "isa" "noavx,avx")
6505 (set_attr "type" "sselog")
6506 (set_attr "prefix_data16" "1,*")
6507 (set_attr "prefix" "orig,vex")
6508 (set_attr "mode" "TI")])
6510 (define_expand "<code>tf3"
6511 [(set (match_operand:TF 0 "register_operand" "")
6513 (match_operand:TF 1 "nonimmediate_operand" "")
6514 (match_operand:TF 2 "nonimmediate_operand" "")))]
6516 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
6518 (define_insn "*<code>tf3"
6519 [(set (match_operand:TF 0 "register_operand" "=x,x")
6521 (match_operand:TF 1 "nonimmediate_operand" "%0,x")
6522 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
6524 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
6526 p<logic>\t{%2, %0|%0, %2}
6527 vp<logic>\t{%2, %1, %0|%0, %1, %2}"
6528 [(set_attr "isa" "noavx,avx")
6529 (set_attr "type" "sselog")
6530 (set_attr "prefix_data16" "1,*")
6531 (set_attr "prefix" "orig,vex")
6532 (set_attr "mode" "TI")])
6534 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6536 ;; Parallel integral element swizzling
6538 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6540 (define_expand "vec_pack_trunc_<mode>"
6541 [(match_operand:<ssepackmode> 0 "register_operand" "")
6542 (match_operand:VI248_AVX2 1 "register_operand" "")
6543 (match_operand:VI248_AVX2 2 "register_operand" "")]
6546 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
6547 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
6548 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6552 (define_insn "<sse2_avx2>_packsswb"
6553 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
6554 (vec_concat:VI1_AVX2
6555 (ss_truncate:<ssehalfvecmode>
6556 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6557 (ss_truncate:<ssehalfvecmode>
6558 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6561 packsswb\t{%2, %0|%0, %2}
6562 vpacksswb\t{%2, %1, %0|%0, %1, %2}"
6563 [(set_attr "isa" "noavx,avx")
6564 (set_attr "type" "sselog")
6565 (set_attr "prefix_data16" "1,*")
6566 (set_attr "prefix" "orig,vex")
6567 (set_attr "mode" "<sseinsnmode>")])
6569 (define_insn "<sse2_avx2>_packssdw"
6570 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
6571 (vec_concat:VI2_AVX2
6572 (ss_truncate:<ssehalfvecmode>
6573 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6574 (ss_truncate:<ssehalfvecmode>
6575 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6578 packssdw\t{%2, %0|%0, %2}
6579 vpackssdw\t{%2, %1, %0|%0, %1, %2}"
6580 [(set_attr "isa" "noavx,avx")
6581 (set_attr "type" "sselog")
6582 (set_attr "prefix_data16" "1,*")
6583 (set_attr "prefix" "orig,vex")
6584 (set_attr "mode" "<sseinsnmode>")])
6586 (define_insn "<sse2_avx2>_packuswb"
6587 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
6588 (vec_concat:VI1_AVX2
6589 (us_truncate:<ssehalfvecmode>
6590 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6591 (us_truncate:<ssehalfvecmode>
6592 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6595 packuswb\t{%2, %0|%0, %2}
6596 vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6597 [(set_attr "isa" "noavx,avx")
6598 (set_attr "type" "sselog")
6599 (set_attr "prefix_data16" "1,*")
6600 (set_attr "prefix" "orig,vex")
6601 (set_attr "mode" "<sseinsnmode>")])
6603 (define_insn "avx2_interleave_highv32qi"
6604 [(set (match_operand:V32QI 0 "register_operand" "=x")
6607 (match_operand:V32QI 1 "register_operand" "x")
6608 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
6609 (parallel [(const_int 8) (const_int 40)
6610 (const_int 9) (const_int 41)
6611 (const_int 10) (const_int 42)
6612 (const_int 11) (const_int 43)
6613 (const_int 12) (const_int 44)
6614 (const_int 13) (const_int 45)
6615 (const_int 14) (const_int 46)
6616 (const_int 15) (const_int 47)
6617 (const_int 24) (const_int 56)
6618 (const_int 25) (const_int 57)
6619 (const_int 26) (const_int 58)
6620 (const_int 27) (const_int 59)
6621 (const_int 28) (const_int 60)
6622 (const_int 29) (const_int 61)
6623 (const_int 30) (const_int 62)
6624 (const_int 31) (const_int 63)])))]
6626 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6627 [(set_attr "type" "sselog")
6628 (set_attr "prefix" "vex")
6629 (set_attr "mode" "OI")])
6631 (define_insn "vec_interleave_highv16qi"
6632 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6635 (match_operand:V16QI 1 "register_operand" "0,x")
6636 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
6637 (parallel [(const_int 8) (const_int 24)
6638 (const_int 9) (const_int 25)
6639 (const_int 10) (const_int 26)
6640 (const_int 11) (const_int 27)
6641 (const_int 12) (const_int 28)
6642 (const_int 13) (const_int 29)
6643 (const_int 14) (const_int 30)
6644 (const_int 15) (const_int 31)])))]
6647 punpckhbw\t{%2, %0|%0, %2}
6648 vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6649 [(set_attr "isa" "noavx,avx")
6650 (set_attr "type" "sselog")
6651 (set_attr "prefix_data16" "1,*")
6652 (set_attr "prefix" "orig,vex")
6653 (set_attr "mode" "TI")])
6655 (define_insn "avx2_interleave_lowv32qi"
6656 [(set (match_operand:V32QI 0 "register_operand" "=x")
6659 (match_operand:V32QI 1 "register_operand" "x")
6660 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
6661 (parallel [(const_int 0) (const_int 32)
6662 (const_int 1) (const_int 33)
6663 (const_int 2) (const_int 34)
6664 (const_int 3) (const_int 35)
6665 (const_int 4) (const_int 36)
6666 (const_int 5) (const_int 37)
6667 (const_int 6) (const_int 38)
6668 (const_int 7) (const_int 39)
6669 (const_int 16) (const_int 48)
6670 (const_int 17) (const_int 49)
6671 (const_int 18) (const_int 50)
6672 (const_int 19) (const_int 51)
6673 (const_int 20) (const_int 52)
6674 (const_int 21) (const_int 53)
6675 (const_int 22) (const_int 54)
6676 (const_int 23) (const_int 55)])))]
6678 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6679 [(set_attr "type" "sselog")
6680 (set_attr "prefix" "vex")
6681 (set_attr "mode" "OI")])
6683 (define_insn "vec_interleave_lowv16qi"
6684 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6687 (match_operand:V16QI 1 "register_operand" "0,x")
6688 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
6689 (parallel [(const_int 0) (const_int 16)
6690 (const_int 1) (const_int 17)
6691 (const_int 2) (const_int 18)
6692 (const_int 3) (const_int 19)
6693 (const_int 4) (const_int 20)
6694 (const_int 5) (const_int 21)
6695 (const_int 6) (const_int 22)
6696 (const_int 7) (const_int 23)])))]
6699 punpcklbw\t{%2, %0|%0, %2}
6700 vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6701 [(set_attr "isa" "noavx,avx")
6702 (set_attr "type" "sselog")
6703 (set_attr "prefix_data16" "1,*")
6704 (set_attr "prefix" "orig,vex")
6705 (set_attr "mode" "TI")])
6707 (define_insn "avx2_interleave_highv16hi"
6708 [(set (match_operand:V16HI 0 "register_operand" "=x")
6711 (match_operand:V16HI 1 "register_operand" "x")
6712 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
6713 (parallel [(const_int 4) (const_int 20)
6714 (const_int 5) (const_int 21)
6715 (const_int 6) (const_int 22)
6716 (const_int 7) (const_int 23)
6717 (const_int 12) (const_int 28)
6718 (const_int 13) (const_int 29)
6719 (const_int 14) (const_int 30)
6720 (const_int 15) (const_int 31)])))]
6722 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6723 [(set_attr "type" "sselog")
6724 (set_attr "prefix" "vex")
6725 (set_attr "mode" "OI")])
6727 (define_insn "vec_interleave_highv8hi"
6728 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6731 (match_operand:V8HI 1 "register_operand" "0,x")
6732 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
6733 (parallel [(const_int 4) (const_int 12)
6734 (const_int 5) (const_int 13)
6735 (const_int 6) (const_int 14)
6736 (const_int 7) (const_int 15)])))]
6739 punpckhwd\t{%2, %0|%0, %2}
6740 vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6741 [(set_attr "isa" "noavx,avx")
6742 (set_attr "type" "sselog")
6743 (set_attr "prefix_data16" "1,*")
6744 (set_attr "prefix" "orig,vex")
6745 (set_attr "mode" "TI")])
6747 (define_insn "avx2_interleave_lowv16hi"
6748 [(set (match_operand:V16HI 0 "register_operand" "=x")
6751 (match_operand:V16HI 1 "register_operand" "x")
6752 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
6753 (parallel [(const_int 0) (const_int 16)
6754 (const_int 1) (const_int 17)
6755 (const_int 2) (const_int 18)
6756 (const_int 3) (const_int 19)
6757 (const_int 8) (const_int 24)
6758 (const_int 9) (const_int 25)
6759 (const_int 10) (const_int 26)
6760 (const_int 11) (const_int 27)])))]
6762 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6763 [(set_attr "type" "sselog")
6764 (set_attr "prefix" "vex")
6765 (set_attr "mode" "OI")])
6767 (define_insn "vec_interleave_lowv8hi"
6768 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6771 (match_operand:V8HI 1 "register_operand" "0,x")
6772 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
6773 (parallel [(const_int 0) (const_int 8)
6774 (const_int 1) (const_int 9)
6775 (const_int 2) (const_int 10)
6776 (const_int 3) (const_int 11)])))]
6779 punpcklwd\t{%2, %0|%0, %2}
6780 vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6781 [(set_attr "isa" "noavx,avx")
6782 (set_attr "type" "sselog")
6783 (set_attr "prefix_data16" "1,*")
6784 (set_attr "prefix" "orig,vex")
6785 (set_attr "mode" "TI")])
6787 (define_insn "avx2_interleave_highv8si"
6788 [(set (match_operand:V8SI 0 "register_operand" "=x")
6791 (match_operand:V8SI 1 "register_operand" "x")
6792 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
6793 (parallel [(const_int 2) (const_int 10)
6794 (const_int 3) (const_int 11)
6795 (const_int 6) (const_int 14)
6796 (const_int 7) (const_int 15)])))]
6798 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6799 [(set_attr "type" "sselog")
6800 (set_attr "prefix" "vex")
6801 (set_attr "mode" "OI")])
6803 (define_insn "vec_interleave_highv4si"
6804 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6807 (match_operand:V4SI 1 "register_operand" "0,x")
6808 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6809 (parallel [(const_int 2) (const_int 6)
6810 (const_int 3) (const_int 7)])))]
6813 punpckhdq\t{%2, %0|%0, %2}
6814 vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6815 [(set_attr "isa" "noavx,avx")
6816 (set_attr "type" "sselog")
6817 (set_attr "prefix_data16" "1,*")
6818 (set_attr "prefix" "orig,vex")
6819 (set_attr "mode" "TI")])
6821 (define_insn "avx2_interleave_lowv8si"
6822 [(set (match_operand:V8SI 0 "register_operand" "=x")
6825 (match_operand:V8SI 1 "register_operand" "x")
6826 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
6827 (parallel [(const_int 0) (const_int 8)
6828 (const_int 1) (const_int 9)
6829 (const_int 4) (const_int 12)
6830 (const_int 5) (const_int 13)])))]
6832 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6833 [(set_attr "type" "sselog")
6834 (set_attr "prefix" "vex")
6835 (set_attr "mode" "OI")])
6837 (define_insn "vec_interleave_lowv4si"
6838 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6841 (match_operand:V4SI 1 "register_operand" "0,x")
6842 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6843 (parallel [(const_int 0) (const_int 4)
6844 (const_int 1) (const_int 5)])))]
6847 punpckldq\t{%2, %0|%0, %2}
6848 vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6849 [(set_attr "isa" "noavx,avx")
6850 (set_attr "type" "sselog")
6851 (set_attr "prefix_data16" "1,*")
6852 (set_attr "prefix" "orig,vex")
6853 (set_attr "mode" "TI")])
6855 (define_expand "vec_interleave_high<mode>"
6856 [(match_operand:VI_256 0 "register_operand" "=x")
6857 (match_operand:VI_256 1 "register_operand" "x")
6858 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
6861 rtx t1 = gen_reg_rtx (<MODE>mode);
6862 rtx t2 = gen_reg_rtx (<MODE>mode);
6863 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
6864 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
6865 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, operands[0]),
6866 gen_lowpart (V4DImode, t1),
6867 gen_lowpart (V4DImode, t2), GEN_INT (1 + (3 << 4))));
6871 (define_expand "vec_interleave_low<mode>"
6872 [(match_operand:VI_256 0 "register_operand" "=x")
6873 (match_operand:VI_256 1 "register_operand" "x")
6874 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
6877 rtx t1 = gen_reg_rtx (<MODE>mode);
6878 rtx t2 = gen_reg_rtx (<MODE>mode);
6879 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
6880 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
6881 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, operands[0]),
6882 gen_lowpart (V4DImode, t1),
6883 gen_lowpart (V4DImode, t2), GEN_INT (0 + (2 << 4))));
6887 ;; Modes handled by pinsr patterns.
6888 (define_mode_iterator PINSR_MODE
6889 [(V16QI "TARGET_SSE4_1") V8HI
6890 (V4SI "TARGET_SSE4_1")
6891 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
6893 (define_mode_attr sse2p4_1
6894 [(V16QI "sse4_1") (V8HI "sse2")
6895 (V4SI "sse4_1") (V2DI "sse4_1")])
6897 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
6898 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
6899 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x")
6900 (vec_merge:PINSR_MODE
6901 (vec_duplicate:PINSR_MODE
6902 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m"))
6903 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x")
6904 (match_operand:SI 3 "const_int_operand" "")))]
6906 && ((unsigned) exact_log2 (INTVAL (operands[3]))
6907 < GET_MODE_NUNITS (<MODE>mode))"
6909 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6911 switch (which_alternative)
6914 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
6915 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
6918 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
6920 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
6921 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
6924 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6929 [(set_attr "isa" "noavx,noavx,avx,avx")
6930 (set_attr "type" "sselog")
6931 (set (attr "prefix_rex")
6933 (and (not (match_test "TARGET_AVX"))
6934 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
6936 (const_string "*")))
6937 (set (attr "prefix_data16")
6939 (and (not (match_test "TARGET_AVX"))
6940 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
6942 (const_string "*")))
6943 (set (attr "prefix_extra")
6945 (and (not (match_test "TARGET_AVX"))
6946 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
6948 (const_string "1")))
6949 (set_attr "length_immediate" "1")
6950 (set_attr "prefix" "orig,orig,vex,vex")
6951 (set_attr "mode" "TI")])
6953 (define_insn "*sse4_1_pextrb_<mode>"
6954 [(set (match_operand:SWI48 0 "register_operand" "=r")
6957 (match_operand:V16QI 1 "register_operand" "x")
6958 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
6960 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
6961 [(set_attr "type" "sselog")
6962 (set_attr "prefix_extra" "1")
6963 (set_attr "length_immediate" "1")
6964 (set_attr "prefix" "maybe_vex")
6965 (set_attr "mode" "TI")])
6967 (define_insn "*sse4_1_pextrb_memory"
6968 [(set (match_operand:QI 0 "memory_operand" "=m")
6970 (match_operand:V16QI 1 "register_operand" "x")
6971 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
6973 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
6974 [(set_attr "type" "sselog")
6975 (set_attr "prefix_extra" "1")
6976 (set_attr "length_immediate" "1")
6977 (set_attr "prefix" "maybe_vex")
6978 (set_attr "mode" "TI")])
6980 (define_insn "*sse2_pextrw_<mode>"
6981 [(set (match_operand:SWI48 0 "register_operand" "=r")
6984 (match_operand:V8HI 1 "register_operand" "x")
6985 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
6987 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
6988 [(set_attr "type" "sselog")
6989 (set_attr "prefix_data16" "1")
6990 (set_attr "length_immediate" "1")
6991 (set_attr "prefix" "maybe_vex")
6992 (set_attr "mode" "TI")])
6994 (define_insn "*sse4_1_pextrw_memory"
6995 [(set (match_operand:HI 0 "memory_operand" "=m")
6997 (match_operand:V8HI 1 "register_operand" "x")
6998 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
7000 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
7001 [(set_attr "type" "sselog")
7002 (set_attr "prefix_extra" "1")
7003 (set_attr "length_immediate" "1")
7004 (set_attr "prefix" "maybe_vex")
7005 (set_attr "mode" "TI")])
7007 (define_insn "*sse4_1_pextrd"
7008 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
7010 (match_operand:V4SI 1 "register_operand" "x")
7011 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
7013 "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
7014 [(set_attr "type" "sselog")
7015 (set_attr "prefix_extra" "1")
7016 (set_attr "length_immediate" "1")
7017 (set_attr "prefix" "maybe_vex")
7018 (set_attr "mode" "TI")])
7020 (define_insn "*sse4_1_pextrd_zext"
7021 [(set (match_operand:DI 0 "register_operand" "=r")
7024 (match_operand:V4SI 1 "register_operand" "x")
7025 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))]
7026 "TARGET_64BIT && TARGET_SSE4_1"
7027 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
7028 [(set_attr "type" "sselog")
7029 (set_attr "prefix_extra" "1")
7030 (set_attr "length_immediate" "1")
7031 (set_attr "prefix" "maybe_vex")
7032 (set_attr "mode" "TI")])
7034 ;; It must come before *vec_extractv2di_1_rex64 since it is preferred.
7035 (define_insn "*sse4_1_pextrq"
7036 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
7038 (match_operand:V2DI 1 "register_operand" "x")
7039 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
7040 "TARGET_SSE4_1 && TARGET_64BIT"
7041 "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
7042 [(set_attr "type" "sselog")
7043 (set_attr "prefix_rex" "1")
7044 (set_attr "prefix_extra" "1")
7045 (set_attr "length_immediate" "1")
7046 (set_attr "prefix" "maybe_vex")
7047 (set_attr "mode" "TI")])
7049 (define_expand "avx2_pshufdv3"
7050 [(match_operand:V8SI 0 "register_operand" "")
7051 (match_operand:V8SI 1 "nonimmediate_operand" "")
7052 (match_operand:SI 2 "const_0_to_255_operand" "")]
7055 int mask = INTVAL (operands[2]);
7056 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
7057 GEN_INT ((mask >> 0) & 3),
7058 GEN_INT ((mask >> 2) & 3),
7059 GEN_INT ((mask >> 4) & 3),
7060 GEN_INT ((mask >> 6) & 3),
7061 GEN_INT (((mask >> 0) & 3) + 4),
7062 GEN_INT (((mask >> 2) & 3) + 4),
7063 GEN_INT (((mask >> 4) & 3) + 4),
7064 GEN_INT (((mask >> 6) & 3) + 4)));
7068 (define_insn "avx2_pshufd_1"
7069 [(set (match_operand:V8SI 0 "register_operand" "=x")
7071 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
7072 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7073 (match_operand 3 "const_0_to_3_operand" "")
7074 (match_operand 4 "const_0_to_3_operand" "")
7075 (match_operand 5 "const_0_to_3_operand" "")
7076 (match_operand 6 "const_4_to_7_operand" "")
7077 (match_operand 7 "const_4_to_7_operand" "")
7078 (match_operand 8 "const_4_to_7_operand" "")
7079 (match_operand 9 "const_4_to_7_operand" "")])))]
7081 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
7082 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
7083 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
7084 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
7087 mask |= INTVAL (operands[2]) << 0;
7088 mask |= INTVAL (operands[3]) << 2;
7089 mask |= INTVAL (operands[4]) << 4;
7090 mask |= INTVAL (operands[5]) << 6;
7091 operands[2] = GEN_INT (mask);
7093 return "vpshufd\t{%2, %1, %0|%0, %1, %2}";
7095 [(set_attr "type" "sselog1")
7096 (set_attr "prefix" "vex")
7097 (set_attr "length_immediate" "1")
7098 (set_attr "mode" "OI")])
7100 (define_expand "sse2_pshufd"
7101 [(match_operand:V4SI 0 "register_operand" "")
7102 (match_operand:V4SI 1 "nonimmediate_operand" "")
7103 (match_operand:SI 2 "const_int_operand" "")]
7106 int mask = INTVAL (operands[2]);
7107 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
7108 GEN_INT ((mask >> 0) & 3),
7109 GEN_INT ((mask >> 2) & 3),
7110 GEN_INT ((mask >> 4) & 3),
7111 GEN_INT ((mask >> 6) & 3)));
7115 (define_insn "sse2_pshufd_1"
7116 [(set (match_operand:V4SI 0 "register_operand" "=x")
7118 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7119 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7120 (match_operand 3 "const_0_to_3_operand" "")
7121 (match_operand 4 "const_0_to_3_operand" "")
7122 (match_operand 5 "const_0_to_3_operand" "")])))]
7126 mask |= INTVAL (operands[2]) << 0;
7127 mask |= INTVAL (operands[3]) << 2;
7128 mask |= INTVAL (operands[4]) << 4;
7129 mask |= INTVAL (operands[5]) << 6;
7130 operands[2] = GEN_INT (mask);
7132 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
7134 [(set_attr "type" "sselog1")
7135 (set_attr "prefix_data16" "1")
7136 (set_attr "prefix" "maybe_vex")
7137 (set_attr "length_immediate" "1")
7138 (set_attr "mode" "TI")])
7140 (define_expand "avx2_pshuflwv3"
7141 [(match_operand:V16HI 0 "register_operand" "")
7142 (match_operand:V16HI 1 "nonimmediate_operand" "")
7143 (match_operand:SI 2 "const_0_to_255_operand" "")]
7146 int mask = INTVAL (operands[2]);
7147 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
7148 GEN_INT ((mask >> 0) & 3),
7149 GEN_INT ((mask >> 2) & 3),
7150 GEN_INT ((mask >> 4) & 3),
7151 GEN_INT ((mask >> 6) & 3),
7152 GEN_INT (((mask >> 0) & 3) + 8),
7153 GEN_INT (((mask >> 2) & 3) + 8),
7154 GEN_INT (((mask >> 4) & 3) + 8),
7155 GEN_INT (((mask >> 6) & 3) + 8)));
7159 (define_insn "avx2_pshuflw_1"
7160 [(set (match_operand:V16HI 0 "register_operand" "=x")
7162 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
7163 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7164 (match_operand 3 "const_0_to_3_operand" "")
7165 (match_operand 4 "const_0_to_3_operand" "")
7166 (match_operand 5 "const_0_to_3_operand" "")
7171 (match_operand 6 "const_8_to_11_operand" "")
7172 (match_operand 7 "const_8_to_11_operand" "")
7173 (match_operand 8 "const_8_to_11_operand" "")
7174 (match_operand 9 "const_8_to_11_operand" "")
7180 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
7181 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
7182 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
7183 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
7186 mask |= INTVAL (operands[2]) << 0;
7187 mask |= INTVAL (operands[3]) << 2;
7188 mask |= INTVAL (operands[4]) << 4;
7189 mask |= INTVAL (operands[5]) << 6;
7190 operands[2] = GEN_INT (mask);
7192 return "vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7194 [(set_attr "type" "sselog")
7195 (set_attr "prefix" "vex")
7196 (set_attr "length_immediate" "1")
7197 (set_attr "mode" "OI")])
7199 (define_expand "sse2_pshuflw"
7200 [(match_operand:V8HI 0 "register_operand" "")
7201 (match_operand:V8HI 1 "nonimmediate_operand" "")
7202 (match_operand:SI 2 "const_int_operand" "")]
7205 int mask = INTVAL (operands[2]);
7206 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
7207 GEN_INT ((mask >> 0) & 3),
7208 GEN_INT ((mask >> 2) & 3),
7209 GEN_INT ((mask >> 4) & 3),
7210 GEN_INT ((mask >> 6) & 3)));
7214 (define_insn "sse2_pshuflw_1"
7215 [(set (match_operand:V8HI 0 "register_operand" "=x")
7217 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7218 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7219 (match_operand 3 "const_0_to_3_operand" "")
7220 (match_operand 4 "const_0_to_3_operand" "")
7221 (match_operand 5 "const_0_to_3_operand" "")
7229 mask |= INTVAL (operands[2]) << 0;
7230 mask |= INTVAL (operands[3]) << 2;
7231 mask |= INTVAL (operands[4]) << 4;
7232 mask |= INTVAL (operands[5]) << 6;
7233 operands[2] = GEN_INT (mask);
7235 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7237 [(set_attr "type" "sselog")
7238 (set_attr "prefix_data16" "0")
7239 (set_attr "prefix_rep" "1")
7240 (set_attr "prefix" "maybe_vex")
7241 (set_attr "length_immediate" "1")
7242 (set_attr "mode" "TI")])
7244 (define_expand "avx2_pshufhwv3"
7245 [(match_operand:V16HI 0 "register_operand" "")
7246 (match_operand:V16HI 1 "nonimmediate_operand" "")
7247 (match_operand:SI 2 "const_0_to_255_operand" "")]
7250 int mask = INTVAL (operands[2]);
7251 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
7252 GEN_INT (((mask >> 0) & 3) + 4),
7253 GEN_INT (((mask >> 2) & 3) + 4),
7254 GEN_INT (((mask >> 4) & 3) + 4),
7255 GEN_INT (((mask >> 6) & 3) + 4),
7256 GEN_INT (((mask >> 0) & 3) + 12),
7257 GEN_INT (((mask >> 2) & 3) + 12),
7258 GEN_INT (((mask >> 4) & 3) + 12),
7259 GEN_INT (((mask >> 6) & 3) + 12)));
7263 (define_insn "avx2_pshufhw_1"
7264 [(set (match_operand:V16HI 0 "register_operand" "=x")
7266 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
7267 (parallel [(const_int 0)
7271 (match_operand 2 "const_4_to_7_operand" "")
7272 (match_operand 3 "const_4_to_7_operand" "")
7273 (match_operand 4 "const_4_to_7_operand" "")
7274 (match_operand 5 "const_4_to_7_operand" "")
7279 (match_operand 6 "const_12_to_15_operand" "")
7280 (match_operand 7 "const_12_to_15_operand" "")
7281 (match_operand 8 "const_12_to_15_operand" "")
7282 (match_operand 9 "const_12_to_15_operand" "")])))]
7284 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
7285 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
7286 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
7287 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
7290 mask |= (INTVAL (operands[2]) - 4) << 0;
7291 mask |= (INTVAL (operands[3]) - 4) << 2;
7292 mask |= (INTVAL (operands[4]) - 4) << 4;
7293 mask |= (INTVAL (operands[5]) - 4) << 6;
7294 operands[2] = GEN_INT (mask);
7296 return "vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7298 [(set_attr "type" "sselog")
7299 (set_attr "prefix" "vex")
7300 (set_attr "length_immediate" "1")
7301 (set_attr "mode" "OI")])
7303 (define_expand "sse2_pshufhw"
7304 [(match_operand:V8HI 0 "register_operand" "")
7305 (match_operand:V8HI 1 "nonimmediate_operand" "")
7306 (match_operand:SI 2 "const_int_operand" "")]
7309 int mask = INTVAL (operands[2]);
7310 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
7311 GEN_INT (((mask >> 0) & 3) + 4),
7312 GEN_INT (((mask >> 2) & 3) + 4),
7313 GEN_INT (((mask >> 4) & 3) + 4),
7314 GEN_INT (((mask >> 6) & 3) + 4)));
7318 (define_insn "sse2_pshufhw_1"
7319 [(set (match_operand:V8HI 0 "register_operand" "=x")
7321 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7322 (parallel [(const_int 0)
7326 (match_operand 2 "const_4_to_7_operand" "")
7327 (match_operand 3 "const_4_to_7_operand" "")
7328 (match_operand 4 "const_4_to_7_operand" "")
7329 (match_operand 5 "const_4_to_7_operand" "")])))]
7333 mask |= (INTVAL (operands[2]) - 4) << 0;
7334 mask |= (INTVAL (operands[3]) - 4) << 2;
7335 mask |= (INTVAL (operands[4]) - 4) << 4;
7336 mask |= (INTVAL (operands[5]) - 4) << 6;
7337 operands[2] = GEN_INT (mask);
7339 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7341 [(set_attr "type" "sselog")
7342 (set_attr "prefix_rep" "1")
7343 (set_attr "prefix_data16" "0")
7344 (set_attr "prefix" "maybe_vex")
7345 (set_attr "length_immediate" "1")
7346 (set_attr "mode" "TI")])
7348 (define_expand "sse2_loadd"
7349 [(set (match_operand:V4SI 0 "register_operand" "")
7352 (match_operand:SI 1 "nonimmediate_operand" ""))
7356 "operands[2] = CONST0_RTX (V4SImode);")
7358 (define_insn "sse2_loadld"
7359 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x,x,x")
7362 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x"))
7363 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,x")
7367 %vmovd\t{%2, %0|%0, %2}
7368 %vmovd\t{%2, %0|%0, %2}
7369 movss\t{%2, %0|%0, %2}
7370 movss\t{%2, %0|%0, %2}
7371 vmovss\t{%2, %1, %0|%0, %1, %2}"
7372 [(set_attr "isa" "sse2,*,noavx,noavx,avx")
7373 (set_attr "type" "ssemov")
7374 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex")
7375 (set_attr "mode" "TI,TI,V4SF,SF,SF")])
7377 (define_insn_and_split "sse2_stored"
7378 [(set (match_operand:SI 0 "nonimmediate_operand" "=xm,r")
7380 (match_operand:V4SI 1 "register_operand" "x,Yi")
7381 (parallel [(const_int 0)])))]
7384 "&& reload_completed
7385 && (TARGET_INTER_UNIT_MOVES
7386 || MEM_P (operands [0])
7387 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7388 [(set (match_dup 0) (match_dup 1))]
7389 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
7391 (define_insn_and_split "*vec_ext_v4si_mem"
7392 [(set (match_operand:SI 0 "register_operand" "=r")
7394 (match_operand:V4SI 1 "memory_operand" "o")
7395 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
7401 int i = INTVAL (operands[2]);
7403 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
7407 (define_expand "sse_storeq"
7408 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7410 (match_operand:V2DI 1 "register_operand" "")
7411 (parallel [(const_int 0)])))]
7414 (define_insn "*sse2_storeq_rex64"
7415 [(set (match_operand:DI 0 "nonimmediate_operand" "=xm,*r,r")
7417 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
7418 (parallel [(const_int 0)])))]
7419 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7423 mov{q}\t{%1, %0|%0, %1}"
7424 [(set_attr "type" "*,*,imov")
7425 (set_attr "mode" "*,*,DI")])
7427 (define_insn "*sse2_storeq"
7428 [(set (match_operand:DI 0 "nonimmediate_operand" "=xm")
7430 (match_operand:V2DI 1 "register_operand" "x")
7431 (parallel [(const_int 0)])))]
7436 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7438 (match_operand:V2DI 1 "register_operand" "")
7439 (parallel [(const_int 0)])))]
7442 && (TARGET_INTER_UNIT_MOVES
7443 || MEM_P (operands [0])
7444 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7445 [(set (match_dup 0) (match_dup 1))]
7446 "operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));")
7448 (define_insn "*vec_extractv2di_1_rex64"
7449 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,x,r")
7451 (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o,o")
7452 (parallel [(const_int 1)])))]
7453 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7455 %vmovhps\t{%1, %0|%0, %1}
7456 psrldq\t{$8, %0|%0, 8}
7457 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7458 %vmovq\t{%H1, %0|%0, %H1}
7459 mov{q}\t{%H1, %0|%0, %H1}"
7460 [(set_attr "isa" "*,noavx,avx,*,*")
7461 (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,imov")
7462 (set_attr "length_immediate" "*,1,1,*,*")
7463 (set_attr "memory" "*,none,none,*,*")
7464 (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig")
7465 (set_attr "mode" "V2SF,TI,TI,TI,DI")])
7467 (define_insn "*vec_extractv2di_1"
7468 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,x,x,x")
7470 (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o,x,o")
7471 (parallel [(const_int 1)])))]
7472 "!TARGET_64BIT && TARGET_SSE
7473 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7475 %vmovhps\t{%1, %0|%0, %1}
7476 psrldq\t{$8, %0|%0, 8}
7477 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7478 %vmovq\t{%H1, %0|%0, %H1}
7479 movhlps\t{%1, %0|%0, %1}
7480 movlps\t{%H1, %0|%0, %H1}"
7481 [(set_attr "isa" "*,sse2_noavx,avx,sse2,noavx,noavx")
7482 (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,ssemov,ssemov")
7483 (set_attr "length_immediate" "*,1,1,*,*,*")
7484 (set_attr "memory" "*,none,none,*,*,*")
7485 (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig,orig")
7486 (set_attr "mode" "V2SF,TI,TI,TI,V4SF,V2SF")])
7488 (define_insn "*vec_dupv4si_avx"
7489 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7491 (match_operand:SI 1 "nonimmediate_operand" " x,m")))]
7494 vpshufd\t{$0, %1, %0|%0, %1, 0}
7495 vbroadcastss\t{%1, %0|%0, %1}"
7496 [(set_attr "type" "sselog1,ssemov")
7497 (set_attr "length_immediate" "1,0")
7498 (set_attr "prefix_extra" "0,1")
7499 (set_attr "prefix" "vex")
7500 (set_attr "mode" "TI,V4SF")])
7502 (define_insn "*vec_dupv4si"
7503 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7505 (match_operand:SI 1 "register_operand" " x,0")))]
7508 pshufd\t{$0, %1, %0|%0, %1, 0}
7509 shufps\t{$0, %0, %0|%0, %0, 0}"
7510 [(set_attr "isa" "sse2,*")
7511 (set_attr "type" "sselog1")
7512 (set_attr "length_immediate" "1")
7513 (set_attr "mode" "TI,V4SF")])
7515 (define_insn "*vec_dupv2di_sse3"
7516 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
7518 (match_operand:DI 1 "nonimmediate_operand" " 0,x,m")))]
7522 vpunpcklqdq\t{%d1, %0|%0, %d1}
7523 %vmovddup\t{%1, %0|%0, %1}"
7524 [(set_attr "isa" "noavx,avx,*")
7525 (set_attr "type" "sselog1")
7526 (set_attr "prefix" "orig,vex,maybe_vex")
7527 (set_attr "mode" "TI,TI,DF")])
7529 (define_insn "*vec_dupv2di"
7530 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7532 (match_operand:DI 1 "register_operand" " 0,0")))]
7537 [(set_attr "isa" "sse2,*")
7538 (set_attr "type" "sselog1,ssemov")
7539 (set_attr "mode" "TI,V4SF")])
7541 (define_insn "*vec_concatv2si_sse4_1"
7542 [(set (match_operand:V2SI 0 "register_operand" "=x, x,x,x, x, *y,*y")
7544 (match_operand:SI 1 "nonimmediate_operand" " 0, x,0,x,rm, 0,rm")
7545 (match_operand:SI 2 "vector_move_operand" "rm,rm,x,x, C,*ym, C")))]
7548 pinsrd\t{$1, %2, %0|%0, %2, 1}
7549 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
7550 punpckldq\t{%2, %0|%0, %2}
7551 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7552 %vmovd\t{%1, %0|%0, %1}
7553 punpckldq\t{%2, %0|%0, %2}
7554 movd\t{%1, %0|%0, %1}"
7555 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
7556 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
7557 (set_attr "prefix_extra" "1,1,*,*,*,*,*")
7558 (set_attr "length_immediate" "1,1,*,*,*,*,*")
7559 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
7560 (set_attr "mode" "TI,TI,TI,TI,TI,DI,DI")])
7562 ;; ??? In theory we can match memory for the MMX alternative, but allowing
7563 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7564 ;; alternatives pretty much forces the MMX alternative to be chosen.
7565 (define_insn "*vec_concatv2si_sse2"
7566 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
7568 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
7569 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
7572 punpckldq\t{%2, %0|%0, %2}
7573 movd\t{%1, %0|%0, %1}
7574 punpckldq\t{%2, %0|%0, %2}
7575 movd\t{%1, %0|%0, %1}"
7576 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7577 (set_attr "mode" "TI,TI,DI,DI")])
7579 (define_insn "*vec_concatv2si_sse"
7580 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
7582 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
7583 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
7586 unpcklps\t{%2, %0|%0, %2}
7587 movss\t{%1, %0|%0, %1}
7588 punpckldq\t{%2, %0|%0, %2}
7589 movd\t{%1, %0|%0, %1}"
7590 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7591 (set_attr "mode" "V4SF,V4SF,DI,DI")])
7593 (define_insn "*vec_concatv4si"
7594 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x,x")
7596 (match_operand:V2SI 1 "register_operand" " 0,x,0,0,x")
7597 (match_operand:V2SI 2 "nonimmediate_operand" " x,x,x,m,m")))]
7600 punpcklqdq\t{%2, %0|%0, %2}
7601 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7602 movlhps\t{%2, %0|%0, %2}
7603 movhps\t{%2, %0|%0, %2}
7604 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7605 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
7606 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
7607 (set_attr "prefix" "orig,vex,orig,orig,vex")
7608 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
7610 ;; movd instead of movq is required to handle broken assemblers.
7611 (define_insn "*vec_concatv2di_rex64"
7612 [(set (match_operand:V2DI 0 "register_operand"
7613 "=x,x ,x ,Yi,!x,x,x,x,x")
7615 (match_operand:DI 1 "nonimmediate_operand"
7616 " 0,x ,xm,r ,*y,0,x,0,x")
7617 (match_operand:DI 2 "vector_move_operand"
7618 "rm,rm,C ,C ,C ,x,x,m,m")))]
7621 pinsrq\t{$1, %2, %0|%0, %2, 1}
7622 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
7623 %vmovq\t{%1, %0|%0, %1}
7624 %vmovd\t{%1, %0|%0, %1}
7625 movq2dq\t{%1, %0|%0, %1}
7626 punpcklqdq\t{%2, %0|%0, %2}
7627 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7628 movhps\t{%2, %0|%0, %2}
7629 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7630 [(set_attr "isa" "sse4_noavx,avx,*,*,*,noavx,avx,noavx,avx")
7633 (eq_attr "alternative" "0,1,5,6")
7634 (const_string "sselog")
7635 (const_string "ssemov")))
7636 (set (attr "prefix_rex")
7638 (and (eq_attr "alternative" "0,3")
7639 (not (match_test "TARGET_AVX")))
7641 (const_string "*")))
7642 (set_attr "prefix_extra" "1,1,*,*,*,*,*,*,*")
7643 (set_attr "length_immediate" "1,1,*,*,*,*,*,*,*")
7644 (set_attr "prefix" "orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex")
7645 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,V2SF,V2SF")])
7647 (define_insn "vec_concatv2di"
7648 [(set (match_operand:V2DI 0 "register_operand" "=x,?x,x,x,x,x,x")
7650 (match_operand:DI 1 "nonimmediate_operand" "xm,*y,0,x,0,0,x")
7651 (match_operand:DI 2 "vector_move_operand" " C, C,x,x,x,m,m")))]
7652 "!TARGET_64BIT && TARGET_SSE"
7654 %vmovq\t{%1, %0|%0, %1}
7655 movq2dq\t{%1, %0|%0, %1}
7656 punpcklqdq\t{%2, %0|%0, %2}
7657 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7658 movlhps\t{%2, %0|%0, %2}
7659 movhps\t{%2, %0|%0, %2}
7660 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7661 [(set_attr "isa" "sse2,sse2,sse2_noavx,avx,noavx,noavx,avx")
7662 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,ssemov,ssemov")
7663 (set_attr "prefix" "maybe_vex,orig,orig,vex,orig,orig,vex")
7664 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")])
7666 (define_expand "vec_unpacks_lo_<mode>"
7667 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7668 (match_operand:VI124_AVX2 1 "register_operand" "")]
7670 "ix86_expand_sse_unpack (operands, false, false); DONE;")
7672 (define_expand "vec_unpacks_hi_<mode>"
7673 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7674 (match_operand:VI124_AVX2 1 "register_operand" "")]
7676 "ix86_expand_sse_unpack (operands, false, true); DONE;")
7678 (define_expand "vec_unpacku_lo_<mode>"
7679 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7680 (match_operand:VI124_AVX2 1 "register_operand" "")]
7682 "ix86_expand_sse_unpack (operands, true, false); DONE;")
7684 (define_expand "vec_unpacku_hi_<mode>"
7685 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7686 (match_operand:VI124_AVX2 1 "register_operand" "")]
7688 "ix86_expand_sse_unpack (operands, true, true); DONE;")
7690 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7694 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7696 (define_expand "avx2_uavgv32qi3"
7697 [(set (match_operand:V32QI 0 "register_operand" "")
7703 (match_operand:V32QI 1 "nonimmediate_operand" ""))
7705 (match_operand:V32QI 2 "nonimmediate_operand" "")))
7706 (const_vector:V32QI [(const_int 1) (const_int 1)
7707 (const_int 1) (const_int 1)
7708 (const_int 1) (const_int 1)
7709 (const_int 1) (const_int 1)
7710 (const_int 1) (const_int 1)
7711 (const_int 1) (const_int 1)
7712 (const_int 1) (const_int 1)
7713 (const_int 1) (const_int 1)
7714 (const_int 1) (const_int 1)
7715 (const_int 1) (const_int 1)
7716 (const_int 1) (const_int 1)
7717 (const_int 1) (const_int 1)
7718 (const_int 1) (const_int 1)
7719 (const_int 1) (const_int 1)
7720 (const_int 1) (const_int 1)
7721 (const_int 1) (const_int 1)]))
7724 "ix86_fixup_binary_operands_no_copy (PLUS, V32QImode, operands);")
7726 (define_expand "sse2_uavgv16qi3"
7727 [(set (match_operand:V16QI 0 "register_operand" "")
7733 (match_operand:V16QI 1 "nonimmediate_operand" ""))
7735 (match_operand:V16QI 2 "nonimmediate_operand" "")))
7736 (const_vector:V16QI [(const_int 1) (const_int 1)
7737 (const_int 1) (const_int 1)
7738 (const_int 1) (const_int 1)
7739 (const_int 1) (const_int 1)
7740 (const_int 1) (const_int 1)
7741 (const_int 1) (const_int 1)
7742 (const_int 1) (const_int 1)
7743 (const_int 1) (const_int 1)]))
7746 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
7748 (define_insn "*avx2_uavgv32qi3"
7749 [(set (match_operand:V32QI 0 "register_operand" "=x")
7755 (match_operand:V32QI 1 "nonimmediate_operand" "%x"))
7757 (match_operand:V32QI 2 "nonimmediate_operand" "xm")))
7758 (const_vector:V32QI [(const_int 1) (const_int 1)
7759 (const_int 1) (const_int 1)
7760 (const_int 1) (const_int 1)
7761 (const_int 1) (const_int 1)
7762 (const_int 1) (const_int 1)
7763 (const_int 1) (const_int 1)
7764 (const_int 1) (const_int 1)
7765 (const_int 1) (const_int 1)
7766 (const_int 1) (const_int 1)
7767 (const_int 1) (const_int 1)
7768 (const_int 1) (const_int 1)
7769 (const_int 1) (const_int 1)
7770 (const_int 1) (const_int 1)
7771 (const_int 1) (const_int 1)
7772 (const_int 1) (const_int 1)
7773 (const_int 1) (const_int 1)]))
7775 "TARGET_AVX2 && ix86_binary_operator_ok (PLUS, V32QImode, operands)"
7776 "vpavgb\t{%2, %1, %0|%0, %1, %2}"
7777 [(set_attr "type" "sseiadd")
7778 (set_attr "prefix" "vex")
7779 (set_attr "mode" "OI")])
7781 (define_insn "*sse2_uavgv16qi3"
7782 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
7788 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x"))
7790 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))
7791 (const_vector:V16QI [(const_int 1) (const_int 1)
7792 (const_int 1) (const_int 1)
7793 (const_int 1) (const_int 1)
7794 (const_int 1) (const_int 1)
7795 (const_int 1) (const_int 1)
7796 (const_int 1) (const_int 1)
7797 (const_int 1) (const_int 1)
7798 (const_int 1) (const_int 1)]))
7800 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7802 pavgb\t{%2, %0|%0, %2}
7803 vpavgb\t{%2, %1, %0|%0, %1, %2}"
7804 [(set_attr "isa" "noavx,avx")
7805 (set_attr "type" "sseiadd")
7806 (set_attr "prefix_data16" "1,*")
7807 (set_attr "prefix" "orig,vex")
7808 (set_attr "mode" "TI")])
7810 (define_expand "avx2_uavgv16hi3"
7811 [(set (match_operand:V16HI 0 "register_operand" "")
7817 (match_operand:V16HI 1 "nonimmediate_operand" ""))
7819 (match_operand:V16HI 2 "nonimmediate_operand" "")))
7820 (const_vector:V16HI [(const_int 1) (const_int 1)
7821 (const_int 1) (const_int 1)
7822 (const_int 1) (const_int 1)
7823 (const_int 1) (const_int 1)
7824 (const_int 1) (const_int 1)
7825 (const_int 1) (const_int 1)
7826 (const_int 1) (const_int 1)
7827 (const_int 1) (const_int 1)]))
7830 "ix86_fixup_binary_operands_no_copy (PLUS, V16HImode, operands);")
7832 (define_expand "sse2_uavgv8hi3"
7833 [(set (match_operand:V8HI 0 "register_operand" "")
7839 (match_operand:V8HI 1 "nonimmediate_operand" ""))
7841 (match_operand:V8HI 2 "nonimmediate_operand" "")))
7842 (const_vector:V8HI [(const_int 1) (const_int 1)
7843 (const_int 1) (const_int 1)
7844 (const_int 1) (const_int 1)
7845 (const_int 1) (const_int 1)]))
7848 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
7850 (define_insn "*avx2_uavgv16hi3"
7851 [(set (match_operand:V16HI 0 "register_operand" "=x")
7857 (match_operand:V16HI 1 "nonimmediate_operand" "%x"))
7859 (match_operand:V16HI 2 "nonimmediate_operand" "xm")))
7860 (const_vector:V16HI [(const_int 1) (const_int 1)
7861 (const_int 1) (const_int 1)
7862 (const_int 1) (const_int 1)
7863 (const_int 1) (const_int 1)
7864 (const_int 1) (const_int 1)
7865 (const_int 1) (const_int 1)
7866 (const_int 1) (const_int 1)
7867 (const_int 1) (const_int 1)]))
7869 "TARGET_AVX2 && ix86_binary_operator_ok (PLUS, V16HImode, operands)"
7870 "vpavgw\t{%2, %1, %0|%0, %1, %2}"
7871 [(set_attr "type" "sseiadd")
7872 (set_attr "prefix" "vex")
7873 (set_attr "mode" "OI")])
7875 (define_insn "*sse2_uavgv8hi3"
7876 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7882 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
7884 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
7885 (const_vector:V8HI [(const_int 1) (const_int 1)
7886 (const_int 1) (const_int 1)
7887 (const_int 1) (const_int 1)
7888 (const_int 1) (const_int 1)]))
7890 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7892 pavgw\t{%2, %0|%0, %2}
7893 vpavgw\t{%2, %1, %0|%0, %1, %2}"
7894 [(set_attr "isa" "noavx,avx")
7895 (set_attr "type" "sseiadd")
7896 (set_attr "prefix_data16" "1,*")
7897 (set_attr "prefix" "orig,vex")
7898 (set_attr "mode" "TI")])
7900 ;; The correct representation for this is absolutely enormous, and
7901 ;; surely not generally useful.
7902 (define_insn "<sse2_avx2>_psadbw"
7903 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x,x")
7904 (unspec:VI8_AVX2 [(match_operand:<ssebytemode> 1 "register_operand" "0,x")
7905 (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,xm")]
7909 psadbw\t{%2, %0|%0, %2}
7910 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
7911 [(set_attr "isa" "noavx,avx")
7912 (set_attr "type" "sseiadd")
7913 (set_attr "atom_unit" "simul")
7914 (set_attr "prefix_data16" "1,*")
7915 (set_attr "prefix" "orig,vex")
7916 (set_attr "mode" "<sseinsnmode>")])
7918 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
7919 [(set (match_operand:SI 0 "register_operand" "=r")
7921 [(match_operand:VF 1 "register_operand" "x")]
7924 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
7925 [(set_attr "type" "ssemov")
7926 (set_attr "prefix" "maybe_vex")
7927 (set_attr "mode" "<MODE>")])
7929 (define_insn "avx2_pmovmskb"
7930 [(set (match_operand:SI 0 "register_operand" "=r")
7931 (unspec:SI [(match_operand:V32QI 1 "register_operand" "x")]
7934 "vpmovmskb\t{%1, %0|%0, %1}"
7935 [(set_attr "type" "ssemov")
7936 (set_attr "prefix" "vex")
7937 (set_attr "mode" "DI")])
7939 (define_insn "sse2_pmovmskb"
7940 [(set (match_operand:SI 0 "register_operand" "=r")
7941 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
7944 "%vpmovmskb\t{%1, %0|%0, %1}"
7945 [(set_attr "type" "ssemov")
7946 (set_attr "prefix_data16" "1")
7947 (set_attr "prefix" "maybe_vex")
7948 (set_attr "mode" "SI")])
7950 (define_expand "sse2_maskmovdqu"
7951 [(set (match_operand:V16QI 0 "memory_operand" "")
7952 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
7953 (match_operand:V16QI 2 "register_operand" "")
7958 (define_insn "*sse2_maskmovdqu"
7959 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
7960 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
7961 (match_operand:V16QI 2 "register_operand" "x")
7962 (mem:V16QI (match_dup 0))]
7965 "%vmaskmovdqu\t{%2, %1|%1, %2}"
7966 [(set_attr "type" "ssemov")
7967 (set_attr "prefix_data16" "1")
7968 ;; The implicit %rdi operand confuses default length_vex computation.
7969 (set (attr "length_vex")
7970 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
7971 (set_attr "prefix" "maybe_vex")
7972 (set_attr "mode" "TI")])
7974 (define_insn "sse_ldmxcsr"
7975 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
7979 [(set_attr "type" "sse")
7980 (set_attr "atom_sse_attr" "mxcsr")
7981 (set_attr "prefix" "maybe_vex")
7982 (set_attr "memory" "load")])
7984 (define_insn "sse_stmxcsr"
7985 [(set (match_operand:SI 0 "memory_operand" "=m")
7986 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
7989 [(set_attr "type" "sse")
7990 (set_attr "atom_sse_attr" "mxcsr")
7991 (set_attr "prefix" "maybe_vex")
7992 (set_attr "memory" "store")])
7994 (define_expand "sse_sfence"
7996 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
7997 "TARGET_SSE || TARGET_3DNOW_A"
7999 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8000 MEM_VOLATILE_P (operands[0]) = 1;
8003 (define_insn "*sse_sfence"
8004 [(set (match_operand:BLK 0 "" "")
8005 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
8006 "TARGET_SSE || TARGET_3DNOW_A"
8008 [(set_attr "type" "sse")
8009 (set_attr "length_address" "0")
8010 (set_attr "atom_sse_attr" "fence")
8011 (set_attr "memory" "unknown")])
8013 (define_insn "sse2_clflush"
8014 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
8018 [(set_attr "type" "sse")
8019 (set_attr "atom_sse_attr" "fence")
8020 (set_attr "memory" "unknown")])
8022 (define_expand "sse2_mfence"
8024 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8027 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8028 MEM_VOLATILE_P (operands[0]) = 1;
8031 (define_insn "*sse2_mfence"
8032 [(set (match_operand:BLK 0 "" "")
8033 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8034 "TARGET_64BIT || TARGET_SSE2"
8036 [(set_attr "type" "sse")
8037 (set_attr "length_address" "0")
8038 (set_attr "atom_sse_attr" "fence")
8039 (set_attr "memory" "unknown")])
8041 (define_expand "sse2_lfence"
8043 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8046 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8047 MEM_VOLATILE_P (operands[0]) = 1;
8050 (define_insn "*sse2_lfence"
8051 [(set (match_operand:BLK 0 "" "")
8052 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8055 [(set_attr "type" "sse")
8056 (set_attr "length_address" "0")
8057 (set_attr "atom_sse_attr" "lfence")
8058 (set_attr "memory" "unknown")])
8060 (define_insn "sse3_mwait"
8061 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8062 (match_operand:SI 1 "register_operand" "c")]
8065 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
8066 ;; Since 32bit register operands are implicitly zero extended to 64bit,
8067 ;; we only need to set up 32bit registers.
8069 [(set_attr "length" "3")])
8071 (define_insn "sse3_monitor"
8072 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8073 (match_operand:SI 1 "register_operand" "c")
8074 (match_operand:SI 2 "register_operand" "d")]
8076 "TARGET_SSE3 && !TARGET_64BIT"
8077 "monitor\t%0, %1, %2"
8078 [(set_attr "length" "3")])
8080 (define_insn "sse3_monitor64"
8081 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
8082 (match_operand:SI 1 "register_operand" "c")
8083 (match_operand:SI 2 "register_operand" "d")]
8085 "TARGET_SSE3 && TARGET_64BIT"
8086 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
8087 ;; RCX and RDX are used. Since 32bit register operands are implicitly
8088 ;; zero extended to 64bit, we only need to set up 32bit registers.
8090 [(set_attr "length" "3")])
8092 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8094 ;; SSSE3 instructions
8096 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8098 (define_insn "avx2_phaddwv16hi3"
8099 [(set (match_operand:V16HI 0 "register_operand" "=x")
8106 (match_operand:V16HI 1 "register_operand" "x")
8107 (parallel [(const_int 0)]))
8108 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8110 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8111 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8114 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8115 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8117 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8118 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8122 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8123 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8125 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8126 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8129 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8130 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8132 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8133 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8139 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8140 (parallel [(const_int 0)]))
8141 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8143 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8144 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8147 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8148 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8150 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8151 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8155 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8156 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8158 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8159 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8162 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8163 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8165 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8166 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8168 "vphaddw\t{%2, %1, %0|%0, %1, %2}"
8169 [(set_attr "type" "sseiadd")
8170 (set_attr "prefix_extra" "1")
8171 (set_attr "prefix" "vex")
8172 (set_attr "mode" "OI")])
8174 (define_insn "ssse3_phaddwv8hi3"
8175 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8181 (match_operand:V8HI 1 "register_operand" "0,x")
8182 (parallel [(const_int 0)]))
8183 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8185 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8186 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8189 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8190 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8192 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8193 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8198 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8199 (parallel [(const_int 0)]))
8200 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8202 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8203 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8206 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8207 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8209 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8210 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8213 phaddw\t{%2, %0|%0, %2}
8214 vphaddw\t{%2, %1, %0|%0, %1, %2}"
8215 [(set_attr "isa" "noavx,avx")
8216 (set_attr "type" "sseiadd")
8217 (set_attr "atom_unit" "complex")
8218 (set_attr "prefix_data16" "1,*")
8219 (set_attr "prefix_extra" "1")
8220 (set_attr "prefix" "orig,vex")
8221 (set_attr "mode" "TI")])
8223 (define_insn "ssse3_phaddwv4hi3"
8224 [(set (match_operand:V4HI 0 "register_operand" "=y")
8229 (match_operand:V4HI 1 "register_operand" "0")
8230 (parallel [(const_int 0)]))
8231 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8233 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8234 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8238 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8239 (parallel [(const_int 0)]))
8240 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8242 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8243 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8245 "phaddw\t{%2, %0|%0, %2}"
8246 [(set_attr "type" "sseiadd")
8247 (set_attr "atom_unit" "complex")
8248 (set_attr "prefix_extra" "1")
8249 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8250 (set_attr "mode" "DI")])
8252 (define_insn "avx2_phadddv8si3"
8253 [(set (match_operand:V8SI 0 "register_operand" "=x")
8259 (match_operand:V8SI 1 "register_operand" "x")
8260 (parallel [(const_int 0)]))
8261 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8263 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8264 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8267 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
8268 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
8270 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
8271 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
8276 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
8277 (parallel [(const_int 0)]))
8278 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8280 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8281 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
8284 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
8285 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
8287 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
8288 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
8290 "vphaddd\t{%2, %1, %0|%0, %1, %2}"
8291 [(set_attr "type" "sseiadd")
8292 (set_attr "prefix_extra" "1")
8293 (set_attr "prefix" "vex")
8294 (set_attr "mode" "OI")])
8296 (define_insn "ssse3_phadddv4si3"
8297 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
8302 (match_operand:V4SI 1 "register_operand" "0,x")
8303 (parallel [(const_int 0)]))
8304 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8306 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8307 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8311 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
8312 (parallel [(const_int 0)]))
8313 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8315 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8316 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8319 phaddd\t{%2, %0|%0, %2}
8320 vphaddd\t{%2, %1, %0|%0, %1, %2}"
8321 [(set_attr "isa" "noavx,avx")
8322 (set_attr "type" "sseiadd")
8323 (set_attr "atom_unit" "complex")
8324 (set_attr "prefix_data16" "1,*")
8325 (set_attr "prefix_extra" "1")
8326 (set_attr "prefix" "orig,vex")
8327 (set_attr "mode" "TI")])
8329 (define_insn "ssse3_phadddv2si3"
8330 [(set (match_operand:V2SI 0 "register_operand" "=y")
8334 (match_operand:V2SI 1 "register_operand" "0")
8335 (parallel [(const_int 0)]))
8336 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8339 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8340 (parallel [(const_int 0)]))
8341 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8343 "phaddd\t{%2, %0|%0, %2}"
8344 [(set_attr "type" "sseiadd")
8345 (set_attr "atom_unit" "complex")
8346 (set_attr "prefix_extra" "1")
8347 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8348 (set_attr "mode" "DI")])
8350 (define_insn "avx2_phaddswv16hi3"
8351 [(set (match_operand:V16HI 0 "register_operand" "=x")
8358 (match_operand:V16HI 1 "register_operand" "x")
8359 (parallel [(const_int 0)]))
8360 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8362 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8363 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8366 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8367 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8369 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8370 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8374 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8375 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8377 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8378 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8381 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8382 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8384 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8385 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8391 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8392 (parallel [(const_int 0)]))
8393 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8395 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8396 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8399 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8400 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8402 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8403 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8407 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8408 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8410 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8411 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8414 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8415 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8417 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8418 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8420 "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8421 [(set_attr "type" "sseiadd")
8422 (set_attr "prefix_extra" "1")
8423 (set_attr "prefix" "vex")
8424 (set_attr "mode" "OI")])
8426 (define_insn "ssse3_phaddswv8hi3"
8427 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8433 (match_operand:V8HI 1 "register_operand" "0,x")
8434 (parallel [(const_int 0)]))
8435 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8437 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8438 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8441 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8442 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8444 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8445 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8450 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8451 (parallel [(const_int 0)]))
8452 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8454 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8455 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8458 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8459 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8461 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8462 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8465 phaddsw\t{%2, %0|%0, %2}
8466 vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8467 [(set_attr "isa" "noavx,avx")
8468 (set_attr "type" "sseiadd")
8469 (set_attr "atom_unit" "complex")
8470 (set_attr "prefix_data16" "1,*")
8471 (set_attr "prefix_extra" "1")
8472 (set_attr "prefix" "orig,vex")
8473 (set_attr "mode" "TI")])
8475 (define_insn "ssse3_phaddswv4hi3"
8476 [(set (match_operand:V4HI 0 "register_operand" "=y")
8481 (match_operand:V4HI 1 "register_operand" "0")
8482 (parallel [(const_int 0)]))
8483 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8485 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8486 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8490 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8491 (parallel [(const_int 0)]))
8492 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8494 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8495 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8497 "phaddsw\t{%2, %0|%0, %2}"
8498 [(set_attr "type" "sseiadd")
8499 (set_attr "atom_unit" "complex")
8500 (set_attr "prefix_extra" "1")
8501 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8502 (set_attr "mode" "DI")])
8504 (define_insn "avx2_phsubwv16hi3"
8505 [(set (match_operand:V16HI 0 "register_operand" "=x")
8512 (match_operand:V16HI 1 "register_operand" "x")
8513 (parallel [(const_int 0)]))
8514 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8516 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8517 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8520 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8521 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8523 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8524 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8528 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8529 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8531 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8532 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8535 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8536 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8538 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8539 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8545 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8546 (parallel [(const_int 0)]))
8547 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8549 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8550 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8553 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8554 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8556 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8557 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8561 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8562 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8564 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8565 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8568 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8569 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8571 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8572 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8574 "vphsubw\t{%2, %1, %0|%0, %1, %2}"
8575 [(set_attr "type" "sseiadd")
8576 (set_attr "prefix_extra" "1")
8577 (set_attr "prefix" "vex")
8578 (set_attr "mode" "OI")])
8580 (define_insn "ssse3_phsubwv8hi3"
8581 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8587 (match_operand:V8HI 1 "register_operand" "0,x")
8588 (parallel [(const_int 0)]))
8589 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8591 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8592 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8595 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8596 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8598 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8599 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8604 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8605 (parallel [(const_int 0)]))
8606 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8608 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8609 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8612 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8613 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8615 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8616 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8619 phsubw\t{%2, %0|%0, %2}
8620 vphsubw\t{%2, %1, %0|%0, %1, %2}"
8621 [(set_attr "isa" "noavx,avx")
8622 (set_attr "type" "sseiadd")
8623 (set_attr "atom_unit" "complex")
8624 (set_attr "prefix_data16" "1,*")
8625 (set_attr "prefix_extra" "1")
8626 (set_attr "prefix" "orig,vex")
8627 (set_attr "mode" "TI")])
8629 (define_insn "ssse3_phsubwv4hi3"
8630 [(set (match_operand:V4HI 0 "register_operand" "=y")
8635 (match_operand:V4HI 1 "register_operand" "0")
8636 (parallel [(const_int 0)]))
8637 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8639 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8640 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8644 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8645 (parallel [(const_int 0)]))
8646 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8648 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8649 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8651 "phsubw\t{%2, %0|%0, %2}"
8652 [(set_attr "type" "sseiadd")
8653 (set_attr "atom_unit" "complex")
8654 (set_attr "prefix_extra" "1")
8655 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8656 (set_attr "mode" "DI")])
8658 (define_insn "avx2_phsubdv8si3"
8659 [(set (match_operand:V8SI 0 "register_operand" "=x")
8665 (match_operand:V8SI 1 "register_operand" "x")
8666 (parallel [(const_int 0)]))
8667 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8669 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8670 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8673 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
8674 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
8676 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
8677 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
8682 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
8683 (parallel [(const_int 0)]))
8684 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8686 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8687 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
8690 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
8691 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
8693 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
8694 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
8696 "vphsubd\t{%2, %1, %0|%0, %1, %2}"
8697 [(set_attr "type" "sseiadd")
8698 (set_attr "prefix_extra" "1")
8699 (set_attr "prefix" "vex")
8700 (set_attr "mode" "OI")])
8702 (define_insn "ssse3_phsubdv4si3"
8703 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
8708 (match_operand:V4SI 1 "register_operand" "0,x")
8709 (parallel [(const_int 0)]))
8710 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8712 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8713 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8717 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
8718 (parallel [(const_int 0)]))
8719 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8721 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8722 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8725 phsubd\t{%2, %0|%0, %2}
8726 vphsubd\t{%2, %1, %0|%0, %1, %2}"
8728 [(set_attr "isa" "noavx,avx")
8729 (set_attr "type" "sseiadd")
8730 (set_attr "atom_unit" "complex")
8731 (set_attr "prefix_data16" "1,*")
8732 (set_attr "prefix_extra" "1")
8733 (set_attr "prefix" "orig,vex")
8734 (set_attr "mode" "TI")])
8736 (define_insn "ssse3_phsubdv2si3"
8737 [(set (match_operand:V2SI 0 "register_operand" "=y")
8741 (match_operand:V2SI 1 "register_operand" "0")
8742 (parallel [(const_int 0)]))
8743 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8746 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8747 (parallel [(const_int 0)]))
8748 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8750 "phsubd\t{%2, %0|%0, %2}"
8751 [(set_attr "type" "sseiadd")
8752 (set_attr "atom_unit" "complex")
8753 (set_attr "prefix_extra" "1")
8754 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8755 (set_attr "mode" "DI")])
8757 (define_insn "avx2_phsubswv16hi3"
8758 [(set (match_operand:V16HI 0 "register_operand" "=x")
8765 (match_operand:V16HI 1 "register_operand" "x")
8766 (parallel [(const_int 0)]))
8767 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8769 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8770 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8773 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8774 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8776 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8777 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8781 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8782 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8784 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8785 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8788 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8789 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8791 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8792 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8798 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8799 (parallel [(const_int 0)]))
8800 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8802 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8803 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8806 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8807 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8809 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8810 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8814 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8815 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8817 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8818 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8821 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8822 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8824 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8825 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8827 "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8828 [(set_attr "type" "sseiadd")
8829 (set_attr "prefix_extra" "1")
8830 (set_attr "prefix" "vex")
8831 (set_attr "mode" "OI")])
8833 (define_insn "ssse3_phsubswv8hi3"
8834 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8840 (match_operand:V8HI 1 "register_operand" "0,x")
8841 (parallel [(const_int 0)]))
8842 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8844 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8845 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8848 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8849 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8851 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8852 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8857 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8858 (parallel [(const_int 0)]))
8859 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8861 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8862 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8865 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8866 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8868 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8869 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8872 phsubsw\t{%2, %0|%0, %2}
8873 vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8874 [(set_attr "isa" "noavx,avx")
8875 (set_attr "type" "sseiadd")
8876 (set_attr "atom_unit" "complex")
8877 (set_attr "prefix_data16" "1,*")
8878 (set_attr "prefix_extra" "1")
8879 (set_attr "prefix" "orig,vex")
8880 (set_attr "mode" "TI")])
8882 (define_insn "ssse3_phsubswv4hi3"
8883 [(set (match_operand:V4HI 0 "register_operand" "=y")
8888 (match_operand:V4HI 1 "register_operand" "0")
8889 (parallel [(const_int 0)]))
8890 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8892 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8893 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8897 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8898 (parallel [(const_int 0)]))
8899 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8901 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8902 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8904 "phsubsw\t{%2, %0|%0, %2}"
8905 [(set_attr "type" "sseiadd")
8906 (set_attr "atom_unit" "complex")
8907 (set_attr "prefix_extra" "1")
8908 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8909 (set_attr "mode" "DI")])
8911 (define_insn "avx2_pmaddubsw256"
8912 [(set (match_operand:V16HI 0 "register_operand" "=x")
8917 (match_operand:V32QI 1 "register_operand" "x")
8918 (parallel [(const_int 0)
8936 (match_operand:V32QI 2 "nonimmediate_operand" "xm")
8937 (parallel [(const_int 0)
8955 (vec_select:V16QI (match_dup 1)
8956 (parallel [(const_int 1)
8973 (vec_select:V16QI (match_dup 2)
8974 (parallel [(const_int 1)
8989 (const_int 31)]))))))]
8991 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8992 [(set_attr "type" "sseiadd")
8993 (set_attr "prefix_extra" "1")
8994 (set_attr "prefix" "vex")
8995 (set_attr "mode" "OI")])
8997 (define_insn "ssse3_pmaddubsw128"
8998 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9003 (match_operand:V16QI 1 "register_operand" "0,x")
9004 (parallel [(const_int 0)
9014 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
9015 (parallel [(const_int 0)
9025 (vec_select:V8QI (match_dup 1)
9026 (parallel [(const_int 1)
9035 (vec_select:V8QI (match_dup 2)
9036 (parallel [(const_int 1)
9043 (const_int 15)]))))))]
9046 pmaddubsw\t{%2, %0|%0, %2}
9047 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
9048 [(set_attr "isa" "noavx,avx")
9049 (set_attr "type" "sseiadd")
9050 (set_attr "atom_unit" "simul")
9051 (set_attr "prefix_data16" "1,*")
9052 (set_attr "prefix_extra" "1")
9053 (set_attr "prefix" "orig,vex")
9054 (set_attr "mode" "TI")])
9056 (define_insn "ssse3_pmaddubsw"
9057 [(set (match_operand:V4HI 0 "register_operand" "=y")
9062 (match_operand:V8QI 1 "register_operand" "0")
9063 (parallel [(const_int 0)
9069 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
9070 (parallel [(const_int 0)
9076 (vec_select:V4QI (match_dup 1)
9077 (parallel [(const_int 1)
9082 (vec_select:V4QI (match_dup 2)
9083 (parallel [(const_int 1)
9086 (const_int 7)]))))))]
9088 "pmaddubsw\t{%2, %0|%0, %2}"
9089 [(set_attr "type" "sseiadd")
9090 (set_attr "atom_unit" "simul")
9091 (set_attr "prefix_extra" "1")
9092 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9093 (set_attr "mode" "DI")])
9095 (define_expand "avx2_umulhrswv16hi3"
9096 [(set (match_operand:V16HI 0 "register_operand" "")
9103 (match_operand:V16HI 1 "nonimmediate_operand" ""))
9105 (match_operand:V16HI 2 "nonimmediate_operand" "")))
9107 (const_vector:V16HI [(const_int 1) (const_int 1)
9108 (const_int 1) (const_int 1)
9109 (const_int 1) (const_int 1)
9110 (const_int 1) (const_int 1)
9111 (const_int 1) (const_int 1)
9112 (const_int 1) (const_int 1)
9113 (const_int 1) (const_int 1)
9114 (const_int 1) (const_int 1)]))
9117 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
9119 (define_insn "*avx2_umulhrswv16hi3"
9120 [(set (match_operand:V16HI 0 "register_operand" "=x")
9127 (match_operand:V16HI 1 "nonimmediate_operand" "%x"))
9129 (match_operand:V16HI 2 "nonimmediate_operand" "xm")))
9131 (const_vector:V16HI [(const_int 1) (const_int 1)
9132 (const_int 1) (const_int 1)
9133 (const_int 1) (const_int 1)
9134 (const_int 1) (const_int 1)
9135 (const_int 1) (const_int 1)
9136 (const_int 1) (const_int 1)
9137 (const_int 1) (const_int 1)
9138 (const_int 1) (const_int 1)]))
9140 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
9141 "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9142 [(set_attr "type" "sseimul")
9143 (set_attr "prefix_extra" "1")
9144 (set_attr "prefix" "vex")
9145 (set_attr "mode" "OI")])
9147 (define_expand "ssse3_pmulhrswv8hi3"
9148 [(set (match_operand:V8HI 0 "register_operand" "")
9155 (match_operand:V8HI 1 "nonimmediate_operand" ""))
9157 (match_operand:V8HI 2 "nonimmediate_operand" "")))
9159 (const_vector:V8HI [(const_int 1) (const_int 1)
9160 (const_int 1) (const_int 1)
9161 (const_int 1) (const_int 1)
9162 (const_int 1) (const_int 1)]))
9165 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
9167 (define_insn "*ssse3_pmulhrswv8hi3"
9168 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9175 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
9177 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
9179 (const_vector:V8HI [(const_int 1) (const_int 1)
9180 (const_int 1) (const_int 1)
9181 (const_int 1) (const_int 1)
9182 (const_int 1) (const_int 1)]))
9184 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9186 pmulhrsw\t{%2, %0|%0, %2}
9187 vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9188 [(set_attr "isa" "noavx,avx")
9189 (set_attr "type" "sseimul")
9190 (set_attr "prefix_data16" "1,*")
9191 (set_attr "prefix_extra" "1")
9192 (set_attr "prefix" "orig,vex")
9193 (set_attr "mode" "TI")])
9195 (define_expand "ssse3_pmulhrswv4hi3"
9196 [(set (match_operand:V4HI 0 "register_operand" "")
9203 (match_operand:V4HI 1 "nonimmediate_operand" ""))
9205 (match_operand:V4HI 2 "nonimmediate_operand" "")))
9207 (const_vector:V4HI [(const_int 1) (const_int 1)
9208 (const_int 1) (const_int 1)]))
9211 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
9213 (define_insn "*ssse3_pmulhrswv4hi3"
9214 [(set (match_operand:V4HI 0 "register_operand" "=y")
9221 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
9223 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
9225 (const_vector:V4HI [(const_int 1) (const_int 1)
9226 (const_int 1) (const_int 1)]))
9228 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
9229 "pmulhrsw\t{%2, %0|%0, %2}"
9230 [(set_attr "type" "sseimul")
9231 (set_attr "prefix_extra" "1")
9232 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9233 (set_attr "mode" "DI")])
9235 (define_insn "<ssse3_avx2>_pshufb<mode>3"
9236 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
9237 (unspec:VI1_AVX2 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
9238 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")]
9242 pshufb\t{%2, %0|%0, %2}
9243 vpshufb\t{%2, %1, %0|%0, %1, %2}"
9244 [(set_attr "isa" "noavx,avx")
9245 (set_attr "type" "sselog1")
9246 (set_attr "prefix_data16" "1,*")
9247 (set_attr "prefix_extra" "1")
9248 (set_attr "prefix" "orig,vex")
9249 (set_attr "mode" "<sseinsnmode>")])
9251 (define_insn "ssse3_pshufbv8qi3"
9252 [(set (match_operand:V8QI 0 "register_operand" "=y")
9253 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
9254 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
9257 "pshufb\t{%2, %0|%0, %2}";
9258 [(set_attr "type" "sselog1")
9259 (set_attr "prefix_extra" "1")
9260 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9261 (set_attr "mode" "DI")])
9263 (define_insn "<ssse3_avx2>_psign<mode>3"
9264 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
9266 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
9267 (match_operand:VI124_AVX2 2 "nonimmediate_operand" "xm,xm")]
9271 psign<ssemodesuffix>\t{%2, %0|%0, %2}
9272 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9273 [(set_attr "isa" "noavx,avx")
9274 (set_attr "type" "sselog1")
9275 (set_attr "prefix_data16" "1,*")
9276 (set_attr "prefix_extra" "1")
9277 (set_attr "prefix" "orig,vex")
9278 (set_attr "mode" "<sseinsnmode>")])
9280 (define_insn "ssse3_psign<mode>3"
9281 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9283 [(match_operand:MMXMODEI 1 "register_operand" "0")
9284 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
9287 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
9288 [(set_attr "type" "sselog1")
9289 (set_attr "prefix_extra" "1")
9290 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9291 (set_attr "mode" "DI")])
9293 (define_insn "<ssse3_avx2>_palignr<mode>"
9294 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x")
9295 (unspec:SSESCALARMODE [(match_operand:SSESCALARMODE 1 "register_operand" "0,x")
9296 (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,xm")
9297 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
9301 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9303 switch (which_alternative)
9306 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9308 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9313 [(set_attr "isa" "noavx,avx")
9314 (set_attr "type" "sseishft")
9315 (set_attr "atom_unit" "sishuf")
9316 (set_attr "prefix_data16" "1,*")
9317 (set_attr "prefix_extra" "1")
9318 (set_attr "length_immediate" "1")
9319 (set_attr "prefix" "orig,vex")
9320 (set_attr "mode" "<sseinsnmode>")])
9322 (define_insn "ssse3_palignrdi"
9323 [(set (match_operand:DI 0 "register_operand" "=y")
9324 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
9325 (match_operand:DI 2 "nonimmediate_operand" "ym")
9326 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9330 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9331 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9333 [(set_attr "type" "sseishft")
9334 (set_attr "atom_unit" "sishuf")
9335 (set_attr "prefix_extra" "1")
9336 (set_attr "length_immediate" "1")
9337 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9338 (set_attr "mode" "DI")])
9340 (define_insn "abs<mode>2"
9341 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x")
9343 (match_operand:VI124_AVX2 1 "nonimmediate_operand" "xm")))]
9345 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
9346 [(set_attr "type" "sselog1")
9347 (set_attr "prefix_data16" "1")
9348 (set_attr "prefix_extra" "1")
9349 (set_attr "prefix" "maybe_vex")
9350 (set_attr "mode" "<sseinsnmode>")])
9352 (define_insn "abs<mode>2"
9353 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9355 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
9357 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
9358 [(set_attr "type" "sselog1")
9359 (set_attr "prefix_rep" "0")
9360 (set_attr "prefix_extra" "1")
9361 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9362 (set_attr "mode" "DI")])
9364 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9366 ;; AMD SSE4A instructions
9368 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9370 (define_insn "sse4a_movnt<mode>"
9371 [(set (match_operand:MODEF 0 "memory_operand" "=m")
9373 [(match_operand:MODEF 1 "register_operand" "x")]
9376 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
9377 [(set_attr "type" "ssemov")
9378 (set_attr "mode" "<MODE>")])
9380 (define_insn "sse4a_vmmovnt<mode>"
9381 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
9382 (unspec:<ssescalarmode>
9383 [(vec_select:<ssescalarmode>
9384 (match_operand:VF_128 1 "register_operand" "x")
9385 (parallel [(const_int 0)]))]
9388 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
9389 [(set_attr "type" "ssemov")
9390 (set_attr "mode" "<ssescalarmode>")])
9392 (define_insn "sse4a_extrqi"
9393 [(set (match_operand:V2DI 0 "register_operand" "=x")
9394 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9395 (match_operand 2 "const_0_to_255_operand" "")
9396 (match_operand 3 "const_0_to_255_operand" "")]
9399 "extrq\t{%3, %2, %0|%0, %2, %3}"
9400 [(set_attr "type" "sse")
9401 (set_attr "prefix_data16" "1")
9402 (set_attr "length_immediate" "2")
9403 (set_attr "mode" "TI")])
9405 (define_insn "sse4a_extrq"
9406 [(set (match_operand:V2DI 0 "register_operand" "=x")
9407 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9408 (match_operand:V16QI 2 "register_operand" "x")]
9411 "extrq\t{%2, %0|%0, %2}"
9412 [(set_attr "type" "sse")
9413 (set_attr "prefix_data16" "1")
9414 (set_attr "mode" "TI")])
9416 (define_insn "sse4a_insertqi"
9417 [(set (match_operand:V2DI 0 "register_operand" "=x")
9418 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9419 (match_operand:V2DI 2 "register_operand" "x")
9420 (match_operand 3 "const_0_to_255_operand" "")
9421 (match_operand 4 "const_0_to_255_operand" "")]
9424 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
9425 [(set_attr "type" "sseins")
9426 (set_attr "prefix_data16" "0")
9427 (set_attr "prefix_rep" "1")
9428 (set_attr "length_immediate" "2")
9429 (set_attr "mode" "TI")])
9431 (define_insn "sse4a_insertq"
9432 [(set (match_operand:V2DI 0 "register_operand" "=x")
9433 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9434 (match_operand:V2DI 2 "register_operand" "x")]
9437 "insertq\t{%2, %0|%0, %2}"
9438 [(set_attr "type" "sseins")
9439 (set_attr "prefix_data16" "0")
9440 (set_attr "prefix_rep" "1")
9441 (set_attr "mode" "TI")])
9443 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9445 ;; Intel SSE4.1 instructions
9447 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9449 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
9450 [(set (match_operand:VF 0 "register_operand" "=x,x")
9452 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
9453 (match_operand:VF 1 "register_operand" "0,x")
9454 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "")))]
9457 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
9458 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9459 [(set_attr "isa" "noavx,avx")
9460 (set_attr "type" "ssemov")
9461 (set_attr "length_immediate" "1")
9462 (set_attr "prefix_data16" "1,*")
9463 (set_attr "prefix_extra" "1")
9464 (set_attr "prefix" "orig,vex")
9465 (set_attr "mode" "<MODE>")])
9467 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
9468 [(set (match_operand:VF 0 "reg_not_xmm0_operand_maybe_avx" "=x,x")
9470 [(match_operand:VF 1 "reg_not_xmm0_operand_maybe_avx" "0,x")
9471 (match_operand:VF 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
9472 (match_operand:VF 3 "register_operand" "Yz,x")]
9476 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
9477 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9478 [(set_attr "isa" "noavx,avx")
9479 (set_attr "type" "ssemov")
9480 (set_attr "length_immediate" "1")
9481 (set_attr "prefix_data16" "1,*")
9482 (set_attr "prefix_extra" "1")
9483 (set_attr "prefix" "orig,vex")
9484 (set_attr "mode" "<MODE>")])
9486 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
9487 [(set (match_operand:VF 0 "register_operand" "=x,x")
9489 [(match_operand:VF 1 "nonimmediate_operand" "%0,x")
9490 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
9491 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9495 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
9496 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9497 [(set_attr "isa" "noavx,avx")
9498 (set_attr "type" "ssemul")
9499 (set_attr "length_immediate" "1")
9500 (set_attr "prefix_data16" "1,*")
9501 (set_attr "prefix_extra" "1")
9502 (set_attr "prefix" "orig,vex")
9503 (set_attr "mode" "<MODE>")])
9505 (define_insn "<sse4_1_avx2>_movntdqa"
9506 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x")
9507 (unspec:VI8_AVX2 [(match_operand:VI8_AVX2 1 "memory_operand" "m")]
9510 "%vmovntdqa\t{%1, %0|%0, %1}"
9511 [(set_attr "type" "ssemov")
9512 (set_attr "prefix_extra" "1")
9513 (set_attr "prefix" "maybe_vex")
9514 (set_attr "mode" "<sseinsnmode>")])
9516 (define_insn "<sse4_1_avx2>_mpsadbw"
9517 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
9518 (unspec:VI1_AVX2 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
9519 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
9520 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9524 mpsadbw\t{%3, %2, %0|%0, %2, %3}
9525 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9526 [(set_attr "isa" "noavx,avx")
9527 (set_attr "type" "sselog1")
9528 (set_attr "length_immediate" "1")
9529 (set_attr "prefix_extra" "1")
9530 (set_attr "prefix" "orig,vex")
9531 (set_attr "mode" "<sseinsnmode>")])
9533 (define_insn "avx2_packusdw"
9534 [(set (match_operand:V16HI 0 "register_operand" "=x")
9537 (match_operand:V8SI 1 "register_operand" "x"))
9539 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))))]
9541 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9542 [(set_attr "type" "sselog")
9543 (set_attr "prefix_extra" "1")
9544 (set_attr "prefix" "vex")
9545 (set_attr "mode" "OI")])
9547 (define_insn "sse4_1_packusdw"
9548 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9551 (match_operand:V4SI 1 "register_operand" "0,x"))
9553 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))]
9556 packusdw\t{%2, %0|%0, %2}
9557 vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9558 [(set_attr "isa" "noavx,avx")
9559 (set_attr "type" "sselog")
9560 (set_attr "prefix_extra" "1")
9561 (set_attr "prefix" "orig,vex")
9562 (set_attr "mode" "TI")])
9564 (define_insn "<sse4_1_avx2>_pblendvb"
9565 [(set (match_operand:VI1_AVX2 0 "reg_not_xmm0_operand" "=x,x")
9567 [(match_operand:VI1_AVX2 1 "reg_not_xmm0_operand_maybe_avx" "0,x")
9568 (match_operand:VI1_AVX2 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
9569 (match_operand:VI1_AVX2 3 "register_operand" "Yz,x")]
9573 pblendvb\t{%3, %2, %0|%0, %2, %3}
9574 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9575 [(set_attr "isa" "noavx,avx")
9576 (set_attr "type" "ssemov")
9577 (set_attr "prefix_extra" "1")
9578 (set_attr "length_immediate" "*,1")
9579 (set_attr "prefix" "orig,vex")
9580 (set_attr "mode" "<sseinsnmode>")])
9582 (define_insn "sse4_1_pblendw"
9583 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9585 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
9586 (match_operand:V8HI 1 "register_operand" "0,x")
9587 (match_operand:SI 3 "const_0_to_255_operand" "n,n")))]
9590 pblendw\t{%3, %2, %0|%0, %2, %3}
9591 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9592 [(set_attr "isa" "noavx,avx")
9593 (set_attr "type" "ssemov")
9594 (set_attr "prefix_extra" "1")
9595 (set_attr "length_immediate" "1")
9596 (set_attr "prefix" "orig,vex")
9597 (set_attr "mode" "TI")])
9599 ;; The builtin uses an 8-bit immediate. Expand that.
9600 (define_expand "avx2_pblendw"
9601 [(set (match_operand:V16HI 0 "register_operand" "")
9603 (match_operand:V16HI 2 "nonimmediate_operand" "")
9604 (match_operand:V16HI 1 "register_operand" "")
9605 (match_operand:SI 3 "const_0_to_255_operand" "")))]
9608 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
9609 operands[3] = GEN_INT (val << 8 | val);
9612 (define_insn "*avx2_pblendw"
9613 [(set (match_operand:V16HI 0 "register_operand" "=x")
9615 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
9616 (match_operand:V16HI 1 "register_operand" "x")
9617 (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
9620 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
9621 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9623 [(set_attr "type" "ssemov")
9624 (set_attr "prefix_extra" "1")
9625 (set_attr "length_immediate" "1")
9626 (set_attr "prefix" "vex")
9627 (set_attr "mode" "OI")])
9629 (define_insn "avx2_pblendd<mode>"
9630 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
9632 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
9633 (match_operand:VI4_AVX2 1 "register_operand" "x")
9634 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9636 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9637 [(set_attr "type" "ssemov")
9638 (set_attr "prefix_extra" "1")
9639 (set_attr "length_immediate" "1")
9640 (set_attr "prefix" "vex")
9641 (set_attr "mode" "<sseinsnmode>")])
9643 (define_insn "sse4_1_phminposuw"
9644 [(set (match_operand:V8HI 0 "register_operand" "=x")
9645 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
9646 UNSPEC_PHMINPOSUW))]
9648 "%vphminposuw\t{%1, %0|%0, %1}"
9649 [(set_attr "type" "sselog1")
9650 (set_attr "prefix_extra" "1")
9651 (set_attr "prefix" "maybe_vex")
9652 (set_attr "mode" "TI")])
9654 (define_insn "avx2_<code>v16qiv16hi2"
9655 [(set (match_operand:V16HI 0 "register_operand" "=x")
9657 (match_operand:V16QI 1 "nonimmediate_operand" "xm")))]
9659 "vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
9660 [(set_attr "type" "ssemov")
9661 (set_attr "prefix_extra" "1")
9662 (set_attr "prefix" "vex")
9663 (set_attr "mode" "OI")])
9665 (define_insn "sse4_1_<code>v8qiv8hi2"
9666 [(set (match_operand:V8HI 0 "register_operand" "=x")
9669 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9670 (parallel [(const_int 0)
9679 "%vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
9680 [(set_attr "type" "ssemov")
9681 (set_attr "prefix_extra" "1")
9682 (set_attr "prefix" "maybe_vex")
9683 (set_attr "mode" "TI")])
9685 (define_insn "avx2_<code>v8qiv8si2"
9686 [(set (match_operand:V8SI 0 "register_operand" "=x")
9689 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9690 (parallel [(const_int 0)
9699 "vpmov<extsuffix>bd\t{%1, %0|%0, %1}"
9700 [(set_attr "type" "ssemov")
9701 (set_attr "prefix_extra" "1")
9702 (set_attr "prefix" "vex")
9703 (set_attr "mode" "OI")])
9705 (define_insn "sse4_1_<code>v4qiv4si2"
9706 [(set (match_operand:V4SI 0 "register_operand" "=x")
9709 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9710 (parallel [(const_int 0)
9715 "%vpmov<extsuffix>bd\t{%1, %0|%0, %1}"
9716 [(set_attr "type" "ssemov")
9717 (set_attr "prefix_extra" "1")
9718 (set_attr "prefix" "maybe_vex")
9719 (set_attr "mode" "TI")])
9721 (define_insn "avx2_<code>v8hiv8si2"
9722 [(set (match_operand:V8SI 0 "register_operand" "=x")
9724 (match_operand:V8HI 1 "nonimmediate_operand" "xm")))]
9726 "vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
9727 [(set_attr "type" "ssemov")
9728 (set_attr "prefix_extra" "1")
9729 (set_attr "prefix" "vex")
9730 (set_attr "mode" "OI")])
9732 (define_insn "sse4_1_<code>v4hiv4si2"
9733 [(set (match_operand:V4SI 0 "register_operand" "=x")
9736 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9737 (parallel [(const_int 0)
9742 "%vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
9743 [(set_attr "type" "ssemov")
9744 (set_attr "prefix_extra" "1")
9745 (set_attr "prefix" "maybe_vex")
9746 (set_attr "mode" "TI")])
9748 (define_insn "avx2_<code>v4qiv4di2"
9749 [(set (match_operand:V4DI 0 "register_operand" "=x")
9752 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9753 (parallel [(const_int 0)
9758 "vpmov<extsuffix>bq\t{%1, %0|%0, %1}"
9759 [(set_attr "type" "ssemov")
9760 (set_attr "prefix_extra" "1")
9761 (set_attr "prefix" "vex")
9762 (set_attr "mode" "OI")])
9764 (define_insn "sse4_1_<code>v2qiv2di2"
9765 [(set (match_operand:V2DI 0 "register_operand" "=x")
9768 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9769 (parallel [(const_int 0)
9772 "%vpmov<extsuffix>bq\t{%1, %0|%0, %1}"
9773 [(set_attr "type" "ssemov")
9774 (set_attr "prefix_extra" "1")
9775 (set_attr "prefix" "maybe_vex")
9776 (set_attr "mode" "TI")])
9778 (define_insn "avx2_<code>v4hiv4di2"
9779 [(set (match_operand:V4DI 0 "register_operand" "=x")
9782 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9783 (parallel [(const_int 0)
9788 "vpmov<extsuffix>wq\t{%1, %0|%0, %1}"
9789 [(set_attr "type" "ssemov")
9790 (set_attr "prefix_extra" "1")
9791 (set_attr "prefix" "vex")
9792 (set_attr "mode" "OI")])
9794 (define_insn "sse4_1_<code>v2hiv2di2"
9795 [(set (match_operand:V2DI 0 "register_operand" "=x")
9798 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9799 (parallel [(const_int 0)
9802 "%vpmov<extsuffix>wq\t{%1, %0|%0, %1}"
9803 [(set_attr "type" "ssemov")
9804 (set_attr "prefix_extra" "1")
9805 (set_attr "prefix" "maybe_vex")
9806 (set_attr "mode" "TI")])
9808 (define_insn "avx2_<code>v4siv4di2"
9809 [(set (match_operand:V4DI 0 "register_operand" "=x")
9811 (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
9813 "vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
9814 [(set_attr "type" "ssemov")
9815 (set_attr "prefix_extra" "1")
9816 (set_attr "mode" "OI")])
9818 (define_insn "sse4_1_<code>v2siv2di2"
9819 [(set (match_operand:V2DI 0 "register_operand" "=x")
9822 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9823 (parallel [(const_int 0)
9826 "%vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
9827 [(set_attr "type" "ssemov")
9828 (set_attr "prefix_extra" "1")
9829 (set_attr "prefix" "maybe_vex")
9830 (set_attr "mode" "TI")])
9832 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
9833 ;; setting FLAGS_REG. But it is not a really compare instruction.
9834 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
9835 [(set (reg:CC FLAGS_REG)
9836 (unspec:CC [(match_operand:VF 0 "register_operand" "x")
9837 (match_operand:VF 1 "nonimmediate_operand" "xm")]
9840 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
9841 [(set_attr "type" "ssecomi")
9842 (set_attr "prefix_extra" "1")
9843 (set_attr "prefix" "vex")
9844 (set_attr "mode" "<MODE>")])
9846 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
9847 ;; But it is not a really compare instruction.
9848 (define_insn "avx_ptest256"
9849 [(set (reg:CC FLAGS_REG)
9850 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
9851 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
9854 "vptest\t{%1, %0|%0, %1}"
9855 [(set_attr "type" "ssecomi")
9856 (set_attr "prefix_extra" "1")
9857 (set_attr "prefix" "vex")
9858 (set_attr "mode" "OI")])
9860 (define_insn "sse4_1_ptest"
9861 [(set (reg:CC FLAGS_REG)
9862 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
9863 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
9866 "%vptest\t{%1, %0|%0, %1}"
9867 [(set_attr "type" "ssecomi")
9868 (set_attr "prefix_extra" "1")
9869 (set_attr "prefix" "maybe_vex")
9870 (set_attr "mode" "TI")])
9872 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
9873 [(set (match_operand:VF 0 "register_operand" "=x")
9875 [(match_operand:VF 1 "nonimmediate_operand" "xm")
9876 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9879 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9880 [(set_attr "type" "ssecvt")
9881 (set (attr "prefix_data16")
9883 (match_test "TARGET_AVX")
9885 (const_string "1")))
9886 (set_attr "prefix_extra" "1")
9887 (set_attr "length_immediate" "1")
9888 (set_attr "prefix" "maybe_vex")
9889 (set_attr "mode" "<MODE>")])
9891 (define_insn "sse4_1_round<ssescalarmodesuffix>"
9892 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
9895 [(match_operand:VF_128 2 "register_operand" "x,x")
9896 (match_operand:SI 3 "const_0_to_15_operand" "n,n")]
9898 (match_operand:VF_128 1 "register_operand" "0,x")
9902 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
9903 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9904 [(set_attr "isa" "noavx,avx")
9905 (set_attr "type" "ssecvt")
9906 (set_attr "length_immediate" "1")
9907 (set_attr "prefix_data16" "1,*")
9908 (set_attr "prefix_extra" "1")
9909 (set_attr "prefix" "orig,vex")
9910 (set_attr "mode" "<MODE>")])
9912 (define_expand "round<mode>2"
9915 (match_operand:VF 1 "nonimmediate_operand" "")
9917 (set (match_operand:VF 0 "register_operand" "")
9919 [(match_dup 4) (match_dup 5)]
9921 "TARGET_ROUND && !flag_trapping_math"
9923 enum machine_mode scalar_mode;
9924 const struct real_format *fmt;
9925 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
9928 scalar_mode = GET_MODE_INNER (<MODE>mode);
9930 /* load nextafter (0.5, 0.0) */
9931 fmt = REAL_MODE_FORMAT (scalar_mode);
9932 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
9933 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
9934 half = const_double_from_real_value (pred_half, scalar_mode);
9936 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
9937 vec_half = force_reg (<MODE>mode, vec_half);
9939 operands[3] = gen_reg_rtx (<MODE>mode);
9940 emit_insn (gen_copysign<mode>3 (operands[3], vec_half, operands[1]));
9942 operands[4] = gen_reg_rtx (<MODE>mode);
9943 operands[5] = GEN_INT (ROUND_TRUNC);
9946 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9948 ;; Intel SSE4.2 string/text processing instructions
9950 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9952 (define_insn_and_split "sse4_2_pcmpestr"
9953 [(set (match_operand:SI 0 "register_operand" "=c,c")
9955 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
9956 (match_operand:SI 3 "register_operand" "a,a")
9957 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
9958 (match_operand:SI 5 "register_operand" "d,d")
9959 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
9961 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9969 (set (reg:CC FLAGS_REG)
9978 && can_create_pseudo_p ()"
9983 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9984 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9985 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9988 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
9989 operands[3], operands[4],
9990 operands[5], operands[6]));
9992 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
9993 operands[3], operands[4],
9994 operands[5], operands[6]));
9995 if (flags && !(ecx || xmm0))
9996 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
9997 operands[2], operands[3],
9998 operands[4], operands[5],
10000 if (!(flags || ecx || xmm0))
10001 emit_note (NOTE_INSN_DELETED);
10005 [(set_attr "type" "sselog")
10006 (set_attr "prefix_data16" "1")
10007 (set_attr "prefix_extra" "1")
10008 (set_attr "length_immediate" "1")
10009 (set_attr "memory" "none,load")
10010 (set_attr "mode" "TI")])
10012 (define_insn "sse4_2_pcmpestri"
10013 [(set (match_operand:SI 0 "register_operand" "=c,c")
10015 [(match_operand:V16QI 1 "register_operand" "x,x")
10016 (match_operand:SI 2 "register_operand" "a,a")
10017 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
10018 (match_operand:SI 4 "register_operand" "d,d")
10019 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
10021 (set (reg:CC FLAGS_REG)
10030 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
10031 [(set_attr "type" "sselog")
10032 (set_attr "prefix_data16" "1")
10033 (set_attr "prefix_extra" "1")
10034 (set_attr "prefix" "maybe_vex")
10035 (set_attr "length_immediate" "1")
10036 (set_attr "memory" "none,load")
10037 (set_attr "mode" "TI")])
10039 (define_insn "sse4_2_pcmpestrm"
10040 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10042 [(match_operand:V16QI 1 "register_operand" "x,x")
10043 (match_operand:SI 2 "register_operand" "a,a")
10044 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
10045 (match_operand:SI 4 "register_operand" "d,d")
10046 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
10048 (set (reg:CC FLAGS_REG)
10057 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
10058 [(set_attr "type" "sselog")
10059 (set_attr "prefix_data16" "1")
10060 (set_attr "prefix_extra" "1")
10061 (set_attr "length_immediate" "1")
10062 (set_attr "prefix" "maybe_vex")
10063 (set_attr "memory" "none,load")
10064 (set_attr "mode" "TI")])
10066 (define_insn "sse4_2_pcmpestr_cconly"
10067 [(set (reg:CC FLAGS_REG)
10069 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10070 (match_operand:SI 3 "register_operand" "a,a,a,a")
10071 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
10072 (match_operand:SI 5 "register_operand" "d,d,d,d")
10073 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
10075 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10076 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10079 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10080 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10081 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
10082 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
10083 [(set_attr "type" "sselog")
10084 (set_attr "prefix_data16" "1")
10085 (set_attr "prefix_extra" "1")
10086 (set_attr "length_immediate" "1")
10087 (set_attr "memory" "none,load,none,load")
10088 (set_attr "prefix" "maybe_vex")
10089 (set_attr "mode" "TI")])
10091 (define_insn_and_split "sse4_2_pcmpistr"
10092 [(set (match_operand:SI 0 "register_operand" "=c,c")
10094 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
10095 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
10096 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
10098 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
10104 (set (reg:CC FLAGS_REG)
10111 && can_create_pseudo_p ()"
10116 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10117 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10118 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10121 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
10122 operands[3], operands[4]));
10124 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
10125 operands[3], operands[4]));
10126 if (flags && !(ecx || xmm0))
10127 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
10128 operands[2], operands[3],
10130 if (!(flags || ecx || xmm0))
10131 emit_note (NOTE_INSN_DELETED);
10135 [(set_attr "type" "sselog")
10136 (set_attr "prefix_data16" "1")
10137 (set_attr "prefix_extra" "1")
10138 (set_attr "length_immediate" "1")
10139 (set_attr "memory" "none,load")
10140 (set_attr "mode" "TI")])
10142 (define_insn "sse4_2_pcmpistri"
10143 [(set (match_operand:SI 0 "register_operand" "=c,c")
10145 [(match_operand:V16QI 1 "register_operand" "x,x")
10146 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10147 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10149 (set (reg:CC FLAGS_REG)
10156 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
10157 [(set_attr "type" "sselog")
10158 (set_attr "prefix_data16" "1")
10159 (set_attr "prefix_extra" "1")
10160 (set_attr "length_immediate" "1")
10161 (set_attr "prefix" "maybe_vex")
10162 (set_attr "memory" "none,load")
10163 (set_attr "mode" "TI")])
10165 (define_insn "sse4_2_pcmpistrm"
10166 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10168 [(match_operand:V16QI 1 "register_operand" "x,x")
10169 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10170 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10172 (set (reg:CC FLAGS_REG)
10179 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
10180 [(set_attr "type" "sselog")
10181 (set_attr "prefix_data16" "1")
10182 (set_attr "prefix_extra" "1")
10183 (set_attr "length_immediate" "1")
10184 (set_attr "prefix" "maybe_vex")
10185 (set_attr "memory" "none,load")
10186 (set_attr "mode" "TI")])
10188 (define_insn "sse4_2_pcmpistr_cconly"
10189 [(set (reg:CC FLAGS_REG)
10191 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10192 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
10193 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
10195 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10196 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10199 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10200 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10201 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
10202 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
10203 [(set_attr "type" "sselog")
10204 (set_attr "prefix_data16" "1")
10205 (set_attr "prefix_extra" "1")
10206 (set_attr "length_immediate" "1")
10207 (set_attr "memory" "none,load,none,load")
10208 (set_attr "prefix" "maybe_vex")
10209 (set_attr "mode" "TI")])
10211 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10213 ;; XOP instructions
10215 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10217 ;; XOP parallel integer multiply/add instructions.
10218 ;; Note the XOP multiply/add instructions
10219 ;; a[i] = b[i] * c[i] + d[i];
10220 ;; do not allow the value being added to be a memory operation.
10221 (define_insn "xop_pmacsww"
10222 [(set (match_operand:V8HI 0 "register_operand" "=x")
10225 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10226 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10227 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10229 "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10230 [(set_attr "type" "ssemuladd")
10231 (set_attr "mode" "TI")])
10233 (define_insn "xop_pmacssww"
10234 [(set (match_operand:V8HI 0 "register_operand" "=x")
10236 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10237 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10238 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10240 "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10241 [(set_attr "type" "ssemuladd")
10242 (set_attr "mode" "TI")])
10244 (define_insn "xop_pmacsdd"
10245 [(set (match_operand:V4SI 0 "register_operand" "=x")
10248 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10249 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10250 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10252 "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10253 [(set_attr "type" "ssemuladd")
10254 (set_attr "mode" "TI")])
10256 (define_insn "xop_pmacssdd"
10257 [(set (match_operand:V4SI 0 "register_operand" "=x")
10259 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10260 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10261 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10263 "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10264 [(set_attr "type" "ssemuladd")
10265 (set_attr "mode" "TI")])
10267 (define_insn "xop_pmacssdql"
10268 [(set (match_operand:V2DI 0 "register_operand" "=x")
10273 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10274 (parallel [(const_int 1)
10277 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10278 (parallel [(const_int 1)
10280 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10282 "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10283 [(set_attr "type" "ssemuladd")
10284 (set_attr "mode" "TI")])
10286 (define_insn "xop_pmacssdqh"
10287 [(set (match_operand:V2DI 0 "register_operand" "=x")
10292 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10293 (parallel [(const_int 0)
10297 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10298 (parallel [(const_int 0)
10300 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10302 "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10303 [(set_attr "type" "ssemuladd")
10304 (set_attr "mode" "TI")])
10306 (define_insn "xop_pmacsdql"
10307 [(set (match_operand:V2DI 0 "register_operand" "=x")
10312 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10313 (parallel [(const_int 1)
10317 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10318 (parallel [(const_int 1)
10320 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10322 "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10323 [(set_attr "type" "ssemuladd")
10324 (set_attr "mode" "TI")])
10326 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10327 ;; fake it with a multiply/add. In general, we expect the define_split to
10328 ;; occur before register allocation, so we have to handle the corner case where
10329 ;; the target is the same as operands 1/2
10330 (define_insn_and_split "xop_mulv2div2di3_low"
10331 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10335 (match_operand:V4SI 1 "register_operand" "%x")
10336 (parallel [(const_int 1)
10340 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10341 (parallel [(const_int 1)
10342 (const_int 3)])))))]
10345 "&& reload_completed"
10346 [(set (match_dup 0)
10354 (parallel [(const_int 1)
10359 (parallel [(const_int 1)
10363 operands[3] = CONST0_RTX (V2DImode);
10365 [(set_attr "type" "ssemul")
10366 (set_attr "mode" "TI")])
10368 (define_insn "xop_pmacsdqh"
10369 [(set (match_operand:V2DI 0 "register_operand" "=x")
10374 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10375 (parallel [(const_int 0)
10379 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10380 (parallel [(const_int 0)
10382 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10384 "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10385 [(set_attr "type" "ssemuladd")
10386 (set_attr "mode" "TI")])
10388 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10389 ;; fake it with a multiply/add. In general, we expect the define_split to
10390 ;; occur before register allocation, so we have to handle the corner case where
10391 ;; the target is the same as either operands[1] or operands[2]
10392 (define_insn_and_split "xop_mulv2div2di3_high"
10393 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10397 (match_operand:V4SI 1 "register_operand" "%x")
10398 (parallel [(const_int 0)
10402 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10403 (parallel [(const_int 0)
10404 (const_int 2)])))))]
10407 "&& reload_completed"
10408 [(set (match_dup 0)
10416 (parallel [(const_int 0)
10421 (parallel [(const_int 0)
10425 operands[3] = CONST0_RTX (V2DImode);
10427 [(set_attr "type" "ssemul")
10428 (set_attr "mode" "TI")])
10430 ;; XOP parallel integer multiply/add instructions for the intrinisics
10431 (define_insn "xop_pmacsswd"
10432 [(set (match_operand:V4SI 0 "register_operand" "=x")
10437 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10438 (parallel [(const_int 1)
10444 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10445 (parallel [(const_int 1)
10449 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10451 "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10452 [(set_attr "type" "ssemuladd")
10453 (set_attr "mode" "TI")])
10455 (define_insn "xop_pmacswd"
10456 [(set (match_operand:V4SI 0 "register_operand" "=x")
10461 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10462 (parallel [(const_int 1)
10468 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10469 (parallel [(const_int 1)
10473 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10475 "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10476 [(set_attr "type" "ssemuladd")
10477 (set_attr "mode" "TI")])
10479 (define_insn "xop_pmadcsswd"
10480 [(set (match_operand:V4SI 0 "register_operand" "=x")
10486 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10487 (parallel [(const_int 0)
10493 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10494 (parallel [(const_int 0)
10502 (parallel [(const_int 1)
10509 (parallel [(const_int 1)
10512 (const_int 7)])))))
10513 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10515 "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10516 [(set_attr "type" "ssemuladd")
10517 (set_attr "mode" "TI")])
10519 (define_insn "xop_pmadcswd"
10520 [(set (match_operand:V4SI 0 "register_operand" "=x")
10526 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10527 (parallel [(const_int 0)
10533 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10534 (parallel [(const_int 0)
10542 (parallel [(const_int 1)
10549 (parallel [(const_int 1)
10552 (const_int 7)])))))
10553 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10555 "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10556 [(set_attr "type" "ssemuladd")
10557 (set_attr "mode" "TI")])
10559 ;; XOP parallel XMM conditional moves
10560 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
10561 [(set (match_operand:V 0 "register_operand" "=x,x")
10563 (match_operand:V 3 "nonimmediate_operand" "x,m")
10564 (match_operand:V 1 "register_operand" "x,x")
10565 (match_operand:V 2 "nonimmediate_operand" "xm,x")))]
10567 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10568 [(set_attr "type" "sse4arg")])
10570 ;; XOP horizontal add/subtract instructions
10571 (define_insn "xop_phaddbw"
10572 [(set (match_operand:V8HI 0 "register_operand" "=x")
10576 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10577 (parallel [(const_int 0)
10588 (parallel [(const_int 1)
10595 (const_int 15)])))))]
10597 "vphaddbw\t{%1, %0|%0, %1}"
10598 [(set_attr "type" "sseiadd1")])
10600 (define_insn "xop_phaddbd"
10601 [(set (match_operand:V4SI 0 "register_operand" "=x")
10606 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10607 (parallel [(const_int 0)
10614 (parallel [(const_int 1)
10617 (const_int 13)]))))
10622 (parallel [(const_int 2)
10629 (parallel [(const_int 3)
10632 (const_int 15)]))))))]
10634 "vphaddbd\t{%1, %0|%0, %1}"
10635 [(set_attr "type" "sseiadd1")])
10637 (define_insn "xop_phaddbq"
10638 [(set (match_operand:V2DI 0 "register_operand" "=x")
10644 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10645 (parallel [(const_int 0)
10650 (parallel [(const_int 1)
10656 (parallel [(const_int 2)
10661 (parallel [(const_int 3)
10662 (const_int 7)])))))
10668 (parallel [(const_int 8)
10673 (parallel [(const_int 9)
10674 (const_int 13)]))))
10679 (parallel [(const_int 10)
10684 (parallel [(const_int 11)
10685 (const_int 15)])))))))]
10687 "vphaddbq\t{%1, %0|%0, %1}"
10688 [(set_attr "type" "sseiadd1")])
10690 (define_insn "xop_phaddwd"
10691 [(set (match_operand:V4SI 0 "register_operand" "=x")
10695 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10696 (parallel [(const_int 0)
10703 (parallel [(const_int 1)
10706 (const_int 7)])))))]
10708 "vphaddwd\t{%1, %0|%0, %1}"
10709 [(set_attr "type" "sseiadd1")])
10711 (define_insn "xop_phaddwq"
10712 [(set (match_operand:V2DI 0 "register_operand" "=x")
10717 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10718 (parallel [(const_int 0)
10723 (parallel [(const_int 1)
10729 (parallel [(const_int 2)
10734 (parallel [(const_int 3)
10735 (const_int 7)]))))))]
10737 "vphaddwq\t{%1, %0|%0, %1}"
10738 [(set_attr "type" "sseiadd1")])
10740 (define_insn "xop_phadddq"
10741 [(set (match_operand:V2DI 0 "register_operand" "=x")
10745 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10746 (parallel [(const_int 0)
10751 (parallel [(const_int 1)
10752 (const_int 3)])))))]
10754 "vphadddq\t{%1, %0|%0, %1}"
10755 [(set_attr "type" "sseiadd1")])
10757 (define_insn "xop_phaddubw"
10758 [(set (match_operand:V8HI 0 "register_operand" "=x")
10762 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10763 (parallel [(const_int 0)
10774 (parallel [(const_int 1)
10781 (const_int 15)])))))]
10783 "vphaddubw\t{%1, %0|%0, %1}"
10784 [(set_attr "type" "sseiadd1")])
10786 (define_insn "xop_phaddubd"
10787 [(set (match_operand:V4SI 0 "register_operand" "=x")
10792 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10793 (parallel [(const_int 0)
10800 (parallel [(const_int 1)
10803 (const_int 13)]))))
10808 (parallel [(const_int 2)
10815 (parallel [(const_int 3)
10818 (const_int 15)]))))))]
10820 "vphaddubd\t{%1, %0|%0, %1}"
10821 [(set_attr "type" "sseiadd1")])
10823 (define_insn "xop_phaddubq"
10824 [(set (match_operand:V2DI 0 "register_operand" "=x")
10830 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10831 (parallel [(const_int 0)
10836 (parallel [(const_int 1)
10842 (parallel [(const_int 2)
10847 (parallel [(const_int 3)
10848 (const_int 7)])))))
10854 (parallel [(const_int 8)
10859 (parallel [(const_int 9)
10860 (const_int 13)]))))
10865 (parallel [(const_int 10)
10870 (parallel [(const_int 11)
10871 (const_int 15)])))))))]
10873 "vphaddubq\t{%1, %0|%0, %1}"
10874 [(set_attr "type" "sseiadd1")])
10876 (define_insn "xop_phadduwd"
10877 [(set (match_operand:V4SI 0 "register_operand" "=x")
10881 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10882 (parallel [(const_int 0)
10889 (parallel [(const_int 1)
10892 (const_int 7)])))))]
10894 "vphadduwd\t{%1, %0|%0, %1}"
10895 [(set_attr "type" "sseiadd1")])
10897 (define_insn "xop_phadduwq"
10898 [(set (match_operand:V2DI 0 "register_operand" "=x")
10903 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10904 (parallel [(const_int 0)
10909 (parallel [(const_int 1)
10915 (parallel [(const_int 2)
10920 (parallel [(const_int 3)
10921 (const_int 7)]))))))]
10923 "vphadduwq\t{%1, %0|%0, %1}"
10924 [(set_attr "type" "sseiadd1")])
10926 (define_insn "xop_phaddudq"
10927 [(set (match_operand:V2DI 0 "register_operand" "=x")
10931 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10932 (parallel [(const_int 0)
10937 (parallel [(const_int 1)
10938 (const_int 3)])))))]
10940 "vphaddudq\t{%1, %0|%0, %1}"
10941 [(set_attr "type" "sseiadd1")])
10943 (define_insn "xop_phsubbw"
10944 [(set (match_operand:V8HI 0 "register_operand" "=x")
10948 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10949 (parallel [(const_int 0)
10960 (parallel [(const_int 1)
10967 (const_int 15)])))))]
10969 "vphsubbw\t{%1, %0|%0, %1}"
10970 [(set_attr "type" "sseiadd1")])
10972 (define_insn "xop_phsubwd"
10973 [(set (match_operand:V4SI 0 "register_operand" "=x")
10977 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10978 (parallel [(const_int 0)
10985 (parallel [(const_int 1)
10988 (const_int 7)])))))]
10990 "vphsubwd\t{%1, %0|%0, %1}"
10991 [(set_attr "type" "sseiadd1")])
10993 (define_insn "xop_phsubdq"
10994 [(set (match_operand:V2DI 0 "register_operand" "=x")
10998 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10999 (parallel [(const_int 0)
11004 (parallel [(const_int 1)
11005 (const_int 3)])))))]
11007 "vphsubdq\t{%1, %0|%0, %1}"
11008 [(set_attr "type" "sseiadd1")])
11010 ;; XOP permute instructions
11011 (define_insn "xop_pperm"
11012 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
11014 [(match_operand:V16QI 1 "register_operand" "x,x")
11015 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
11016 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
11017 UNSPEC_XOP_PERMUTE))]
11018 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11019 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11020 [(set_attr "type" "sse4arg")
11021 (set_attr "mode" "TI")])
11023 ;; XOP pack instructions that combine two vectors into a smaller vector
11024 (define_insn "xop_pperm_pack_v2di_v4si"
11025 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
11028 (match_operand:V2DI 1 "register_operand" "x,x"))
11030 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
11031 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11032 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11033 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11034 [(set_attr "type" "sse4arg")
11035 (set_attr "mode" "TI")])
11037 (define_insn "xop_pperm_pack_v4si_v8hi"
11038 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
11041 (match_operand:V4SI 1 "register_operand" "x,x"))
11043 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
11044 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11045 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11046 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11047 [(set_attr "type" "sse4arg")
11048 (set_attr "mode" "TI")])
11050 (define_insn "xop_pperm_pack_v8hi_v16qi"
11051 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
11054 (match_operand:V8HI 1 "register_operand" "x,x"))
11056 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
11057 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11058 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11059 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11060 [(set_attr "type" "sse4arg")
11061 (set_attr "mode" "TI")])
11063 ;; XOP packed rotate instructions
11064 (define_expand "rotl<mode>3"
11065 [(set (match_operand:VI_128 0 "register_operand" "")
11067 (match_operand:VI_128 1 "nonimmediate_operand" "")
11068 (match_operand:SI 2 "general_operand")))]
11071 /* If we were given a scalar, convert it to parallel */
11072 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11074 rtvec vs = rtvec_alloc (<ssescalarnum>);
11075 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11076 rtx reg = gen_reg_rtx (<MODE>mode);
11077 rtx op2 = operands[2];
11080 if (GET_MODE (op2) != <ssescalarmode>mode)
11082 op2 = gen_reg_rtx (<ssescalarmode>mode);
11083 convert_move (op2, operands[2], false);
11086 for (i = 0; i < <ssescalarnum>; i++)
11087 RTVEC_ELT (vs, i) = op2;
11089 emit_insn (gen_vec_init<mode> (reg, par));
11090 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11095 (define_expand "rotr<mode>3"
11096 [(set (match_operand:VI_128 0 "register_operand" "")
11098 (match_operand:VI_128 1 "nonimmediate_operand" "")
11099 (match_operand:SI 2 "general_operand")))]
11102 /* If we were given a scalar, convert it to parallel */
11103 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11105 rtvec vs = rtvec_alloc (<ssescalarnum>);
11106 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11107 rtx neg = gen_reg_rtx (<MODE>mode);
11108 rtx reg = gen_reg_rtx (<MODE>mode);
11109 rtx op2 = operands[2];
11112 if (GET_MODE (op2) != <ssescalarmode>mode)
11114 op2 = gen_reg_rtx (<ssescalarmode>mode);
11115 convert_move (op2, operands[2], false);
11118 for (i = 0; i < <ssescalarnum>; i++)
11119 RTVEC_ELT (vs, i) = op2;
11121 emit_insn (gen_vec_init<mode> (reg, par));
11122 emit_insn (gen_neg<mode>2 (neg, reg));
11123 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
11128 (define_insn "xop_rotl<mode>3"
11129 [(set (match_operand:VI_128 0 "register_operand" "=x")
11131 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
11132 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11134 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11135 [(set_attr "type" "sseishft")
11136 (set_attr "length_immediate" "1")
11137 (set_attr "mode" "TI")])
11139 (define_insn "xop_rotr<mode>3"
11140 [(set (match_operand:VI_128 0 "register_operand" "=x")
11142 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
11143 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11146 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
11147 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
11149 [(set_attr "type" "sseishft")
11150 (set_attr "length_immediate" "1")
11151 (set_attr "mode" "TI")])
11153 (define_expand "vrotr<mode>3"
11154 [(match_operand:VI_128 0 "register_operand" "")
11155 (match_operand:VI_128 1 "register_operand" "")
11156 (match_operand:VI_128 2 "register_operand" "")]
11159 rtx reg = gen_reg_rtx (<MODE>mode);
11160 emit_insn (gen_neg<mode>2 (reg, operands[2]));
11161 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11165 (define_expand "vrotl<mode>3"
11166 [(match_operand:VI_128 0 "register_operand" "")
11167 (match_operand:VI_128 1 "register_operand" "")
11168 (match_operand:VI_128 2 "register_operand" "")]
11171 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
11175 (define_insn "xop_vrotl<mode>3"
11176 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
11177 (if_then_else:VI_128
11179 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
11182 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
11186 (neg:VI_128 (match_dup 2)))))]
11187 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11188 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11189 [(set_attr "type" "sseishft")
11190 (set_attr "prefix_data16" "0")
11191 (set_attr "prefix_extra" "2")
11192 (set_attr "mode" "TI")])
11194 ;; XOP packed shift instructions.
11195 ;; FIXME: add V2DI back in
11196 (define_expand "vlshr<mode>3"
11197 [(match_operand:VI124_128 0 "register_operand" "")
11198 (match_operand:VI124_128 1 "register_operand" "")
11199 (match_operand:VI124_128 2 "register_operand" "")]
11202 rtx neg = gen_reg_rtx (<MODE>mode);
11203 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11204 emit_insn (gen_xop_lshl<mode>3 (operands[0], operands[1], neg));
11208 (define_expand "vashr<mode>3"
11209 [(match_operand:VI124_128 0 "register_operand" "")
11210 (match_operand:VI124_128 1 "register_operand" "")
11211 (match_operand:VI124_128 2 "register_operand" "")]
11214 rtx neg = gen_reg_rtx (<MODE>mode);
11215 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11216 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], neg));
11220 (define_expand "vashl<mode>3"
11221 [(match_operand:VI124_128 0 "register_operand" "")
11222 (match_operand:VI124_128 1 "register_operand" "")
11223 (match_operand:VI124_128 2 "register_operand" "")]
11226 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], operands[2]));
11230 (define_insn "xop_ashl<mode>3"
11231 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
11232 (if_then_else:VI_128
11234 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
11237 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
11241 (neg:VI_128 (match_dup 2)))))]
11242 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11243 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11244 [(set_attr "type" "sseishft")
11245 (set_attr "prefix_data16" "0")
11246 (set_attr "prefix_extra" "2")
11247 (set_attr "mode" "TI")])
11249 (define_insn "xop_lshl<mode>3"
11250 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
11251 (if_then_else:VI_128
11253 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
11256 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
11260 (neg:VI_128 (match_dup 2)))))]
11261 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11262 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11263 [(set_attr "type" "sseishft")
11264 (set_attr "prefix_data16" "0")
11265 (set_attr "prefix_extra" "2")
11266 (set_attr "mode" "TI")])
11268 ;; SSE2 doesn't have some shift varients, so define versions for XOP
11269 (define_expand "ashlv16qi3"
11270 [(match_operand:V16QI 0 "register_operand" "")
11271 (match_operand:V16QI 1 "register_operand" "")
11272 (match_operand:SI 2 "nonmemory_operand" "")]
11275 rtvec vs = rtvec_alloc (16);
11276 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11277 rtx reg = gen_reg_rtx (V16QImode);
11279 for (i = 0; i < 16; i++)
11280 RTVEC_ELT (vs, i) = operands[2];
11282 emit_insn (gen_vec_initv16qi (reg, par));
11283 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11287 (define_expand "lshlv16qi3"
11288 [(match_operand:V16QI 0 "register_operand" "")
11289 (match_operand:V16QI 1 "register_operand" "")
11290 (match_operand:SI 2 "nonmemory_operand" "")]
11293 rtvec vs = rtvec_alloc (16);
11294 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11295 rtx reg = gen_reg_rtx (V16QImode);
11297 for (i = 0; i < 16; i++)
11298 RTVEC_ELT (vs, i) = operands[2];
11300 emit_insn (gen_vec_initv16qi (reg, par));
11301 emit_insn (gen_xop_lshlv16qi3 (operands[0], operands[1], reg));
11305 (define_expand "ashrv16qi3"
11306 [(match_operand:V16QI 0 "register_operand" "")
11307 (match_operand:V16QI 1 "register_operand" "")
11308 (match_operand:SI 2 "nonmemory_operand" "")]
11311 rtvec vs = rtvec_alloc (16);
11312 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11313 rtx reg = gen_reg_rtx (V16QImode);
11315 rtx ele = ((CONST_INT_P (operands[2]))
11316 ? GEN_INT (- INTVAL (operands[2]))
11319 for (i = 0; i < 16; i++)
11320 RTVEC_ELT (vs, i) = ele;
11322 emit_insn (gen_vec_initv16qi (reg, par));
11324 if (!CONST_INT_P (operands[2]))
11326 rtx neg = gen_reg_rtx (V16QImode);
11327 emit_insn (gen_negv16qi2 (neg, reg));
11328 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], neg));
11331 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11336 (define_expand "ashrv2di3"
11337 [(match_operand:V2DI 0 "register_operand" "")
11338 (match_operand:V2DI 1 "register_operand" "")
11339 (match_operand:DI 2 "nonmemory_operand" "")]
11342 rtvec vs = rtvec_alloc (2);
11343 rtx par = gen_rtx_PARALLEL (V2DImode, vs);
11344 rtx reg = gen_reg_rtx (V2DImode);
11347 if (CONST_INT_P (operands[2]))
11348 ele = GEN_INT (- INTVAL (operands[2]));
11349 else if (GET_MODE (operands[2]) != DImode)
11351 rtx move = gen_reg_rtx (DImode);
11352 ele = gen_reg_rtx (DImode);
11353 convert_move (move, operands[2], false);
11354 emit_insn (gen_negdi2 (ele, move));
11358 ele = gen_reg_rtx (DImode);
11359 emit_insn (gen_negdi2 (ele, operands[2]));
11362 RTVEC_ELT (vs, 0) = ele;
11363 RTVEC_ELT (vs, 1) = ele;
11364 emit_insn (gen_vec_initv2di (reg, par));
11365 emit_insn (gen_xop_ashlv2di3 (operands[0], operands[1], reg));
11369 ;; XOP FRCZ support
11370 (define_insn "xop_frcz<mode>2"
11371 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
11373 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
11376 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
11377 [(set_attr "type" "ssecvt1")
11378 (set_attr "mode" "<MODE>")])
11381 (define_expand "xop_vmfrcz<mode>2"
11382 [(set (match_operand:VF_128 0 "register_operand")
11385 [(match_operand:VF_128 1 "nonimmediate_operand")]
11391 operands[3] = CONST0_RTX (<MODE>mode);
11394 (define_insn "*xop_vmfrcz_<mode>"
11395 [(set (match_operand:VF_128 0 "register_operand" "=x")
11398 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
11400 (match_operand:VF_128 2 "const0_operand")
11403 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
11404 [(set_attr "type" "ssecvt1")
11405 (set_attr "mode" "<MODE>")])
11407 (define_insn "xop_maskcmp<mode>3"
11408 [(set (match_operand:VI_128 0 "register_operand" "=x")
11409 (match_operator:VI_128 1 "ix86_comparison_int_operator"
11410 [(match_operand:VI_128 2 "register_operand" "x")
11411 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
11413 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
11414 [(set_attr "type" "sse4arg")
11415 (set_attr "prefix_data16" "0")
11416 (set_attr "prefix_rep" "0")
11417 (set_attr "prefix_extra" "2")
11418 (set_attr "length_immediate" "1")
11419 (set_attr "mode" "TI")])
11421 (define_insn "xop_maskcmp_uns<mode>3"
11422 [(set (match_operand:VI_128 0 "register_operand" "=x")
11423 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
11424 [(match_operand:VI_128 2 "register_operand" "x")
11425 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
11427 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
11428 [(set_attr "type" "ssecmp")
11429 (set_attr "prefix_data16" "0")
11430 (set_attr "prefix_rep" "0")
11431 (set_attr "prefix_extra" "2")
11432 (set_attr "length_immediate" "1")
11433 (set_attr "mode" "TI")])
11435 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
11436 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
11437 ;; the exact instruction generated for the intrinsic.
11438 (define_insn "xop_maskcmp_uns2<mode>3"
11439 [(set (match_operand:VI_128 0 "register_operand" "=x")
11441 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
11442 [(match_operand:VI_128 2 "register_operand" "x")
11443 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
11444 UNSPEC_XOP_UNSIGNED_CMP))]
11446 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
11447 [(set_attr "type" "ssecmp")
11448 (set_attr "prefix_data16" "0")
11449 (set_attr "prefix_extra" "2")
11450 (set_attr "length_immediate" "1")
11451 (set_attr "mode" "TI")])
11453 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
11454 ;; being added here to be complete.
11455 (define_insn "xop_pcom_tf<mode>3"
11456 [(set (match_operand:VI_128 0 "register_operand" "=x")
11458 [(match_operand:VI_128 1 "register_operand" "x")
11459 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
11460 (match_operand:SI 3 "const_int_operand" "n")]
11461 UNSPEC_XOP_TRUEFALSE))]
11464 return ((INTVAL (operands[3]) != 0)
11465 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11466 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
11468 [(set_attr "type" "ssecmp")
11469 (set_attr "prefix_data16" "0")
11470 (set_attr "prefix_extra" "2")
11471 (set_attr "length_immediate" "1")
11472 (set_attr "mode" "TI")])
11474 (define_insn "xop_vpermil2<mode>3"
11475 [(set (match_operand:VF 0 "register_operand" "=x")
11477 [(match_operand:VF 1 "register_operand" "x")
11478 (match_operand:VF 2 "nonimmediate_operand" "%x")
11479 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm")
11480 (match_operand:SI 4 "const_0_to_3_operand" "n")]
11483 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
11484 [(set_attr "type" "sse4arg")
11485 (set_attr "length_immediate" "1")
11486 (set_attr "mode" "<MODE>")])
11488 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11490 (define_insn "aesenc"
11491 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11492 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11493 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11497 aesenc\t{%2, %0|%0, %2}
11498 vaesenc\t{%2, %1, %0|%0, %1, %2}"
11499 [(set_attr "isa" "noavx,avx")
11500 (set_attr "type" "sselog1")
11501 (set_attr "prefix_extra" "1")
11502 (set_attr "prefix" "orig,vex")
11503 (set_attr "mode" "TI")])
11505 (define_insn "aesenclast"
11506 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11507 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11508 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11509 UNSPEC_AESENCLAST))]
11512 aesenclast\t{%2, %0|%0, %2}
11513 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
11514 [(set_attr "isa" "noavx,avx")
11515 (set_attr "type" "sselog1")
11516 (set_attr "prefix_extra" "1")
11517 (set_attr "prefix" "orig,vex")
11518 (set_attr "mode" "TI")])
11520 (define_insn "aesdec"
11521 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11522 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11523 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11527 aesdec\t{%2, %0|%0, %2}
11528 vaesdec\t{%2, %1, %0|%0, %1, %2}"
11529 [(set_attr "isa" "noavx,avx")
11530 (set_attr "type" "sselog1")
11531 (set_attr "prefix_extra" "1")
11532 (set_attr "prefix" "orig,vex")
11533 (set_attr "mode" "TI")])
11535 (define_insn "aesdeclast"
11536 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11537 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11538 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11539 UNSPEC_AESDECLAST))]
11542 aesdeclast\t{%2, %0|%0, %2}
11543 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
11544 [(set_attr "isa" "noavx,avx")
11545 (set_attr "type" "sselog1")
11546 (set_attr "prefix_extra" "1")
11547 (set_attr "prefix" "orig,vex")
11548 (set_attr "mode" "TI")])
11550 (define_insn "aesimc"
11551 [(set (match_operand:V2DI 0 "register_operand" "=x")
11552 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
11555 "%vaesimc\t{%1, %0|%0, %1}"
11556 [(set_attr "type" "sselog1")
11557 (set_attr "prefix_extra" "1")
11558 (set_attr "prefix" "maybe_vex")
11559 (set_attr "mode" "TI")])
11561 (define_insn "aeskeygenassist"
11562 [(set (match_operand:V2DI 0 "register_operand" "=x")
11563 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
11564 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11565 UNSPEC_AESKEYGENASSIST))]
11567 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
11568 [(set_attr "type" "sselog1")
11569 (set_attr "prefix_extra" "1")
11570 (set_attr "length_immediate" "1")
11571 (set_attr "prefix" "maybe_vex")
11572 (set_attr "mode" "TI")])
11574 (define_insn "pclmulqdq"
11575 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11576 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11577 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")
11578 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
11582 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
11583 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11584 [(set_attr "isa" "noavx,avx")
11585 (set_attr "type" "sselog1")
11586 (set_attr "prefix_extra" "1")
11587 (set_attr "length_immediate" "1")
11588 (set_attr "prefix" "orig,vex")
11589 (set_attr "mode" "TI")])
11591 (define_expand "avx_vzeroall"
11592 [(match_par_dup 0 [(const_int 0)])]
11595 int nregs = TARGET_64BIT ? 16 : 8;
11598 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11600 XVECEXP (operands[0], 0, 0)
11601 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11604 for (regno = 0; regno < nregs; regno++)
11605 XVECEXP (operands[0], 0, regno + 1)
11606 = gen_rtx_SET (VOIDmode,
11607 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
11608 CONST0_RTX (V8SImode));
11611 (define_insn "*avx_vzeroall"
11612 [(match_parallel 0 "vzeroall_operation"
11613 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
11616 [(set_attr "type" "sse")
11617 (set_attr "modrm" "0")
11618 (set_attr "memory" "none")
11619 (set_attr "prefix" "vex")
11620 (set_attr "mode" "OI")])
11622 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
11623 ;; if the upper 128bits are unused.
11624 (define_insn "avx_vzeroupper"
11625 [(unspec_volatile [(match_operand 0 "const_int_operand" "")]
11626 UNSPECV_VZEROUPPER)]
11629 [(set_attr "type" "sse")
11630 (set_attr "modrm" "0")
11631 (set_attr "memory" "none")
11632 (set_attr "prefix" "vex")
11633 (set_attr "mode" "OI")])
11635 (define_mode_attr AVXTOSSEMODE
11636 [(V4DI "V2DI") (V2DI "V2DI")
11637 (V8SI "V4SI") (V4SI "V4SI")
11638 (V16HI "V8HI") (V8HI "V8HI")
11639 (V32QI "V16QI") (V16QI "V16QI")])
11641 (define_insn "avx2_pbroadcast<mode>"
11642 [(set (match_operand:VI 0 "register_operand" "=x")
11644 (vec_select:<ssescalarmode>
11645 (match_operand:<AVXTOSSEMODE> 1 "nonimmediate_operand" "xm")
11646 (parallel [(const_int 0)]))))]
11648 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %1}"
11649 [(set_attr "type" "ssemov")
11650 (set_attr "prefix_extra" "1")
11651 (set_attr "prefix" "vex")
11652 (set_attr "mode" "<sseinsnmode>")])
11654 (define_insn "avx2_permvarv8si"
11655 [(set (match_operand:V8SI 0 "register_operand" "=x")
11657 [(match_operand:V8SI 1 "register_operand" "x")
11658 (match_operand:V8SI 2 "nonimmediate_operand" "xm")]
11661 "vpermd\t{%2, %1, %0|%0, %1, %2}"
11662 [(set_attr "type" "sselog")
11663 (set_attr "prefix" "vex")
11664 (set_attr "mode" "OI")])
11666 (define_insn "avx2_permv4df"
11667 [(set (match_operand:V4DF 0 "register_operand" "=x")
11669 [(match_operand:V4DF 1 "register_operand" "xm")
11670 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11673 "vpermpd\t{%2, %1, %0|%0, %1, %2}"
11674 [(set_attr "type" "sselog")
11675 (set_attr "prefix_extra" "1")
11676 (set_attr "prefix" "vex")
11677 (set_attr "mode" "OI")])
11679 (define_insn "avx2_permvarv8sf"
11680 [(set (match_operand:V8SF 0 "register_operand" "=x")
11682 [(match_operand:V8SF 1 "register_operand" "x")
11683 (match_operand:V8SF 2 "nonimmediate_operand" "xm")]
11686 "vpermps\t{%2, %1, %0|%0, %1, %2}"
11687 [(set_attr "type" "sselog")
11688 (set_attr "prefix" "vex")
11689 (set_attr "mode" "OI")])
11691 (define_expand "avx2_permv4di"
11692 [(match_operand:V4DI 0 "register_operand" "")
11693 (match_operand:V4DI 1 "nonimmediate_operand" "")
11694 (match_operand:SI 2 "const_0_to_255_operand" "")]
11697 int mask = INTVAL (operands[2]);
11698 emit_insn (gen_avx2_permv4di_1 (operands[0], operands[1],
11699 GEN_INT ((mask >> 0) & 3),
11700 GEN_INT ((mask >> 2) & 3),
11701 GEN_INT ((mask >> 4) & 3),
11702 GEN_INT ((mask >> 6) & 3)));
11706 (define_insn "avx2_permv4di_1"
11707 [(set (match_operand:V4DI 0 "register_operand" "=x")
11709 (match_operand:V4DI 1 "nonimmediate_operand" "xm")
11710 (parallel [(match_operand 2 "const_0_to_3_operand" "")
11711 (match_operand 3 "const_0_to_3_operand" "")
11712 (match_operand 4 "const_0_to_3_operand" "")
11713 (match_operand 5 "const_0_to_3_operand" "")])))]
11717 mask |= INTVAL (operands[2]) << 0;
11718 mask |= INTVAL (operands[3]) << 2;
11719 mask |= INTVAL (operands[4]) << 4;
11720 mask |= INTVAL (operands[5]) << 6;
11721 operands[2] = GEN_INT (mask);
11722 return "vpermq\t{%2, %1, %0|%0, %1, %2}";
11724 [(set_attr "type" "sselog")
11725 (set_attr "prefix" "vex")
11726 (set_attr "mode" "OI")])
11728 (define_insn "avx2_permv2ti"
11729 [(set (match_operand:V4DI 0 "register_operand" "=x")
11731 [(match_operand:V4DI 1 "register_operand" "x")
11732 (match_operand:V4DI 2 "nonimmediate_operand" "xm")
11733 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11736 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11737 [(set_attr "type" "sselog")
11738 (set_attr "prefix" "vex")
11739 (set_attr "mode" "OI")])
11741 (define_insn "avx2_vec_dupv4df"
11742 [(set (match_operand:V4DF 0 "register_operand" "=x")
11743 (vec_duplicate:V4DF
11745 (match_operand:V2DF 1 "register_operand" "x")
11746 (parallel [(const_int 0)]))))]
11748 "vbroadcastsd\t{%1, %0|%0, %1}"
11749 [(set_attr "type" "sselog1")
11750 (set_attr "prefix" "vex")
11751 (set_attr "mode" "V4DF")])
11753 ;; Modes handled by AVX vec_dup patterns.
11754 (define_mode_iterator AVX_VEC_DUP_MODE
11755 [V8SI V8SF V4DI V4DF])
11757 (define_insn "vec_dup<mode>"
11758 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x")
11759 (vec_duplicate:AVX_VEC_DUP_MODE
11760 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,?x")))]
11763 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
11765 [(set_attr "type" "ssemov")
11766 (set_attr "prefix_extra" "1")
11767 (set_attr "prefix" "vex")
11768 (set_attr "mode" "V8SF")])
11770 (define_insn "avx2_vbroadcasti128_<mode>"
11771 [(set (match_operand:VI_256 0 "register_operand" "=x")
11773 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m")
11776 "vbroadcasti128\t{%1, %0|%0, %1}"
11777 [(set_attr "type" "ssemov")
11778 (set_attr "prefix_extra" "1")
11779 (set_attr "prefix" "vex")
11780 (set_attr "mode" "OI")])
11783 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "")
11784 (vec_duplicate:AVX_VEC_DUP_MODE
11785 (match_operand:<ssescalarmode> 1 "register_operand" "")))]
11786 "TARGET_AVX && reload_completed"
11787 [(set (match_dup 2)
11788 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
11790 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
11791 "operands[2] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[0]));")
11793 (define_insn "avx_vbroadcastf128_<mode>"
11794 [(set (match_operand:V_256 0 "register_operand" "=x,x,x")
11796 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
11800 vbroadcast<i128>\t{%1, %0|%0, %1}
11801 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
11802 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
11803 [(set_attr "type" "ssemov,sselog1,sselog1")
11804 (set_attr "prefix_extra" "1")
11805 (set_attr "length_immediate" "0,1,1")
11806 (set_attr "prefix" "vex")
11807 (set_attr "mode" "<sseinsnmode>")])
11809 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
11810 ;; If it so happens that the input is in memory, use vbroadcast.
11811 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
11812 (define_insn "*avx_vperm_broadcast_v4sf"
11813 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
11815 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
11816 (match_parallel 2 "avx_vbroadcast_operand"
11817 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11820 int elt = INTVAL (operands[3]);
11821 switch (which_alternative)
11825 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
11826 return "vbroadcastss\t{%1, %0|%0, %1}";
11828 operands[2] = GEN_INT (elt * 0x55);
11829 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
11831 gcc_unreachable ();
11834 [(set_attr "type" "ssemov,ssemov,sselog1")
11835 (set_attr "prefix_extra" "1")
11836 (set_attr "length_immediate" "0,0,1")
11837 (set_attr "prefix" "vex")
11838 (set_attr "mode" "SF,SF,V4SF")])
11840 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
11841 [(set (match_operand:VF_256 0 "register_operand" "=x,x,x")
11843 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x")
11844 (match_parallel 2 "avx_vbroadcast_operand"
11845 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11848 "&& reload_completed"
11849 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
11851 rtx op0 = operands[0], op1 = operands[1];
11852 int elt = INTVAL (operands[3]);
11858 /* Shuffle element we care about into all elements of the 128-bit lane.
11859 The other lane gets shuffled too, but we don't care. */
11860 if (<MODE>mode == V4DFmode)
11861 mask = (elt & 1 ? 15 : 0);
11863 mask = (elt & 3) * 0x55;
11864 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
11866 /* Shuffle the lane we care about into both lanes of the dest. */
11867 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
11868 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
11872 operands[1] = adjust_address_nv (op1, <ssescalarmode>mode,
11873 elt * GET_MODE_SIZE (<ssescalarmode>mode));
11876 (define_expand "avx_vpermil<mode>"
11877 [(set (match_operand:VF2 0 "register_operand" "")
11879 (match_operand:VF2 1 "nonimmediate_operand" "")
11880 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11883 int mask = INTVAL (operands[2]);
11884 rtx perm[<ssescalarnum>];
11886 perm[0] = GEN_INT (mask & 1);
11887 perm[1] = GEN_INT ((mask >> 1) & 1);
11888 if (<MODE>mode == V4DFmode)
11890 perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
11891 perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
11895 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11898 (define_expand "avx_vpermil<mode>"
11899 [(set (match_operand:VF1 0 "register_operand" "")
11901 (match_operand:VF1 1 "nonimmediate_operand" "")
11902 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11905 int mask = INTVAL (operands[2]);
11906 rtx perm[<ssescalarnum>];
11908 perm[0] = GEN_INT (mask & 3);
11909 perm[1] = GEN_INT ((mask >> 2) & 3);
11910 perm[2] = GEN_INT ((mask >> 4) & 3);
11911 perm[3] = GEN_INT ((mask >> 6) & 3);
11912 if (<MODE>mode == V8SFmode)
11914 perm[4] = GEN_INT ((mask & 3) + 4);
11915 perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
11916 perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
11917 perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
11921 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11924 (define_insn "*avx_vpermilp<mode>"
11925 [(set (match_operand:VF 0 "register_operand" "=x")
11927 (match_operand:VF 1 "nonimmediate_operand" "xm")
11928 (match_parallel 2 ""
11929 [(match_operand 3 "const_int_operand" "")])))]
11931 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
11933 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
11934 operands[2] = GEN_INT (mask);
11935 return "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
11937 [(set_attr "type" "sselog")
11938 (set_attr "prefix_extra" "1")
11939 (set_attr "length_immediate" "1")
11940 (set_attr "prefix" "vex")
11941 (set_attr "mode" "<MODE>")])
11943 (define_insn "avx_vpermilvar<mode>3"
11944 [(set (match_operand:VF 0 "register_operand" "=x")
11946 [(match_operand:VF 1 "register_operand" "x")
11947 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "xm")]
11950 "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11951 [(set_attr "type" "sselog")
11952 (set_attr "prefix_extra" "1")
11953 (set_attr "prefix" "vex")
11954 (set_attr "mode" "<MODE>")])
11956 (define_expand "avx_vperm2f128<mode>3"
11957 [(set (match_operand:AVX256MODE2P 0 "register_operand" "")
11958 (unspec:AVX256MODE2P
11959 [(match_operand:AVX256MODE2P 1 "register_operand" "")
11960 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "")
11961 (match_operand:SI 3 "const_0_to_255_operand" "")]
11962 UNSPEC_VPERMIL2F128))]
11965 int mask = INTVAL (operands[3]);
11966 if ((mask & 0x88) == 0)
11968 rtx perm[<ssescalarnum>], t1, t2;
11969 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
11971 base = (mask & 3) * nelt2;
11972 for (i = 0; i < nelt2; ++i)
11973 perm[i] = GEN_INT (base + i);
11975 base = ((mask >> 4) & 3) * nelt2;
11976 for (i = 0; i < nelt2; ++i)
11977 perm[i + nelt2] = GEN_INT (base + i);
11979 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
11980 operands[1], operands[2]);
11981 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
11982 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
11983 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
11989 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
11990 ;; means that in order to represent this properly in rtl we'd have to
11991 ;; nest *another* vec_concat with a zero operand and do the select from
11992 ;; a 4x wide vector. That doesn't seem very nice.
11993 (define_insn "*avx_vperm2f128<mode>_full"
11994 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
11995 (unspec:AVX256MODE2P
11996 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
11997 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
11998 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11999 UNSPEC_VPERMIL2F128))]
12001 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12002 [(set_attr "type" "sselog")
12003 (set_attr "prefix_extra" "1")
12004 (set_attr "length_immediate" "1")
12005 (set_attr "prefix" "vex")
12006 (set_attr "mode" "<sseinsnmode>")])
12008 (define_insn "*avx_vperm2f128<mode>_nozero"
12009 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
12010 (vec_select:AVX256MODE2P
12011 (vec_concat:<ssedoublevecmode>
12012 (match_operand:AVX256MODE2P 1 "register_operand" "x")
12013 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
12014 (match_parallel 3 ""
12015 [(match_operand 4 "const_int_operand" "")])))]
12017 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
12019 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
12020 operands[3] = GEN_INT (mask);
12021 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
12023 [(set_attr "type" "sselog")
12024 (set_attr "prefix_extra" "1")
12025 (set_attr "length_immediate" "1")
12026 (set_attr "prefix" "vex")
12027 (set_attr "mode" "<sseinsnmode>")])
12029 (define_expand "avx_vinsertf128<mode>"
12030 [(match_operand:V_256 0 "register_operand" "")
12031 (match_operand:V_256 1 "register_operand" "")
12032 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "")
12033 (match_operand:SI 3 "const_0_to_1_operand" "")]
12036 rtx (*insn)(rtx, rtx, rtx);
12038 switch (INTVAL (operands[3]))
12041 insn = gen_vec_set_lo_<mode>;
12044 insn = gen_vec_set_hi_<mode>;
12047 gcc_unreachable ();
12050 emit_insn (insn (operands[0], operands[1], operands[2]));
12054 (define_insn "avx2_vec_set_lo_v4di"
12055 [(set (match_operand:V4DI 0 "register_operand" "=x")
12057 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
12059 (match_operand:V4DI 1 "register_operand" "x")
12060 (parallel [(const_int 2) (const_int 3)]))))]
12062 "vinserti128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12063 [(set_attr "type" "sselog")
12064 (set_attr "prefix_extra" "1")
12065 (set_attr "length_immediate" "1")
12066 (set_attr "prefix" "vex")
12067 (set_attr "mode" "OI")])
12069 (define_insn "avx2_vec_set_hi_v4di"
12070 [(set (match_operand:V4DI 0 "register_operand" "=x")
12073 (match_operand:V4DI 1 "register_operand" "x")
12074 (parallel [(const_int 0) (const_int 1)]))
12075 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
12077 "vinserti128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12078 [(set_attr "type" "sselog")
12079 (set_attr "prefix_extra" "1")
12080 (set_attr "length_immediate" "1")
12081 (set_attr "prefix" "vex")
12082 (set_attr "mode" "OI")])
12084 (define_insn "vec_set_lo_<mode>"
12085 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
12086 (vec_concat:VI8F_256
12087 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
12088 (vec_select:<ssehalfvecmode>
12089 (match_operand:VI8F_256 1 "register_operand" "x")
12090 (parallel [(const_int 2) (const_int 3)]))))]
12092 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12093 [(set_attr "type" "sselog")
12094 (set_attr "prefix_extra" "1")
12095 (set_attr "length_immediate" "1")
12096 (set_attr "prefix" "vex")
12097 (set_attr "mode" "<sseinsnmode>")])
12099 (define_insn "vec_set_hi_<mode>"
12100 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
12101 (vec_concat:VI8F_256
12102 (vec_select:<ssehalfvecmode>
12103 (match_operand:VI8F_256 1 "register_operand" "x")
12104 (parallel [(const_int 0) (const_int 1)]))
12105 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
12107 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12108 [(set_attr "type" "sselog")
12109 (set_attr "prefix_extra" "1")
12110 (set_attr "length_immediate" "1")
12111 (set_attr "prefix" "vex")
12112 (set_attr "mode" "<sseinsnmode>")])
12114 (define_insn "vec_set_lo_<mode>"
12115 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
12116 (vec_concat:VI4F_256
12117 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
12118 (vec_select:<ssehalfvecmode>
12119 (match_operand:VI4F_256 1 "register_operand" "x")
12120 (parallel [(const_int 4) (const_int 5)
12121 (const_int 6) (const_int 7)]))))]
12123 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12124 [(set_attr "type" "sselog")
12125 (set_attr "prefix_extra" "1")
12126 (set_attr "length_immediate" "1")
12127 (set_attr "prefix" "vex")
12128 (set_attr "mode" "<sseinsnmode>")])
12130 (define_insn "vec_set_hi_<mode>"
12131 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
12132 (vec_concat:VI4F_256
12133 (vec_select:<ssehalfvecmode>
12134 (match_operand:VI4F_256 1 "register_operand" "x")
12135 (parallel [(const_int 0) (const_int 1)
12136 (const_int 2) (const_int 3)]))
12137 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
12139 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12140 [(set_attr "type" "sselog")
12141 (set_attr "prefix_extra" "1")
12142 (set_attr "length_immediate" "1")
12143 (set_attr "prefix" "vex")
12144 (set_attr "mode" "<sseinsnmode>")])
12146 (define_insn "vec_set_lo_v16hi"
12147 [(set (match_operand:V16HI 0 "register_operand" "=x")
12149 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
12151 (match_operand:V16HI 1 "register_operand" "x")
12152 (parallel [(const_int 8) (const_int 9)
12153 (const_int 10) (const_int 11)
12154 (const_int 12) (const_int 13)
12155 (const_int 14) (const_int 15)]))))]
12157 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12158 [(set_attr "type" "sselog")
12159 (set_attr "prefix_extra" "1")
12160 (set_attr "length_immediate" "1")
12161 (set_attr "prefix" "vex")
12162 (set_attr "mode" "OI")])
12164 (define_insn "vec_set_hi_v16hi"
12165 [(set (match_operand:V16HI 0 "register_operand" "=x")
12168 (match_operand:V16HI 1 "register_operand" "x")
12169 (parallel [(const_int 0) (const_int 1)
12170 (const_int 2) (const_int 3)
12171 (const_int 4) (const_int 5)
12172 (const_int 6) (const_int 7)]))
12173 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
12175 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12176 [(set_attr "type" "sselog")
12177 (set_attr "prefix_extra" "1")
12178 (set_attr "length_immediate" "1")
12179 (set_attr "prefix" "vex")
12180 (set_attr "mode" "OI")])
12182 (define_insn "vec_set_lo_v32qi"
12183 [(set (match_operand:V32QI 0 "register_operand" "=x")
12185 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
12187 (match_operand:V32QI 1 "register_operand" "x")
12188 (parallel [(const_int 16) (const_int 17)
12189 (const_int 18) (const_int 19)
12190 (const_int 20) (const_int 21)
12191 (const_int 22) (const_int 23)
12192 (const_int 24) (const_int 25)
12193 (const_int 26) (const_int 27)
12194 (const_int 28) (const_int 29)
12195 (const_int 30) (const_int 31)]))))]
12197 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12198 [(set_attr "type" "sselog")
12199 (set_attr "prefix_extra" "1")
12200 (set_attr "length_immediate" "1")
12201 (set_attr "prefix" "vex")
12202 (set_attr "mode" "OI")])
12204 (define_insn "vec_set_hi_v32qi"
12205 [(set (match_operand:V32QI 0 "register_operand" "=x")
12208 (match_operand:V32QI 1 "register_operand" "x")
12209 (parallel [(const_int 0) (const_int 1)
12210 (const_int 2) (const_int 3)
12211 (const_int 4) (const_int 5)
12212 (const_int 6) (const_int 7)
12213 (const_int 8) (const_int 9)
12214 (const_int 10) (const_int 11)
12215 (const_int 12) (const_int 13)
12216 (const_int 14) (const_int 15)]))
12217 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
12219 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12220 [(set_attr "type" "sselog")
12221 (set_attr "prefix_extra" "1")
12222 (set_attr "length_immediate" "1")
12223 (set_attr "prefix" "vex")
12224 (set_attr "mode" "OI")])
12226 (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
12227 [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
12229 [(match_operand:<sseintvecmode> 2 "register_operand" "x")
12230 (match_operand:V48_AVX2 1 "memory_operand" "m")]
12233 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
12234 [(set_attr "type" "sselog1")
12235 (set_attr "prefix_extra" "1")
12236 (set_attr "prefix" "vex")
12237 (set_attr "mode" "<sseinsnmode>")])
12239 (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
12240 [(set (match_operand:V48_AVX2 0 "memory_operand" "=m")
12242 [(match_operand:<sseintvecmode> 1 "register_operand" "x")
12243 (match_operand:V48_AVX2 2 "register_operand" "x")
12247 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12248 [(set_attr "type" "sselog1")
12249 (set_attr "prefix_extra" "1")
12250 (set_attr "prefix" "vex")
12251 (set_attr "mode" "<sseinsnmode>")])
12253 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
12254 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
12255 (unspec:AVX256MODE2P
12256 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
12260 "&& reload_completed"
12263 rtx op0 = operands[0];
12264 rtx op1 = operands[1];
12266 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
12268 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
12269 emit_move_insn (op0, op1);
12273 (define_expand "vec_init<mode>"
12274 [(match_operand:V_256 0 "register_operand" "")
12275 (match_operand 1 "" "")]
12278 ix86_expand_vector_init (false, operands[0], operands[1]);
12282 (define_expand "avx2_extracti128"
12283 [(match_operand:V2DI 0 "nonimmediate_operand" "")
12284 (match_operand:V4DI 1 "register_operand" "")
12285 (match_operand:SI 2 "const_0_to_1_operand" "")]
12288 rtx (*insn)(rtx, rtx);
12290 switch (INTVAL (operands[2]))
12293 insn = gen_vec_extract_lo_v4di;
12296 insn = gen_vec_extract_hi_v4di;
12299 gcc_unreachable ();
12302 emit_insn (insn (operands[0], operands[1]));
12306 (define_expand "avx2_inserti128"
12307 [(match_operand:V4DI 0 "register_operand" "")
12308 (match_operand:V4DI 1 "register_operand" "")
12309 (match_operand:V2DI 2 "nonimmediate_operand" "")
12310 (match_operand:SI 3 "const_0_to_1_operand" "")]
12313 rtx (*insn)(rtx, rtx, rtx);
12315 switch (INTVAL (operands[3]))
12318 insn = gen_avx2_vec_set_lo_v4di;
12321 insn = gen_avx2_vec_set_hi_v4di;
12324 gcc_unreachable ();
12327 emit_insn (insn (operands[0], operands[1], operands[2]));
12331 (define_insn "avx2_ashrvv8si"
12332 [(set (match_operand:V8SI 0 "register_operand" "=x")
12338 (match_operand:V8SI 1 "register_operand" "x")
12339 (parallel [(const_int 0)]))
12341 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
12342 (parallel [(const_int 0)])))
12346 (parallel [(const_int 1)]))
12349 (parallel [(const_int 1)]))))
12354 (parallel [(const_int 2)]))
12357 (parallel [(const_int 2)])))
12361 (parallel [(const_int 3)]))
12364 (parallel [(const_int 3)])))))
12370 (parallel [(const_int 0)]))
12373 (parallel [(const_int 0)])))
12377 (parallel [(const_int 1)]))
12380 (parallel [(const_int 1)]))))
12385 (parallel [(const_int 2)]))
12388 (parallel [(const_int 2)])))
12392 (parallel [(const_int 3)]))
12395 (parallel [(const_int 3)])))))))]
12397 "vpsravd\t{%2, %1, %0|%0, %1, %2}"
12398 [(set_attr "type" "sseishft")
12399 (set_attr "prefix" "vex")
12400 (set_attr "mode" "OI")])
12402 (define_insn "avx2_ashrvv4si"
12403 [(set (match_operand:V4SI 0 "register_operand" "=x")
12408 (match_operand:V4SI 1 "register_operand" "x")
12409 (parallel [(const_int 0)]))
12411 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
12412 (parallel [(const_int 0)])))
12416 (parallel [(const_int 1)]))
12419 (parallel [(const_int 1)]))))
12424 (parallel [(const_int 2)]))
12427 (parallel [(const_int 2)])))
12431 (parallel [(const_int 3)]))
12434 (parallel [(const_int 3)]))))))]
12436 "vpsravd\t{%2, %1, %0|%0, %1, %2}"
12437 [(set_attr "type" "sseishft")
12438 (set_attr "prefix" "vex")
12439 (set_attr "mode" "TI")])
12441 (define_insn "avx2_<lshift>vv8si"
12442 [(set (match_operand:V8SI 0 "register_operand" "=x")
12448 (match_operand:V8SI 1 "register_operand" "x")
12449 (parallel [(const_int 0)]))
12451 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
12452 (parallel [(const_int 0)])))
12456 (parallel [(const_int 1)]))
12459 (parallel [(const_int 1)]))))
12464 (parallel [(const_int 2)]))
12467 (parallel [(const_int 2)])))
12471 (parallel [(const_int 3)]))
12474 (parallel [(const_int 3)])))))
12480 (parallel [(const_int 0)]))
12483 (parallel [(const_int 0)])))
12487 (parallel [(const_int 1)]))
12490 (parallel [(const_int 1)]))))
12495 (parallel [(const_int 2)]))
12498 (parallel [(const_int 2)])))
12502 (parallel [(const_int 3)]))
12505 (parallel [(const_int 3)])))))))]
12507 "vp<lshift_insn>vd\t{%2, %1, %0|%0, %1, %2}"
12508 [(set_attr "type" "sseishft")
12509 (set_attr "prefix" "vex")
12510 (set_attr "mode" "OI")])
12512 (define_insn "avx2_<lshift>v<mode>"
12513 [(set (match_operand:VI4SD_AVX2 0 "register_operand" "=x")
12514 (vec_concat:VI4SD_AVX2
12515 (vec_concat:<ssehalfvecmode>
12516 (lshift:<ssescalarmode>
12517 (vec_select:<ssescalarmode>
12518 (match_operand:VI4SD_AVX2 1 "register_operand" "x")
12519 (parallel [(const_int 0)]))
12520 (vec_select:<ssescalarmode>
12521 (match_operand:VI4SD_AVX2 2 "nonimmediate_operand" "xm")
12522 (parallel [(const_int 0)])))
12523 (lshift:<ssescalarmode>
12524 (vec_select:<ssescalarmode>
12526 (parallel [(const_int 1)]))
12527 (vec_select:<ssescalarmode>
12529 (parallel [(const_int 1)]))))
12530 (vec_concat:<ssehalfvecmode>
12531 (lshift:<ssescalarmode>
12532 (vec_select:<ssescalarmode>
12534 (parallel [(const_int 2)]))
12535 (vec_select:<ssescalarmode>
12537 (parallel [(const_int 2)])))
12538 (lshift:<ssescalarmode>
12539 (vec_select:<ssescalarmode>
12541 (parallel [(const_int 3)]))
12542 (vec_select:<ssescalarmode>
12544 (parallel [(const_int 3)]))))))]
12546 "vp<lshift_insn>v<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12547 [(set_attr "type" "sseishft")
12548 (set_attr "prefix" "vex")
12549 (set_attr "mode" "<sseinsnmode>")])
12551 (define_insn "avx2_<lshift>vv2di"
12552 [(set (match_operand:V2DI 0 "register_operand" "=x")
12556 (match_operand:V2DI 1 "register_operand" "x")
12557 (parallel [(const_int 0)]))
12559 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
12560 (parallel [(const_int 0)])))
12564 (parallel [(const_int 1)]))
12567 (parallel [(const_int 1)])))))]
12569 "vp<lshift_insn>vq\t{%2, %1, %0|%0, %1, %2}"
12570 [(set_attr "type" "sseishft")
12571 (set_attr "prefix" "vex")
12572 (set_attr "mode" "TI")])
12574 (define_insn "avx_vec_concat<mode>"
12575 [(set (match_operand:V_256 0 "register_operand" "=x,x")
12577 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
12578 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))]
12581 switch (which_alternative)
12584 return "vinsert<i128>\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
12586 switch (get_attr_mode (insn))
12589 return "vmovaps\t{%1, %x0|%x0, %1}";
12591 return "vmovapd\t{%1, %x0|%x0, %1}";
12593 return "vmovdqa\t{%1, %x0|%x0, %1}";
12596 gcc_unreachable ();
12599 [(set_attr "type" "sselog,ssemov")
12600 (set_attr "prefix_extra" "1,*")
12601 (set_attr "length_immediate" "1,*")
12602 (set_attr "prefix" "vex")
12603 (set_attr "mode" "<sseinsnmode>")])
12605 (define_insn "vcvtph2ps"
12606 [(set (match_operand:V4SF 0 "register_operand" "=x")
12608 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
12610 (parallel [(const_int 0) (const_int 1)
12611 (const_int 1) (const_int 2)])))]
12613 "vcvtph2ps\t{%1, %0|%0, %1}"
12614 [(set_attr "type" "ssecvt")
12615 (set_attr "prefix" "vex")
12616 (set_attr "mode" "V4SF")])
12618 (define_insn "*vcvtph2ps_load"
12619 [(set (match_operand:V4SF 0 "register_operand" "=x")
12620 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
12621 UNSPEC_VCVTPH2PS))]
12623 "vcvtph2ps\t{%1, %0|%0, %1}"
12624 [(set_attr "type" "ssecvt")
12625 (set_attr "prefix" "vex")
12626 (set_attr "mode" "V8SF")])
12628 (define_insn "vcvtph2ps256"
12629 [(set (match_operand:V8SF 0 "register_operand" "=x")
12630 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
12631 UNSPEC_VCVTPH2PS))]
12633 "vcvtph2ps\t{%1, %0|%0, %1}"
12634 [(set_attr "type" "ssecvt")
12635 (set_attr "prefix" "vex")
12636 (set_attr "mode" "V8SF")])
12638 (define_expand "vcvtps2ph"
12639 [(set (match_operand:V8HI 0 "register_operand" "")
12641 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "")
12642 (match_operand:SI 2 "const_0_to_255_operand" "")]
12646 "operands[3] = CONST0_RTX (V4HImode);")
12648 (define_insn "*vcvtps2ph"
12649 [(set (match_operand:V8HI 0 "register_operand" "=x")
12651 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12652 (match_operand:SI 2 "const_0_to_255_operand" "N")]
12654 (match_operand:V4HI 3 "const0_operand" "")))]
12656 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12657 [(set_attr "type" "ssecvt")
12658 (set_attr "prefix" "vex")
12659 (set_attr "mode" "V4SF")])
12661 (define_insn "*vcvtps2ph_store"
12662 [(set (match_operand:V4HI 0 "memory_operand" "=m")
12663 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12664 (match_operand:SI 2 "const_0_to_255_operand" "N")]
12665 UNSPEC_VCVTPS2PH))]
12667 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12668 [(set_attr "type" "ssecvt")
12669 (set_attr "prefix" "vex")
12670 (set_attr "mode" "V4SF")])
12672 (define_insn "vcvtps2ph256"
12673 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
12674 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
12675 (match_operand:SI 2 "const_0_to_255_operand" "N")]
12676 UNSPEC_VCVTPS2PH))]
12678 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12679 [(set_attr "type" "ssecvt")
12680 (set_attr "prefix" "vex")
12681 (set_attr "mode" "V8SF")])
12683 ;; For gather* insn patterns
12684 (define_mode_iterator VEC_GATHER_MODE
12685 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
12686 (define_mode_attr VEC_GATHER_MODE
12687 [(V2DI "V4SI") (V2DF "V4SI")
12688 (V4DI "V4SI") (V4DF "V4SI")
12689 (V4SI "V4SI") (V4SF "V4SI")
12690 (V8SI "V8SI") (V8SF "V8SI")])
12692 (define_expand "avx2_gathersi<mode>"
12693 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "")
12694 (unspec:VEC_GATHER_MODE
12695 [(match_operand:VEC_GATHER_MODE 1 "register_operand" "")
12696 (mem:<ssescalarmode>
12698 [(match_operand 2 "vsib_address_operand" "")
12699 (match_operand:<VEC_GATHER_MODE> 3 "register_operand" "")
12700 (match_operand:SI 5 "const1248_operand " "")]))
12701 (mem:BLK (scratch))
12702 (match_operand:VEC_GATHER_MODE 4 "register_operand" "")]
12704 (clobber (match_scratch:VEC_GATHER_MODE 6 ""))])]
12708 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
12709 operands[5]), UNSPEC_VSIBADDR);
12712 (define_insn "*avx2_gathersi<mode>"
12713 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
12714 (unspec:VEC_GATHER_MODE
12715 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
12716 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
12718 [(match_operand:P 3 "vsib_address_operand" "p")
12719 (match_operand:<VEC_GATHER_MODE> 4 "register_operand" "x")
12720 (match_operand:SI 6 "const1248_operand" "n")]
12722 (mem:BLK (scratch))
12723 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
12725 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
12727 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
12728 [(set_attr "type" "ssemov")
12729 (set_attr "prefix" "vex")
12730 (set_attr "mode" "<sseinsnmode>")])
12732 (define_expand "avx2_gatherdi<mode>"
12733 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "")
12734 (unspec:VEC_GATHER_MODE
12735 [(match_operand:VEC_GATHER_MODE 1 "register_operand" "")
12736 (mem:<ssescalarmode>
12738 [(match_operand 2 "vsib_address_operand" "")
12739 (match_operand:<AVXMODE48P_DI> 3 "register_operand" "")
12740 (match_operand:SI 5 "const1248_operand " "")]))
12741 (mem:BLK (scratch))
12742 (match_operand:VEC_GATHER_MODE 4 "register_operand" "")]
12744 (clobber (match_scratch:VEC_GATHER_MODE 6 ""))])]
12748 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
12749 operands[5]), UNSPEC_VSIBADDR);
12752 (define_insn "*avx2_gatherdi<mode>"
12753 [(set (match_operand:AVXMODE48P_DI 0 "register_operand" "=&x")
12754 (unspec:AVXMODE48P_DI
12755 [(match_operand:AVXMODE48P_DI 2 "register_operand" "0")
12756 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
12758 [(match_operand:P 3 "vsib_address_operand" "p")
12759 (match_operand:<AVXMODE48P_DI> 4 "register_operand" "x")
12760 (match_operand:SI 6 "const1248_operand" "n")]
12762 (mem:BLK (scratch))
12763 (match_operand:AVXMODE48P_DI 5 "register_operand" "1")]
12765 (clobber (match_scratch:AVXMODE48P_DI 1 "=&x"))]
12767 "v<sseintprefix>gatherq<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
12768 [(set_attr "type" "ssemov")
12769 (set_attr "prefix" "vex")
12770 (set_attr "mode" "<sseinsnmode>")])
12772 ;; Special handling for VEX.256 with float arguments
12773 ;; since there're still xmms as operands
12774 (define_expand "avx2_gatherdi<mode>256"
12775 [(parallel [(set (match_operand:VI4F_128 0 "register_operand" "")
12777 [(match_operand:VI4F_128 1 "register_operand" "")
12778 (mem:<ssescalarmode>
12780 [(match_operand 2 "vsib_address_operand" "")
12781 (match_operand:V4DI 3 "register_operand" "")
12782 (match_operand:SI 5 "const1248_operand " "")]))
12783 (mem:BLK (scratch))
12784 (match_operand:VI4F_128 4 "register_operand" "")]
12786 (clobber (match_scratch:VI4F_128 6 ""))])]
12790 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
12791 operands[5]), UNSPEC_VSIBADDR);
12794 (define_insn "*avx2_gatherdi<mode>256"
12795 [(set (match_operand:VI4F_128 0 "register_operand" "=x")
12797 [(match_operand:VI4F_128 2 "register_operand" "0")
12798 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
12800 [(match_operand:P 3 "vsib_address_operand" "p")
12801 (match_operand:V4DI 4 "register_operand" "x")
12802 (match_operand:SI 6 "const1248_operand" "n")]
12804 (mem:BLK (scratch))
12805 (match_operand:VI4F_128 5 "register_operand" "1")]
12807 (clobber (match_scratch:VI4F_128 1 "=&x"))]
12809 "v<sseintprefix>gatherq<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
12810 [(set_attr "type" "ssemov")
12811 (set_attr "prefix" "vex")
12812 (set_attr "mode" "<sseinsnmode>")])