1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 ;; All vector modes including V?TImode, used in move patterns.
22 (define_mode_iterator V16
23 [(V32QI "TARGET_AVX") V16QI
24 (V16HI "TARGET_AVX") V8HI
25 (V8SI "TARGET_AVX") V4SI
26 (V4DI "TARGET_AVX") V2DI
27 (V2TI "TARGET_AVX") V1TI
28 (V8SF "TARGET_AVX") V4SF
29 (V4DF "TARGET_AVX") V2DF])
32 (define_mode_iterator V
33 [(V32QI "TARGET_AVX") V16QI
34 (V16HI "TARGET_AVX") V8HI
35 (V8SI "TARGET_AVX") V4SI
36 (V4DI "TARGET_AVX") V2DI
37 (V8SF "TARGET_AVX") V4SF
38 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
40 ;; All 128bit vector modes
41 (define_mode_iterator V_128
42 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
44 ;; All 256bit vector modes
45 (define_mode_iterator V_256
46 [V32QI V16HI V8SI V4DI V8SF V4DF])
48 ;; All vector float modes
49 (define_mode_iterator VF
50 [(V8SF "TARGET_AVX") V4SF
51 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
53 ;; All SFmode vector float modes
54 (define_mode_iterator VF1
55 [(V8SF "TARGET_AVX") V4SF])
57 ;; All DFmode vector float modes
58 (define_mode_iterator VF2
59 [(V4DF "TARGET_AVX") V2DF])
61 ;; All 128bit vector float modes
62 (define_mode_iterator VF_128
63 [V4SF (V2DF "TARGET_SSE2")])
65 ;; All 256bit vector float modes
66 (define_mode_iterator VF_256
69 ;; All vector integer modes
70 (define_mode_iterator VI
71 [(V32QI "TARGET_AVX") V16QI
72 (V16HI "TARGET_AVX") V8HI
73 (V8SI "TARGET_AVX") V4SI
74 (V4DI "TARGET_AVX") V2DI])
76 (define_mode_iterator VI_AVX2
77 [(V32QI "TARGET_AVX2") V16QI
78 (V16HI "TARGET_AVX2") V8HI
79 (V8SI "TARGET_AVX2") V4SI
80 (V4DI "TARGET_AVX2") V2DI])
82 ;; All QImode vector integer modes
83 (define_mode_iterator VI1
84 [(V32QI "TARGET_AVX") V16QI])
86 ;; All DImode vector integer modes
87 (define_mode_iterator VI8
88 [(V4DI "TARGET_AVX") V2DI])
90 (define_mode_iterator VI1_AVX2
91 [(V32QI "TARGET_AVX2") V16QI])
93 (define_mode_iterator VI2_AVX2
94 [(V16HI "TARGET_AVX2") V8HI])
96 (define_mode_iterator VI4_AVX2
97 [(V8SI "TARGET_AVX2") V4SI])
99 (define_mode_iterator VI8_AVX2
100 [(V4DI "TARGET_AVX2") V2DI])
102 ;; ??? We should probably use TImode instead.
103 (define_mode_iterator VIMAX_AVX2
104 [(V2TI "TARGET_AVX2") V1TI])
106 ;; ??? This should probably be dropped in favor of VIMAX_AVX2.
107 (define_mode_iterator SSESCALARMODE
108 [(V2TI "TARGET_AVX2") TI])
110 (define_mode_iterator VI12_AVX2
111 [(V32QI "TARGET_AVX2") V16QI
112 (V16HI "TARGET_AVX2") V8HI])
114 (define_mode_iterator VI24_AVX2
115 [(V16HI "TARGET_AVX2") V8HI
116 (V8SI "TARGET_AVX2") V4SI])
118 (define_mode_iterator VI124_AVX2
119 [(V32QI "TARGET_AVX2") V16QI
120 (V16HI "TARGET_AVX2") V8HI
121 (V8SI "TARGET_AVX2") V4SI])
123 (define_mode_iterator VI248_AVX2
124 [(V16HI "TARGET_AVX2") V8HI
125 (V8SI "TARGET_AVX2") V4SI
126 (V4DI "TARGET_AVX2") V2DI])
128 (define_mode_iterator VI48_AVX2
129 [V8SI V4SI V4DI V2DI])
131 (define_mode_iterator VI4SD_AVX2
134 (define_mode_iterator V48_AVX2
137 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
138 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
140 (define_mode_attr sse2_avx2
141 [(V16QI "sse2") (V32QI "avx2")
142 (V8HI "sse2") (V16HI "avx2")
143 (V4SI "sse2") (V8SI "avx2")
144 (V2DI "sse2") (V4DI "avx2")
145 (V1TI "sse2") (V2TI "avx2")])
147 (define_mode_attr ssse3_avx2
148 [(V16QI "ssse3") (V32QI "avx2")
149 (V8HI "ssse3") (V16HI "avx2")
150 (V4SI "ssse3") (V8SI "avx2")
151 (V2DI "ssse3") (V4DI "avx2")
152 (TI "ssse3") (V2TI "avx2")])
154 (define_mode_attr sse4_1_avx2
155 [(V16QI "sse4_1") (V32QI "avx2")
156 (V8HI "sse4_1") (V16HI "avx2")
157 (V4SI "sse4_1") (V8SI "avx2")
158 (V2DI "sse4_1") (V4DI "avx2")])
160 (define_mode_attr avx_avx2
161 [(V4SF "avx") (V2DF "avx")
162 (V8SF "avx") (V4DF "avx")
163 (V4SI "avx2") (V2DI "avx2")
164 (V8SI "avx2") (V4DI "avx2")])
166 (define_mode_attr vec_avx2
167 [(V16QI "vec") (V32QI "avx2")
168 (V8HI "vec") (V16HI "avx2")
169 (V4SI "vec") (V8SI "avx2")
170 (V2DI "vec") (V4DI "avx2")])
172 ;; Mapping of logic-shift operators
173 (define_code_iterator lshift [lshiftrt ashift])
175 ;; Base name for define_insn
176 (define_code_attr lshift_insn [(lshiftrt "srl") (ashift "sll")])
178 ;; Base name for insn mnemonic
179 (define_code_attr lshift [(lshiftrt "lshr") (ashift "lshl")])
181 (define_mode_attr ssedoublemode
182 [(V16HI "V16SI") (V8HI "V8SI")])
184 (define_mode_attr ssebytemode
185 [(V4DI "V32QI") (V2DI "V16QI")])
187 ;; All 128bit vector integer modes
188 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
190 ;; All 256bit vector integer modes
191 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
193 ;; Random 128bit vector integer mode combinations
194 (define_mode_iterator VI12_128 [V16QI V8HI])
195 (define_mode_iterator VI14_128 [V16QI V4SI])
196 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
197 (define_mode_iterator VI24_128 [V8HI V4SI])
198 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
200 ;; Random 256bit vector integer mode combinations
201 (define_mode_iterator VI124_256 [V32QI V16HI V8SI])
202 (define_mode_iterator VI248_256 [V16HI V8SI V4DI])
204 ;; Int-float size matches
205 (define_mode_iterator VI4F_128 [V4SI V4SF])
206 (define_mode_iterator VI8F_128 [V2DI V2DF])
207 (define_mode_iterator VI4F_256 [V8SI V8SF])
208 (define_mode_iterator VI8F_256 [V4DI V4DF])
210 ;; Mapping from float mode to required SSE level
211 (define_mode_attr sse
212 [(SF "sse") (DF "sse2")
213 (V4SF "sse") (V2DF "sse2")
214 (V8SF "avx") (V4DF "avx")])
216 (define_mode_attr sse2
217 [(V16QI "sse2") (V32QI "avx")
218 (V2DI "sse2") (V4DI "avx")])
220 (define_mode_attr sse3
221 [(V16QI "sse3") (V32QI "avx")])
223 (define_mode_attr sse4_1
224 [(V4SF "sse4_1") (V2DF "sse4_1")
225 (V8SF "avx") (V4DF "avx")])
227 (define_mode_attr avxsizesuffix
228 [(V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
229 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
230 (V8SF "256") (V4DF "256")
231 (V4SF "") (V2DF "")])
233 ;; SSE instruction mode
234 (define_mode_attr sseinsnmode
235 [(V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
236 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
237 (V8SF "V8SF") (V4DF "V4DF")
238 (V4SF "V4SF") (V2DF "V2DF")
241 ;; Mapping of vector float modes to an integer mode of the same size
242 (define_mode_attr sseintvecmode
243 [(V8SF "V8SI") (V4DF "V4DI")
244 (V4SF "V4SI") (V2DF "V2DI")
245 (V4DF "V4DI") (V8SF "V8SI")
246 (V8SI "V8SI") (V4DI "V4DI")
247 (V4SI "V4SI") (V2DI "V2DI")
248 (V16HI "V16HI") (V8HI "V8HI")
249 (V32QI "V32QI") (V16QI "V16QI")
252 ;; Mapping of vector modes to a vector mode of double size
253 (define_mode_attr ssedoublevecmode
254 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
255 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
256 (V8SF "V16SF") (V4DF "V8DF")
257 (V4SF "V8SF") (V2DF "V4DF")])
259 ;; Mapping of vector modes to a vector mode of half size
260 (define_mode_attr ssehalfvecmode
261 [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
262 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
263 (V8SF "V4SF") (V4DF "V2DF")
266 ;; Mapping of vector modes back to the scalar modes
267 (define_mode_attr ssescalarmode
268 [(V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI")
269 (V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI")
270 (V8SF "SF") (V4DF "DF")
271 (V4SF "SF") (V2DF "DF")])
273 ;; Number of scalar elements in each vector type
274 (define_mode_attr ssescalarnum
275 [(V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
276 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
277 (V8SF "8") (V4DF "4")
278 (V4SF "4") (V2DF "2")])
280 ;; SSE scalar suffix for vector modes
281 (define_mode_attr ssescalarmodesuffix
283 (V8SF "ss") (V4DF "sd")
284 (V4SF "ss") (V2DF "sd")
285 (V8SI "ss") (V4DI "sd")
288 ;; Pack/unpack vector modes
289 (define_mode_attr sseunpackmode
290 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
291 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")])
293 (define_mode_attr ssepackmode
294 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
295 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")])
297 ;; Mapping of the max integer size for xop rotate immediate constraint
298 (define_mode_attr sserotatemax
299 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
301 ;; Mapping of mode to cast intrinsic name
302 (define_mode_attr castmode [(V8SI "si") (V8SF "ps") (V4DF "pd")])
304 ;; Instruction suffix for sign and zero extensions.
305 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
307 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
308 (define_mode_attr i128
309 [(V8SF "f128") (V4DF "f128") (V32QI "%~128") (V16HI "%~128")
310 (V8SI "%~128") (V4DI "%~128")])
313 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
315 (define_mode_iterator AVXMODE48P_DI
316 [V2DI V2DF V4DI V4DF V4SF V4SI])
317 (define_mode_attr AVXMODE48P_DI
318 [(V2DI "V2DI") (V2DF "V2DI")
319 (V4DI "V4DI") (V4DF "V4DI")
320 (V4SI "V2DI") (V4SF "V2DI")
321 (V8SI "V4DI") (V8SF "V4DI")])
322 (define_mode_attr gthrfirstp
323 [(V2DI "p") (V2DF "")
326 (V8SI "p") (V8SF "")])
327 (define_mode_attr gthrlastp
328 [(V2DI "q") (V2DF "pd")
329 (V4DI "q") (V4DF "pd")
330 (V4SI "d") (V4SF "ps")
331 (V8SI "d") (V8SF "ps")])
333 (define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
335 ;; Mapping of immediate bits for blend instructions
336 (define_mode_attr blendbits
337 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
339 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
341 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
345 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
347 ;; All of these patterns are enabled for SSE1 as well as SSE2.
348 ;; This is essential for maintaining stable calling conventions.
350 (define_expand "mov<mode>"
351 [(set (match_operand:V16 0 "nonimmediate_operand" "")
352 (match_operand:V16 1 "nonimmediate_operand" ""))]
355 ix86_expand_vector_move (<MODE>mode, operands);
359 (define_insn "*mov<mode>_internal"
360 [(set (match_operand:V16 0 "nonimmediate_operand" "=x,x ,m")
361 (match_operand:V16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
363 && (register_operand (operands[0], <MODE>mode)
364 || register_operand (operands[1], <MODE>mode))"
366 switch (which_alternative)
369 return standard_sse_constant_opcode (insn, operands[1]);
372 switch (get_attr_mode (insn))
377 && (misaligned_operand (operands[0], <MODE>mode)
378 || misaligned_operand (operands[1], <MODE>mode)))
379 return "vmovups\t{%1, %0|%0, %1}";
381 return "%vmovaps\t{%1, %0|%0, %1}";
386 && (misaligned_operand (operands[0], <MODE>mode)
387 || misaligned_operand (operands[1], <MODE>mode)))
388 return "vmovupd\t{%1, %0|%0, %1}";
389 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
390 return "%vmovaps\t{%1, %0|%0, %1}";
392 return "%vmovapd\t{%1, %0|%0, %1}";
397 && (misaligned_operand (operands[0], <MODE>mode)
398 || misaligned_operand (operands[1], <MODE>mode)))
399 return "vmovdqu\t{%1, %0|%0, %1}";
400 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
401 return "%vmovaps\t{%1, %0|%0, %1}";
403 return "%vmovdqa\t{%1, %0|%0, %1}";
412 [(set_attr "type" "sselog1,ssemov,ssemov")
413 (set_attr "prefix" "maybe_vex")
415 (cond [(match_test "TARGET_AVX")
416 (const_string "<sseinsnmode>")
417 (ior (ior (match_test "optimize_function_for_size_p (cfun)")
418 (not (match_test "TARGET_SSE2")))
419 (and (eq_attr "alternative" "2")
420 (match_test "TARGET_SSE_TYPELESS_STORES")))
421 (const_string "V4SF")
422 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
423 (const_string "V4SF")
424 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
425 (const_string "V2DF")
427 (const_string "TI")))])
429 (define_insn "sse2_movq128"
430 [(set (match_operand:V2DI 0 "register_operand" "=x")
433 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
434 (parallel [(const_int 0)]))
437 "%vmovq\t{%1, %0|%0, %1}"
438 [(set_attr "type" "ssemov")
439 (set_attr "prefix" "maybe_vex")
440 (set_attr "mode" "TI")])
442 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
443 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
444 ;; from memory, we'd prefer to load the memory directly into the %xmm
445 ;; register. To facilitate this happy circumstance, this pattern won't
446 ;; split until after register allocation. If the 64-bit value didn't
447 ;; come from memory, this is the best we can do. This is much better
448 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
451 (define_insn_and_split "movdi_to_sse"
453 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
454 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
455 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
456 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
458 "&& reload_completed"
461 if (register_operand (operands[1], DImode))
463 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
464 Assemble the 64-bit DImode value in an xmm register. */
465 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
466 gen_rtx_SUBREG (SImode, operands[1], 0)));
467 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
468 gen_rtx_SUBREG (SImode, operands[1], 4)));
469 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
472 else if (memory_operand (operands[1], DImode))
473 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
474 operands[1], const0_rtx));
480 [(set (match_operand:V4SF 0 "register_operand" "")
481 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
482 "TARGET_SSE && reload_completed"
485 (vec_duplicate:V4SF (match_dup 1))
489 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
490 operands[2] = CONST0_RTX (V4SFmode);
494 [(set (match_operand:V2DF 0 "register_operand" "")
495 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
496 "TARGET_SSE2 && reload_completed"
497 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
499 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
500 operands[2] = CONST0_RTX (DFmode);
503 (define_expand "push<mode>1"
504 [(match_operand:V16 0 "register_operand" "")]
507 ix86_expand_push (<MODE>mode, operands[0]);
511 (define_expand "movmisalign<mode>"
512 [(set (match_operand:V16 0 "nonimmediate_operand" "")
513 (match_operand:V16 1 "nonimmediate_operand" ""))]
516 ix86_expand_vector_move_misalign (<MODE>mode, operands);
520 (define_expand "<sse>_movu<ssemodesuffix><avxsizesuffix>"
521 [(set (match_operand:VF 0 "nonimmediate_operand" "")
523 [(match_operand:VF 1 "nonimmediate_operand" "")]
527 if (MEM_P (operands[0]) && MEM_P (operands[1]))
528 operands[1] = force_reg (<MODE>mode, operands[1]);
531 (define_insn "*<sse>_movu<ssemodesuffix><avxsizesuffix>"
532 [(set (match_operand:VF 0 "nonimmediate_operand" "=x,m")
534 [(match_operand:VF 1 "nonimmediate_operand" "xm,x")]
536 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
537 "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
538 [(set_attr "type" "ssemov")
539 (set_attr "movu" "1")
540 (set_attr "prefix" "maybe_vex")
541 (set_attr "mode" "<MODE>")])
543 (define_expand "<sse2>_movdqu<avxsizesuffix>"
544 [(set (match_operand:VI1 0 "nonimmediate_operand" "")
545 (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "")]
549 if (MEM_P (operands[0]) && MEM_P (operands[1]))
550 operands[1] = force_reg (<MODE>mode, operands[1]);
553 (define_insn "*<sse2>_movdqu<avxsizesuffix>"
554 [(set (match_operand:VI1 0 "nonimmediate_operand" "=x,m")
555 (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "xm,x")]
557 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
558 "%vmovdqu\t{%1, %0|%0, %1}"
559 [(set_attr "type" "ssemov")
560 (set_attr "movu" "1")
561 (set (attr "prefix_data16")
563 (match_test "TARGET_AVX")
566 (set_attr "prefix" "maybe_vex")
567 (set_attr "mode" "<sseinsnmode>")])
569 (define_insn "<sse3>_lddqu<avxsizesuffix>"
570 [(set (match_operand:VI1 0 "register_operand" "=x")
571 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
574 "%vlddqu\t{%1, %0|%0, %1}"
575 [(set_attr "type" "ssemov")
576 (set_attr "movu" "1")
577 (set (attr "prefix_data16")
579 (match_test "TARGET_AVX")
582 (set (attr "prefix_rep")
584 (match_test "TARGET_AVX")
587 (set_attr "prefix" "maybe_vex")
588 (set_attr "mode" "<sseinsnmode>")])
590 (define_insn "sse2_movntsi"
591 [(set (match_operand:SI 0 "memory_operand" "=m")
592 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
595 "movnti\t{%1, %0|%0, %1}"
596 [(set_attr "type" "ssemov")
597 (set_attr "prefix_data16" "0")
598 (set_attr "mode" "V2DF")])
600 (define_insn "<sse>_movnt<mode>"
601 [(set (match_operand:VF 0 "memory_operand" "=m")
602 (unspec:VF [(match_operand:VF 1 "register_operand" "x")]
605 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
606 [(set_attr "type" "ssemov")
607 (set_attr "prefix" "maybe_vex")
608 (set_attr "mode" "<MODE>")])
610 (define_insn "<sse2>_movnt<mode>"
611 [(set (match_operand:VI8 0 "memory_operand" "=m")
612 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "x")]
615 "%vmovntdq\t{%1, %0|%0, %1}"
616 [(set_attr "type" "ssecvt")
617 (set (attr "prefix_data16")
619 (match_test "TARGET_AVX")
622 (set_attr "prefix" "maybe_vex")
623 (set_attr "mode" "<sseinsnmode>")])
625 ; Expand patterns for non-temporal stores. At the moment, only those
626 ; that directly map to insns are defined; it would be possible to
627 ; define patterns for other modes that would expand to several insns.
629 ;; Modes handled by storent patterns.
630 (define_mode_iterator STORENT_MODE
631 [(SI "TARGET_SSE2") (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
633 (V8SF "TARGET_AVX") V4SF
634 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
636 (define_expand "storent<mode>"
637 [(set (match_operand:STORENT_MODE 0 "memory_operand" "")
639 [(match_operand:STORENT_MODE 1 "register_operand" "")]
643 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
645 ;; Parallel floating point arithmetic
647 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
649 (define_expand "<code><mode>2"
650 [(set (match_operand:VF 0 "register_operand" "")
652 (match_operand:VF 1 "register_operand" "")))]
654 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
656 (define_insn_and_split "*absneg<mode>2"
657 [(set (match_operand:VF 0 "register_operand" "=x,x,x,x")
658 (match_operator:VF 3 "absneg_operator"
659 [(match_operand:VF 1 "nonimmediate_operand" "0, xm,x, m")]))
660 (use (match_operand:VF 2 "nonimmediate_operand" "xm,0, xm,x"))]
663 "&& reload_completed"
666 enum rtx_code absneg_op;
672 if (MEM_P (operands[1]))
673 op1 = operands[2], op2 = operands[1];
675 op1 = operands[1], op2 = operands[2];
680 if (rtx_equal_p (operands[0], operands[1]))
686 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
687 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
688 t = gen_rtx_SET (VOIDmode, operands[0], t);
692 [(set_attr "isa" "noavx,noavx,avx,avx")])
694 (define_expand "<plusminus_insn><mode>3"
695 [(set (match_operand:VF 0 "register_operand" "")
697 (match_operand:VF 1 "nonimmediate_operand" "")
698 (match_operand:VF 2 "nonimmediate_operand" "")))]
700 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
702 (define_insn "*<plusminus_insn><mode>3"
703 [(set (match_operand:VF 0 "register_operand" "=x,x")
705 (match_operand:VF 1 "nonimmediate_operand" "<comm>0,x")
706 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
707 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
709 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
710 v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
711 [(set_attr "isa" "noavx,avx")
712 (set_attr "type" "sseadd")
713 (set_attr "prefix" "orig,vex")
714 (set_attr "mode" "<MODE>")])
716 (define_insn "<sse>_vm<plusminus_insn><mode>3"
717 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
720 (match_operand:VF_128 1 "register_operand" "0,x")
721 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
726 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}
727 v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
728 [(set_attr "isa" "noavx,avx")
729 (set_attr "type" "sseadd")
730 (set_attr "prefix" "orig,vex")
731 (set_attr "mode" "<ssescalarmode>")])
733 (define_expand "mul<mode>3"
734 [(set (match_operand:VF 0 "register_operand" "")
736 (match_operand:VF 1 "nonimmediate_operand" "")
737 (match_operand:VF 2 "nonimmediate_operand" "")))]
739 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
741 (define_insn "*mul<mode>3"
742 [(set (match_operand:VF 0 "register_operand" "=x,x")
744 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
745 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
746 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
748 mul<ssemodesuffix>\t{%2, %0|%0, %2}
749 vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
750 [(set_attr "isa" "noavx,avx")
751 (set_attr "type" "ssemul")
752 (set_attr "prefix" "orig,vex")
753 (set_attr "mode" "<MODE>")])
755 (define_insn "<sse>_vmmul<mode>3"
756 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
759 (match_operand:VF_128 1 "register_operand" "0,x")
760 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
765 mul<ssescalarmodesuffix>\t{%2, %0|%0, %2}
766 vmul<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
767 [(set_attr "isa" "noavx,avx")
768 (set_attr "type" "ssemul")
769 (set_attr "prefix" "orig,vex")
770 (set_attr "mode" "<ssescalarmode>")])
772 (define_expand "div<mode>3"
773 [(set (match_operand:VF2 0 "register_operand" "")
774 (div:VF2 (match_operand:VF2 1 "register_operand" "")
775 (match_operand:VF2 2 "nonimmediate_operand" "")))]
777 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
779 (define_expand "div<mode>3"
780 [(set (match_operand:VF1 0 "register_operand" "")
781 (div:VF1 (match_operand:VF1 1 "register_operand" "")
782 (match_operand:VF1 2 "nonimmediate_operand" "")))]
785 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
788 && TARGET_RECIP_VEC_DIV
789 && !optimize_insn_for_size_p ()
790 && flag_finite_math_only && !flag_trapping_math
791 && flag_unsafe_math_optimizations)
793 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
798 (define_insn "<sse>_div<mode>3"
799 [(set (match_operand:VF 0 "register_operand" "=x,x")
801 (match_operand:VF 1 "register_operand" "0,x")
802 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
805 div<ssemodesuffix>\t{%2, %0|%0, %2}
806 vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
807 [(set_attr "isa" "noavx,avx")
808 (set_attr "type" "ssediv")
809 (set_attr "prefix" "orig,vex")
810 (set_attr "mode" "<MODE>")])
812 (define_insn "<sse>_vmdiv<mode>3"
813 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
816 (match_operand:VF_128 1 "register_operand" "0,x")
817 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
822 div<ssescalarmodesuffix>\t{%2, %0|%0, %2}
823 vdiv<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
824 [(set_attr "isa" "noavx,avx")
825 (set_attr "type" "ssediv")
826 (set_attr "prefix" "orig,vex")
827 (set_attr "mode" "<ssescalarmode>")])
829 (define_insn "<sse>_rcp<mode>2"
830 [(set (match_operand:VF1 0 "register_operand" "=x")
832 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
834 "%vrcpps\t{%1, %0|%0, %1}"
835 [(set_attr "type" "sse")
836 (set_attr "atom_sse_attr" "rcp")
837 (set_attr "prefix" "maybe_vex")
838 (set_attr "mode" "<MODE>")])
840 (define_insn "sse_vmrcpv4sf2"
841 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
843 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
845 (match_operand:V4SF 2 "register_operand" "0,x")
849 rcpss\t{%1, %0|%0, %1}
850 vrcpss\t{%1, %2, %0|%0, %2, %1}"
851 [(set_attr "isa" "noavx,avx")
852 (set_attr "type" "sse")
853 (set_attr "atom_sse_attr" "rcp")
854 (set_attr "prefix" "orig,vex")
855 (set_attr "mode" "SF")])
857 (define_expand "sqrt<mode>2"
858 [(set (match_operand:VF2 0 "register_operand" "")
859 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand" "")))]
862 (define_expand "sqrt<mode>2"
863 [(set (match_operand:VF1 0 "register_operand" "")
864 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand" "")))]
868 && TARGET_RECIP_VEC_SQRT
869 && !optimize_insn_for_size_p ()
870 && flag_finite_math_only && !flag_trapping_math
871 && flag_unsafe_math_optimizations)
873 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
878 (define_insn "<sse>_sqrt<mode>2"
879 [(set (match_operand:VF 0 "register_operand" "=x")
880 (sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "xm")))]
882 "%vsqrt<ssemodesuffix>\t{%1, %0|%0, %1}"
883 [(set_attr "type" "sse")
884 (set_attr "atom_sse_attr" "sqrt")
885 (set_attr "prefix" "maybe_vex")
886 (set_attr "mode" "<MODE>")])
888 (define_insn "<sse>_vmsqrt<mode>2"
889 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
892 (match_operand:VF_128 1 "nonimmediate_operand" "xm,xm"))
893 (match_operand:VF_128 2 "register_operand" "0,x")
897 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}
898 vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
899 [(set_attr "isa" "noavx,avx")
900 (set_attr "type" "sse")
901 (set_attr "atom_sse_attr" "sqrt")
902 (set_attr "prefix" "orig,vex")
903 (set_attr "mode" "<ssescalarmode>")])
905 (define_expand "rsqrt<mode>2"
906 [(set (match_operand:VF1 0 "register_operand" "")
908 [(match_operand:VF1 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
911 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
915 (define_insn "<sse>_rsqrt<mode>2"
916 [(set (match_operand:VF1 0 "register_operand" "=x")
918 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
920 "%vrsqrtps\t{%1, %0|%0, %1}"
921 [(set_attr "type" "sse")
922 (set_attr "prefix" "maybe_vex")
923 (set_attr "mode" "<MODE>")])
925 (define_insn "sse_vmrsqrtv4sf2"
926 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
928 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
930 (match_operand:V4SF 2 "register_operand" "0,x")
934 rsqrtss\t{%1, %0|%0, %1}
935 vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
936 [(set_attr "isa" "noavx,avx")
937 (set_attr "type" "sse")
938 (set_attr "prefix" "orig,vex")
939 (set_attr "mode" "SF")])
941 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
942 ;; isn't really correct, as those rtl operators aren't defined when
943 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
945 (define_expand "<code><mode>3"
946 [(set (match_operand:VF 0 "register_operand" "")
948 (match_operand:VF 1 "nonimmediate_operand" "")
949 (match_operand:VF 2 "nonimmediate_operand" "")))]
952 if (!flag_finite_math_only)
953 operands[1] = force_reg (<MODE>mode, operands[1]);
954 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
957 (define_insn "*<code><mode>3_finite"
958 [(set (match_operand:VF 0 "register_operand" "=x,x")
960 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
961 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
962 "TARGET_SSE && flag_finite_math_only
963 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
965 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
966 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
967 [(set_attr "isa" "noavx,avx")
968 (set_attr "type" "sseadd")
969 (set_attr "prefix" "orig,vex")
970 (set_attr "mode" "<MODE>")])
972 (define_insn "*<code><mode>3"
973 [(set (match_operand:VF 0 "register_operand" "=x,x")
975 (match_operand:VF 1 "register_operand" "0,x")
976 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
977 "TARGET_SSE && !flag_finite_math_only"
979 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
980 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
981 [(set_attr "isa" "noavx,avx")
982 (set_attr "type" "sseadd")
983 (set_attr "prefix" "orig,vex")
984 (set_attr "mode" "<MODE>")])
986 (define_insn "<sse>_vm<code><mode>3"
987 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
990 (match_operand:VF_128 1 "register_operand" "0,x")
991 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
996 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %2}
997 v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
998 [(set_attr "isa" "noavx,avx")
999 (set_attr "type" "sse")
1000 (set_attr "prefix" "orig,vex")
1001 (set_attr "mode" "<ssescalarmode>")])
1003 ;; These versions of the min/max patterns implement exactly the operations
1004 ;; min = (op1 < op2 ? op1 : op2)
1005 ;; max = (!(op1 < op2) ? op1 : op2)
1006 ;; Their operands are not commutative, and thus they may be used in the
1007 ;; presence of -0.0 and NaN.
1009 (define_insn "*ieee_smin<mode>3"
1010 [(set (match_operand:VF 0 "register_operand" "=x,x")
1012 [(match_operand:VF 1 "register_operand" "0,x")
1013 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
1017 min<ssemodesuffix>\t{%2, %0|%0, %2}
1018 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1019 [(set_attr "isa" "noavx,avx")
1020 (set_attr "type" "sseadd")
1021 (set_attr "prefix" "orig,vex")
1022 (set_attr "mode" "<MODE>")])
1024 (define_insn "*ieee_smax<mode>3"
1025 [(set (match_operand:VF 0 "register_operand" "=x,x")
1027 [(match_operand:VF 1 "register_operand" "0,x")
1028 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
1032 max<ssemodesuffix>\t{%2, %0|%0, %2}
1033 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1034 [(set_attr "isa" "noavx,avx")
1035 (set_attr "type" "sseadd")
1036 (set_attr "prefix" "orig,vex")
1037 (set_attr "mode" "<MODE>")])
1039 (define_insn "avx_addsubv4df3"
1040 [(set (match_operand:V4DF 0 "register_operand" "=x")
1043 (match_operand:V4DF 1 "register_operand" "x")
1044 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1045 (minus:V4DF (match_dup 1) (match_dup 2))
1048 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1049 [(set_attr "type" "sseadd")
1050 (set_attr "prefix" "vex")
1051 (set_attr "mode" "V4DF")])
1053 (define_insn "sse3_addsubv2df3"
1054 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1057 (match_operand:V2DF 1 "register_operand" "0,x")
1058 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
1059 (minus:V2DF (match_dup 1) (match_dup 2))
1063 addsubpd\t{%2, %0|%0, %2}
1064 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1065 [(set_attr "isa" "noavx,avx")
1066 (set_attr "type" "sseadd")
1067 (set_attr "atom_unit" "complex")
1068 (set_attr "prefix" "orig,vex")
1069 (set_attr "mode" "V2DF")])
1071 (define_insn "avx_addsubv8sf3"
1072 [(set (match_operand:V8SF 0 "register_operand" "=x")
1075 (match_operand:V8SF 1 "register_operand" "x")
1076 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1077 (minus:V8SF (match_dup 1) (match_dup 2))
1080 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1081 [(set_attr "type" "sseadd")
1082 (set_attr "prefix" "vex")
1083 (set_attr "mode" "V8SF")])
1085 (define_insn "sse3_addsubv4sf3"
1086 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1089 (match_operand:V4SF 1 "register_operand" "0,x")
1090 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
1091 (minus:V4SF (match_dup 1) (match_dup 2))
1095 addsubps\t{%2, %0|%0, %2}
1096 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1097 [(set_attr "isa" "noavx,avx")
1098 (set_attr "type" "sseadd")
1099 (set_attr "prefix" "orig,vex")
1100 (set_attr "prefix_rep" "1,*")
1101 (set_attr "mode" "V4SF")])
1103 (define_insn "avx_h<plusminus_insn>v4df3"
1104 [(set (match_operand:V4DF 0 "register_operand" "=x")
1109 (match_operand:V4DF 1 "register_operand" "x")
1110 (parallel [(const_int 0)]))
1111 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1113 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1114 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
1118 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1119 (parallel [(const_int 0)]))
1120 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
1122 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1123 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1125 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1126 [(set_attr "type" "sseadd")
1127 (set_attr "prefix" "vex")
1128 (set_attr "mode" "V4DF")])
1130 (define_insn "sse3_h<plusminus_insn>v2df3"
1131 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1135 (match_operand:V2DF 1 "register_operand" "0,x")
1136 (parallel [(const_int 0)]))
1137 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1140 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
1141 (parallel [(const_int 0)]))
1142 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1145 h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}
1146 vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1147 [(set_attr "isa" "noavx,avx")
1148 (set_attr "type" "sseadd")
1149 (set_attr "prefix" "orig,vex")
1150 (set_attr "mode" "V2DF")])
1152 (define_insn "avx_h<plusminus_insn>v8sf3"
1153 [(set (match_operand:V8SF 0 "register_operand" "=x")
1159 (match_operand:V8SF 1 "register_operand" "x")
1160 (parallel [(const_int 0)]))
1161 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1163 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1164 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1168 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1169 (parallel [(const_int 0)]))
1170 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1172 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1173 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1177 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1178 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1180 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1181 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1184 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1185 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1187 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1188 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1190 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1191 [(set_attr "type" "sseadd")
1192 (set_attr "prefix" "vex")
1193 (set_attr "mode" "V8SF")])
1195 (define_insn "sse3_h<plusminus_insn>v4sf3"
1196 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1201 (match_operand:V4SF 1 "register_operand" "0,x")
1202 (parallel [(const_int 0)]))
1203 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1205 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1206 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1210 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
1211 (parallel [(const_int 0)]))
1212 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1214 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1215 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1218 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
1219 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1220 [(set_attr "isa" "noavx,avx")
1221 (set_attr "type" "sseadd")
1222 (set_attr "atom_unit" "complex")
1223 (set_attr "prefix" "orig,vex")
1224 (set_attr "prefix_rep" "1,*")
1225 (set_attr "mode" "V4SF")])
1227 (define_expand "reduc_splus_v4df"
1228 [(match_operand:V4DF 0 "register_operand" "")
1229 (match_operand:V4DF 1 "register_operand" "")]
1232 rtx tmp = gen_reg_rtx (V4DFmode);
1233 rtx tmp2 = gen_reg_rtx (V4DFmode);
1234 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
1235 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
1236 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
1240 (define_expand "reduc_splus_v2df"
1241 [(match_operand:V2DF 0 "register_operand" "")
1242 (match_operand:V2DF 1 "register_operand" "")]
1245 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1249 (define_expand "reduc_splus_v8sf"
1250 [(match_operand:V8SF 0 "register_operand" "")
1251 (match_operand:V8SF 1 "register_operand" "")]
1254 rtx tmp = gen_reg_rtx (V8SFmode);
1255 rtx tmp2 = gen_reg_rtx (V8SFmode);
1256 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
1257 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
1258 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
1259 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
1263 (define_expand "reduc_splus_v4sf"
1264 [(match_operand:V4SF 0 "register_operand" "")
1265 (match_operand:V4SF 1 "register_operand" "")]
1270 rtx tmp = gen_reg_rtx (V4SFmode);
1271 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1272 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1275 ix86_expand_reduc (gen_addv4sf3, operands[0], operands[1]);
1279 ;; Modes handled by reduc_sm{in,ax}* patterns.
1280 (define_mode_iterator REDUC_SMINMAX_MODE
1281 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
1282 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
1283 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
1284 (V4SF "TARGET_SSE")])
1286 (define_expand "reduc_<code>_<mode>"
1287 [(smaxmin:REDUC_SMINMAX_MODE
1288 (match_operand:REDUC_SMINMAX_MODE 0 "register_operand" "")
1289 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand" ""))]
1292 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1296 (define_expand "reduc_<code>_<mode>"
1298 (match_operand:VI_256 0 "register_operand" "")
1299 (match_operand:VI_256 1 "register_operand" ""))]
1302 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1306 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1308 ;; Parallel floating point comparisons
1310 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1312 (define_insn "avx_cmp<mode>3"
1313 [(set (match_operand:VF 0 "register_operand" "=x")
1315 [(match_operand:VF 1 "register_operand" "x")
1316 (match_operand:VF 2 "nonimmediate_operand" "xm")
1317 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1320 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1321 [(set_attr "type" "ssecmp")
1322 (set_attr "length_immediate" "1")
1323 (set_attr "prefix" "vex")
1324 (set_attr "mode" "<MODE>")])
1326 (define_insn "avx_vmcmp<mode>3"
1327 [(set (match_operand:VF_128 0 "register_operand" "=x")
1330 [(match_operand:VF_128 1 "register_operand" "x")
1331 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
1332 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1337 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1338 [(set_attr "type" "ssecmp")
1339 (set_attr "length_immediate" "1")
1340 (set_attr "prefix" "vex")
1341 (set_attr "mode" "<ssescalarmode>")])
1343 (define_insn "*<sse>_maskcmp<mode>3_comm"
1344 [(set (match_operand:VF 0 "register_operand" "=x,x")
1345 (match_operator:VF 3 "sse_comparison_operator"
1346 [(match_operand:VF 1 "register_operand" "%0,x")
1347 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1349 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
1351 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1352 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1353 [(set_attr "isa" "noavx,avx")
1354 (set_attr "type" "ssecmp")
1355 (set_attr "length_immediate" "1")
1356 (set_attr "prefix" "orig,vex")
1357 (set_attr "mode" "<MODE>")])
1359 (define_insn "<sse>_maskcmp<mode>3"
1360 [(set (match_operand:VF 0 "register_operand" "=x,x")
1361 (match_operator:VF 3 "sse_comparison_operator"
1362 [(match_operand:VF 1 "register_operand" "0,x")
1363 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1366 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1367 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1368 [(set_attr "isa" "noavx,avx")
1369 (set_attr "type" "ssecmp")
1370 (set_attr "length_immediate" "1")
1371 (set_attr "prefix" "orig,vex")
1372 (set_attr "mode" "<MODE>")])
1374 (define_insn "<sse>_vmmaskcmp<mode>3"
1375 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1377 (match_operator:VF_128 3 "sse_comparison_operator"
1378 [(match_operand:VF_128 1 "register_operand" "0,x")
1379 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
1384 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %2}
1385 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1386 [(set_attr "isa" "noavx,avx")
1387 (set_attr "type" "ssecmp")
1388 (set_attr "length_immediate" "1,*")
1389 (set_attr "prefix" "orig,vex")
1390 (set_attr "mode" "<ssescalarmode>")])
1392 (define_insn "<sse>_comi"
1393 [(set (reg:CCFP FLAGS_REG)
1396 (match_operand:<ssevecmode> 0 "register_operand" "x")
1397 (parallel [(const_int 0)]))
1399 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1400 (parallel [(const_int 0)]))))]
1401 "SSE_FLOAT_MODE_P (<MODE>mode)"
1402 "%vcomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1403 [(set_attr "type" "ssecomi")
1404 (set_attr "prefix" "maybe_vex")
1405 (set_attr "prefix_rep" "0")
1406 (set (attr "prefix_data16")
1407 (if_then_else (eq_attr "mode" "DF")
1409 (const_string "0")))
1410 (set_attr "mode" "<MODE>")])
1412 (define_insn "<sse>_ucomi"
1413 [(set (reg:CCFPU FLAGS_REG)
1416 (match_operand:<ssevecmode> 0 "register_operand" "x")
1417 (parallel [(const_int 0)]))
1419 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1420 (parallel [(const_int 0)]))))]
1421 "SSE_FLOAT_MODE_P (<MODE>mode)"
1422 "%vucomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1423 [(set_attr "type" "ssecomi")
1424 (set_attr "prefix" "maybe_vex")
1425 (set_attr "prefix_rep" "0")
1426 (set (attr "prefix_data16")
1427 (if_then_else (eq_attr "mode" "DF")
1429 (const_string "0")))
1430 (set_attr "mode" "<MODE>")])
1432 (define_expand "vcond<V_256:mode><VF_256:mode>"
1433 [(set (match_operand:V_256 0 "register_operand" "")
1435 (match_operator 3 ""
1436 [(match_operand:VF_256 4 "nonimmediate_operand" "")
1437 (match_operand:VF_256 5 "nonimmediate_operand" "")])
1438 (match_operand:V_256 1 "general_operand" "")
1439 (match_operand:V_256 2 "general_operand" "")))]
1441 && (GET_MODE_NUNITS (<V_256:MODE>mode)
1442 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
1444 bool ok = ix86_expand_fp_vcond (operands);
1449 (define_expand "vcond<V_128:mode><VF_128:mode>"
1450 [(set (match_operand:V_128 0 "register_operand" "")
1452 (match_operator 3 ""
1453 [(match_operand:VF_128 4 "nonimmediate_operand" "")
1454 (match_operand:VF_128 5 "nonimmediate_operand" "")])
1455 (match_operand:V_128 1 "general_operand" "")
1456 (match_operand:V_128 2 "general_operand" "")))]
1458 && (GET_MODE_NUNITS (<V_128:MODE>mode)
1459 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
1461 bool ok = ix86_expand_fp_vcond (operands);
1466 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1468 ;; Parallel floating point logical operations
1470 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1472 (define_insn "<sse>_andnot<mode>3"
1473 [(set (match_operand:VF 0 "register_operand" "=x,x")
1476 (match_operand:VF 1 "register_operand" "0,x"))
1477 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1480 static char buf[32];
1483 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1485 switch (which_alternative)
1488 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1491 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1497 snprintf (buf, sizeof (buf), insn, suffix);
1500 [(set_attr "isa" "noavx,avx")
1501 (set_attr "type" "sselog")
1502 (set_attr "prefix" "orig,vex")
1503 (set_attr "mode" "<MODE>")])
1505 (define_expand "<code><mode>3"
1506 [(set (match_operand:VF 0 "register_operand" "")
1508 (match_operand:VF 1 "nonimmediate_operand" "")
1509 (match_operand:VF 2 "nonimmediate_operand" "")))]
1511 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1513 (define_insn "*<code><mode>3"
1514 [(set (match_operand:VF 0 "register_operand" "=x,x")
1516 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
1517 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1518 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1520 static char buf[32];
1523 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1525 switch (which_alternative)
1528 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1531 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1537 snprintf (buf, sizeof (buf), insn, suffix);
1540 [(set_attr "isa" "noavx,avx")
1541 (set_attr "type" "sselog")
1542 (set_attr "prefix" "orig,vex")
1543 (set_attr "mode" "<MODE>")])
1545 (define_expand "copysign<mode>3"
1548 (not:VF (match_dup 3))
1549 (match_operand:VF 1 "nonimmediate_operand" "")))
1551 (and:VF (match_dup 3)
1552 (match_operand:VF 2 "nonimmediate_operand" "")))
1553 (set (match_operand:VF 0 "register_operand" "")
1554 (ior:VF (match_dup 4) (match_dup 5)))]
1557 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
1559 operands[4] = gen_reg_rtx (<MODE>mode);
1560 operands[5] = gen_reg_rtx (<MODE>mode);
1563 ;; Also define scalar versions. These are used for abs, neg, and
1564 ;; conditional move. Using subregs into vector modes causes register
1565 ;; allocation lossage. These patterns do not allow memory operands
1566 ;; because the native instructions read the full 128-bits.
1568 (define_insn "*andnot<mode>3"
1569 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1572 (match_operand:MODEF 1 "register_operand" "0,x"))
1573 (match_operand:MODEF 2 "register_operand" "x,x")))]
1574 "SSE_FLOAT_MODE_P (<MODE>mode)"
1576 static char buf[32];
1579 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1581 switch (which_alternative)
1584 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1587 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1593 snprintf (buf, sizeof (buf), insn, suffix);
1596 [(set_attr "isa" "noavx,avx")
1597 (set_attr "type" "sselog")
1598 (set_attr "prefix" "orig,vex")
1599 (set_attr "mode" "<ssevecmode>")])
1601 (define_insn "*<code><mode>3"
1602 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1604 (match_operand:MODEF 1 "register_operand" "%0,x")
1605 (match_operand:MODEF 2 "register_operand" "x,x")))]
1606 "SSE_FLOAT_MODE_P (<MODE>mode)"
1608 static char buf[32];
1611 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1613 switch (which_alternative)
1616 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1619 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1625 snprintf (buf, sizeof (buf), insn, suffix);
1628 [(set_attr "isa" "noavx,avx")
1629 (set_attr "type" "sselog")
1630 (set_attr "prefix" "orig,vex")
1631 (set_attr "mode" "<ssevecmode>")])
1633 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1635 ;; FMA4 floating point multiply/accumulate instructions. This
1636 ;; includes the scalar version of the instructions as well as the
1639 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1641 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1642 ;; combine to generate a multiply/add with two memory references. We then
1643 ;; split this insn, into loading up the destination register with one of the
1644 ;; memory operations. If we don't manage to split the insn, reload will
1645 ;; generate the appropriate moves. The reason this is needed, is that combine
1646 ;; has already folded one of the memory references into both the multiply and
1647 ;; add insns, and it can't generate a new pseudo. I.e.:
1648 ;; (set (reg1) (mem (addr1)))
1649 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1650 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1652 ;; ??? This is historic, pre-dating the gimple fma transformation.
1653 ;; We could now properly represent that only one memory operand is
1654 ;; allowed and not be penalized during optimization.
1656 ;; Intrinsic FMA operations.
1658 ;; The standard names for fma is only available with SSE math enabled.
1659 (define_expand "fma<mode>4"
1660 [(set (match_operand:FMAMODE 0 "register_operand")
1662 (match_operand:FMAMODE 1 "nonimmediate_operand")
1663 (match_operand:FMAMODE 2 "nonimmediate_operand")
1664 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1665 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1667 (define_expand "fms<mode>4"
1668 [(set (match_operand:FMAMODE 0 "register_operand")
1670 (match_operand:FMAMODE 1 "nonimmediate_operand")
1671 (match_operand:FMAMODE 2 "nonimmediate_operand")
1672 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1673 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1675 (define_expand "fnma<mode>4"
1676 [(set (match_operand:FMAMODE 0 "register_operand")
1678 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1679 (match_operand:FMAMODE 2 "nonimmediate_operand")
1680 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1681 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1683 (define_expand "fnms<mode>4"
1684 [(set (match_operand:FMAMODE 0 "register_operand")
1686 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1687 (match_operand:FMAMODE 2 "nonimmediate_operand")
1688 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1689 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1691 ;; The builtin for fma4intrin.h is not constrained by SSE math enabled.
1692 (define_expand "fma4i_fmadd_<mode>"
1693 [(set (match_operand:FMAMODE 0 "register_operand")
1695 (match_operand:FMAMODE 1 "nonimmediate_operand")
1696 (match_operand:FMAMODE 2 "nonimmediate_operand")
1697 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1698 "TARGET_FMA || TARGET_FMA4")
1700 (define_insn "*fma4i_fmadd_<mode>"
1701 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1703 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1704 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1705 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1707 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1708 [(set_attr "type" "ssemuladd")
1709 (set_attr "mode" "<MODE>")])
1711 (define_insn "*fma4i_fmsub_<mode>"
1712 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1714 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1715 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1717 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1719 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1720 [(set_attr "type" "ssemuladd")
1721 (set_attr "mode" "<MODE>")])
1723 (define_insn "*fma4i_fnmadd_<mode>"
1724 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1727 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1728 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1729 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1731 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1732 [(set_attr "type" "ssemuladd")
1733 (set_attr "mode" "<MODE>")])
1735 (define_insn "*fma4i_fnmsub_<mode>"
1736 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1739 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1740 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1742 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1744 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1745 [(set_attr "type" "ssemuladd")
1746 (set_attr "mode" "<MODE>")])
1748 ;; Scalar versions of the above. Unlike ADDSS et al, these write the
1749 ;; entire destination register, with the high-order elements zeroed.
1751 (define_expand "fma4i_vmfmadd_<mode>"
1752 [(set (match_operand:VF_128 0 "register_operand")
1755 (match_operand:VF_128 1 "nonimmediate_operand")
1756 (match_operand:VF_128 2 "nonimmediate_operand")
1757 (match_operand:VF_128 3 "nonimmediate_operand"))
1762 operands[4] = CONST0_RTX (<MODE>mode);
1765 (define_expand "fmai_vmfmadd_<mode>"
1766 [(set (match_operand:VF_128 0 "register_operand")
1769 (match_operand:VF_128 1 "nonimmediate_operand")
1770 (match_operand:VF_128 2 "nonimmediate_operand")
1771 (match_operand:VF_128 3 "nonimmediate_operand"))
1776 (define_insn "*fmai_fmadd_<mode>"
1777 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1780 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")
1781 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1782 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))
1787 vfmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1788 vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1789 vfmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1790 [(set_attr "type" "ssemuladd")
1791 (set_attr "mode" "<MODE>")])
1793 (define_insn "*fmai_fmsub_<mode>"
1794 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1797 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")
1798 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1800 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")))
1805 vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1806 vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1807 vfmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1808 [(set_attr "type" "ssemuladd")
1809 (set_attr "mode" "<MODE>")])
1811 (define_insn "*fmai_fnmadd_<mode>"
1812 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1816 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x"))
1817 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1818 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))
1823 vfnmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1824 vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1825 vfnmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1826 [(set_attr "type" "ssemuladd")
1827 (set_attr "mode" "<MODE>")])
1829 (define_insn "*fmai_fnmsub_<mode>"
1830 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1834 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x"))
1835 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1837 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")))
1842 vfnmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1843 vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1844 vfnmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1845 [(set_attr "type" "ssemuladd")
1846 (set_attr "mode" "<MODE>")])
1848 (define_insn "*fma4i_vmfmadd_<mode>"
1849 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1852 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
1853 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1854 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
1855 (match_operand:VF_128 4 "const0_operand" "")
1858 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1859 [(set_attr "type" "ssemuladd")
1860 (set_attr "mode" "<MODE>")])
1862 (define_insn "*fma4i_vmfmsub_<mode>"
1863 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1866 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
1867 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1869 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
1870 (match_operand:VF_128 4 "const0_operand" "")
1873 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1874 [(set_attr "type" "ssemuladd")
1875 (set_attr "mode" "<MODE>")])
1877 (define_insn "*fma4i_vmfnmadd_<mode>"
1878 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1882 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
1883 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1884 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
1885 (match_operand:VF_128 4 "const0_operand" "")
1888 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1889 [(set_attr "type" "ssemuladd")
1890 (set_attr "mode" "<MODE>")])
1892 (define_insn "*fma4i_vmfnmsub_<mode>"
1893 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1897 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
1898 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1900 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
1901 (match_operand:VF_128 4 "const0_operand" "")
1904 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1905 [(set_attr "type" "ssemuladd")
1906 (set_attr "mode" "<MODE>")])
1908 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1910 ;; FMA4 Parallel floating point multiply addsub and subadd operations.
1912 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1914 ;; It would be possible to represent these without the UNSPEC as
1917 ;; (fma op1 op2 op3)
1918 ;; (fma op1 op2 (neg op3))
1921 ;; But this doesn't seem useful in practice.
1923 (define_expand "fmaddsub_<mode>"
1924 [(set (match_operand:VF 0 "register_operand")
1926 [(match_operand:VF 1 "nonimmediate_operand")
1927 (match_operand:VF 2 "nonimmediate_operand")
1928 (match_operand:VF 3 "nonimmediate_operand")]
1930 "TARGET_FMA || TARGET_FMA4")
1932 (define_insn "*fma4_fmaddsub_<mode>"
1933 [(set (match_operand:VF 0 "register_operand" "=x,x")
1935 [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
1936 (match_operand:VF 2 "nonimmediate_operand" " x,m")
1937 (match_operand:VF 3 "nonimmediate_operand" "xm,x")]
1940 "vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1941 [(set_attr "type" "ssemuladd")
1942 (set_attr "mode" "<MODE>")])
1944 (define_insn "*fma4_fmsubadd_<mode>"
1945 [(set (match_operand:VF 0 "register_operand" "=x,x")
1947 [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
1948 (match_operand:VF 2 "nonimmediate_operand" " x,m")
1950 (match_operand:VF 3 "nonimmediate_operand" "xm,x"))]
1953 "vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1954 [(set_attr "type" "ssemuladd")
1955 (set_attr "mode" "<MODE>")])
1957 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1959 ;; FMA3 floating point multiply/accumulate instructions.
1961 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1963 (define_insn "*fma_fmadd_<mode>"
1964 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1966 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
1967 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1968 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
1971 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1972 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1973 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1974 [(set_attr "type" "ssemuladd")
1975 (set_attr "mode" "<MODE>")])
1977 (define_insn "*fma_fmsub_<mode>"
1978 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1980 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
1981 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1983 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
1986 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1987 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1988 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1989 [(set_attr "type" "ssemuladd")
1990 (set_attr "mode" "<MODE>")])
1992 (define_insn "*fma_fnmadd_<mode>"
1993 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1996 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
1997 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1998 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
2001 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2002 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2003 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2004 [(set_attr "type" "ssemuladd")
2005 (set_attr "mode" "<MODE>")])
2007 (define_insn "*fma_fnmsub_<mode>"
2008 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
2011 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
2012 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
2014 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
2017 vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2018 vfnmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2019 vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2020 [(set_attr "type" "ssemuladd")
2021 (set_attr "mode" "<MODE>")])
2023 (define_insn "*fma_fmaddsub_<mode>"
2024 [(set (match_operand:VF 0 "register_operand" "=x,x,x")
2026 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
2027 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
2028 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0")]
2032 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2033 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2034 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2035 [(set_attr "type" "ssemuladd")
2036 (set_attr "mode" "<MODE>")])
2038 (define_insn "*fma_fmsubadd_<mode>"
2039 [(set (match_operand:VF 0 "register_operand" "=x,x,x")
2041 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
2042 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
2044 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0"))]
2048 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2049 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2050 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2051 [(set_attr "type" "ssemuladd")
2052 (set_attr "mode" "<MODE>")])
2054 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2056 ;; Parallel single-precision floating point conversion operations
2058 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2060 (define_insn "sse_cvtpi2ps"
2061 [(set (match_operand:V4SF 0 "register_operand" "=x")
2064 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2065 (match_operand:V4SF 1 "register_operand" "0")
2068 "cvtpi2ps\t{%2, %0|%0, %2}"
2069 [(set_attr "type" "ssecvt")
2070 (set_attr "mode" "V4SF")])
2072 (define_insn "sse_cvtps2pi"
2073 [(set (match_operand:V2SI 0 "register_operand" "=y")
2075 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2077 (parallel [(const_int 0) (const_int 1)])))]
2079 "cvtps2pi\t{%1, %0|%0, %1}"
2080 [(set_attr "type" "ssecvt")
2081 (set_attr "unit" "mmx")
2082 (set_attr "mode" "DI")])
2084 (define_insn "sse_cvttps2pi"
2085 [(set (match_operand:V2SI 0 "register_operand" "=y")
2087 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2088 (parallel [(const_int 0) (const_int 1)])))]
2090 "cvttps2pi\t{%1, %0|%0, %1}"
2091 [(set_attr "type" "ssecvt")
2092 (set_attr "unit" "mmx")
2093 (set_attr "prefix_rep" "0")
2094 (set_attr "mode" "SF")])
2096 (define_insn "sse_cvtsi2ss"
2097 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2100 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2101 (match_operand:V4SF 1 "register_operand" "0,0,x")
2105 cvtsi2ss\t{%2, %0|%0, %2}
2106 cvtsi2ss\t{%2, %0|%0, %2}
2107 vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2108 [(set_attr "isa" "noavx,noavx,avx")
2109 (set_attr "type" "sseicvt")
2110 (set_attr "athlon_decode" "vector,double,*")
2111 (set_attr "amdfam10_decode" "vector,double,*")
2112 (set_attr "bdver1_decode" "double,direct,*")
2113 (set_attr "prefix" "orig,orig,vex")
2114 (set_attr "mode" "SF")])
2116 (define_insn "sse_cvtsi2ssq"
2117 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2120 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2121 (match_operand:V4SF 1 "register_operand" "0,0,x")
2123 "TARGET_SSE && TARGET_64BIT"
2125 cvtsi2ssq\t{%2, %0|%0, %2}
2126 cvtsi2ssq\t{%2, %0|%0, %2}
2127 vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2128 [(set_attr "isa" "noavx,noavx,avx")
2129 (set_attr "type" "sseicvt")
2130 (set_attr "athlon_decode" "vector,double,*")
2131 (set_attr "amdfam10_decode" "vector,double,*")
2132 (set_attr "bdver1_decode" "double,direct,*")
2133 (set_attr "length_vex" "*,*,4")
2134 (set_attr "prefix_rex" "1,1,*")
2135 (set_attr "prefix" "orig,orig,vex")
2136 (set_attr "mode" "SF")])
2138 (define_insn "sse_cvtss2si"
2139 [(set (match_operand:SI 0 "register_operand" "=r,r")
2142 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2143 (parallel [(const_int 0)]))]
2144 UNSPEC_FIX_NOTRUNC))]
2146 "%vcvtss2si\t{%1, %0|%0, %1}"
2147 [(set_attr "type" "sseicvt")
2148 (set_attr "athlon_decode" "double,vector")
2149 (set_attr "bdver1_decode" "double,double")
2150 (set_attr "prefix_rep" "1")
2151 (set_attr "prefix" "maybe_vex")
2152 (set_attr "mode" "SI")])
2154 (define_insn "sse_cvtss2si_2"
2155 [(set (match_operand:SI 0 "register_operand" "=r,r")
2156 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2157 UNSPEC_FIX_NOTRUNC))]
2159 "%vcvtss2si\t{%1, %0|%0, %1}"
2160 [(set_attr "type" "sseicvt")
2161 (set_attr "athlon_decode" "double,vector")
2162 (set_attr "amdfam10_decode" "double,double")
2163 (set_attr "bdver1_decode" "double,double")
2164 (set_attr "prefix_rep" "1")
2165 (set_attr "prefix" "maybe_vex")
2166 (set_attr "mode" "SI")])
2168 (define_insn "sse_cvtss2siq"
2169 [(set (match_operand:DI 0 "register_operand" "=r,r")
2172 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2173 (parallel [(const_int 0)]))]
2174 UNSPEC_FIX_NOTRUNC))]
2175 "TARGET_SSE && TARGET_64BIT"
2176 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2177 [(set_attr "type" "sseicvt")
2178 (set_attr "athlon_decode" "double,vector")
2179 (set_attr "bdver1_decode" "double,double")
2180 (set_attr "prefix_rep" "1")
2181 (set_attr "prefix" "maybe_vex")
2182 (set_attr "mode" "DI")])
2184 (define_insn "sse_cvtss2siq_2"
2185 [(set (match_operand:DI 0 "register_operand" "=r,r")
2186 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2187 UNSPEC_FIX_NOTRUNC))]
2188 "TARGET_SSE && TARGET_64BIT"
2189 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2190 [(set_attr "type" "sseicvt")
2191 (set_attr "athlon_decode" "double,vector")
2192 (set_attr "amdfam10_decode" "double,double")
2193 (set_attr "bdver1_decode" "double,double")
2194 (set_attr "prefix_rep" "1")
2195 (set_attr "prefix" "maybe_vex")
2196 (set_attr "mode" "DI")])
2198 (define_insn "sse_cvttss2si"
2199 [(set (match_operand:SI 0 "register_operand" "=r,r")
2202 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2203 (parallel [(const_int 0)]))))]
2205 "%vcvttss2si\t{%1, %0|%0, %1}"
2206 [(set_attr "type" "sseicvt")
2207 (set_attr "athlon_decode" "double,vector")
2208 (set_attr "amdfam10_decode" "double,double")
2209 (set_attr "bdver1_decode" "double,double")
2210 (set_attr "prefix_rep" "1")
2211 (set_attr "prefix" "maybe_vex")
2212 (set_attr "mode" "SI")])
2214 (define_insn "sse_cvttss2siq"
2215 [(set (match_operand:DI 0 "register_operand" "=r,r")
2218 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2219 (parallel [(const_int 0)]))))]
2220 "TARGET_SSE && TARGET_64BIT"
2221 "%vcvttss2si{q}\t{%1, %0|%0, %1}"
2222 [(set_attr "type" "sseicvt")
2223 (set_attr "athlon_decode" "double,vector")
2224 (set_attr "amdfam10_decode" "double,double")
2225 (set_attr "bdver1_decode" "double,double")
2226 (set_attr "prefix_rep" "1")
2227 (set_attr "prefix" "maybe_vex")
2228 (set_attr "mode" "DI")])
2230 (define_insn "avx_cvtdq2ps256"
2231 [(set (match_operand:V8SF 0 "register_operand" "=x")
2232 (float:V8SF (match_operand:V8SI 1 "nonimmediate_operand" "xm")))]
2234 "vcvtdq2ps\t{%1, %0|%0, %1}"
2235 [(set_attr "type" "ssecvt")
2236 (set_attr "prefix" "vex")
2237 (set_attr "mode" "V8SF")])
2239 (define_insn "sse2_cvtdq2ps"
2240 [(set (match_operand:V4SF 0 "register_operand" "=x")
2241 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2243 "%vcvtdq2ps\t{%1, %0|%0, %1}"
2244 [(set_attr "type" "ssecvt")
2245 (set_attr "prefix" "maybe_vex")
2246 (set_attr "mode" "V4SF")])
2248 (define_expand "sse2_cvtudq2ps"
2250 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "")))
2252 (lt:V4SF (match_dup 5) (match_dup 3)))
2254 (and:V4SF (match_dup 6) (match_dup 4)))
2255 (set (match_operand:V4SF 0 "register_operand" "")
2256 (plus:V4SF (match_dup 5) (match_dup 7)))]
2259 REAL_VALUE_TYPE TWO32r;
2263 real_ldexp (&TWO32r, &dconst1, 32);
2264 x = const_double_from_real_value (TWO32r, SFmode);
2266 operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
2267 operands[4] = force_reg (V4SFmode,
2268 ix86_build_const_vector (V4SFmode, 1, x));
2270 for (i = 5; i < 8; i++)
2271 operands[i] = gen_reg_rtx (V4SFmode);
2274 (define_insn "avx_cvtps2dq256"
2275 [(set (match_operand:V8SI 0 "register_operand" "=x")
2276 (unspec:V8SI [(match_operand:V8SF 1 "nonimmediate_operand" "xm")]
2277 UNSPEC_FIX_NOTRUNC))]
2279 "vcvtps2dq\t{%1, %0|%0, %1}"
2280 [(set_attr "type" "ssecvt")
2281 (set_attr "prefix" "vex")
2282 (set_attr "mode" "OI")])
2284 (define_insn "sse2_cvtps2dq"
2285 [(set (match_operand:V4SI 0 "register_operand" "=x")
2286 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2287 UNSPEC_FIX_NOTRUNC))]
2289 "%vcvtps2dq\t{%1, %0|%0, %1}"
2290 [(set_attr "type" "ssecvt")
2291 (set (attr "prefix_data16")
2293 (match_test "TARGET_AVX")
2295 (const_string "1")))
2296 (set_attr "prefix" "maybe_vex")
2297 (set_attr "mode" "TI")])
2299 (define_insn "avx_cvttps2dq256"
2300 [(set (match_operand:V8SI 0 "register_operand" "=x")
2301 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
2303 "vcvttps2dq\t{%1, %0|%0, %1}"
2304 [(set_attr "type" "ssecvt")
2305 (set_attr "prefix" "vex")
2306 (set_attr "mode" "OI")])
2308 (define_insn "sse2_cvttps2dq"
2309 [(set (match_operand:V4SI 0 "register_operand" "=x")
2310 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2312 "%vcvttps2dq\t{%1, %0|%0, %1}"
2313 [(set_attr "type" "ssecvt")
2314 (set (attr "prefix_rep")
2316 (match_test "TARGET_AVX")
2318 (const_string "1")))
2319 (set (attr "prefix_data16")
2321 (match_test "TARGET_AVX")
2323 (const_string "0")))
2324 (set_attr "prefix_data16" "0")
2325 (set_attr "prefix" "maybe_vex")
2326 (set_attr "mode" "TI")])
2328 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2330 ;; Parallel double-precision floating point conversion operations
2332 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2334 (define_insn "sse2_cvtpi2pd"
2335 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2336 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2338 "cvtpi2pd\t{%1, %0|%0, %1}"
2339 [(set_attr "type" "ssecvt")
2340 (set_attr "unit" "mmx,*")
2341 (set_attr "prefix_data16" "1,*")
2342 (set_attr "mode" "V2DF")])
2344 (define_insn "sse2_cvtpd2pi"
2345 [(set (match_operand:V2SI 0 "register_operand" "=y")
2346 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2347 UNSPEC_FIX_NOTRUNC))]
2349 "cvtpd2pi\t{%1, %0|%0, %1}"
2350 [(set_attr "type" "ssecvt")
2351 (set_attr "unit" "mmx")
2352 (set_attr "bdver1_decode" "double")
2353 (set_attr "prefix_data16" "1")
2354 (set_attr "mode" "DI")])
2356 (define_insn "sse2_cvttpd2pi"
2357 [(set (match_operand:V2SI 0 "register_operand" "=y")
2358 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2360 "cvttpd2pi\t{%1, %0|%0, %1}"
2361 [(set_attr "type" "ssecvt")
2362 (set_attr "unit" "mmx")
2363 (set_attr "bdver1_decode" "double")
2364 (set_attr "prefix_data16" "1")
2365 (set_attr "mode" "TI")])
2367 (define_insn "sse2_cvtsi2sd"
2368 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2371 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2372 (match_operand:V2DF 1 "register_operand" "0,0,x")
2376 cvtsi2sd\t{%2, %0|%0, %2}
2377 cvtsi2sd\t{%2, %0|%0, %2}
2378 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2379 [(set_attr "isa" "noavx,noavx,avx")
2380 (set_attr "type" "sseicvt")
2381 (set_attr "athlon_decode" "double,direct,*")
2382 (set_attr "amdfam10_decode" "vector,double,*")
2383 (set_attr "bdver1_decode" "double,direct,*")
2384 (set_attr "prefix" "orig,orig,vex")
2385 (set_attr "mode" "DF")])
2387 (define_insn "sse2_cvtsi2sdq"
2388 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2391 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2392 (match_operand:V2DF 1 "register_operand" "0,0,x")
2394 "TARGET_SSE2 && TARGET_64BIT"
2396 cvtsi2sdq\t{%2, %0|%0, %2}
2397 cvtsi2sdq\t{%2, %0|%0, %2}
2398 vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2399 [(set_attr "isa" "noavx,noavx,avx")
2400 (set_attr "type" "sseicvt")
2401 (set_attr "athlon_decode" "double,direct,*")
2402 (set_attr "amdfam10_decode" "vector,double,*")
2403 (set_attr "bdver1_decode" "double,direct,*")
2404 (set_attr "length_vex" "*,*,4")
2405 (set_attr "prefix_rex" "1,1,*")
2406 (set_attr "prefix" "orig,orig,vex")
2407 (set_attr "mode" "DF")])
2409 (define_insn "sse2_cvtsd2si"
2410 [(set (match_operand:SI 0 "register_operand" "=r,r")
2413 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2414 (parallel [(const_int 0)]))]
2415 UNSPEC_FIX_NOTRUNC))]
2417 "%vcvtsd2si\t{%1, %0|%0, %1}"
2418 [(set_attr "type" "sseicvt")
2419 (set_attr "athlon_decode" "double,vector")
2420 (set_attr "bdver1_decode" "double,double")
2421 (set_attr "prefix_rep" "1")
2422 (set_attr "prefix" "maybe_vex")
2423 (set_attr "mode" "SI")])
2425 (define_insn "sse2_cvtsd2si_2"
2426 [(set (match_operand:SI 0 "register_operand" "=r,r")
2427 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2428 UNSPEC_FIX_NOTRUNC))]
2430 "%vcvtsd2si\t{%1, %0|%0, %1}"
2431 [(set_attr "type" "sseicvt")
2432 (set_attr "athlon_decode" "double,vector")
2433 (set_attr "amdfam10_decode" "double,double")
2434 (set_attr "bdver1_decode" "double,double")
2435 (set_attr "prefix_rep" "1")
2436 (set_attr "prefix" "maybe_vex")
2437 (set_attr "mode" "SI")])
2439 (define_insn "sse2_cvtsd2siq"
2440 [(set (match_operand:DI 0 "register_operand" "=r,r")
2443 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2444 (parallel [(const_int 0)]))]
2445 UNSPEC_FIX_NOTRUNC))]
2446 "TARGET_SSE2 && TARGET_64BIT"
2447 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2448 [(set_attr "type" "sseicvt")
2449 (set_attr "athlon_decode" "double,vector")
2450 (set_attr "bdver1_decode" "double,double")
2451 (set_attr "prefix_rep" "1")
2452 (set_attr "prefix" "maybe_vex")
2453 (set_attr "mode" "DI")])
2455 (define_insn "sse2_cvtsd2siq_2"
2456 [(set (match_operand:DI 0 "register_operand" "=r,r")
2457 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2458 UNSPEC_FIX_NOTRUNC))]
2459 "TARGET_SSE2 && TARGET_64BIT"
2460 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2461 [(set_attr "type" "sseicvt")
2462 (set_attr "athlon_decode" "double,vector")
2463 (set_attr "amdfam10_decode" "double,double")
2464 (set_attr "bdver1_decode" "double,double")
2465 (set_attr "prefix_rep" "1")
2466 (set_attr "prefix" "maybe_vex")
2467 (set_attr "mode" "DI")])
2469 (define_insn "sse2_cvttsd2si"
2470 [(set (match_operand:SI 0 "register_operand" "=r,r")
2473 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2474 (parallel [(const_int 0)]))))]
2476 "%vcvttsd2si\t{%1, %0|%0, %1}"
2477 [(set_attr "type" "sseicvt")
2478 (set_attr "athlon_decode" "double,vector")
2479 (set_attr "amdfam10_decode" "double,double")
2480 (set_attr "bdver1_decode" "double,double")
2481 (set_attr "prefix_rep" "1")
2482 (set_attr "prefix" "maybe_vex")
2483 (set_attr "mode" "SI")])
2485 (define_insn "sse2_cvttsd2siq"
2486 [(set (match_operand:DI 0 "register_operand" "=r,r")
2489 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2490 (parallel [(const_int 0)]))))]
2491 "TARGET_SSE2 && TARGET_64BIT"
2492 "%vcvttsd2si{q}\t{%1, %0|%0, %1}"
2493 [(set_attr "type" "sseicvt")
2494 (set_attr "athlon_decode" "double,vector")
2495 (set_attr "amdfam10_decode" "double,double")
2496 (set_attr "bdver1_decode" "double,double")
2497 (set_attr "prefix_rep" "1")
2498 (set_attr "prefix" "maybe_vex")
2499 (set_attr "mode" "DI")])
2501 (define_insn "avx_cvtdq2pd256"
2502 [(set (match_operand:V4DF 0 "register_operand" "=x")
2503 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2505 "vcvtdq2pd\t{%1, %0|%0, %1}"
2506 [(set_attr "type" "ssecvt")
2507 (set_attr "prefix" "vex")
2508 (set_attr "mode" "V4DF")])
2510 (define_insn "*avx_cvtdq2pd256_2"
2511 [(set (match_operand:V4DF 0 "register_operand" "=x")
2514 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
2515 (parallel [(const_int 0) (const_int 1)
2516 (const_int 2) (const_int 3)]))))]
2518 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
2519 [(set_attr "type" "ssecvt")
2520 (set_attr "prefix" "vex")
2521 (set_attr "mode" "V4DF")])
2523 (define_insn "sse2_cvtdq2pd"
2524 [(set (match_operand:V2DF 0 "register_operand" "=x")
2527 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2528 (parallel [(const_int 0) (const_int 1)]))))]
2530 "%vcvtdq2pd\t{%1, %0|%0, %1}"
2531 [(set_attr "type" "ssecvt")
2532 (set_attr "prefix" "maybe_vex")
2533 (set_attr "mode" "V2DF")])
2535 (define_insn "avx_cvtpd2dq256"
2536 [(set (match_operand:V4SI 0 "register_operand" "=x")
2537 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2538 UNSPEC_FIX_NOTRUNC))]
2540 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2541 [(set_attr "type" "ssecvt")
2542 (set_attr "prefix" "vex")
2543 (set_attr "mode" "OI")])
2545 (define_expand "sse2_cvtpd2dq"
2546 [(set (match_operand:V4SI 0 "register_operand" "")
2548 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2552 "operands[2] = CONST0_RTX (V2SImode);")
2554 (define_insn "*sse2_cvtpd2dq"
2555 [(set (match_operand:V4SI 0 "register_operand" "=x")
2557 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2559 (match_operand:V2SI 2 "const0_operand" "")))]
2563 return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
2565 return "cvtpd2dq\t{%1, %0|%0, %1}";
2567 [(set_attr "type" "ssecvt")
2568 (set_attr "prefix_rep" "1")
2569 (set_attr "prefix_data16" "0")
2570 (set_attr "prefix" "maybe_vex")
2571 (set_attr "mode" "TI")
2572 (set_attr "amdfam10_decode" "double")
2573 (set_attr "athlon_decode" "vector")
2574 (set_attr "bdver1_decode" "double")])
2576 (define_insn "avx_cvttpd2dq256"
2577 [(set (match_operand:V4SI 0 "register_operand" "=x")
2578 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2580 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2581 [(set_attr "type" "ssecvt")
2582 (set_attr "prefix" "vex")
2583 (set_attr "mode" "OI")])
2585 (define_expand "sse2_cvttpd2dq"
2586 [(set (match_operand:V4SI 0 "register_operand" "")
2588 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2591 "operands[2] = CONST0_RTX (V2SImode);")
2593 (define_insn "*sse2_cvttpd2dq"
2594 [(set (match_operand:V4SI 0 "register_operand" "=x")
2596 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2597 (match_operand:V2SI 2 "const0_operand" "")))]
2601 return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
2603 return "cvttpd2dq\t{%1, %0|%0, %1}";
2605 [(set_attr "type" "ssecvt")
2606 (set_attr "amdfam10_decode" "double")
2607 (set_attr "athlon_decode" "vector")
2608 (set_attr "bdver1_decode" "double")
2609 (set_attr "prefix" "maybe_vex")
2610 (set_attr "mode" "TI")])
2612 (define_insn "sse2_cvtsd2ss"
2613 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2616 (float_truncate:V2SF
2617 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,xm")))
2618 (match_operand:V4SF 1 "register_operand" "0,0,x")
2622 cvtsd2ss\t{%2, %0|%0, %2}
2623 cvtsd2ss\t{%2, %0|%0, %2}
2624 vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2625 [(set_attr "isa" "noavx,noavx,avx")
2626 (set_attr "type" "ssecvt")
2627 (set_attr "athlon_decode" "vector,double,*")
2628 (set_attr "amdfam10_decode" "vector,double,*")
2629 (set_attr "bdver1_decode" "direct,direct,*")
2630 (set_attr "prefix" "orig,orig,vex")
2631 (set_attr "mode" "SF")])
2633 (define_insn "sse2_cvtss2sd"
2634 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2638 (match_operand:V4SF 2 "nonimmediate_operand" "x,m,xm")
2639 (parallel [(const_int 0) (const_int 1)])))
2640 (match_operand:V2DF 1 "register_operand" "0,0,x")
2644 cvtss2sd\t{%2, %0|%0, %2}
2645 cvtss2sd\t{%2, %0|%0, %2}
2646 vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
2647 [(set_attr "isa" "noavx,noavx,avx")
2648 (set_attr "type" "ssecvt")
2649 (set_attr "amdfam10_decode" "vector,double,*")
2650 (set_attr "athlon_decode" "direct,direct,*")
2651 (set_attr "bdver1_decode" "direct,direct,*")
2652 (set_attr "prefix" "orig,orig,vex")
2653 (set_attr "mode" "DF")])
2655 (define_insn "avx_cvtpd2ps256"
2656 [(set (match_operand:V4SF 0 "register_operand" "=x")
2657 (float_truncate:V4SF
2658 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2660 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
2661 [(set_attr "type" "ssecvt")
2662 (set_attr "prefix" "vex")
2663 (set_attr "mode" "V4SF")])
2665 (define_expand "sse2_cvtpd2ps"
2666 [(set (match_operand:V4SF 0 "register_operand" "")
2668 (float_truncate:V2SF
2669 (match_operand:V2DF 1 "nonimmediate_operand" ""))
2672 "operands[2] = CONST0_RTX (V2SFmode);")
2674 (define_insn "*sse2_cvtpd2ps"
2675 [(set (match_operand:V4SF 0 "register_operand" "=x")
2677 (float_truncate:V2SF
2678 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2679 (match_operand:V2SF 2 "const0_operand" "")))]
2683 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
2685 return "cvtpd2ps\t{%1, %0|%0, %1}";
2687 [(set_attr "type" "ssecvt")
2688 (set_attr "amdfam10_decode" "double")
2689 (set_attr "athlon_decode" "vector")
2690 (set_attr "bdver1_decode" "double")
2691 (set_attr "prefix_data16" "1")
2692 (set_attr "prefix" "maybe_vex")
2693 (set_attr "mode" "V4SF")])
2695 (define_insn "avx_cvtps2pd256"
2696 [(set (match_operand:V4DF 0 "register_operand" "=x")
2698 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2700 "vcvtps2pd\t{%1, %0|%0, %1}"
2701 [(set_attr "type" "ssecvt")
2702 (set_attr "prefix" "vex")
2703 (set_attr "mode" "V4DF")])
2705 (define_insn "*avx_cvtps2pd256_2"
2706 [(set (match_operand:V4DF 0 "register_operand" "=x")
2709 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
2710 (parallel [(const_int 0) (const_int 1)
2711 (const_int 2) (const_int 3)]))))]
2713 "vcvtps2pd\t{%x1, %0|%0, %x1}"
2714 [(set_attr "type" "ssecvt")
2715 (set_attr "prefix" "vex")
2716 (set_attr "mode" "V4DF")])
2718 (define_insn "sse2_cvtps2pd"
2719 [(set (match_operand:V2DF 0 "register_operand" "=x")
2722 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2723 (parallel [(const_int 0) (const_int 1)]))))]
2725 "%vcvtps2pd\t{%1, %0|%0, %1}"
2726 [(set_attr "type" "ssecvt")
2727 (set_attr "amdfam10_decode" "direct")
2728 (set_attr "athlon_decode" "double")
2729 (set_attr "bdver1_decode" "double")
2730 (set_attr "prefix_data16" "0")
2731 (set_attr "prefix" "maybe_vex")
2732 (set_attr "mode" "V2DF")])
2734 (define_expand "vec_unpacks_hi_v4sf"
2739 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2740 (parallel [(const_int 6) (const_int 7)
2741 (const_int 2) (const_int 3)])))
2742 (set (match_operand:V2DF 0 "register_operand" "")
2746 (parallel [(const_int 0) (const_int 1)]))))]
2748 "operands[2] = gen_reg_rtx (V4SFmode);")
2750 (define_expand "vec_unpacks_hi_v8sf"
2753 (match_operand:V8SF 1 "nonimmediate_operand" "")
2754 (parallel [(const_int 4) (const_int 5)
2755 (const_int 6) (const_int 7)])))
2756 (set (match_operand:V4DF 0 "register_operand" "")
2760 "operands[2] = gen_reg_rtx (V4SFmode);")
2762 (define_expand "vec_unpacks_lo_v4sf"
2763 [(set (match_operand:V2DF 0 "register_operand" "")
2766 (match_operand:V4SF 1 "nonimmediate_operand" "")
2767 (parallel [(const_int 0) (const_int 1)]))))]
2770 (define_expand "vec_unpacks_lo_v8sf"
2771 [(set (match_operand:V4DF 0 "register_operand" "")
2774 (match_operand:V8SF 1 "nonimmediate_operand" "")
2775 (parallel [(const_int 0) (const_int 1)
2776 (const_int 2) (const_int 3)]))))]
2779 (define_expand "vec_unpacks_float_hi_v8hi"
2780 [(match_operand:V4SF 0 "register_operand" "")
2781 (match_operand:V8HI 1 "register_operand" "")]
2784 rtx tmp = gen_reg_rtx (V4SImode);
2786 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
2787 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2791 (define_expand "vec_unpacks_float_lo_v8hi"
2792 [(match_operand:V4SF 0 "register_operand" "")
2793 (match_operand:V8HI 1 "register_operand" "")]
2796 rtx tmp = gen_reg_rtx (V4SImode);
2798 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
2799 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2803 (define_expand "vec_unpacku_float_hi_v8hi"
2804 [(match_operand:V4SF 0 "register_operand" "")
2805 (match_operand:V8HI 1 "register_operand" "")]
2808 rtx tmp = gen_reg_rtx (V4SImode);
2810 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
2811 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2815 (define_expand "vec_unpacku_float_lo_v8hi"
2816 [(match_operand:V4SF 0 "register_operand" "")
2817 (match_operand:V8HI 1 "register_operand" "")]
2820 rtx tmp = gen_reg_rtx (V4SImode);
2822 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
2823 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2827 (define_expand "vec_unpacks_float_hi_v4si"
2830 (match_operand:V4SI 1 "nonimmediate_operand" "")
2831 (parallel [(const_int 2) (const_int 3)
2832 (const_int 2) (const_int 3)])))
2833 (set (match_operand:V2DF 0 "register_operand" "")
2837 (parallel [(const_int 0) (const_int 1)]))))]
2839 "operands[2] = gen_reg_rtx (V4SImode);")
2841 (define_expand "vec_unpacks_float_lo_v4si"
2842 [(set (match_operand:V2DF 0 "register_operand" "")
2845 (match_operand:V4SI 1 "nonimmediate_operand" "")
2846 (parallel [(const_int 0) (const_int 1)]))))]
2849 (define_expand "vec_unpacks_float_hi_v8si"
2852 (match_operand:V8SI 1 "nonimmediate_operand" "")
2853 (parallel [(const_int 4) (const_int 5)
2854 (const_int 6) (const_int 7)])))
2855 (set (match_operand:V4DF 0 "register_operand" "")
2859 "operands[2] = gen_reg_rtx (V4SImode);")
2861 (define_expand "vec_unpacks_float_lo_v8si"
2862 [(set (match_operand:V4DF 0 "register_operand" "")
2865 (match_operand:V8SI 1 "nonimmediate_operand" "")
2866 (parallel [(const_int 0) (const_int 1)
2867 (const_int 2) (const_int 3)]))))]
2870 (define_expand "vec_unpacku_float_hi_v4si"
2873 (match_operand:V4SI 1 "nonimmediate_operand" "")
2874 (parallel [(const_int 2) (const_int 3)
2875 (const_int 2) (const_int 3)])))
2880 (parallel [(const_int 0) (const_int 1)]))))
2882 (lt:V2DF (match_dup 6) (match_dup 3)))
2884 (and:V2DF (match_dup 7) (match_dup 4)))
2885 (set (match_operand:V2DF 0 "register_operand" "")
2886 (plus:V2DF (match_dup 6) (match_dup 8)))]
2889 REAL_VALUE_TYPE TWO32r;
2893 real_ldexp (&TWO32r, &dconst1, 32);
2894 x = const_double_from_real_value (TWO32r, DFmode);
2896 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2897 operands[4] = force_reg (V2DFmode,
2898 ix86_build_const_vector (V2DFmode, 1, x));
2900 operands[5] = gen_reg_rtx (V4SImode);
2902 for (i = 6; i < 9; i++)
2903 operands[i] = gen_reg_rtx (V2DFmode);
2906 (define_expand "vec_unpacku_float_lo_v4si"
2910 (match_operand:V4SI 1 "nonimmediate_operand" "")
2911 (parallel [(const_int 0) (const_int 1)]))))
2913 (lt:V2DF (match_dup 5) (match_dup 3)))
2915 (and:V2DF (match_dup 6) (match_dup 4)))
2916 (set (match_operand:V2DF 0 "register_operand" "")
2917 (plus:V2DF (match_dup 5) (match_dup 7)))]
2920 REAL_VALUE_TYPE TWO32r;
2924 real_ldexp (&TWO32r, &dconst1, 32);
2925 x = const_double_from_real_value (TWO32r, DFmode);
2927 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2928 operands[4] = force_reg (V2DFmode,
2929 ix86_build_const_vector (V2DFmode, 1, x));
2931 for (i = 5; i < 8; i++)
2932 operands[i] = gen_reg_rtx (V2DFmode);
2935 (define_expand "vec_pack_trunc_v4df"
2937 (float_truncate:V4SF
2938 (match_operand:V4DF 1 "nonimmediate_operand" "")))
2940 (float_truncate:V4SF
2941 (match_operand:V4DF 2 "nonimmediate_operand" "")))
2942 (set (match_operand:V8SF 0 "register_operand" "")
2948 operands[3] = gen_reg_rtx (V4SFmode);
2949 operands[4] = gen_reg_rtx (V4SFmode);
2952 (define_expand "vec_pack_trunc_v2df"
2953 [(match_operand:V4SF 0 "register_operand" "")
2954 (match_operand:V2DF 1 "nonimmediate_operand" "")
2955 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2960 r1 = gen_reg_rtx (V4SFmode);
2961 r2 = gen_reg_rtx (V4SFmode);
2963 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
2964 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
2965 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
2969 (define_expand "vec_pack_sfix_trunc_v2df"
2970 [(match_operand:V4SI 0 "register_operand" "")
2971 (match_operand:V2DF 1 "nonimmediate_operand" "")
2972 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2977 r1 = gen_reg_rtx (V4SImode);
2978 r2 = gen_reg_rtx (V4SImode);
2980 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
2981 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
2982 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
2983 gen_lowpart (V2DImode, r1),
2984 gen_lowpart (V2DImode, r2)));
2988 (define_expand "vec_pack_sfix_v2df"
2989 [(match_operand:V4SI 0 "register_operand" "")
2990 (match_operand:V2DF 1 "nonimmediate_operand" "")
2991 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2996 r1 = gen_reg_rtx (V4SImode);
2997 r2 = gen_reg_rtx (V4SImode);
2999 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
3000 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
3001 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3002 gen_lowpart (V2DImode, r1),
3003 gen_lowpart (V2DImode, r2)));
3007 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3009 ;; Parallel single-precision floating point element swizzling
3011 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3013 (define_expand "sse_movhlps_exp"
3014 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3017 (match_operand:V4SF 1 "nonimmediate_operand" "")
3018 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3019 (parallel [(const_int 6)
3025 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3027 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
3029 /* Fix up the destination if needed. */
3030 if (dst != operands[0])
3031 emit_move_insn (operands[0], dst);
3036 (define_insn "sse_movhlps"
3037 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
3040 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3041 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
3042 (parallel [(const_int 6)
3046 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3048 movhlps\t{%2, %0|%0, %2}
3049 vmovhlps\t{%2, %1, %0|%0, %1, %2}
3050 movlps\t{%H2, %0|%0, %H2}
3051 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3052 %vmovhps\t{%2, %0|%0, %2}"
3053 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3054 (set_attr "type" "ssemov")
3055 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3056 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3058 (define_expand "sse_movlhps_exp"
3059 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3062 (match_operand:V4SF 1 "nonimmediate_operand" "")
3063 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3064 (parallel [(const_int 0)
3070 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3072 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
3074 /* Fix up the destination if needed. */
3075 if (dst != operands[0])
3076 emit_move_insn (operands[0], dst);
3081 (define_insn "sse_movlhps"
3082 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
3085 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3086 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,x,x"))
3087 (parallel [(const_int 0)
3091 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3093 movlhps\t{%2, %0|%0, %2}
3094 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3095 movhps\t{%2, %0|%0, %2}
3096 vmovhps\t{%2, %1, %0|%0, %1, %2}
3097 %vmovlps\t{%2, %H0|%H0, %2}"
3098 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3099 (set_attr "type" "ssemov")
3100 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3101 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3103 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3104 (define_insn "avx_unpckhps256"
3105 [(set (match_operand:V8SF 0 "register_operand" "=x")
3108 (match_operand:V8SF 1 "register_operand" "x")
3109 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3110 (parallel [(const_int 2) (const_int 10)
3111 (const_int 3) (const_int 11)
3112 (const_int 6) (const_int 14)
3113 (const_int 7) (const_int 15)])))]
3115 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3116 [(set_attr "type" "sselog")
3117 (set_attr "prefix" "vex")
3118 (set_attr "mode" "V8SF")])
3120 (define_expand "vec_interleave_highv8sf"
3124 (match_operand:V8SF 1 "register_operand" "x")
3125 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3126 (parallel [(const_int 0) (const_int 8)
3127 (const_int 1) (const_int 9)
3128 (const_int 4) (const_int 12)
3129 (const_int 5) (const_int 13)])))
3135 (parallel [(const_int 2) (const_int 10)
3136 (const_int 3) (const_int 11)
3137 (const_int 6) (const_int 14)
3138 (const_int 7) (const_int 15)])))
3139 (set (match_operand:V8SF 0 "register_operand" "")
3144 (parallel [(const_int 4) (const_int 5)
3145 (const_int 6) (const_int 7)
3146 (const_int 12) (const_int 13)
3147 (const_int 14) (const_int 15)])))]
3150 operands[3] = gen_reg_rtx (V8SFmode);
3151 operands[4] = gen_reg_rtx (V8SFmode);
3154 (define_insn "vec_interleave_highv4sf"
3155 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3158 (match_operand:V4SF 1 "register_operand" "0,x")
3159 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3160 (parallel [(const_int 2) (const_int 6)
3161 (const_int 3) (const_int 7)])))]
3164 unpckhps\t{%2, %0|%0, %2}
3165 vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3166 [(set_attr "isa" "noavx,avx")
3167 (set_attr "type" "sselog")
3168 (set_attr "prefix" "orig,vex")
3169 (set_attr "mode" "V4SF")])
3171 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3172 (define_insn "avx_unpcklps256"
3173 [(set (match_operand:V8SF 0 "register_operand" "=x")
3176 (match_operand:V8SF 1 "register_operand" "x")
3177 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3178 (parallel [(const_int 0) (const_int 8)
3179 (const_int 1) (const_int 9)
3180 (const_int 4) (const_int 12)
3181 (const_int 5) (const_int 13)])))]
3183 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3184 [(set_attr "type" "sselog")
3185 (set_attr "prefix" "vex")
3186 (set_attr "mode" "V8SF")])
3188 (define_expand "vec_interleave_lowv8sf"
3192 (match_operand:V8SF 1 "register_operand" "x")
3193 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3194 (parallel [(const_int 0) (const_int 8)
3195 (const_int 1) (const_int 9)
3196 (const_int 4) (const_int 12)
3197 (const_int 5) (const_int 13)])))
3203 (parallel [(const_int 2) (const_int 10)
3204 (const_int 3) (const_int 11)
3205 (const_int 6) (const_int 14)
3206 (const_int 7) (const_int 15)])))
3207 (set (match_operand:V8SF 0 "register_operand" "")
3212 (parallel [(const_int 0) (const_int 1)
3213 (const_int 2) (const_int 3)
3214 (const_int 8) (const_int 9)
3215 (const_int 10) (const_int 11)])))]
3218 operands[3] = gen_reg_rtx (V8SFmode);
3219 operands[4] = gen_reg_rtx (V8SFmode);
3222 (define_insn "vec_interleave_lowv4sf"
3223 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3226 (match_operand:V4SF 1 "register_operand" "0,x")
3227 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3228 (parallel [(const_int 0) (const_int 4)
3229 (const_int 1) (const_int 5)])))]
3232 unpcklps\t{%2, %0|%0, %2}
3233 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3234 [(set_attr "isa" "noavx,avx")
3235 (set_attr "type" "sselog")
3236 (set_attr "prefix" "orig,vex")
3237 (set_attr "mode" "V4SF")])
3239 ;; These are modeled with the same vec_concat as the others so that we
3240 ;; capture users of shufps that can use the new instructions
3241 (define_insn "avx_movshdup256"
3242 [(set (match_operand:V8SF 0 "register_operand" "=x")
3245 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3247 (parallel [(const_int 1) (const_int 1)
3248 (const_int 3) (const_int 3)
3249 (const_int 5) (const_int 5)
3250 (const_int 7) (const_int 7)])))]
3252 "vmovshdup\t{%1, %0|%0, %1}"
3253 [(set_attr "type" "sse")
3254 (set_attr "prefix" "vex")
3255 (set_attr "mode" "V8SF")])
3257 (define_insn "sse3_movshdup"
3258 [(set (match_operand:V4SF 0 "register_operand" "=x")
3261 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3263 (parallel [(const_int 1)
3268 "%vmovshdup\t{%1, %0|%0, %1}"
3269 [(set_attr "type" "sse")
3270 (set_attr "prefix_rep" "1")
3271 (set_attr "prefix" "maybe_vex")
3272 (set_attr "mode" "V4SF")])
3274 (define_insn "avx_movsldup256"
3275 [(set (match_operand:V8SF 0 "register_operand" "=x")
3278 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3280 (parallel [(const_int 0) (const_int 0)
3281 (const_int 2) (const_int 2)
3282 (const_int 4) (const_int 4)
3283 (const_int 6) (const_int 6)])))]
3285 "vmovsldup\t{%1, %0|%0, %1}"
3286 [(set_attr "type" "sse")
3287 (set_attr "prefix" "vex")
3288 (set_attr "mode" "V8SF")])
3290 (define_insn "sse3_movsldup"
3291 [(set (match_operand:V4SF 0 "register_operand" "=x")
3294 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3296 (parallel [(const_int 0)
3301 "%vmovsldup\t{%1, %0|%0, %1}"
3302 [(set_attr "type" "sse")
3303 (set_attr "prefix_rep" "1")
3304 (set_attr "prefix" "maybe_vex")
3305 (set_attr "mode" "V4SF")])
3307 (define_expand "avx_shufps256"
3308 [(match_operand:V8SF 0 "register_operand" "")
3309 (match_operand:V8SF 1 "register_operand" "")
3310 (match_operand:V8SF 2 "nonimmediate_operand" "")
3311 (match_operand:SI 3 "const_int_operand" "")]
3314 int mask = INTVAL (operands[3]);
3315 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3316 GEN_INT ((mask >> 0) & 3),
3317 GEN_INT ((mask >> 2) & 3),
3318 GEN_INT (((mask >> 4) & 3) + 8),
3319 GEN_INT (((mask >> 6) & 3) + 8),
3320 GEN_INT (((mask >> 0) & 3) + 4),
3321 GEN_INT (((mask >> 2) & 3) + 4),
3322 GEN_INT (((mask >> 4) & 3) + 12),
3323 GEN_INT (((mask >> 6) & 3) + 12)));
3327 ;; One bit in mask selects 2 elements.
3328 (define_insn "avx_shufps256_1"
3329 [(set (match_operand:V8SF 0 "register_operand" "=x")
3332 (match_operand:V8SF 1 "register_operand" "x")
3333 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3334 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3335 (match_operand 4 "const_0_to_3_operand" "")
3336 (match_operand 5 "const_8_to_11_operand" "")
3337 (match_operand 6 "const_8_to_11_operand" "")
3338 (match_operand 7 "const_4_to_7_operand" "")
3339 (match_operand 8 "const_4_to_7_operand" "")
3340 (match_operand 9 "const_12_to_15_operand" "")
3341 (match_operand 10 "const_12_to_15_operand" "")])))]
3343 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3344 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3345 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3346 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3349 mask = INTVAL (operands[3]);
3350 mask |= INTVAL (operands[4]) << 2;
3351 mask |= (INTVAL (operands[5]) - 8) << 4;
3352 mask |= (INTVAL (operands[6]) - 8) << 6;
3353 operands[3] = GEN_INT (mask);
3355 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3357 [(set_attr "type" "sselog")
3358 (set_attr "length_immediate" "1")
3359 (set_attr "prefix" "vex")
3360 (set_attr "mode" "V8SF")])
3362 (define_expand "sse_shufps"
3363 [(match_operand:V4SF 0 "register_operand" "")
3364 (match_operand:V4SF 1 "register_operand" "")
3365 (match_operand:V4SF 2 "nonimmediate_operand" "")
3366 (match_operand:SI 3 "const_int_operand" "")]
3369 int mask = INTVAL (operands[3]);
3370 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3371 GEN_INT ((mask >> 0) & 3),
3372 GEN_INT ((mask >> 2) & 3),
3373 GEN_INT (((mask >> 4) & 3) + 4),
3374 GEN_INT (((mask >> 6) & 3) + 4)));
3378 (define_insn "sse_shufps_<mode>"
3379 [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
3380 (vec_select:VI4F_128
3381 (vec_concat:<ssedoublevecmode>
3382 (match_operand:VI4F_128 1 "register_operand" "0,x")
3383 (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm"))
3384 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3385 (match_operand 4 "const_0_to_3_operand" "")
3386 (match_operand 5 "const_4_to_7_operand" "")
3387 (match_operand 6 "const_4_to_7_operand" "")])))]
3391 mask |= INTVAL (operands[3]) << 0;
3392 mask |= INTVAL (operands[4]) << 2;
3393 mask |= (INTVAL (operands[5]) - 4) << 4;
3394 mask |= (INTVAL (operands[6]) - 4) << 6;
3395 operands[3] = GEN_INT (mask);
3397 switch (which_alternative)
3400 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3402 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3407 [(set_attr "isa" "noavx,avx")
3408 (set_attr "type" "sselog")
3409 (set_attr "length_immediate" "1")
3410 (set_attr "prefix" "orig,vex")
3411 (set_attr "mode" "V4SF")])
3413 (define_insn "sse_storehps"
3414 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3416 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3417 (parallel [(const_int 2) (const_int 3)])))]
3420 %vmovhps\t{%1, %0|%0, %1}
3421 %vmovhlps\t{%1, %d0|%d0, %1}
3422 %vmovlps\t{%H1, %d0|%d0, %H1}"
3423 [(set_attr "type" "ssemov")
3424 (set_attr "prefix" "maybe_vex")
3425 (set_attr "mode" "V2SF,V4SF,V2SF")])
3427 (define_expand "sse_loadhps_exp"
3428 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3431 (match_operand:V4SF 1 "nonimmediate_operand" "")
3432 (parallel [(const_int 0) (const_int 1)]))
3433 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3436 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3438 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
3440 /* Fix up the destination if needed. */
3441 if (dst != operands[0])
3442 emit_move_insn (operands[0], dst);
3447 (define_insn "sse_loadhps"
3448 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
3451 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3452 (parallel [(const_int 0) (const_int 1)]))
3453 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))]
3456 movhps\t{%2, %0|%0, %2}
3457 vmovhps\t{%2, %1, %0|%0, %1, %2}
3458 movlhps\t{%2, %0|%0, %2}
3459 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3460 %vmovlps\t{%2, %H0|%H0, %2}"
3461 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3462 (set_attr "type" "ssemov")
3463 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3464 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
3466 (define_insn "sse_storelps"
3467 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3469 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
3470 (parallel [(const_int 0) (const_int 1)])))]
3473 %vmovlps\t{%1, %0|%0, %1}
3474 %vmovaps\t{%1, %0|%0, %1}
3475 %vmovlps\t{%1, %d0|%d0, %1}"
3476 [(set_attr "type" "ssemov")
3477 (set_attr "prefix" "maybe_vex")
3478 (set_attr "mode" "V2SF,V4SF,V2SF")])
3480 (define_expand "sse_loadlps_exp"
3481 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3483 (match_operand:V2SF 2 "nonimmediate_operand" "")
3485 (match_operand:V4SF 1 "nonimmediate_operand" "")
3486 (parallel [(const_int 2) (const_int 3)]))))]
3489 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3491 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
3493 /* Fix up the destination if needed. */
3494 if (dst != operands[0])
3495 emit_move_insn (operands[0], dst);
3500 (define_insn "sse_loadlps"
3501 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
3503 (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,x,x")
3505 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
3506 (parallel [(const_int 2) (const_int 3)]))))]
3509 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3510 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3511 movlps\t{%2, %0|%0, %2}
3512 vmovlps\t{%2, %1, %0|%0, %1, %2}
3513 %vmovlps\t{%2, %0|%0, %2}"
3514 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3515 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
3516 (set_attr "length_immediate" "1,1,*,*,*")
3517 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3518 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3520 (define_insn "sse_movss"
3521 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3523 (match_operand:V4SF 2 "register_operand" " x,x")
3524 (match_operand:V4SF 1 "register_operand" " 0,x")
3528 movss\t{%2, %0|%0, %2}
3529 vmovss\t{%2, %1, %0|%0, %1, %2}"
3530 [(set_attr "isa" "noavx,avx")
3531 (set_attr "type" "ssemov")
3532 (set_attr "prefix" "orig,vex")
3533 (set_attr "mode" "SF")])
3535 (define_expand "vec_dupv4sf"
3536 [(set (match_operand:V4SF 0 "register_operand" "")
3538 (match_operand:SF 1 "nonimmediate_operand" "")))]
3542 operands[1] = force_reg (SFmode, operands[1]);
3545 (define_insn "avx2_vec_dupv4sf"
3546 [(set (match_operand:V4SF 0 "register_operand" "=x")
3549 (match_operand:V4SF 1 "register_operand" "x")
3550 (parallel [(const_int 0)]))))]
3552 "vbroadcastss\t{%1, %0|%0, %1}"
3553 [(set_attr "type" "sselog1")
3554 (set_attr "prefix" "vex")
3555 (set_attr "mode" "V4SF")])
3557 (define_insn "*vec_dupv4sf_avx"
3558 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3560 (match_operand:SF 1 "nonimmediate_operand" "x,m")))]
3563 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
3564 vbroadcastss\t{%1, %0|%0, %1}"
3565 [(set_attr "type" "sselog1,ssemov")
3566 (set_attr "length_immediate" "1,0")
3567 (set_attr "prefix_extra" "0,1")
3568 (set_attr "prefix" "vex")
3569 (set_attr "mode" "V4SF")])
3571 (define_insn "avx2_vec_dupv8sf"
3572 [(set (match_operand:V8SF 0 "register_operand" "=x")
3575 (match_operand:V4SF 1 "register_operand" "x")
3576 (parallel [(const_int 0)]))))]
3578 "vbroadcastss\t{%1, %0|%0, %1}"
3579 [(set_attr "type" "sselog1")
3580 (set_attr "prefix" "vex")
3581 (set_attr "mode" "V8SF")])
3583 (define_insn "*vec_dupv4sf"
3584 [(set (match_operand:V4SF 0 "register_operand" "=x")
3586 (match_operand:SF 1 "register_operand" "0")))]
3588 "shufps\t{$0, %0, %0|%0, %0, 0}"
3589 [(set_attr "type" "sselog1")
3590 (set_attr "length_immediate" "1")
3591 (set_attr "mode" "V4SF")])
3593 ;; Although insertps takes register source, we prefer
3594 ;; unpcklps with register source since it is shorter.
3595 (define_insn "*vec_concatv2sf_sse4_1"
3596 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,x,x,*y ,*y")
3598 (match_operand:SF 1 "nonimmediate_operand" " 0,x,0,x,m, 0 , m")
3599 (match_operand:SF 2 "vector_move_operand" " x,x,m,m,C,*ym, C")))]
3602 unpcklps\t{%2, %0|%0, %2}
3603 vunpcklps\t{%2, %1, %0|%0, %1, %2}
3604 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3605 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3606 %vmovss\t{%1, %0|%0, %1}
3607 punpckldq\t{%2, %0|%0, %2}
3608 movd\t{%1, %0|%0, %1}"
3609 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
3610 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
3611 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
3612 (set_attr "prefix_extra" "*,*,1,1,*,*,*")
3613 (set_attr "length_immediate" "*,*,1,1,*,*,*")
3614 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
3615 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
3617 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3618 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3619 ;; alternatives pretty much forces the MMX alternative to be chosen.
3620 (define_insn "*vec_concatv2sf_sse"
3621 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
3623 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3624 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
3627 unpcklps\t{%2, %0|%0, %2}
3628 movss\t{%1, %0|%0, %1}
3629 punpckldq\t{%2, %0|%0, %2}
3630 movd\t{%1, %0|%0, %1}"
3631 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3632 (set_attr "mode" "V4SF,SF,DI,DI")])
3634 (define_insn "*vec_concatv4sf"
3635 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x")
3637 (match_operand:V2SF 1 "register_operand" " 0,x,0,x")
3638 (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
3641 movlhps\t{%2, %0|%0, %2}
3642 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3643 movhps\t{%2, %0|%0, %2}
3644 vmovhps\t{%2, %1, %0|%0, %1, %2}"
3645 [(set_attr "isa" "noavx,avx,noavx,avx")
3646 (set_attr "type" "ssemov")
3647 (set_attr "prefix" "orig,vex,orig,vex")
3648 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
3650 (define_expand "vec_init<mode>"
3651 [(match_operand:V_128 0 "register_operand" "")
3652 (match_operand 1 "" "")]
3655 ix86_expand_vector_init (false, operands[0], operands[1]);
3659 ;; Avoid combining registers from different units in a single alternative,
3660 ;; see comment above inline_secondary_memory_needed function in i386.c
3661 (define_insn "vec_set<mode>_0"
3662 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
3663 "=x,x,x ,x,x,x,x ,x ,m,m ,m")
3665 (vec_duplicate:VI4F_128
3666 (match_operand:<ssescalarmode> 2 "general_operand"
3667 " x,m,*r,m,x,x,*rm,*rm,x,fF,*r"))
3668 (match_operand:VI4F_128 1 "vector_move_operand"
3669 " C,C,C ,C,0,x,0 ,x ,0,0 ,0")
3673 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
3674 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
3675 %vmovd\t{%2, %0|%0, %2}
3676 movss\t{%2, %0|%0, %2}
3677 movss\t{%2, %0|%0, %2}
3678 vmovss\t{%2, %1, %0|%0, %1, %2}
3679 pinsrd\t{$0, %2, %0|%0, %2, 0}
3680 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
3684 [(set_attr "isa" "sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,avx,*,*,*")
3686 (cond [(eq_attr "alternative" "0,6,7")
3687 (const_string "sselog")
3688 (eq_attr "alternative" "9")
3689 (const_string "fmov")
3690 (eq_attr "alternative" "10")
3691 (const_string "imov")
3693 (const_string "ssemov")))
3694 (set_attr "prefix_extra" "*,*,*,*,*,*,1,1,*,*,*")
3695 (set_attr "length_immediate" "*,*,*,*,*,*,1,1,*,*,*")
3696 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex,*,*,*")
3697 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,*,*,*")])
3699 ;; A subset is vec_setv4sf.
3700 (define_insn "*vec_setv4sf_sse4_1"
3701 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3704 (match_operand:SF 2 "nonimmediate_operand" "xm,xm"))
3705 (match_operand:V4SF 1 "register_operand" "0,x")
3706 (match_operand:SI 3 "const_int_operand" "")))]
3708 && ((unsigned) exact_log2 (INTVAL (operands[3]))
3709 < GET_MODE_NUNITS (V4SFmode))"
3711 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
3712 switch (which_alternative)
3715 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3717 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3722 [(set_attr "isa" "noavx,avx")
3723 (set_attr "type" "sselog")
3724 (set_attr "prefix_data16" "1,*")
3725 (set_attr "prefix_extra" "1")
3726 (set_attr "length_immediate" "1")
3727 (set_attr "prefix" "orig,vex")
3728 (set_attr "mode" "V4SF")])
3730 (define_insn "sse4_1_insertps"
3731 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3732 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
3733 (match_operand:V4SF 1 "register_operand" "0,x")
3734 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
3738 if (MEM_P (operands[2]))
3740 unsigned count_s = INTVAL (operands[3]) >> 6;
3742 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
3743 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
3745 switch (which_alternative)
3748 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3750 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3755 [(set_attr "isa" "noavx,avx")
3756 (set_attr "type" "sselog")
3757 (set_attr "prefix_data16" "1,*")
3758 (set_attr "prefix_extra" "1")
3759 (set_attr "length_immediate" "1")
3760 (set_attr "prefix" "orig,vex")
3761 (set_attr "mode" "V4SF")])
3764 [(set (match_operand:VI4F_128 0 "memory_operand" "")
3766 (vec_duplicate:VI4F_128
3767 (match_operand:<ssescalarmode> 1 "nonmemory_operand" ""))
3770 "TARGET_SSE && reload_completed"
3773 emit_move_insn (adjust_address (operands[0], <ssescalarmode>mode, 0),
3778 (define_expand "vec_set<mode>"
3779 [(match_operand:V_128 0 "register_operand" "")
3780 (match_operand:<ssescalarmode> 1 "register_operand" "")
3781 (match_operand 2 "const_int_operand" "")]
3784 ix86_expand_vector_set (false, operands[0], operands[1],
3785 INTVAL (operands[2]));
3789 (define_insn_and_split "*vec_extractv4sf_0"
3790 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
3792 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
3793 (parallel [(const_int 0)])))]
3794 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3796 "&& reload_completed"
3799 rtx op1 = operands[1];
3801 op1 = gen_rtx_REG (SFmode, REGNO (op1));
3803 op1 = gen_lowpart (SFmode, op1);
3804 emit_move_insn (operands[0], op1);
3808 (define_expand "avx_vextractf128<mode>"
3809 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "")
3810 (match_operand:V_256 1 "register_operand" "")
3811 (match_operand:SI 2 "const_0_to_1_operand" "")]
3814 rtx (*insn)(rtx, rtx);
3816 switch (INTVAL (operands[2]))
3819 insn = gen_vec_extract_lo_<mode>;
3822 insn = gen_vec_extract_hi_<mode>;
3828 emit_insn (insn (operands[0], operands[1]));
3832 (define_insn_and_split "vec_extract_lo_<mode>"
3833 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
3834 (vec_select:<ssehalfvecmode>
3835 (match_operand:VI8F_256 1 "nonimmediate_operand" "xm,x")
3836 (parallel [(const_int 0) (const_int 1)])))]
3837 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3839 "&& reload_completed"
3842 rtx op1 = operands[1];
3844 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
3846 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
3847 emit_move_insn (operands[0], op1);
3851 (define_insn "vec_extract_hi_<mode>"
3852 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
3853 (vec_select:<ssehalfvecmode>
3854 (match_operand:VI8F_256 1 "register_operand" "x,x")
3855 (parallel [(const_int 2) (const_int 3)])))]
3857 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
3858 [(set_attr "type" "sselog")
3859 (set_attr "prefix_extra" "1")
3860 (set_attr "length_immediate" "1")
3861 (set_attr "memory" "none,store")
3862 (set_attr "prefix" "vex")
3863 (set_attr "mode" "<sseinsnmode>")])
3865 (define_insn_and_split "vec_extract_lo_<mode>"
3866 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
3867 (vec_select:<ssehalfvecmode>
3868 (match_operand:VI4F_256 1 "nonimmediate_operand" "xm,x")
3869 (parallel [(const_int 0) (const_int 1)
3870 (const_int 2) (const_int 3)])))]
3871 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3873 "&& reload_completed"
3876 rtx op1 = operands[1];
3878 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
3880 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
3881 emit_move_insn (operands[0], op1);
3885 (define_insn "vec_extract_hi_<mode>"
3886 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
3887 (vec_select:<ssehalfvecmode>
3888 (match_operand:VI4F_256 1 "register_operand" "x,x")
3889 (parallel [(const_int 4) (const_int 5)
3890 (const_int 6) (const_int 7)])))]
3892 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
3893 [(set_attr "type" "sselog")
3894 (set_attr "prefix_extra" "1")
3895 (set_attr "length_immediate" "1")
3896 (set_attr "memory" "none,store")
3897 (set_attr "prefix" "vex")
3898 (set_attr "mode" "<sseinsnmode>")])
3900 (define_insn_and_split "vec_extract_lo_v16hi"
3901 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
3903 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
3904 (parallel [(const_int 0) (const_int 1)
3905 (const_int 2) (const_int 3)
3906 (const_int 4) (const_int 5)
3907 (const_int 6) (const_int 7)])))]
3908 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3910 "&& reload_completed"
3913 rtx op1 = operands[1];
3915 op1 = gen_rtx_REG (V8HImode, REGNO (op1));
3917 op1 = gen_lowpart (V8HImode, op1);
3918 emit_move_insn (operands[0], op1);
3922 (define_insn "vec_extract_hi_v16hi"
3923 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
3925 (match_operand:V16HI 1 "register_operand" "x,x")
3926 (parallel [(const_int 8) (const_int 9)
3927 (const_int 10) (const_int 11)
3928 (const_int 12) (const_int 13)
3929 (const_int 14) (const_int 15)])))]
3931 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
3932 [(set_attr "type" "sselog")
3933 (set_attr "prefix_extra" "1")
3934 (set_attr "length_immediate" "1")
3935 (set_attr "memory" "none,store")
3936 (set_attr "prefix" "vex")
3937 (set_attr "mode" "OI")])
3939 (define_insn_and_split "vec_extract_lo_v32qi"
3940 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
3942 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
3943 (parallel [(const_int 0) (const_int 1)
3944 (const_int 2) (const_int 3)
3945 (const_int 4) (const_int 5)
3946 (const_int 6) (const_int 7)
3947 (const_int 8) (const_int 9)
3948 (const_int 10) (const_int 11)
3949 (const_int 12) (const_int 13)
3950 (const_int 14) (const_int 15)])))]
3951 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3953 "&& reload_completed"
3956 rtx op1 = operands[1];
3958 op1 = gen_rtx_REG (V16QImode, REGNO (op1));
3960 op1 = gen_lowpart (V16QImode, op1);
3961 emit_move_insn (operands[0], op1);
3965 (define_insn "vec_extract_hi_v32qi"
3966 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
3968 (match_operand:V32QI 1 "register_operand" "x,x")
3969 (parallel [(const_int 16) (const_int 17)
3970 (const_int 18) (const_int 19)
3971 (const_int 20) (const_int 21)
3972 (const_int 22) (const_int 23)
3973 (const_int 24) (const_int 25)
3974 (const_int 26) (const_int 27)
3975 (const_int 28) (const_int 29)
3976 (const_int 30) (const_int 31)])))]
3978 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
3979 [(set_attr "type" "sselog")
3980 (set_attr "prefix_extra" "1")
3981 (set_attr "length_immediate" "1")
3982 (set_attr "memory" "none,store")
3983 (set_attr "prefix" "vex")
3984 (set_attr "mode" "OI")])
3986 (define_insn_and_split "*sse4_1_extractps"
3987 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,x,x")
3989 (match_operand:V4SF 1 "register_operand" "x,0,x")
3990 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n")])))]
3993 %vextractps\t{%2, %1, %0|%0, %1, %2}
3996 "&& reload_completed && SSE_REG_P (operands[0])"
3999 rtx dest = gen_rtx_REG (V4SFmode, REGNO (operands[0]));
4000 switch (INTVAL (operands[2]))
4004 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
4005 operands[2], operands[2],
4006 GEN_INT (INTVAL (operands[2]) + 4),
4007 GEN_INT (INTVAL (operands[2]) + 4)));
4010 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
4013 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
4018 [(set_attr "isa" "*,noavx,avx")
4019 (set_attr "type" "sselog,*,*")
4020 (set_attr "prefix_data16" "1,*,*")
4021 (set_attr "prefix_extra" "1,*,*")
4022 (set_attr "length_immediate" "1,*,*")
4023 (set_attr "prefix" "maybe_vex,*,*")
4024 (set_attr "mode" "V4SF,*,*")])
4026 (define_insn_and_split "*vec_extract_v4sf_mem"
4027 [(set (match_operand:SF 0 "register_operand" "=x*rf")
4029 (match_operand:V4SF 1 "memory_operand" "o")
4030 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
4033 "&& reload_completed"
4036 int i = INTVAL (operands[2]);
4038 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
4042 ;; Modes handled by vec_extract patterns.
4043 (define_mode_iterator VEC_EXTRACT_MODE
4044 [(V32QI "TARGET_AVX") V16QI
4045 (V16HI "TARGET_AVX") V8HI
4046 (V8SI "TARGET_AVX") V4SI
4047 (V4DI "TARGET_AVX") V2DI
4048 (V8SF "TARGET_AVX") V4SF
4049 (V4DF "TARGET_AVX") V2DF])
4051 (define_expand "vec_extract<mode>"
4052 [(match_operand:<ssescalarmode> 0 "register_operand" "")
4053 (match_operand:VEC_EXTRACT_MODE 1 "register_operand" "")
4054 (match_operand 2 "const_int_operand" "")]
4057 ix86_expand_vector_extract (false, operands[0], operands[1],
4058 INTVAL (operands[2]));
4062 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4064 ;; Parallel double-precision floating point element swizzling
4066 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4068 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4069 (define_insn "avx_unpckhpd256"
4070 [(set (match_operand:V4DF 0 "register_operand" "=x")
4073 (match_operand:V4DF 1 "register_operand" "x")
4074 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4075 (parallel [(const_int 1) (const_int 5)
4076 (const_int 3) (const_int 7)])))]
4078 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
4079 [(set_attr "type" "sselog")
4080 (set_attr "prefix" "vex")
4081 (set_attr "mode" "V4DF")])
4083 (define_expand "vec_interleave_highv4df"
4087 (match_operand:V4DF 1 "register_operand" "x")
4088 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4089 (parallel [(const_int 0) (const_int 4)
4090 (const_int 2) (const_int 6)])))
4096 (parallel [(const_int 1) (const_int 5)
4097 (const_int 3) (const_int 7)])))
4098 (set (match_operand:V4DF 0 "register_operand" "")
4103 (parallel [(const_int 2) (const_int 3)
4104 (const_int 6) (const_int 7)])))]
4107 operands[3] = gen_reg_rtx (V4DFmode);
4108 operands[4] = gen_reg_rtx (V4DFmode);
4112 (define_expand "vec_interleave_highv2df"
4113 [(set (match_operand:V2DF 0 "register_operand" "")
4116 (match_operand:V2DF 1 "nonimmediate_operand" "")
4117 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4118 (parallel [(const_int 1)
4122 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
4123 operands[2] = force_reg (V2DFmode, operands[2]);
4126 (define_insn "*vec_interleave_highv2df"
4127 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
4130 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x")
4131 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0"))
4132 (parallel [(const_int 1)
4134 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4136 unpckhpd\t{%2, %0|%0, %2}
4137 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
4138 %vmovddup\t{%H1, %0|%0, %H1}
4139 movlpd\t{%H1, %0|%0, %H1}
4140 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
4141 %vmovhpd\t{%1, %0|%0, %1}"
4142 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
4143 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
4144 (set_attr "prefix_data16" "*,*,*,1,*,1")
4145 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
4146 (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")])
4148 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4149 (define_expand "avx_movddup256"
4150 [(set (match_operand:V4DF 0 "register_operand" "")
4153 (match_operand:V4DF 1 "nonimmediate_operand" "")
4155 (parallel [(const_int 0) (const_int 4)
4156 (const_int 2) (const_int 6)])))]
4159 (define_expand "avx_unpcklpd256"
4160 [(set (match_operand:V4DF 0 "register_operand" "")
4163 (match_operand:V4DF 1 "register_operand" "")
4164 (match_operand:V4DF 2 "nonimmediate_operand" ""))
4165 (parallel [(const_int 0) (const_int 4)
4166 (const_int 2) (const_int 6)])))]
4169 (define_insn "*avx_unpcklpd256"
4170 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
4173 (match_operand:V4DF 1 "nonimmediate_operand" "xm,x")
4174 (match_operand:V4DF 2 "nonimmediate_operand" " 1,xm"))
4175 (parallel [(const_int 0) (const_int 4)
4176 (const_int 2) (const_int 6)])))]
4178 && (!MEM_P (operands[1]) || rtx_equal_p (operands[1], operands[2]))"
4180 vmovddup\t{%1, %0|%0, %1}
4181 vunpcklpd\t{%2, %1, %0|%0, %1, %2}"
4182 [(set_attr "type" "sselog")
4183 (set_attr "prefix" "vex")
4184 (set_attr "mode" "V4DF")])
4186 (define_expand "vec_interleave_lowv4df"
4190 (match_operand:V4DF 1 "register_operand" "x")
4191 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4192 (parallel [(const_int 0) (const_int 4)
4193 (const_int 2) (const_int 6)])))
4199 (parallel [(const_int 1) (const_int 5)
4200 (const_int 3) (const_int 7)])))
4201 (set (match_operand:V4DF 0 "register_operand" "")
4206 (parallel [(const_int 0) (const_int 1)
4207 (const_int 4) (const_int 5)])))]
4210 operands[3] = gen_reg_rtx (V4DFmode);
4211 operands[4] = gen_reg_rtx (V4DFmode);
4214 (define_expand "vec_interleave_lowv2df"
4215 [(set (match_operand:V2DF 0 "register_operand" "")
4218 (match_operand:V2DF 1 "nonimmediate_operand" "")
4219 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4220 (parallel [(const_int 0)
4224 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
4225 operands[1] = force_reg (V2DFmode, operands[1]);
4228 (define_insn "*vec_interleave_lowv2df"
4229 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,o")
4232 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0")
4233 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x"))
4234 (parallel [(const_int 0)
4236 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4238 unpcklpd\t{%2, %0|%0, %2}
4239 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4240 %vmovddup\t{%1, %0|%0, %1}
4241 movhpd\t{%2, %0|%0, %2}
4242 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4243 %vmovlpd\t{%2, %H0|%H0, %2}"
4244 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
4245 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
4246 (set_attr "prefix_data16" "*,*,*,1,*,1")
4247 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
4248 (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")])
4251 [(set (match_operand:V2DF 0 "memory_operand" "")
4254 (match_operand:V2DF 1 "register_operand" "")
4256 (parallel [(const_int 0)
4258 "TARGET_SSE3 && reload_completed"
4261 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4262 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4263 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4268 [(set (match_operand:V2DF 0 "register_operand" "")
4271 (match_operand:V2DF 1 "memory_operand" "")
4273 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "")
4274 (match_operand:SI 3 "const_int_operand" "")])))]
4275 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
4276 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
4278 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
4281 (define_expand "avx_shufpd256"
4282 [(match_operand:V4DF 0 "register_operand" "")
4283 (match_operand:V4DF 1 "register_operand" "")
4284 (match_operand:V4DF 2 "nonimmediate_operand" "")
4285 (match_operand:SI 3 "const_int_operand" "")]
4288 int mask = INTVAL (operands[3]);
4289 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4291 GEN_INT (mask & 2 ? 5 : 4),
4292 GEN_INT (mask & 4 ? 3 : 2),
4293 GEN_INT (mask & 8 ? 7 : 6)));
4297 (define_insn "avx_shufpd256_1"
4298 [(set (match_operand:V4DF 0 "register_operand" "=x")
4301 (match_operand:V4DF 1 "register_operand" "x")
4302 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4303 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4304 (match_operand 4 "const_4_to_5_operand" "")
4305 (match_operand 5 "const_2_to_3_operand" "")
4306 (match_operand 6 "const_6_to_7_operand" "")])))]
4310 mask = INTVAL (operands[3]);
4311 mask |= (INTVAL (operands[4]) - 4) << 1;
4312 mask |= (INTVAL (operands[5]) - 2) << 2;
4313 mask |= (INTVAL (operands[6]) - 6) << 3;
4314 operands[3] = GEN_INT (mask);
4316 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4318 [(set_attr "type" "sselog")
4319 (set_attr "length_immediate" "1")
4320 (set_attr "prefix" "vex")
4321 (set_attr "mode" "V4DF")])
4323 (define_expand "sse2_shufpd"
4324 [(match_operand:V2DF 0 "register_operand" "")
4325 (match_operand:V2DF 1 "register_operand" "")
4326 (match_operand:V2DF 2 "nonimmediate_operand" "")
4327 (match_operand:SI 3 "const_int_operand" "")]
4330 int mask = INTVAL (operands[3]);
4331 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4333 GEN_INT (mask & 2 ? 3 : 2)));
4337 ;; Modes handled by vec_extract_even/odd pattern.
4338 (define_mode_iterator VEC_EXTRACT_EVENODD_MODE
4339 [(V32QI "TARGET_AVX2") (V16QI "TARGET_SSE2")
4340 (V16HI "TARGET_AVX2") (V8HI "TARGET_SSE2")
4341 (V8SI "TARGET_AVX2") (V4SI "TARGET_SSE2")
4342 (V4DI "TARGET_AVX2") (V2DI "TARGET_SSE2")
4343 (V8SF "TARGET_AVX") V4SF
4344 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
4346 (define_expand "vec_extract_even<mode>"
4347 [(match_operand:VEC_EXTRACT_EVENODD_MODE 0 "register_operand" "")
4348 (match_operand:VEC_EXTRACT_EVENODD_MODE 1 "register_operand" "")
4349 (match_operand:VEC_EXTRACT_EVENODD_MODE 2 "register_operand" "")]
4352 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 0);
4356 (define_expand "vec_extract_odd<mode>"
4357 [(match_operand:VEC_EXTRACT_EVENODD_MODE 0 "register_operand" "")
4358 (match_operand:VEC_EXTRACT_EVENODD_MODE 1 "register_operand" "")
4359 (match_operand:VEC_EXTRACT_EVENODD_MODE 2 "register_operand" "")]
4362 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 1);
4366 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4367 (define_insn "avx2_interleave_highv4di"
4368 [(set (match_operand:V4DI 0 "register_operand" "=x")
4371 (match_operand:V4DI 1 "register_operand" "x")
4372 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
4373 (parallel [(const_int 1)
4378 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4379 [(set_attr "type" "sselog")
4380 (set_attr "prefix" "vex")
4381 (set_attr "mode" "OI")])
4383 (define_insn "vec_interleave_highv2di"
4384 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4387 (match_operand:V2DI 1 "register_operand" "0,x")
4388 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4389 (parallel [(const_int 1)
4393 punpckhqdq\t{%2, %0|%0, %2}
4394 vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4395 [(set_attr "isa" "noavx,avx")
4396 (set_attr "type" "sselog")
4397 (set_attr "prefix_data16" "1,*")
4398 (set_attr "prefix" "orig,vex")
4399 (set_attr "mode" "TI")])
4401 (define_insn "avx2_interleave_lowv4di"
4402 [(set (match_operand:V4DI 0 "register_operand" "=x")
4405 (match_operand:V4DI 1 "register_operand" "x")
4406 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
4407 (parallel [(const_int 0)
4412 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4413 [(set_attr "type" "sselog")
4414 (set_attr "prefix" "vex")
4415 (set_attr "mode" "OI")])
4417 (define_insn "vec_interleave_lowv2di"
4418 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4421 (match_operand:V2DI 1 "register_operand" "0,x")
4422 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4423 (parallel [(const_int 0)
4427 punpcklqdq\t{%2, %0|%0, %2}
4428 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4429 [(set_attr "isa" "noavx,avx")
4430 (set_attr "type" "sselog")
4431 (set_attr "prefix_data16" "1,*")
4432 (set_attr "prefix" "orig,vex")
4433 (set_attr "mode" "TI")])
4435 (define_insn "sse2_shufpd_<mode>"
4436 [(set (match_operand:VI8F_128 0 "register_operand" "=x,x")
4437 (vec_select:VI8F_128
4438 (vec_concat:<ssedoublevecmode>
4439 (match_operand:VI8F_128 1 "register_operand" "0,x")
4440 (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm"))
4441 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4442 (match_operand 4 "const_2_to_3_operand" "")])))]
4446 mask = INTVAL (operands[3]);
4447 mask |= (INTVAL (operands[4]) - 2) << 1;
4448 operands[3] = GEN_INT (mask);
4450 switch (which_alternative)
4453 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4455 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4460 [(set_attr "isa" "noavx,avx")
4461 (set_attr "type" "sselog")
4462 (set_attr "length_immediate" "1")
4463 (set_attr "prefix" "orig,vex")
4464 (set_attr "mode" "V2DF")])
4466 ;; Avoid combining registers from different units in a single alternative,
4467 ;; see comment above inline_secondary_memory_needed function in i386.c
4468 (define_insn "sse2_storehpd"
4469 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r")
4471 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
4472 (parallel [(const_int 1)])))]
4473 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4475 %vmovhpd\t{%1, %0|%0, %1}
4477 vunpckhpd\t{%d1, %0|%0, %d1}
4481 [(set_attr "isa" "*,noavx,avx,*,*,*")
4482 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
4483 (set (attr "prefix_data16")
4485 (and (eq_attr "alternative" "0")
4486 (not (match_test "TARGET_AVX")))
4488 (const_string "*")))
4489 (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
4490 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
4493 [(set (match_operand:DF 0 "register_operand" "")
4495 (match_operand:V2DF 1 "memory_operand" "")
4496 (parallel [(const_int 1)])))]
4497 "TARGET_SSE2 && reload_completed"
4498 [(set (match_dup 0) (match_dup 1))]
4499 "operands[1] = adjust_address (operands[1], DFmode, 8);")
4501 (define_insn "*vec_extractv2df_1_sse"
4502 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4504 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
4505 (parallel [(const_int 1)])))]
4506 "!TARGET_SSE2 && TARGET_SSE
4507 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4509 movhps\t{%1, %0|%0, %1}
4510 movhlps\t{%1, %0|%0, %1}
4511 movlps\t{%H1, %0|%0, %H1}"
4512 [(set_attr "type" "ssemov")
4513 (set_attr "mode" "V2SF,V4SF,V2SF")])
4515 ;; Avoid combining registers from different units in a single alternative,
4516 ;; see comment above inline_secondary_memory_needed function in i386.c
4517 (define_insn "sse2_storelpd"
4518 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4520 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4521 (parallel [(const_int 0)])))]
4522 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4524 %vmovlpd\t{%1, %0|%0, %1}
4529 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4530 (set_attr "prefix_data16" "1,*,*,*,*")
4531 (set_attr "prefix" "maybe_vex")
4532 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4535 [(set (match_operand:DF 0 "register_operand" "")
4537 (match_operand:V2DF 1 "nonimmediate_operand" "")
4538 (parallel [(const_int 0)])))]
4539 "TARGET_SSE2 && reload_completed"
4542 rtx op1 = operands[1];
4544 op1 = gen_rtx_REG (DFmode, REGNO (op1));
4546 op1 = gen_lowpart (DFmode, op1);
4547 emit_move_insn (operands[0], op1);
4551 (define_insn "*vec_extractv2df_0_sse"
4552 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4554 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
4555 (parallel [(const_int 0)])))]
4556 "!TARGET_SSE2 && TARGET_SSE
4557 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4559 movlps\t{%1, %0|%0, %1}
4560 movaps\t{%1, %0|%0, %1}
4561 movlps\t{%1, %0|%0, %1}"
4562 [(set_attr "type" "ssemov")
4563 (set_attr "mode" "V2SF,V4SF,V2SF")])
4565 (define_expand "sse2_loadhpd_exp"
4566 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4569 (match_operand:V2DF 1 "nonimmediate_operand" "")
4570 (parallel [(const_int 0)]))
4571 (match_operand:DF 2 "nonimmediate_operand" "")))]
4574 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4576 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
4578 /* Fix up the destination if needed. */
4579 if (dst != operands[0])
4580 emit_move_insn (operands[0], dst);
4585 ;; Avoid combining registers from different units in a single alternative,
4586 ;; see comment above inline_secondary_memory_needed function in i386.c
4587 (define_insn "sse2_loadhpd"
4588 [(set (match_operand:V2DF 0 "nonimmediate_operand"
4592 (match_operand:V2DF 1 "nonimmediate_operand"
4594 (parallel [(const_int 0)]))
4595 (match_operand:DF 2 "nonimmediate_operand"
4596 " m,m,x,x,x,*f,r")))]
4597 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4599 movhpd\t{%2, %0|%0, %2}
4600 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4601 unpcklpd\t{%2, %0|%0, %2}
4602 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4606 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
4607 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
4608 (set_attr "prefix_data16" "1,*,*,*,*,*,*")
4609 (set_attr "prefix" "orig,vex,orig,vex,*,*,*")
4610 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
4613 [(set (match_operand:V2DF 0 "memory_operand" "")
4615 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
4616 (match_operand:DF 1 "register_operand" "")))]
4617 "TARGET_SSE2 && reload_completed"
4618 [(set (match_dup 0) (match_dup 1))]
4619 "operands[0] = adjust_address (operands[0], DFmode, 8);")
4621 (define_expand "sse2_loadlpd_exp"
4622 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4624 (match_operand:DF 2 "nonimmediate_operand" "")
4626 (match_operand:V2DF 1 "nonimmediate_operand" "")
4627 (parallel [(const_int 1)]))))]
4630 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4632 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
4634 /* Fix up the destination if needed. */
4635 if (dst != operands[0])
4636 emit_move_insn (operands[0], dst);
4641 ;; Avoid combining registers from different units in a single alternative,
4642 ;; see comment above inline_secondary_memory_needed function in i386.c
4643 (define_insn "sse2_loadlpd"
4644 [(set (match_operand:V2DF 0 "nonimmediate_operand"
4645 "=x,x,x,x,x,x,x,x,m,m ,m")
4647 (match_operand:DF 2 "nonimmediate_operand"
4648 " m,m,m,x,x,0,0,x,x,*f,r")
4650 (match_operand:V2DF 1 "vector_move_operand"
4651 " C,0,x,0,x,x,o,o,0,0 ,0")
4652 (parallel [(const_int 1)]))))]
4653 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4655 %vmovsd\t{%2, %0|%0, %2}
4656 movlpd\t{%2, %0|%0, %2}
4657 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4658 movsd\t{%2, %0|%0, %2}
4659 vmovsd\t{%2, %1, %0|%0, %1, %2}
4660 shufpd\t{$2, %1, %0|%0, %1, 2}
4661 movhpd\t{%H1, %0|%0, %H1}
4662 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
4666 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
4668 (cond [(eq_attr "alternative" "5")
4669 (const_string "sselog")
4670 (eq_attr "alternative" "9")
4671 (const_string "fmov")
4672 (eq_attr "alternative" "10")
4673 (const_string "imov")
4675 (const_string "ssemov")))
4676 (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
4677 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
4678 (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
4679 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
4682 [(set (match_operand:V2DF 0 "memory_operand" "")
4684 (match_operand:DF 1 "register_operand" "")
4685 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
4686 "TARGET_SSE2 && reload_completed"
4687 [(set (match_dup 0) (match_dup 1))]
4688 "operands[0] = adjust_address (operands[0], DFmode, 8);")
4690 (define_insn "sse2_movsd"
4691 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o")
4693 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
4694 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
4698 movsd\t{%2, %0|%0, %2}
4699 vmovsd\t{%2, %1, %0|%0, %1, %2}
4700 movlpd\t{%2, %0|%0, %2}
4701 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4702 %vmovlpd\t{%2, %0|%0, %2}
4703 shufpd\t{$2, %1, %0|%0, %1, 2}
4704 movhps\t{%H1, %0|%0, %H1}
4705 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
4706 %vmovhps\t{%1, %H0|%H0, %1}"
4707 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
4710 (eq_attr "alternative" "5")
4711 (const_string "sselog")
4712 (const_string "ssemov")))
4713 (set (attr "prefix_data16")
4715 (and (eq_attr "alternative" "2,4")
4716 (not (match_test "TARGET_AVX")))
4718 (const_string "*")))
4719 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
4720 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
4721 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
4723 (define_expand "vec_dupv2df"
4724 [(set (match_operand:V2DF 0 "register_operand" "")
4726 (match_operand:DF 1 "nonimmediate_operand" "")))]
4730 operands[1] = force_reg (DFmode, operands[1]);
4733 (define_insn "*vec_dupv2df_sse3"
4734 [(set (match_operand:V2DF 0 "register_operand" "=x")
4736 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
4738 "%vmovddup\t{%1, %0|%0, %1}"
4739 [(set_attr "type" "sselog1")
4740 (set_attr "prefix" "maybe_vex")
4741 (set_attr "mode" "DF")])
4743 (define_insn "*vec_dupv2df"
4744 [(set (match_operand:V2DF 0 "register_operand" "=x")
4746 (match_operand:DF 1 "register_operand" "0")))]
4749 [(set_attr "type" "sselog1")
4750 (set_attr "mode" "V2DF")])
4752 (define_insn "*vec_concatv2df_sse3"
4753 [(set (match_operand:V2DF 0 "register_operand" "=x")
4755 (match_operand:DF 1 "nonimmediate_operand" "xm")
4758 "%vmovddup\t{%1, %0|%0, %1}"
4759 [(set_attr "type" "sselog1")
4760 (set_attr "prefix" "maybe_vex")
4761 (set_attr "mode" "DF")])
4763 (define_insn "*vec_concatv2df"
4764 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x,x,x,x,x")
4766 (match_operand:DF 1 "nonimmediate_operand" " 0,x,0,x,m,0,0")
4767 (match_operand:DF 2 "vector_move_operand" " x,x,m,m,C,x,m")))]
4770 unpcklpd\t{%2, %0|%0, %2}
4771 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4772 movhpd\t{%2, %0|%0, %2}
4773 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4774 %vmovsd\t{%1, %0|%0, %1}
4775 movlhps\t{%2, %0|%0, %2}
4776 movhps\t{%2, %0|%0, %2}"
4777 [(set_attr "isa" "sse2_noavx,avx,sse2_noavx,avx,sse2,noavx,noavx")
4780 (eq_attr "alternative" "0,1")
4781 (const_string "sselog")
4782 (const_string "ssemov")))
4783 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
4784 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
4785 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF,DF,V4SF,V2SF")])
4787 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4789 ;; Parallel integral arithmetic
4791 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4793 (define_expand "neg<mode>2"
4794 [(set (match_operand:VI_128 0 "register_operand" "")
4797 (match_operand:VI_128 1 "nonimmediate_operand" "")))]
4799 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
4801 (define_expand "<plusminus_insn><mode>3"
4802 [(set (match_operand:VI_AVX2 0 "register_operand" "")
4804 (match_operand:VI_AVX2 1 "nonimmediate_operand" "")
4805 (match_operand:VI_AVX2 2 "nonimmediate_operand" "")))]
4807 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4809 (define_insn "*<plusminus_insn><mode>3"
4810 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,x")
4812 (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,x")
4813 (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,xm")))]
4814 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4816 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
4817 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
4818 [(set_attr "isa" "noavx,avx")
4819 (set_attr "type" "sseiadd")
4820 (set_attr "prefix_data16" "1,*")
4821 (set_attr "prefix" "orig,vex")
4822 (set_attr "mode" "<sseinsnmode>")])
4824 (define_expand "<sse2_avx2>_<plusminus_insn><mode>3"
4825 [(set (match_operand:VI12_AVX2 0 "register_operand" "")
4826 (sat_plusminus:VI12_AVX2
4827 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "")
4828 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "")))]
4830 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4832 (define_insn "*<sse2_avx2>_<plusminus_insn><mode>3"
4833 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,x")
4834 (sat_plusminus:VI12_AVX2
4835 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,x")
4836 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,xm")))]
4837 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4839 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
4840 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
4841 [(set_attr "isa" "noavx,avx")
4842 (set_attr "type" "sseiadd")
4843 (set_attr "prefix_data16" "1,*")
4844 (set_attr "prefix" "orig,vex")
4845 (set_attr "mode" "TI")])
4847 (define_insn_and_split "mul<mode>3"
4848 [(set (match_operand:VI1_AVX2 0 "register_operand" "")
4849 (mult:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand" "")
4850 (match_operand:VI1_AVX2 2 "register_operand" "")))]
4852 && can_create_pseudo_p ()"
4859 enum machine_mode mulmode = <sseunpackmode>mode;
4861 for (i = 0; i < 6; ++i)
4862 t[i] = gen_reg_rtx (<MODE>mode);
4864 /* Unpack data such that we've got a source byte in each low byte of
4865 each word. We don't care what goes into the high byte of each word.
4866 Rather than trying to get zero in there, most convenient is to let
4867 it be a copy of the low byte. */
4868 emit_insn (gen_<vec_avx2>_interleave_high<mode> (t[0], operands[1],
4870 emit_insn (gen_<vec_avx2>_interleave_high<mode> (t[1], operands[2],
4872 emit_insn (gen_<vec_avx2>_interleave_low<mode> (t[2], operands[1],
4874 emit_insn (gen_<vec_avx2>_interleave_low<mode> (t[3], operands[2],
4877 /* Multiply words. The end-of-line annotations here give a picture of what
4878 the output of that instruction looks like. Dot means don't care; the
4879 letters are the bytes of the result with A being the most significant. */
4880 emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (mulmode, t[4]),
4881 gen_rtx_MULT (mulmode, /* .A.B.C.D.E.F.G.H */
4882 gen_lowpart (mulmode, t[0]),
4883 gen_lowpart (mulmode, t[1]))));
4884 emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (mulmode, t[5]),
4885 gen_rtx_MULT (mulmode, /* .I.J.K.L.M.N.O.P */
4886 gen_lowpart (mulmode, t[2]),
4887 gen_lowpart (mulmode, t[3]))));
4889 /* Extract the even bytes and merge them back together. */
4890 ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0);
4892 set_unique_reg_note (get_last_insn (), REG_EQUAL,
4893 gen_rtx_MULT (<MODE>mode, operands[1], operands[2]));
4897 (define_expand "mul<mode>3"
4898 [(set (match_operand:VI2_AVX2 0 "register_operand" "")
4899 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "")
4900 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "")))]
4902 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
4904 (define_insn "*mul<mode>3"
4905 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
4906 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x")
4907 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))]
4908 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
4910 pmullw\t{%2, %0|%0, %2}
4911 vpmullw\t{%2, %1, %0|%0, %1, %2}"
4912 [(set_attr "isa" "noavx,avx")
4913 (set_attr "type" "sseimul")
4914 (set_attr "prefix_data16" "1,*")
4915 (set_attr "prefix" "orig,vex")
4916 (set_attr "mode" "<sseinsnmode>")])
4918 (define_expand "<s>mul<mode>3_highpart"
4919 [(set (match_operand:VI2_AVX2 0 "register_operand" "")
4921 (lshiftrt:<ssedoublemode>
4922 (mult:<ssedoublemode>
4923 (any_extend:<ssedoublemode>
4924 (match_operand:VI2_AVX2 1 "nonimmediate_operand" ""))
4925 (any_extend:<ssedoublemode>
4926 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "")))
4929 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
4931 (define_insn "*<s>mul<mode>3_highpart"
4932 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
4934 (lshiftrt:<ssedoublemode>
4935 (mult:<ssedoublemode>
4936 (any_extend:<ssedoublemode>
4937 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
4938 (any_extend:<ssedoublemode>
4939 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
4941 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
4943 pmulh<u>w\t{%2, %0|%0, %2}
4944 vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
4945 [(set_attr "isa" "noavx,avx")
4946 (set_attr "type" "sseimul")
4947 (set_attr "prefix_data16" "1,*")
4948 (set_attr "prefix" "orig,vex")
4949 (set_attr "mode" "<sseinsnmode>")])
4951 (define_expand "avx2_umulv4siv4di3"
4952 [(set (match_operand:V4DI 0 "register_operand" "")
4956 (match_operand:V8SI 1 "nonimmediate_operand" "")
4957 (parallel [(const_int 0) (const_int 2)
4958 (const_int 4) (const_int 6)])))
4961 (match_operand:V8SI 2 "nonimmediate_operand" "")
4962 (parallel [(const_int 0) (const_int 2)
4963 (const_int 4) (const_int 6)])))))]
4965 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
4967 (define_insn "*avx_umulv4siv4di3"
4968 [(set (match_operand:V4DI 0 "register_operand" "=x")
4972 (match_operand:V8SI 1 "nonimmediate_operand" "%x")
4973 (parallel [(const_int 0) (const_int 2)
4974 (const_int 4) (const_int 6)])))
4977 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
4978 (parallel [(const_int 0) (const_int 2)
4979 (const_int 4) (const_int 6)])))))]
4980 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
4981 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
4982 [(set_attr "type" "sseimul")
4983 (set_attr "prefix" "vex")
4984 (set_attr "mode" "OI")])
4986 (define_expand "sse2_umulv2siv2di3"
4987 [(set (match_operand:V2DI 0 "register_operand" "")
4991 (match_operand:V4SI 1 "nonimmediate_operand" "")
4992 (parallel [(const_int 0) (const_int 2)])))
4995 (match_operand:V4SI 2 "nonimmediate_operand" "")
4996 (parallel [(const_int 0) (const_int 2)])))))]
4998 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5000 (define_insn "*sse2_umulv2siv2di3"
5001 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5005 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
5006 (parallel [(const_int 0) (const_int 2)])))
5009 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
5010 (parallel [(const_int 0) (const_int 2)])))))]
5011 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5013 pmuludq\t{%2, %0|%0, %2}
5014 vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5015 [(set_attr "isa" "noavx,avx")
5016 (set_attr "type" "sseimul")
5017 (set_attr "prefix_data16" "1,*")
5018 (set_attr "prefix" "orig,vex")
5019 (set_attr "mode" "TI")])
5021 (define_expand "avx2_mulv4siv4di3"
5022 [(set (match_operand:V4DI 0 "register_operand" "")
5026 (match_operand:V8SI 1 "nonimmediate_operand" "")
5027 (parallel [(const_int 0) (const_int 2)
5028 (const_int 4) (const_int 6)])))
5031 (match_operand:V8SI 2 "nonimmediate_operand" "")
5032 (parallel [(const_int 0) (const_int 2)
5033 (const_int 4) (const_int 6)])))))]
5035 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
5037 (define_insn "*avx2_mulv4siv4di3"
5038 [(set (match_operand:V4DI 0 "register_operand" "=x")
5042 (match_operand:V8SI 1 "nonimmediate_operand" "x")
5043 (parallel [(const_int 0) (const_int 2)
5044 (const_int 4) (const_int 6)])))
5047 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
5048 (parallel [(const_int 0) (const_int 2)
5049 (const_int 4) (const_int 6)])))))]
5050 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
5051 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5052 [(set_attr "isa" "avx")
5053 (set_attr "type" "sseimul")
5054 (set_attr "prefix_extra" "1")
5055 (set_attr "prefix" "vex")
5056 (set_attr "mode" "OI")])
5058 (define_expand "sse4_1_mulv2siv2di3"
5059 [(set (match_operand:V2DI 0 "register_operand" "")
5063 (match_operand:V4SI 1 "nonimmediate_operand" "")
5064 (parallel [(const_int 0) (const_int 2)])))
5067 (match_operand:V4SI 2 "nonimmediate_operand" "")
5068 (parallel [(const_int 0) (const_int 2)])))))]
5070 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5072 (define_insn "*sse4_1_mulv2siv2di3"
5073 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5077 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
5078 (parallel [(const_int 0) (const_int 2)])))
5081 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
5082 (parallel [(const_int 0) (const_int 2)])))))]
5083 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5085 pmuldq\t{%2, %0|%0, %2}
5086 vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5087 [(set_attr "isa" "noavx,avx")
5088 (set_attr "type" "sseimul")
5089 (set_attr "prefix_data16" "1,*")
5090 (set_attr "prefix_extra" "1")
5091 (set_attr "prefix" "orig,vex")
5092 (set_attr "mode" "TI")])
5094 (define_expand "avx2_pmaddwd"
5095 [(set (match_operand:V8SI 0 "register_operand" "")
5100 (match_operand:V16HI 1 "nonimmediate_operand" "")
5101 (parallel [(const_int 0)
5111 (match_operand:V16HI 2 "nonimmediate_operand" "")
5112 (parallel [(const_int 0)
5122 (vec_select:V8HI (match_dup 1)
5123 (parallel [(const_int 1)
5132 (vec_select:V8HI (match_dup 2)
5133 (parallel [(const_int 1)
5140 (const_int 15)]))))))]
5142 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
5144 (define_expand "sse2_pmaddwd"
5145 [(set (match_operand:V4SI 0 "register_operand" "")
5150 (match_operand:V8HI 1 "nonimmediate_operand" "")
5151 (parallel [(const_int 0)
5157 (match_operand:V8HI 2 "nonimmediate_operand" "")
5158 (parallel [(const_int 0)
5164 (vec_select:V4HI (match_dup 1)
5165 (parallel [(const_int 1)
5170 (vec_select:V4HI (match_dup 2)
5171 (parallel [(const_int 1)
5174 (const_int 7)]))))))]
5176 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5178 (define_insn "*avx2_pmaddwd"
5179 [(set (match_operand:V8SI 0 "register_operand" "=x")
5184 (match_operand:V16HI 1 "nonimmediate_operand" "%x")
5185 (parallel [(const_int 0)
5195 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
5196 (parallel [(const_int 0)
5206 (vec_select:V8HI (match_dup 1)
5207 (parallel [(const_int 1)
5216 (vec_select:V8HI (match_dup 2)
5217 (parallel [(const_int 1)
5224 (const_int 15)]))))))]
5225 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
5226 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5227 [(set_attr "type" "sseiadd")
5228 (set_attr "prefix" "vex")
5229 (set_attr "mode" "OI")])
5231 (define_insn "*sse2_pmaddwd"
5232 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
5237 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
5238 (parallel [(const_int 0)
5244 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
5245 (parallel [(const_int 0)
5251 (vec_select:V4HI (match_dup 1)
5252 (parallel [(const_int 1)
5257 (vec_select:V4HI (match_dup 2)
5258 (parallel [(const_int 1)
5261 (const_int 7)]))))))]
5262 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5264 pmaddwd\t{%2, %0|%0, %2}
5265 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5266 [(set_attr "isa" "noavx,avx")
5267 (set_attr "type" "sseiadd")
5268 (set_attr "atom_unit" "simul")
5269 (set_attr "prefix_data16" "1,*")
5270 (set_attr "prefix" "orig,vex")
5271 (set_attr "mode" "TI")])
5273 (define_expand "mul<mode>3"
5274 [(set (match_operand:VI4_AVX2 0 "register_operand" "")
5275 (mult:VI4_AVX2 (match_operand:VI4_AVX2 1 "register_operand" "")
5276 (match_operand:VI4_AVX2 2 "register_operand" "")))]
5279 if (TARGET_SSE4_1 || TARGET_AVX)
5280 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
5283 (define_insn "*<sse4_1_avx2>_mul<mode>3"
5284 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,x")
5285 (mult:VI4_AVX2 (match_operand:VI4_AVX2 1 "nonimmediate_operand" "%0,x")
5286 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm,xm")))]
5287 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
5289 pmulld\t{%2, %0|%0, %2}
5290 vpmulld\t{%2, %1, %0|%0, %1, %2}"
5291 [(set_attr "isa" "noavx,avx")
5292 (set_attr "type" "sseimul")
5293 (set_attr "prefix_extra" "1")
5294 (set_attr "prefix" "orig,vex")
5295 (set_attr "mode" "<sseinsnmode>")])
5297 (define_insn_and_split "*sse2_mulv4si3"
5298 [(set (match_operand:V4SI 0 "register_operand" "")
5299 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5300 (match_operand:V4SI 2 "register_operand" "")))]
5301 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_AVX
5302 && can_create_pseudo_p ()"
5307 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5313 t1 = gen_reg_rtx (V4SImode);
5314 t2 = gen_reg_rtx (V4SImode);
5315 t3 = gen_reg_rtx (V4SImode);
5316 t4 = gen_reg_rtx (V4SImode);
5317 t5 = gen_reg_rtx (V4SImode);
5318 t6 = gen_reg_rtx (V4SImode);
5319 thirtytwo = GEN_INT (32);
5321 /* Multiply elements 2 and 0. */
5322 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
5325 /* Shift both input vectors down one element, so that elements 3
5326 and 1 are now in the slots for elements 2 and 0. For K8, at
5327 least, this is faster than using a shuffle. */
5328 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5329 gen_lowpart (V1TImode, op1),
5331 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5332 gen_lowpart (V1TImode, op2),
5334 /* Multiply elements 3 and 1. */
5335 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
5338 /* Move the results in element 2 down to element 1; we don't care
5339 what goes in elements 2 and 3. */
5340 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
5341 const0_rtx, const0_rtx));
5342 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
5343 const0_rtx, const0_rtx));
5345 /* Merge the parts back together. */
5346 emit_insn (gen_vec_interleave_lowv4si (op0, t5, t6));
5348 set_unique_reg_note (get_last_insn (), REG_EQUAL,
5349 gen_rtx_MULT (V4SImode, operands[1], operands[2]));
5353 (define_insn_and_split "mulv2di3"
5354 [(set (match_operand:V2DI 0 "register_operand" "")
5355 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
5356 (match_operand:V2DI 2 "register_operand" "")))]
5358 && can_create_pseudo_p ()"
5363 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5372 /* op1: A,B,C,D, op2: E,F,G,H */
5373 op1 = gen_lowpart (V4SImode, op1);
5374 op2 = gen_lowpart (V4SImode, op2);
5376 t1 = gen_reg_rtx (V4SImode);
5377 t2 = gen_reg_rtx (V4SImode);
5378 t3 = gen_reg_rtx (V2DImode);
5379 t4 = gen_reg_rtx (V2DImode);
5382 emit_insn (gen_sse2_pshufd_1 (t1, op1,
5388 /* t2: (B*E),(A*F),(D*G),(C*H) */
5389 emit_insn (gen_mulv4si3 (t2, t1, op2));
5391 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
5392 emit_insn (gen_xop_phadddq (t3, t2));
5394 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
5395 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
5397 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
5398 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
5402 t1 = gen_reg_rtx (V2DImode);
5403 t2 = gen_reg_rtx (V2DImode);
5404 t3 = gen_reg_rtx (V2DImode);
5405 t4 = gen_reg_rtx (V2DImode);
5406 t5 = gen_reg_rtx (V2DImode);
5407 t6 = gen_reg_rtx (V2DImode);
5408 thirtytwo = GEN_INT (32);
5410 /* Multiply low parts. */
5411 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
5412 gen_lowpart (V4SImode, op2)));
5414 /* Shift input vectors left 32 bits so we can multiply high parts. */
5415 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
5416 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
5418 /* Multiply high parts by low parts. */
5419 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
5420 gen_lowpart (V4SImode, t3)));
5421 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
5422 gen_lowpart (V4SImode, t2)));
5424 /* Shift them back. */
5425 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
5426 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
5428 /* Add the three parts together. */
5429 emit_insn (gen_addv2di3 (t6, t1, t4));
5430 emit_insn (gen_addv2di3 (op0, t6, t5));
5433 set_unique_reg_note (get_last_insn (), REG_EQUAL,
5434 gen_rtx_MULT (V2DImode, operands[1], operands[2]));
5438 (define_expand "vec_widen_smult_hi_v8hi"
5439 [(match_operand:V4SI 0 "register_operand" "")
5440 (match_operand:V8HI 1 "register_operand" "")
5441 (match_operand:V8HI 2 "register_operand" "")]
5444 rtx op1, op2, t1, t2, dest;
5448 t1 = gen_reg_rtx (V8HImode);
5449 t2 = gen_reg_rtx (V8HImode);
5450 dest = gen_lowpart (V8HImode, operands[0]);
5452 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5453 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5454 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5458 (define_expand "vec_widen_smult_lo_v8hi"
5459 [(match_operand:V4SI 0 "register_operand" "")
5460 (match_operand:V8HI 1 "register_operand" "")
5461 (match_operand:V8HI 2 "register_operand" "")]
5464 rtx op1, op2, t1, t2, dest;
5468 t1 = gen_reg_rtx (V8HImode);
5469 t2 = gen_reg_rtx (V8HImode);
5470 dest = gen_lowpart (V8HImode, operands[0]);
5472 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5473 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5474 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5478 (define_expand "vec_widen_umult_hi_v8hi"
5479 [(match_operand:V4SI 0 "register_operand" "")
5480 (match_operand:V8HI 1 "register_operand" "")
5481 (match_operand:V8HI 2 "register_operand" "")]
5484 rtx op1, op2, t1, t2, dest;
5488 t1 = gen_reg_rtx (V8HImode);
5489 t2 = gen_reg_rtx (V8HImode);
5490 dest = gen_lowpart (V8HImode, operands[0]);
5492 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5493 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5494 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5498 (define_expand "vec_widen_umult_lo_v8hi"
5499 [(match_operand:V4SI 0 "register_operand" "")
5500 (match_operand:V8HI 1 "register_operand" "")
5501 (match_operand:V8HI 2 "register_operand" "")]
5504 rtx op1, op2, t1, t2, dest;
5508 t1 = gen_reg_rtx (V8HImode);
5509 t2 = gen_reg_rtx (V8HImode);
5510 dest = gen_lowpart (V8HImode, operands[0]);
5512 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5513 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5514 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5518 (define_expand "vec_widen_smult_hi_v4si"
5519 [(match_operand:V2DI 0 "register_operand" "")
5520 (match_operand:V4SI 1 "register_operand" "")
5521 (match_operand:V4SI 2 "register_operand" "")]
5526 t1 = gen_reg_rtx (V4SImode);
5527 t2 = gen_reg_rtx (V4SImode);
5529 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5534 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5539 emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
5543 (define_expand "vec_widen_smult_lo_v4si"
5544 [(match_operand:V2DI 0 "register_operand" "")
5545 (match_operand:V4SI 1 "register_operand" "")
5546 (match_operand:V4SI 2 "register_operand" "")]
5551 t1 = gen_reg_rtx (V4SImode);
5552 t2 = gen_reg_rtx (V4SImode);
5554 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5559 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5564 emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
5568 (define_expand "vec_widen_umult_hi_v4si"
5569 [(match_operand:V2DI 0 "register_operand" "")
5570 (match_operand:V4SI 1 "register_operand" "")
5571 (match_operand:V4SI 2 "register_operand" "")]
5574 rtx op1, op2, t1, t2;
5578 t1 = gen_reg_rtx (V4SImode);
5579 t2 = gen_reg_rtx (V4SImode);
5581 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5582 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5583 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5587 (define_expand "vec_widen_umult_lo_v4si"
5588 [(match_operand:V2DI 0 "register_operand" "")
5589 (match_operand:V4SI 1 "register_operand" "")
5590 (match_operand:V4SI 2 "register_operand" "")]
5593 rtx op1, op2, t1, t2;
5597 t1 = gen_reg_rtx (V4SImode);
5598 t2 = gen_reg_rtx (V4SImode);
5600 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5601 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5602 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5606 (define_expand "sdot_prodv8hi"
5607 [(match_operand:V4SI 0 "register_operand" "")
5608 (match_operand:V8HI 1 "register_operand" "")
5609 (match_operand:V8HI 2 "register_operand" "")
5610 (match_operand:V4SI 3 "register_operand" "")]
5613 rtx t = gen_reg_rtx (V4SImode);
5614 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
5615 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
5619 (define_expand "udot_prodv4si"
5620 [(match_operand:V2DI 0 "register_operand" "")
5621 (match_operand:V4SI 1 "register_operand" "")
5622 (match_operand:V4SI 2 "register_operand" "")
5623 (match_operand:V2DI 3 "register_operand" "")]
5628 t1 = gen_reg_rtx (V2DImode);
5629 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
5630 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5632 t2 = gen_reg_rtx (V4SImode);
5633 t3 = gen_reg_rtx (V4SImode);
5634 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5635 gen_lowpart (V1TImode, operands[1]),
5637 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5638 gen_lowpart (V1TImode, operands[2]),
5641 t4 = gen_reg_rtx (V2DImode);
5642 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
5644 emit_insn (gen_addv2di3 (operands[0], t1, t4));
5648 (define_insn "ashr<mode>3"
5649 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
5651 (match_operand:VI24_AVX2 1 "register_operand" "0,x")
5652 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5655 psra<ssemodesuffix>\t{%2, %0|%0, %2}
5656 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5657 [(set_attr "isa" "noavx,avx")
5658 (set_attr "type" "sseishft")
5659 (set (attr "length_immediate")
5660 (if_then_else (match_operand 2 "const_int_operand" "")
5662 (const_string "0")))
5663 (set_attr "prefix_data16" "1,*")
5664 (set_attr "prefix" "orig,vex")
5665 (set_attr "mode" "<sseinsnmode>")])
5667 (define_insn "lshr<mode>3"
5668 [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
5669 (lshiftrt:VI248_AVX2
5670 (match_operand:VI248_AVX2 1 "register_operand" "0,x")
5671 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5674 psrl<ssemodesuffix>\t{%2, %0|%0, %2}
5675 vpsrl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5676 [(set_attr "isa" "noavx,avx")
5677 (set_attr "type" "sseishft")
5678 (set (attr "length_immediate")
5679 (if_then_else (match_operand 2 "const_int_operand" "")
5681 (const_string "0")))
5682 (set_attr "prefix_data16" "1,*")
5683 (set_attr "prefix" "orig,vex")
5684 (set_attr "mode" "<sseinsnmode>")])
5686 (define_insn "avx2_lshl<mode>3"
5687 [(set (match_operand:VI248_256 0 "register_operand" "=x")
5689 (match_operand:VI248_256 1 "register_operand" "x")
5690 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5692 "vpsll<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5693 [(set_attr "type" "sseishft")
5694 (set_attr "prefix" "vex")
5695 (set (attr "length_immediate")
5696 (if_then_else (match_operand 2 "const_int_operand" "")
5698 (const_string "0")))
5699 (set_attr "mode" "OI")])
5701 (define_insn "ashl<mode>3"
5702 [(set (match_operand:VI248_128 0 "register_operand" "=x,x")
5704 (match_operand:VI248_128 1 "register_operand" "0,x")
5705 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5708 psll<ssemodesuffix>\t{%2, %0|%0, %2}
5709 vpsll<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5710 [(set_attr "isa" "noavx,avx")
5711 (set_attr "type" "sseishft")
5712 (set (attr "length_immediate")
5713 (if_then_else (match_operand 2 "const_int_operand" "")
5715 (const_string "0")))
5716 (set_attr "prefix_data16" "1,*")
5717 (set_attr "prefix" "orig,vex")
5718 (set_attr "mode" "TI")])
5720 (define_expand "vec_shl_<mode>"
5721 [(set (match_operand:VI_128 0 "register_operand" "")
5723 (match_operand:VI_128 1 "register_operand" "")
5724 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5727 operands[0] = gen_lowpart (V1TImode, operands[0]);
5728 operands[1] = gen_lowpart (V1TImode, operands[1]);
5731 (define_insn "<sse2_avx2>_ashl<mode>3"
5732 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
5734 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
5735 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5738 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5740 switch (which_alternative)
5743 return "pslldq\t{%2, %0|%0, %2}";
5745 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
5750 [(set_attr "isa" "noavx,avx")
5751 (set_attr "type" "sseishft")
5752 (set_attr "length_immediate" "1")
5753 (set_attr "prefix_data16" "1,*")
5754 (set_attr "prefix" "orig,vex")
5755 (set_attr "mode" "<sseinsnmode>")])
5757 (define_expand "vec_shr_<mode>"
5758 [(set (match_operand:VI_128 0 "register_operand" "")
5760 (match_operand:VI_128 1 "register_operand" "")
5761 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5764 operands[0] = gen_lowpart (V1TImode, operands[0]);
5765 operands[1] = gen_lowpart (V1TImode, operands[1]);
5768 (define_insn "<sse2_avx2>_lshr<mode>3"
5769 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
5770 (lshiftrt:VIMAX_AVX2
5771 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
5772 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5775 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5777 switch (which_alternative)
5780 return "psrldq\t{%2, %0|%0, %2}";
5782 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
5787 [(set_attr "isa" "noavx,avx")
5788 (set_attr "type" "sseishft")
5789 (set_attr "length_immediate" "1")
5790 (set_attr "atom_unit" "sishuf")
5791 (set_attr "prefix_data16" "1,*")
5792 (set_attr "prefix" "orig,vex")
5793 (set_attr "mode" "<sseinsnmode>")])
5796 (define_expand "<code><mode>3"
5797 [(set (match_operand:VI124_256 0 "register_operand" "")
5799 (match_operand:VI124_256 1 "nonimmediate_operand" "")
5800 (match_operand:VI124_256 2 "nonimmediate_operand" "")))]
5802 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5804 (define_insn "*avx2_<code><mode>3"
5805 [(set (match_operand:VI124_256 0 "register_operand" "=x")
5807 (match_operand:VI124_256 1 "nonimmediate_operand" "%x")
5808 (match_operand:VI124_256 2 "nonimmediate_operand" "xm")))]
5809 "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5810 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5811 [(set_attr "type" "sseiadd")
5812 (set_attr "prefix_extra" "1")
5813 (set_attr "prefix" "vex")
5814 (set_attr "mode" "OI")])
5816 (define_expand "<code><mode>3"
5817 [(set (match_operand:VI8_AVX2 0 "register_operand" "")
5818 (maxmin:VI8_AVX2 (match_operand:VI8_AVX2 1 "register_operand" "")
5819 (match_operand:VI8_AVX2 2 "register_operand" "")))]
5826 xops[0] = operands[0];
5828 if (<CODE> == SMAX || <CODE> == UMAX)
5830 xops[1] = operands[1];
5831 xops[2] = operands[2];
5835 xops[1] = operands[2];
5836 xops[2] = operands[1];
5839 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
5841 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
5842 xops[4] = operands[1];
5843 xops[5] = operands[2];
5845 ok = ix86_expand_int_vcond (xops);
5850 (define_expand "<code><mode>3"
5851 [(set (match_operand:VI124_128 0 "register_operand" "")
5852 (smaxmin:VI124_128 (match_operand:VI124_128 1 "nonimmediate_operand" "")
5853 (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
5856 if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
5857 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
5863 xops[0] = operands[0];
5864 operands[1] = force_reg (<MODE>mode, operands[1]);
5865 operands[2] = force_reg (<MODE>mode, operands[2]);
5869 xops[1] = operands[1];
5870 xops[2] = operands[2];
5874 xops[1] = operands[2];
5875 xops[2] = operands[1];
5878 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5879 xops[4] = operands[1];
5880 xops[5] = operands[2];
5882 ok = ix86_expand_int_vcond (xops);
5888 (define_insn "*sse4_1_<code><mode>3"
5889 [(set (match_operand:VI14_128 0 "register_operand" "=x,x")
5891 (match_operand:VI14_128 1 "nonimmediate_operand" "%0,x")
5892 (match_operand:VI14_128 2 "nonimmediate_operand" "xm,xm")))]
5893 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5895 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
5896 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5897 [(set_attr "isa" "noavx,avx")
5898 (set_attr "type" "sseiadd")
5899 (set_attr "prefix_extra" "1,*")
5900 (set_attr "prefix" "orig,vex")
5901 (set_attr "mode" "TI")])
5903 (define_insn "*<code>v8hi3"
5904 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
5906 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
5907 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
5908 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
5910 p<maxmin_int>w\t{%2, %0|%0, %2}
5911 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
5912 [(set_attr "isa" "noavx,avx")
5913 (set_attr "type" "sseiadd")
5914 (set_attr "prefix_data16" "1,*")
5915 (set_attr "prefix_extra" "*,1")
5916 (set_attr "prefix" "orig,vex")
5917 (set_attr "mode" "TI")])
5919 (define_expand "<code><mode>3"
5920 [(set (match_operand:VI124_128 0 "register_operand" "")
5921 (umaxmin:VI124_128 (match_operand:VI124_128 1 "nonimmediate_operand" "")
5922 (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
5925 if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
5926 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
5927 else if (<CODE> == UMAX && <MODE>mode == V8HImode)
5929 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
5930 operands[1] = force_reg (<MODE>mode, operands[1]);
5931 if (rtx_equal_p (op3, op2))
5932 op3 = gen_reg_rtx (V8HImode);
5933 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
5934 emit_insn (gen_addv8hi3 (op0, op3, op2));
5942 operands[1] = force_reg (<MODE>mode, operands[1]);
5943 operands[2] = force_reg (<MODE>mode, operands[2]);
5945 xops[0] = operands[0];
5949 xops[1] = operands[1];
5950 xops[2] = operands[2];
5954 xops[1] = operands[2];
5955 xops[2] = operands[1];
5958 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5959 xops[4] = operands[1];
5960 xops[5] = operands[2];
5962 ok = ix86_expand_int_vcond (xops);
5968 (define_insn "*sse4_1_<code><mode>3"
5969 [(set (match_operand:VI24_128 0 "register_operand" "=x,x")
5971 (match_operand:VI24_128 1 "nonimmediate_operand" "%0,x")
5972 (match_operand:VI24_128 2 "nonimmediate_operand" "xm,xm")))]
5973 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5975 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
5976 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5977 [(set_attr "isa" "noavx,avx")
5978 (set_attr "type" "sseiadd")
5979 (set_attr "prefix_extra" "1,*")
5980 (set_attr "prefix" "orig,vex")
5981 (set_attr "mode" "TI")])
5983 (define_insn "*<code>v16qi3"
5984 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
5986 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")
5987 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))]
5988 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
5990 p<maxmin_int>b\t{%2, %0|%0, %2}
5991 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
5992 [(set_attr "isa" "noavx,avx")
5993 (set_attr "type" "sseiadd")
5994 (set_attr "prefix_data16" "1,*")
5995 (set_attr "prefix_extra" "*,1")
5996 (set_attr "prefix" "orig,vex")
5997 (set_attr "mode" "TI")])
5999 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6001 ;; Parallel integral comparisons
6003 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6005 (define_expand "avx2_eq<mode>3"
6006 [(set (match_operand:VI_256 0 "register_operand" "")
6008 (match_operand:VI_256 1 "nonimmediate_operand" "")
6009 (match_operand:VI_256 2 "nonimmediate_operand" "")))]
6011 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6013 (define_insn "*avx2_eq<mode>3"
6014 [(set (match_operand:VI_256 0 "register_operand" "=x")
6016 (match_operand:VI_256 1 "nonimmediate_operand" "%x")
6017 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
6018 "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6019 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6020 [(set_attr "type" "ssecmp")
6021 (set_attr "prefix_extra" "1")
6022 (set_attr "prefix" "vex")
6023 (set_attr "mode" "OI")])
6025 (define_insn "*sse4_1_eqv2di3"
6026 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6028 (match_operand:V2DI 1 "nonimmediate_operand" "%0,x")
6029 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
6030 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
6032 pcmpeqq\t{%2, %0|%0, %2}
6033 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
6034 [(set_attr "isa" "noavx,avx")
6035 (set_attr "type" "ssecmp")
6036 (set_attr "prefix_extra" "1")
6037 (set_attr "prefix" "orig,vex")
6038 (set_attr "mode" "TI")])
6040 (define_insn "*sse2_eq<mode>3"
6041 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
6043 (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x")
6044 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
6045 "TARGET_SSE2 && !TARGET_XOP
6046 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6048 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
6049 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6050 [(set_attr "isa" "noavx,avx")
6051 (set_attr "type" "ssecmp")
6052 (set_attr "prefix_data16" "1,*")
6053 (set_attr "prefix" "orig,vex")
6054 (set_attr "mode" "TI")])
6056 (define_expand "sse2_eq<mode>3"
6057 [(set (match_operand:VI124_128 0 "register_operand" "")
6059 (match_operand:VI124_128 1 "nonimmediate_operand" "")
6060 (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
6061 "TARGET_SSE2 && !TARGET_XOP "
6062 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6064 (define_expand "sse4_1_eqv2di3"
6065 [(set (match_operand:V2DI 0 "register_operand" "")
6067 (match_operand:V2DI 1 "nonimmediate_operand" "")
6068 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
6070 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
6072 (define_insn "sse4_2_gtv2di3"
6073 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6075 (match_operand:V2DI 1 "register_operand" "0,x")
6076 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
6079 pcmpgtq\t{%2, %0|%0, %2}
6080 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
6081 [(set_attr "isa" "noavx,avx")
6082 (set_attr "type" "ssecmp")
6083 (set_attr "prefix_extra" "1")
6084 (set_attr "prefix" "orig,vex")
6085 (set_attr "mode" "TI")])
6087 (define_insn "avx2_gt<mode>3"
6088 [(set (match_operand:VI_256 0 "register_operand" "=x")
6090 (match_operand:VI_256 1 "register_operand" "x")
6091 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
6093 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6094 [(set_attr "type" "ssecmp")
6095 (set_attr "prefix_extra" "1")
6096 (set_attr "prefix" "vex")
6097 (set_attr "mode" "OI")])
6099 (define_insn "sse2_gt<mode>3"
6100 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
6102 (match_operand:VI124_128 1 "register_operand" "0,x")
6103 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
6104 "TARGET_SSE2 && !TARGET_XOP"
6106 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
6107 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6108 [(set_attr "isa" "noavx,avx")
6109 (set_attr "type" "ssecmp")
6110 (set_attr "prefix_data16" "1,*")
6111 (set_attr "prefix" "orig,vex")
6112 (set_attr "mode" "TI")])
6114 (define_expand "vcond<V_256:mode><VI_256:mode>"
6115 [(set (match_operand:V_256 0 "register_operand" "")
6117 (match_operator 3 ""
6118 [(match_operand:VI_256 4 "nonimmediate_operand" "")
6119 (match_operand:VI_256 5 "nonimmediate_operand" "")])
6120 (match_operand:V_256 1 "general_operand" "")
6121 (match_operand:V_256 2 "general_operand" "")))]
6123 && (GET_MODE_NUNITS (<V_256:MODE>mode)
6124 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
6126 bool ok = ix86_expand_int_vcond (operands);
6131 (define_expand "vcond<V_128:mode><VI124_128:mode>"
6132 [(set (match_operand:V_128 0 "register_operand" "")
6134 (match_operator 3 ""
6135 [(match_operand:VI124_128 4 "nonimmediate_operand" "")
6136 (match_operand:VI124_128 5 "nonimmediate_operand" "")])
6137 (match_operand:V_128 1 "general_operand" "")
6138 (match_operand:V_128 2 "general_operand" "")))]
6140 && (GET_MODE_NUNITS (<V_128:MODE>mode)
6141 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
6143 bool ok = ix86_expand_int_vcond (operands);
6148 (define_expand "vcond<VI8F_128:mode>v2di"
6149 [(set (match_operand:VI8F_128 0 "register_operand" "")
6150 (if_then_else:VI8F_128
6151 (match_operator 3 ""
6152 [(match_operand:V2DI 4 "nonimmediate_operand" "")
6153 (match_operand:V2DI 5 "nonimmediate_operand" "")])
6154 (match_operand:VI8F_128 1 "general_operand" "")
6155 (match_operand:VI8F_128 2 "general_operand" "")))]
6158 bool ok = ix86_expand_int_vcond (operands);
6163 (define_expand "vcondu<V_256:mode><VI_256:mode>"
6164 [(set (match_operand:V_256 0 "register_operand" "")
6166 (match_operator 3 ""
6167 [(match_operand:VI_256 4 "nonimmediate_operand" "")
6168 (match_operand:VI_256 5 "nonimmediate_operand" "")])
6169 (match_operand:V_256 1 "general_operand" "")
6170 (match_operand:V_256 2 "general_operand" "")))]
6172 && (GET_MODE_NUNITS (<V_256:MODE>mode)
6173 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
6175 bool ok = ix86_expand_int_vcond (operands);
6180 (define_expand "vcondu<V_128:mode><VI124_128:mode>"
6181 [(set (match_operand:V_128 0 "register_operand" "")
6183 (match_operator 3 ""
6184 [(match_operand:VI124_128 4 "nonimmediate_operand" "")
6185 (match_operand:VI124_128 5 "nonimmediate_operand" "")])
6186 (match_operand:V_128 1 "general_operand" "")
6187 (match_operand:V_128 2 "general_operand" "")))]
6189 && (GET_MODE_NUNITS (<V_128:MODE>mode)
6190 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
6192 bool ok = ix86_expand_int_vcond (operands);
6197 (define_expand "vcondu<VI8F_128:mode>v2di"
6198 [(set (match_operand:VI8F_128 0 "register_operand" "")
6199 (if_then_else:VI8F_128
6200 (match_operator 3 ""
6201 [(match_operand:V2DI 4 "nonimmediate_operand" "")
6202 (match_operand:V2DI 5 "nonimmediate_operand" "")])
6203 (match_operand:VI8F_128 1 "general_operand" "")
6204 (match_operand:VI8F_128 2 "general_operand" "")))]
6207 bool ok = ix86_expand_int_vcond (operands);
6212 (define_mode_iterator VEC_PERM_AVX2
6213 [V16QI V8HI V4SI V2DI V4SF V2DF
6214 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
6215 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
6216 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")])
6218 (define_expand "vec_perm<mode>"
6219 [(match_operand:VEC_PERM_AVX2 0 "register_operand" "")
6220 (match_operand:VEC_PERM_AVX2 1 "register_operand" "")
6221 (match_operand:VEC_PERM_AVX2 2 "register_operand" "")
6222 (match_operand:<sseintvecmode> 3 "register_operand" "")]
6223 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
6225 ix86_expand_vec_perm (operands);
6229 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6231 ;; Parallel bitwise logical operations
6233 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6235 (define_expand "one_cmpl<mode>2"
6236 [(set (match_operand:VI 0 "register_operand" "")
6237 (xor:VI (match_operand:VI 1 "nonimmediate_operand" "")
6241 int i, n = GET_MODE_NUNITS (<MODE>mode);
6242 rtvec v = rtvec_alloc (n);
6244 for (i = 0; i < n; ++i)
6245 RTVEC_ELT (v, i) = constm1_rtx;
6247 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
6250 (define_expand "<sse2_avx2>_andnot<mode>3"
6251 [(set (match_operand:VI_AVX2 0 "register_operand" "")
6253 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand" ""))
6254 (match_operand:VI_AVX2 2 "nonimmediate_operand" "")))]
6257 (define_insn "*andnot<mode>3"
6258 [(set (match_operand:VI 0 "register_operand" "=x,x")
6260 (not:VI (match_operand:VI 1 "register_operand" "0,x"))
6261 (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
6264 static char buf[32];
6268 switch (get_attr_mode (insn))
6271 gcc_assert (TARGET_AVX2);
6273 gcc_assert (TARGET_SSE2);
6279 gcc_assert (TARGET_AVX);
6281 gcc_assert (TARGET_SSE);
6290 switch (which_alternative)
6293 ops = "%s\t{%%2, %%0|%%0, %%2}";
6296 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
6302 snprintf (buf, sizeof (buf), ops, tmp);
6305 [(set_attr "isa" "noavx,avx")
6306 (set_attr "type" "sselog")
6307 (set (attr "prefix_data16")
6309 (and (eq_attr "alternative" "0")
6310 (eq_attr "mode" "TI"))
6312 (const_string "*")))
6313 (set_attr "prefix" "orig,vex")
6315 (cond [(and (not (match_test "TARGET_AVX2"))
6316 (match_test "GET_MODE_SIZE (<MODE>mode) > 16"))
6317 (const_string "V8SF")
6318 (not (match_test "TARGET_SSE2"))
6319 (const_string "V4SF")
6321 (const_string "<sseinsnmode>")))])
6323 (define_expand "<code><mode>3"
6324 [(set (match_operand:VI 0 "register_operand" "")
6326 (match_operand:VI 1 "nonimmediate_operand" "")
6327 (match_operand:VI 2 "nonimmediate_operand" "")))]
6329 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
6331 (define_insn "*<code><mode>3"
6332 [(set (match_operand:VI 0 "register_operand" "=x,x")
6334 (match_operand:VI 1 "nonimmediate_operand" "%0,x")
6335 (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
6337 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6339 static char buf[32];
6343 switch (get_attr_mode (insn))
6346 gcc_assert (TARGET_AVX2);
6348 gcc_assert (TARGET_SSE2);
6354 gcc_assert (TARGET_AVX);
6356 gcc_assert (TARGET_SSE);
6365 switch (which_alternative)
6368 ops = "%s\t{%%2, %%0|%%0, %%2}";
6371 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
6377 snprintf (buf, sizeof (buf), ops, tmp);
6380 [(set_attr "isa" "noavx,avx")
6381 (set_attr "type" "sselog")
6382 (set (attr "prefix_data16")
6384 (and (eq_attr "alternative" "0")
6385 (eq_attr "mode" "TI"))
6387 (const_string "*")))
6388 (set_attr "prefix" "orig,vex")
6390 (cond [(and (not (match_test "TARGET_AVX2"))
6391 (match_test "GET_MODE_SIZE (<MODE>mode) > 16"))
6392 (const_string "V8SF")
6393 (not (match_test "TARGET_SSE2"))
6394 (const_string "V4SF")
6396 (const_string "<sseinsnmode>")))])
6398 (define_insn "*andnottf3"
6399 [(set (match_operand:TF 0 "register_operand" "=x,x")
6401 (not:TF (match_operand:TF 1 "register_operand" "0,x"))
6402 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
6405 pandn\t{%2, %0|%0, %2}
6406 vpandn\t{%2, %1, %0|%0, %1, %2}"
6407 [(set_attr "isa" "noavx,avx")
6408 (set_attr "type" "sselog")
6409 (set_attr "prefix_data16" "1,*")
6410 (set_attr "prefix" "orig,vex")
6411 (set_attr "mode" "TI")])
6413 (define_expand "<code>tf3"
6414 [(set (match_operand:TF 0 "register_operand" "")
6416 (match_operand:TF 1 "nonimmediate_operand" "")
6417 (match_operand:TF 2 "nonimmediate_operand" "")))]
6419 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
6421 (define_insn "*<code>tf3"
6422 [(set (match_operand:TF 0 "register_operand" "=x,x")
6424 (match_operand:TF 1 "nonimmediate_operand" "%0,x")
6425 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
6427 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
6429 p<logic>\t{%2, %0|%0, %2}
6430 vp<logic>\t{%2, %1, %0|%0, %1, %2}"
6431 [(set_attr "isa" "noavx,avx")
6432 (set_attr "type" "sselog")
6433 (set_attr "prefix_data16" "1,*")
6434 (set_attr "prefix" "orig,vex")
6435 (set_attr "mode" "TI")])
6437 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6439 ;; Parallel integral element swizzling
6441 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6443 (define_expand "vec_pack_trunc_<mode>"
6444 [(match_operand:<ssepackmode> 0 "register_operand" "")
6445 (match_operand:VI248_AVX2 1 "register_operand" "")
6446 (match_operand:VI248_AVX2 2 "register_operand" "")]
6449 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
6450 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
6451 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6455 (define_insn "<sse2_avx2>_packsswb"
6456 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
6457 (vec_concat:VI1_AVX2
6458 (ss_truncate:<ssehalfvecmode>
6459 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6460 (ss_truncate:<ssehalfvecmode>
6461 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6464 packsswb\t{%2, %0|%0, %2}
6465 vpacksswb\t{%2, %1, %0|%0, %1, %2}"
6466 [(set_attr "isa" "noavx,avx")
6467 (set_attr "type" "sselog")
6468 (set_attr "prefix_data16" "1,*")
6469 (set_attr "prefix" "orig,vex")
6470 (set_attr "mode" "<sseinsnmode>")])
6472 (define_insn "<sse2_avx2>_packssdw"
6473 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
6474 (vec_concat:VI2_AVX2
6475 (ss_truncate:<ssehalfvecmode>
6476 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6477 (ss_truncate:<ssehalfvecmode>
6478 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6481 packssdw\t{%2, %0|%0, %2}
6482 vpackssdw\t{%2, %1, %0|%0, %1, %2}"
6483 [(set_attr "isa" "noavx,avx")
6484 (set_attr "type" "sselog")
6485 (set_attr "prefix_data16" "1,*")
6486 (set_attr "prefix" "orig,vex")
6487 (set_attr "mode" "<sseinsnmode>")])
6489 (define_insn "<sse2_avx2>_packuswb"
6490 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
6491 (vec_concat:VI1_AVX2
6492 (us_truncate:<ssehalfvecmode>
6493 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6494 (us_truncate:<ssehalfvecmode>
6495 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6498 packuswb\t{%2, %0|%0, %2}
6499 vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6500 [(set_attr "isa" "noavx,avx")
6501 (set_attr "type" "sselog")
6502 (set_attr "prefix_data16" "1,*")
6503 (set_attr "prefix" "orig,vex")
6504 (set_attr "mode" "<sseinsnmode>")])
6506 (define_insn "avx2_interleave_highv32qi"
6507 [(set (match_operand:V32QI 0 "register_operand" "=x")
6510 (match_operand:V32QI 1 "register_operand" "x")
6511 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
6512 (parallel [(const_int 8) (const_int 40)
6513 (const_int 9) (const_int 41)
6514 (const_int 10) (const_int 42)
6515 (const_int 11) (const_int 43)
6516 (const_int 12) (const_int 44)
6517 (const_int 13) (const_int 45)
6518 (const_int 14) (const_int 46)
6519 (const_int 15) (const_int 47)
6520 (const_int 24) (const_int 56)
6521 (const_int 25) (const_int 57)
6522 (const_int 26) (const_int 58)
6523 (const_int 27) (const_int 59)
6524 (const_int 28) (const_int 60)
6525 (const_int 29) (const_int 61)
6526 (const_int 30) (const_int 62)
6527 (const_int 31) (const_int 63)])))]
6529 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6530 [(set_attr "type" "sselog")
6531 (set_attr "prefix" "vex")
6532 (set_attr "mode" "OI")])
6534 (define_insn "vec_interleave_highv16qi"
6535 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6538 (match_operand:V16QI 1 "register_operand" "0,x")
6539 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
6540 (parallel [(const_int 8) (const_int 24)
6541 (const_int 9) (const_int 25)
6542 (const_int 10) (const_int 26)
6543 (const_int 11) (const_int 27)
6544 (const_int 12) (const_int 28)
6545 (const_int 13) (const_int 29)
6546 (const_int 14) (const_int 30)
6547 (const_int 15) (const_int 31)])))]
6550 punpckhbw\t{%2, %0|%0, %2}
6551 vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6552 [(set_attr "isa" "noavx,avx")
6553 (set_attr "type" "sselog")
6554 (set_attr "prefix_data16" "1,*")
6555 (set_attr "prefix" "orig,vex")
6556 (set_attr "mode" "TI")])
6558 (define_insn "avx2_interleave_lowv32qi"
6559 [(set (match_operand:V32QI 0 "register_operand" "=x")
6562 (match_operand:V32QI 1 "register_operand" "x")
6563 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
6564 (parallel [(const_int 0) (const_int 32)
6565 (const_int 1) (const_int 33)
6566 (const_int 2) (const_int 34)
6567 (const_int 3) (const_int 35)
6568 (const_int 4) (const_int 36)
6569 (const_int 5) (const_int 37)
6570 (const_int 6) (const_int 38)
6571 (const_int 7) (const_int 39)
6572 (const_int 16) (const_int 48)
6573 (const_int 17) (const_int 49)
6574 (const_int 18) (const_int 50)
6575 (const_int 19) (const_int 51)
6576 (const_int 20) (const_int 52)
6577 (const_int 21) (const_int 53)
6578 (const_int 22) (const_int 54)
6579 (const_int 23) (const_int 55)])))]
6581 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6582 [(set_attr "type" "sselog")
6583 (set_attr "prefix" "vex")
6584 (set_attr "mode" "OI")])
6586 (define_insn "vec_interleave_lowv16qi"
6587 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6590 (match_operand:V16QI 1 "register_operand" "0,x")
6591 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
6592 (parallel [(const_int 0) (const_int 16)
6593 (const_int 1) (const_int 17)
6594 (const_int 2) (const_int 18)
6595 (const_int 3) (const_int 19)
6596 (const_int 4) (const_int 20)
6597 (const_int 5) (const_int 21)
6598 (const_int 6) (const_int 22)
6599 (const_int 7) (const_int 23)])))]
6602 punpcklbw\t{%2, %0|%0, %2}
6603 vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6604 [(set_attr "isa" "noavx,avx")
6605 (set_attr "type" "sselog")
6606 (set_attr "prefix_data16" "1,*")
6607 (set_attr "prefix" "orig,vex")
6608 (set_attr "mode" "TI")])
6610 (define_insn "avx2_interleave_highv16hi"
6611 [(set (match_operand:V16HI 0 "register_operand" "=x")
6614 (match_operand:V16HI 1 "register_operand" "x")
6615 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
6616 (parallel [(const_int 4) (const_int 20)
6617 (const_int 5) (const_int 21)
6618 (const_int 6) (const_int 22)
6619 (const_int 7) (const_int 23)
6620 (const_int 12) (const_int 28)
6621 (const_int 13) (const_int 29)
6622 (const_int 14) (const_int 30)
6623 (const_int 15) (const_int 31)])))]
6625 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6626 [(set_attr "type" "sselog")
6627 (set_attr "prefix" "vex")
6628 (set_attr "mode" "OI")])
6630 (define_insn "vec_interleave_highv8hi"
6631 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6634 (match_operand:V8HI 1 "register_operand" "0,x")
6635 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
6636 (parallel [(const_int 4) (const_int 12)
6637 (const_int 5) (const_int 13)
6638 (const_int 6) (const_int 14)
6639 (const_int 7) (const_int 15)])))]
6642 punpckhwd\t{%2, %0|%0, %2}
6643 vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6644 [(set_attr "isa" "noavx,avx")
6645 (set_attr "type" "sselog")
6646 (set_attr "prefix_data16" "1,*")
6647 (set_attr "prefix" "orig,vex")
6648 (set_attr "mode" "TI")])
6650 (define_insn "avx2_interleave_lowv16hi"
6651 [(set (match_operand:V16HI 0 "register_operand" "=x")
6654 (match_operand:V16HI 1 "register_operand" "x")
6655 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
6656 (parallel [(const_int 0) (const_int 16)
6657 (const_int 1) (const_int 17)
6658 (const_int 2) (const_int 18)
6659 (const_int 3) (const_int 19)
6660 (const_int 8) (const_int 24)
6661 (const_int 9) (const_int 25)
6662 (const_int 10) (const_int 26)
6663 (const_int 11) (const_int 27)])))]
6665 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6666 [(set_attr "type" "sselog")
6667 (set_attr "prefix" "vex")
6668 (set_attr "mode" "OI")])
6670 (define_insn "vec_interleave_lowv8hi"
6671 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6674 (match_operand:V8HI 1 "register_operand" "0,x")
6675 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
6676 (parallel [(const_int 0) (const_int 8)
6677 (const_int 1) (const_int 9)
6678 (const_int 2) (const_int 10)
6679 (const_int 3) (const_int 11)])))]
6682 punpcklwd\t{%2, %0|%0, %2}
6683 vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6684 [(set_attr "isa" "noavx,avx")
6685 (set_attr "type" "sselog")
6686 (set_attr "prefix_data16" "1,*")
6687 (set_attr "prefix" "orig,vex")
6688 (set_attr "mode" "TI")])
6690 (define_insn "avx2_interleave_highv8si"
6691 [(set (match_operand:V8SI 0 "register_operand" "=x")
6694 (match_operand:V8SI 1 "register_operand" "x")
6695 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
6696 (parallel [(const_int 2) (const_int 10)
6697 (const_int 3) (const_int 11)
6698 (const_int 6) (const_int 14)
6699 (const_int 7) (const_int 15)])))]
6701 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6702 [(set_attr "type" "sselog")
6703 (set_attr "prefix" "vex")
6704 (set_attr "mode" "OI")])
6706 (define_insn "vec_interleave_highv4si"
6707 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6710 (match_operand:V4SI 1 "register_operand" "0,x")
6711 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6712 (parallel [(const_int 2) (const_int 6)
6713 (const_int 3) (const_int 7)])))]
6716 punpckhdq\t{%2, %0|%0, %2}
6717 vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6718 [(set_attr "isa" "noavx,avx")
6719 (set_attr "type" "sselog")
6720 (set_attr "prefix_data16" "1,*")
6721 (set_attr "prefix" "orig,vex")
6722 (set_attr "mode" "TI")])
6724 (define_insn "avx2_interleave_lowv8si"
6725 [(set (match_operand:V8SI 0 "register_operand" "=x")
6728 (match_operand:V8SI 1 "register_operand" "x")
6729 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
6730 (parallel [(const_int 0) (const_int 8)
6731 (const_int 1) (const_int 9)
6732 (const_int 4) (const_int 12)
6733 (const_int 5) (const_int 13)])))]
6735 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6736 [(set_attr "type" "sselog")
6737 (set_attr "prefix" "vex")
6738 (set_attr "mode" "OI")])
6740 (define_insn "vec_interleave_lowv4si"
6741 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6744 (match_operand:V4SI 1 "register_operand" "0,x")
6745 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6746 (parallel [(const_int 0) (const_int 4)
6747 (const_int 1) (const_int 5)])))]
6750 punpckldq\t{%2, %0|%0, %2}
6751 vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6752 [(set_attr "isa" "noavx,avx")
6753 (set_attr "type" "sselog")
6754 (set_attr "prefix_data16" "1,*")
6755 (set_attr "prefix" "orig,vex")
6756 (set_attr "mode" "TI")])
6758 ;; Modes handled by pinsr patterns.
6759 (define_mode_iterator PINSR_MODE
6760 [(V16QI "TARGET_SSE4_1") V8HI
6761 (V4SI "TARGET_SSE4_1")
6762 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
6764 (define_mode_attr sse2p4_1
6765 [(V16QI "sse4_1") (V8HI "sse2")
6766 (V4SI "sse4_1") (V2DI "sse4_1")])
6768 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
6769 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
6770 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x")
6771 (vec_merge:PINSR_MODE
6772 (vec_duplicate:PINSR_MODE
6773 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m"))
6774 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x")
6775 (match_operand:SI 3 "const_int_operand" "")))]
6777 && ((unsigned) exact_log2 (INTVAL (operands[3]))
6778 < GET_MODE_NUNITS (<MODE>mode))"
6780 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6782 switch (which_alternative)
6785 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
6786 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
6789 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
6791 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
6792 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
6795 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6800 [(set_attr "isa" "noavx,noavx,avx,avx")
6801 (set_attr "type" "sselog")
6802 (set (attr "prefix_rex")
6804 (and (not (match_test "TARGET_AVX"))
6805 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
6807 (const_string "*")))
6808 (set (attr "prefix_data16")
6810 (and (not (match_test "TARGET_AVX"))
6811 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
6813 (const_string "*")))
6814 (set (attr "prefix_extra")
6816 (and (not (match_test "TARGET_AVX"))
6817 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
6819 (const_string "1")))
6820 (set_attr "length_immediate" "1")
6821 (set_attr "prefix" "orig,orig,vex,vex")
6822 (set_attr "mode" "TI")])
6824 (define_insn "*sse4_1_pextrb_<mode>"
6825 [(set (match_operand:SWI48 0 "register_operand" "=r")
6828 (match_operand:V16QI 1 "register_operand" "x")
6829 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
6831 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
6832 [(set_attr "type" "sselog")
6833 (set_attr "prefix_extra" "1")
6834 (set_attr "length_immediate" "1")
6835 (set_attr "prefix" "maybe_vex")
6836 (set_attr "mode" "TI")])
6838 (define_insn "*sse4_1_pextrb_memory"
6839 [(set (match_operand:QI 0 "memory_operand" "=m")
6841 (match_operand:V16QI 1 "register_operand" "x")
6842 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
6844 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
6845 [(set_attr "type" "sselog")
6846 (set_attr "prefix_extra" "1")
6847 (set_attr "length_immediate" "1")
6848 (set_attr "prefix" "maybe_vex")
6849 (set_attr "mode" "TI")])
6851 (define_insn "*sse2_pextrw_<mode>"
6852 [(set (match_operand:SWI48 0 "register_operand" "=r")
6855 (match_operand:V8HI 1 "register_operand" "x")
6856 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
6858 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
6859 [(set_attr "type" "sselog")
6860 (set_attr "prefix_data16" "1")
6861 (set_attr "length_immediate" "1")
6862 (set_attr "prefix" "maybe_vex")
6863 (set_attr "mode" "TI")])
6865 (define_insn "*sse4_1_pextrw_memory"
6866 [(set (match_operand:HI 0 "memory_operand" "=m")
6868 (match_operand:V8HI 1 "register_operand" "x")
6869 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
6871 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
6872 [(set_attr "type" "sselog")
6873 (set_attr "prefix_extra" "1")
6874 (set_attr "length_immediate" "1")
6875 (set_attr "prefix" "maybe_vex")
6876 (set_attr "mode" "TI")])
6878 (define_insn "*sse4_1_pextrd"
6879 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
6881 (match_operand:V4SI 1 "register_operand" "x")
6882 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
6884 "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
6885 [(set_attr "type" "sselog")
6886 (set_attr "prefix_extra" "1")
6887 (set_attr "length_immediate" "1")
6888 (set_attr "prefix" "maybe_vex")
6889 (set_attr "mode" "TI")])
6891 (define_insn "*sse4_1_pextrd_zext"
6892 [(set (match_operand:DI 0 "register_operand" "=r")
6895 (match_operand:V4SI 1 "register_operand" "x")
6896 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))]
6897 "TARGET_64BIT && TARGET_SSE4_1"
6898 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
6899 [(set_attr "type" "sselog")
6900 (set_attr "prefix_extra" "1")
6901 (set_attr "length_immediate" "1")
6902 (set_attr "prefix" "maybe_vex")
6903 (set_attr "mode" "TI")])
6905 ;; It must come before *vec_extractv2di_1_rex64 since it is preferred.
6906 (define_insn "*sse4_1_pextrq"
6907 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
6909 (match_operand:V2DI 1 "register_operand" "x")
6910 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
6911 "TARGET_SSE4_1 && TARGET_64BIT"
6912 "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
6913 [(set_attr "type" "sselog")
6914 (set_attr "prefix_rex" "1")
6915 (set_attr "prefix_extra" "1")
6916 (set_attr "length_immediate" "1")
6917 (set_attr "prefix" "maybe_vex")
6918 (set_attr "mode" "TI")])
6920 (define_expand "avx2_pshufdv3"
6921 [(match_operand:V8SI 0 "register_operand" "")
6922 (match_operand:V8SI 1 "nonimmediate_operand" "")
6923 (match_operand:SI 2 "const_0_to_255_operand" "")]
6926 int mask = INTVAL (operands[2]);
6927 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
6928 GEN_INT ((mask >> 0) & 3),
6929 GEN_INT ((mask >> 2) & 3),
6930 GEN_INT ((mask >> 4) & 3),
6931 GEN_INT ((mask >> 6) & 3),
6932 GEN_INT (((mask >> 0) & 3) + 4),
6933 GEN_INT (((mask >> 2) & 3) + 4),
6934 GEN_INT (((mask >> 4) & 3) + 4),
6935 GEN_INT (((mask >> 6) & 3) + 4)));
6939 (define_insn "avx2_pshufd_1"
6940 [(set (match_operand:V8SI 0 "register_operand" "=x")
6942 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
6943 (parallel [(match_operand 2 "const_0_to_3_operand" "")
6944 (match_operand 3 "const_0_to_3_operand" "")
6945 (match_operand 4 "const_0_to_3_operand" "")
6946 (match_operand 5 "const_0_to_3_operand" "")
6947 (match_operand 6 "const_4_to_7_operand" "")
6948 (match_operand 7 "const_4_to_7_operand" "")
6949 (match_operand 8 "const_4_to_7_operand" "")
6950 (match_operand 9 "const_4_to_7_operand" "")])))]
6952 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
6953 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
6954 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
6955 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
6958 mask |= INTVAL (operands[2]) << 0;
6959 mask |= INTVAL (operands[3]) << 2;
6960 mask |= INTVAL (operands[4]) << 4;
6961 mask |= INTVAL (operands[5]) << 6;
6962 operands[2] = GEN_INT (mask);
6964 return "vpshufd\t{%2, %1, %0|%0, %1, %2}";
6966 [(set_attr "type" "sselog1")
6967 (set_attr "prefix" "vex")
6968 (set_attr "length_immediate" "1")
6969 (set_attr "mode" "OI")])
6971 (define_expand "sse2_pshufd"
6972 [(match_operand:V4SI 0 "register_operand" "")
6973 (match_operand:V4SI 1 "nonimmediate_operand" "")
6974 (match_operand:SI 2 "const_int_operand" "")]
6977 int mask = INTVAL (operands[2]);
6978 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
6979 GEN_INT ((mask >> 0) & 3),
6980 GEN_INT ((mask >> 2) & 3),
6981 GEN_INT ((mask >> 4) & 3),
6982 GEN_INT ((mask >> 6) & 3)));
6986 (define_insn "sse2_pshufd_1"
6987 [(set (match_operand:V4SI 0 "register_operand" "=x")
6989 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
6990 (parallel [(match_operand 2 "const_0_to_3_operand" "")
6991 (match_operand 3 "const_0_to_3_operand" "")
6992 (match_operand 4 "const_0_to_3_operand" "")
6993 (match_operand 5 "const_0_to_3_operand" "")])))]
6997 mask |= INTVAL (operands[2]) << 0;
6998 mask |= INTVAL (operands[3]) << 2;
6999 mask |= INTVAL (operands[4]) << 4;
7000 mask |= INTVAL (operands[5]) << 6;
7001 operands[2] = GEN_INT (mask);
7003 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
7005 [(set_attr "type" "sselog1")
7006 (set_attr "prefix_data16" "1")
7007 (set_attr "prefix" "maybe_vex")
7008 (set_attr "length_immediate" "1")
7009 (set_attr "mode" "TI")])
7011 (define_expand "avx2_pshuflwv3"
7012 [(match_operand:V16HI 0 "register_operand" "")
7013 (match_operand:V16HI 1 "nonimmediate_operand" "")
7014 (match_operand:SI 2 "const_0_to_255_operand" "")]
7017 int mask = INTVAL (operands[2]);
7018 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
7019 GEN_INT ((mask >> 0) & 3),
7020 GEN_INT ((mask >> 2) & 3),
7021 GEN_INT ((mask >> 4) & 3),
7022 GEN_INT ((mask >> 6) & 3),
7023 GEN_INT (((mask >> 0) & 3) + 8),
7024 GEN_INT (((mask >> 2) & 3) + 8),
7025 GEN_INT (((mask >> 4) & 3) + 8),
7026 GEN_INT (((mask >> 6) & 3) + 8)));
7030 (define_insn "avx2_pshuflw_1"
7031 [(set (match_operand:V16HI 0 "register_operand" "=x")
7033 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
7034 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7035 (match_operand 3 "const_0_to_3_operand" "")
7036 (match_operand 4 "const_0_to_3_operand" "")
7037 (match_operand 5 "const_0_to_3_operand" "")
7042 (match_operand 6 "const_8_to_11_operand" "")
7043 (match_operand 7 "const_8_to_11_operand" "")
7044 (match_operand 8 "const_8_to_11_operand" "")
7045 (match_operand 9 "const_8_to_11_operand" "")
7051 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
7052 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
7053 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
7054 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
7057 mask |= INTVAL (operands[2]) << 0;
7058 mask |= INTVAL (operands[3]) << 2;
7059 mask |= INTVAL (operands[4]) << 4;
7060 mask |= INTVAL (operands[5]) << 6;
7061 operands[2] = GEN_INT (mask);
7063 return "vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7065 [(set_attr "type" "sselog")
7066 (set_attr "prefix" "vex")
7067 (set_attr "length_immediate" "1")
7068 (set_attr "mode" "OI")])
7070 (define_expand "sse2_pshuflw"
7071 [(match_operand:V8HI 0 "register_operand" "")
7072 (match_operand:V8HI 1 "nonimmediate_operand" "")
7073 (match_operand:SI 2 "const_int_operand" "")]
7076 int mask = INTVAL (operands[2]);
7077 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
7078 GEN_INT ((mask >> 0) & 3),
7079 GEN_INT ((mask >> 2) & 3),
7080 GEN_INT ((mask >> 4) & 3),
7081 GEN_INT ((mask >> 6) & 3)));
7085 (define_insn "sse2_pshuflw_1"
7086 [(set (match_operand:V8HI 0 "register_operand" "=x")
7088 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7089 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7090 (match_operand 3 "const_0_to_3_operand" "")
7091 (match_operand 4 "const_0_to_3_operand" "")
7092 (match_operand 5 "const_0_to_3_operand" "")
7100 mask |= INTVAL (operands[2]) << 0;
7101 mask |= INTVAL (operands[3]) << 2;
7102 mask |= INTVAL (operands[4]) << 4;
7103 mask |= INTVAL (operands[5]) << 6;
7104 operands[2] = GEN_INT (mask);
7106 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7108 [(set_attr "type" "sselog")
7109 (set_attr "prefix_data16" "0")
7110 (set_attr "prefix_rep" "1")
7111 (set_attr "prefix" "maybe_vex")
7112 (set_attr "length_immediate" "1")
7113 (set_attr "mode" "TI")])
7115 (define_expand "avx2_pshufhwv3"
7116 [(match_operand:V16HI 0 "register_operand" "")
7117 (match_operand:V16HI 1 "nonimmediate_operand" "")
7118 (match_operand:SI 2 "const_0_to_255_operand" "")]
7121 int mask = INTVAL (operands[2]);
7122 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
7123 GEN_INT (((mask >> 0) & 3) + 4),
7124 GEN_INT (((mask >> 2) & 3) + 4),
7125 GEN_INT (((mask >> 4) & 3) + 4),
7126 GEN_INT (((mask >> 6) & 3) + 4),
7127 GEN_INT (((mask >> 0) & 3) + 12),
7128 GEN_INT (((mask >> 2) & 3) + 12),
7129 GEN_INT (((mask >> 4) & 3) + 12),
7130 GEN_INT (((mask >> 6) & 3) + 12)));
7134 (define_insn "avx2_pshufhw_1"
7135 [(set (match_operand:V16HI 0 "register_operand" "=x")
7137 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
7138 (parallel [(const_int 0)
7142 (match_operand 2 "const_4_to_7_operand" "")
7143 (match_operand 3 "const_4_to_7_operand" "")
7144 (match_operand 4 "const_4_to_7_operand" "")
7145 (match_operand 5 "const_4_to_7_operand" "")
7150 (match_operand 6 "const_12_to_15_operand" "")
7151 (match_operand 7 "const_12_to_15_operand" "")
7152 (match_operand 8 "const_12_to_15_operand" "")
7153 (match_operand 9 "const_12_to_15_operand" "")])))]
7155 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
7156 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
7157 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
7158 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
7161 mask |= (INTVAL (operands[2]) - 4) << 0;
7162 mask |= (INTVAL (operands[3]) - 4) << 2;
7163 mask |= (INTVAL (operands[4]) - 4) << 4;
7164 mask |= (INTVAL (operands[5]) - 4) << 6;
7165 operands[2] = GEN_INT (mask);
7167 return "vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7169 [(set_attr "type" "sselog")
7170 (set_attr "prefix" "vex")
7171 (set_attr "length_immediate" "1")
7172 (set_attr "mode" "OI")])
7174 (define_expand "sse2_pshufhw"
7175 [(match_operand:V8HI 0 "register_operand" "")
7176 (match_operand:V8HI 1 "nonimmediate_operand" "")
7177 (match_operand:SI 2 "const_int_operand" "")]
7180 int mask = INTVAL (operands[2]);
7181 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
7182 GEN_INT (((mask >> 0) & 3) + 4),
7183 GEN_INT (((mask >> 2) & 3) + 4),
7184 GEN_INT (((mask >> 4) & 3) + 4),
7185 GEN_INT (((mask >> 6) & 3) + 4)));
7189 (define_insn "sse2_pshufhw_1"
7190 [(set (match_operand:V8HI 0 "register_operand" "=x")
7192 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7193 (parallel [(const_int 0)
7197 (match_operand 2 "const_4_to_7_operand" "")
7198 (match_operand 3 "const_4_to_7_operand" "")
7199 (match_operand 4 "const_4_to_7_operand" "")
7200 (match_operand 5 "const_4_to_7_operand" "")])))]
7204 mask |= (INTVAL (operands[2]) - 4) << 0;
7205 mask |= (INTVAL (operands[3]) - 4) << 2;
7206 mask |= (INTVAL (operands[4]) - 4) << 4;
7207 mask |= (INTVAL (operands[5]) - 4) << 6;
7208 operands[2] = GEN_INT (mask);
7210 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7212 [(set_attr "type" "sselog")
7213 (set_attr "prefix_rep" "1")
7214 (set_attr "prefix_data16" "0")
7215 (set_attr "prefix" "maybe_vex")
7216 (set_attr "length_immediate" "1")
7217 (set_attr "mode" "TI")])
7219 (define_expand "sse2_loadd"
7220 [(set (match_operand:V4SI 0 "register_operand" "")
7223 (match_operand:SI 1 "nonimmediate_operand" ""))
7227 "operands[2] = CONST0_RTX (V4SImode);")
7229 (define_insn "sse2_loadld"
7230 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x,x,x")
7233 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x"))
7234 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,x")
7238 %vmovd\t{%2, %0|%0, %2}
7239 %vmovd\t{%2, %0|%0, %2}
7240 movss\t{%2, %0|%0, %2}
7241 movss\t{%2, %0|%0, %2}
7242 vmovss\t{%2, %1, %0|%0, %1, %2}"
7243 [(set_attr "isa" "sse2,*,noavx,noavx,avx")
7244 (set_attr "type" "ssemov")
7245 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex")
7246 (set_attr "mode" "TI,TI,V4SF,SF,SF")])
7248 (define_insn_and_split "sse2_stored"
7249 [(set (match_operand:SI 0 "nonimmediate_operand" "=xm,r")
7251 (match_operand:V4SI 1 "register_operand" "x,Yi")
7252 (parallel [(const_int 0)])))]
7255 "&& reload_completed
7256 && (TARGET_INTER_UNIT_MOVES
7257 || MEM_P (operands [0])
7258 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7259 [(set (match_dup 0) (match_dup 1))]
7260 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
7262 (define_insn_and_split "*vec_ext_v4si_mem"
7263 [(set (match_operand:SI 0 "register_operand" "=r")
7265 (match_operand:V4SI 1 "memory_operand" "o")
7266 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
7272 int i = INTVAL (operands[2]);
7274 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
7278 (define_expand "sse_storeq"
7279 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7281 (match_operand:V2DI 1 "register_operand" "")
7282 (parallel [(const_int 0)])))]
7285 (define_insn "*sse2_storeq_rex64"
7286 [(set (match_operand:DI 0 "nonimmediate_operand" "=xm,*r,r")
7288 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
7289 (parallel [(const_int 0)])))]
7290 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7294 mov{q}\t{%1, %0|%0, %1}"
7295 [(set_attr "type" "*,*,imov")
7296 (set_attr "mode" "*,*,DI")])
7298 (define_insn "*sse2_storeq"
7299 [(set (match_operand:DI 0 "nonimmediate_operand" "=xm")
7301 (match_operand:V2DI 1 "register_operand" "x")
7302 (parallel [(const_int 0)])))]
7307 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7309 (match_operand:V2DI 1 "register_operand" "")
7310 (parallel [(const_int 0)])))]
7313 && (TARGET_INTER_UNIT_MOVES
7314 || MEM_P (operands [0])
7315 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7316 [(set (match_dup 0) (match_dup 1))]
7317 "operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));")
7319 (define_insn "*vec_extractv2di_1_rex64"
7320 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,x,r")
7322 (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o,o")
7323 (parallel [(const_int 1)])))]
7324 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7326 %vmovhps\t{%1, %0|%0, %1}
7327 psrldq\t{$8, %0|%0, 8}
7328 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7329 %vmovq\t{%H1, %0|%0, %H1}
7330 mov{q}\t{%H1, %0|%0, %H1}"
7331 [(set_attr "isa" "*,noavx,avx,*,*")
7332 (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,imov")
7333 (set_attr "length_immediate" "*,1,1,*,*")
7334 (set_attr "memory" "*,none,none,*,*")
7335 (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig")
7336 (set_attr "mode" "V2SF,TI,TI,TI,DI")])
7338 (define_insn "*vec_extractv2di_1"
7339 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,x,x,x")
7341 (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o,x,o")
7342 (parallel [(const_int 1)])))]
7343 "!TARGET_64BIT && TARGET_SSE
7344 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7346 %vmovhps\t{%1, %0|%0, %1}
7347 psrldq\t{$8, %0|%0, 8}
7348 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7349 %vmovq\t{%H1, %0|%0, %H1}
7350 movhlps\t{%1, %0|%0, %1}
7351 movlps\t{%H1, %0|%0, %H1}"
7352 [(set_attr "isa" "*,sse2_noavx,avx,sse2,noavx,noavx")
7353 (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,ssemov,ssemov")
7354 (set_attr "length_immediate" "*,1,1,*,*,*")
7355 (set_attr "memory" "*,none,none,*,*,*")
7356 (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig,orig")
7357 (set_attr "mode" "V2SF,TI,TI,TI,V4SF,V2SF")])
7359 (define_insn "*vec_dupv4si_avx"
7360 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7362 (match_operand:SI 1 "nonimmediate_operand" " x,m")))]
7365 vpshufd\t{$0, %1, %0|%0, %1, 0}
7366 vbroadcastss\t{%1, %0|%0, %1}"
7367 [(set_attr "type" "sselog1,ssemov")
7368 (set_attr "length_immediate" "1,0")
7369 (set_attr "prefix_extra" "0,1")
7370 (set_attr "prefix" "vex")
7371 (set_attr "mode" "TI,V4SF")])
7373 (define_insn "*vec_dupv4si"
7374 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7376 (match_operand:SI 1 "register_operand" " x,0")))]
7379 pshufd\t{$0, %1, %0|%0, %1, 0}
7380 shufps\t{$0, %0, %0|%0, %0, 0}"
7381 [(set_attr "isa" "sse2,*")
7382 (set_attr "type" "sselog1")
7383 (set_attr "length_immediate" "1")
7384 (set_attr "mode" "TI,V4SF")])
7386 (define_insn "*vec_dupv2di_sse3"
7387 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
7389 (match_operand:DI 1 "nonimmediate_operand" " 0,x,m")))]
7393 vpunpcklqdq\t{%d1, %0|%0, %d1}
7394 %vmovddup\t{%1, %0|%0, %1}"
7395 [(set_attr "isa" "noavx,avx,*")
7396 (set_attr "type" "sselog1")
7397 (set_attr "prefix" "orig,vex,maybe_vex")
7398 (set_attr "mode" "TI,TI,DF")])
7400 (define_insn "*vec_dupv2di"
7401 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7403 (match_operand:DI 1 "register_operand" " 0,0")))]
7408 [(set_attr "isa" "sse2,*")
7409 (set_attr "type" "sselog1,ssemov")
7410 (set_attr "mode" "TI,V4SF")])
7412 (define_insn "*vec_concatv2si_sse4_1"
7413 [(set (match_operand:V2SI 0 "register_operand" "=x, x,x,x, x, *y,*y")
7415 (match_operand:SI 1 "nonimmediate_operand" " 0, x,0,x,rm, 0,rm")
7416 (match_operand:SI 2 "vector_move_operand" "rm,rm,x,x, C,*ym, C")))]
7419 pinsrd\t{$1, %2, %0|%0, %2, 1}
7420 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
7421 punpckldq\t{%2, %0|%0, %2}
7422 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7423 %vmovd\t{%1, %0|%0, %1}
7424 punpckldq\t{%2, %0|%0, %2}
7425 movd\t{%1, %0|%0, %1}"
7426 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
7427 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
7428 (set_attr "prefix_extra" "1,1,*,*,*,*,*")
7429 (set_attr "length_immediate" "1,1,*,*,*,*,*")
7430 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
7431 (set_attr "mode" "TI,TI,TI,TI,TI,DI,DI")])
7433 ;; ??? In theory we can match memory for the MMX alternative, but allowing
7434 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7435 ;; alternatives pretty much forces the MMX alternative to be chosen.
7436 (define_insn "*vec_concatv2si_sse2"
7437 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
7439 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
7440 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
7443 punpckldq\t{%2, %0|%0, %2}
7444 movd\t{%1, %0|%0, %1}
7445 punpckldq\t{%2, %0|%0, %2}
7446 movd\t{%1, %0|%0, %1}"
7447 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7448 (set_attr "mode" "TI,TI,DI,DI")])
7450 (define_insn "*vec_concatv2si_sse"
7451 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
7453 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
7454 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
7457 unpcklps\t{%2, %0|%0, %2}
7458 movss\t{%1, %0|%0, %1}
7459 punpckldq\t{%2, %0|%0, %2}
7460 movd\t{%1, %0|%0, %1}"
7461 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7462 (set_attr "mode" "V4SF,V4SF,DI,DI")])
7464 (define_insn "*vec_concatv4si"
7465 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x,x")
7467 (match_operand:V2SI 1 "register_operand" " 0,x,0,0,x")
7468 (match_operand:V2SI 2 "nonimmediate_operand" " x,x,x,m,m")))]
7471 punpcklqdq\t{%2, %0|%0, %2}
7472 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7473 movlhps\t{%2, %0|%0, %2}
7474 movhps\t{%2, %0|%0, %2}
7475 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7476 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
7477 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
7478 (set_attr "prefix" "orig,vex,orig,orig,vex")
7479 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
7481 ;; movd instead of movq is required to handle broken assemblers.
7482 (define_insn "*vec_concatv2di_rex64"
7483 [(set (match_operand:V2DI 0 "register_operand"
7484 "=x,x ,x ,Yi,!x,x,x,x,x")
7486 (match_operand:DI 1 "nonimmediate_operand"
7487 " 0,x ,xm,r ,*y,0,x,0,x")
7488 (match_operand:DI 2 "vector_move_operand"
7489 "rm,rm,C ,C ,C ,x,x,m,m")))]
7492 pinsrq\t{$1, %2, %0|%0, %2, 1}
7493 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
7494 %vmovq\t{%1, %0|%0, %1}
7495 %vmovd\t{%1, %0|%0, %1}
7496 movq2dq\t{%1, %0|%0, %1}
7497 punpcklqdq\t{%2, %0|%0, %2}
7498 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7499 movhps\t{%2, %0|%0, %2}
7500 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7501 [(set_attr "isa" "sse4_noavx,avx,*,*,*,noavx,avx,noavx,avx")
7504 (eq_attr "alternative" "0,1,5,6")
7505 (const_string "sselog")
7506 (const_string "ssemov")))
7507 (set (attr "prefix_rex")
7509 (and (eq_attr "alternative" "0,3")
7510 (not (match_test "TARGET_AVX")))
7512 (const_string "*")))
7513 (set_attr "prefix_extra" "1,1,*,*,*,*,*,*,*")
7514 (set_attr "length_immediate" "1,1,*,*,*,*,*,*,*")
7515 (set_attr "prefix" "orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex")
7516 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,V2SF,V2SF")])
7518 (define_insn "vec_concatv2di"
7519 [(set (match_operand:V2DI 0 "register_operand" "=x,?x,x,x,x,x,x")
7521 (match_operand:DI 1 "nonimmediate_operand" "xm,*y,0,x,0,0,x")
7522 (match_operand:DI 2 "vector_move_operand" " C, C,x,x,x,m,m")))]
7523 "!TARGET_64BIT && TARGET_SSE"
7525 %vmovq\t{%1, %0|%0, %1}
7526 movq2dq\t{%1, %0|%0, %1}
7527 punpcklqdq\t{%2, %0|%0, %2}
7528 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7529 movlhps\t{%2, %0|%0, %2}
7530 movhps\t{%2, %0|%0, %2}
7531 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7532 [(set_attr "isa" "sse2,sse2,sse2_noavx,avx,noavx,noavx,avx")
7533 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,ssemov,ssemov")
7534 (set_attr "prefix" "maybe_vex,orig,orig,vex,orig,orig,vex")
7535 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")])
7537 (define_expand "vec_unpacks_lo_<mode>"
7538 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7539 (match_operand:VI124_128 1 "register_operand" "")]
7541 "ix86_expand_sse_unpack (operands, false, false); DONE;")
7543 (define_expand "vec_unpacks_hi_<mode>"
7544 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7545 (match_operand:VI124_128 1 "register_operand" "")]
7547 "ix86_expand_sse_unpack (operands, false, true); DONE;")
7549 (define_expand "vec_unpacku_lo_<mode>"
7550 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7551 (match_operand:VI124_128 1 "register_operand" "")]
7553 "ix86_expand_sse_unpack (operands, true, false); DONE;")
7555 (define_expand "vec_unpacku_hi_<mode>"
7556 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7557 (match_operand:VI124_128 1 "register_operand" "")]
7559 "ix86_expand_sse_unpack (operands, true, true); DONE;")
7561 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7565 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7567 (define_expand "avx2_uavgv32qi3"
7568 [(set (match_operand:V32QI 0 "register_operand" "")
7574 (match_operand:V32QI 1 "nonimmediate_operand" ""))
7576 (match_operand:V32QI 2 "nonimmediate_operand" "")))
7577 (const_vector:V32QI [(const_int 1) (const_int 1)
7578 (const_int 1) (const_int 1)
7579 (const_int 1) (const_int 1)
7580 (const_int 1) (const_int 1)
7581 (const_int 1) (const_int 1)
7582 (const_int 1) (const_int 1)
7583 (const_int 1) (const_int 1)
7584 (const_int 1) (const_int 1)
7585 (const_int 1) (const_int 1)
7586 (const_int 1) (const_int 1)
7587 (const_int 1) (const_int 1)
7588 (const_int 1) (const_int 1)
7589 (const_int 1) (const_int 1)
7590 (const_int 1) (const_int 1)
7591 (const_int 1) (const_int 1)
7592 (const_int 1) (const_int 1)]))
7595 "ix86_fixup_binary_operands_no_copy (PLUS, V32QImode, operands);")
7597 (define_expand "sse2_uavgv16qi3"
7598 [(set (match_operand:V16QI 0 "register_operand" "")
7604 (match_operand:V16QI 1 "nonimmediate_operand" ""))
7606 (match_operand:V16QI 2 "nonimmediate_operand" "")))
7607 (const_vector:V16QI [(const_int 1) (const_int 1)
7608 (const_int 1) (const_int 1)
7609 (const_int 1) (const_int 1)
7610 (const_int 1) (const_int 1)
7611 (const_int 1) (const_int 1)
7612 (const_int 1) (const_int 1)
7613 (const_int 1) (const_int 1)
7614 (const_int 1) (const_int 1)]))
7617 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
7619 (define_insn "*avx2_uavgv32qi3"
7620 [(set (match_operand:V32QI 0 "register_operand" "=x")
7626 (match_operand:V32QI 1 "nonimmediate_operand" "%x"))
7628 (match_operand:V32QI 2 "nonimmediate_operand" "xm")))
7629 (const_vector:V32QI [(const_int 1) (const_int 1)
7630 (const_int 1) (const_int 1)
7631 (const_int 1) (const_int 1)
7632 (const_int 1) (const_int 1)
7633 (const_int 1) (const_int 1)
7634 (const_int 1) (const_int 1)
7635 (const_int 1) (const_int 1)
7636 (const_int 1) (const_int 1)
7637 (const_int 1) (const_int 1)
7638 (const_int 1) (const_int 1)
7639 (const_int 1) (const_int 1)
7640 (const_int 1) (const_int 1)
7641 (const_int 1) (const_int 1)
7642 (const_int 1) (const_int 1)
7643 (const_int 1) (const_int 1)
7644 (const_int 1) (const_int 1)]))
7646 "TARGET_AVX2 && ix86_binary_operator_ok (PLUS, V32QImode, operands)"
7647 "vpavgb\t{%2, %1, %0|%0, %1, %2}"
7648 [(set_attr "type" "sseiadd")
7649 (set_attr "prefix" "vex")
7650 (set_attr "mode" "OI")])
7652 (define_insn "*sse2_uavgv16qi3"
7653 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
7659 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x"))
7661 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))
7662 (const_vector:V16QI [(const_int 1) (const_int 1)
7663 (const_int 1) (const_int 1)
7664 (const_int 1) (const_int 1)
7665 (const_int 1) (const_int 1)
7666 (const_int 1) (const_int 1)
7667 (const_int 1) (const_int 1)
7668 (const_int 1) (const_int 1)
7669 (const_int 1) (const_int 1)]))
7671 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7673 pavgb\t{%2, %0|%0, %2}
7674 vpavgb\t{%2, %1, %0|%0, %1, %2}"
7675 [(set_attr "isa" "noavx,avx")
7676 (set_attr "type" "sseiadd")
7677 (set_attr "prefix_data16" "1,*")
7678 (set_attr "prefix" "orig,vex")
7679 (set_attr "mode" "TI")])
7681 (define_expand "avx2_uavgv16hi3"
7682 [(set (match_operand:V16HI 0 "register_operand" "")
7688 (match_operand:V16HI 1 "nonimmediate_operand" ""))
7690 (match_operand:V16HI 2 "nonimmediate_operand" "")))
7691 (const_vector:V16HI [(const_int 1) (const_int 1)
7692 (const_int 1) (const_int 1)
7693 (const_int 1) (const_int 1)
7694 (const_int 1) (const_int 1)
7695 (const_int 1) (const_int 1)
7696 (const_int 1) (const_int 1)
7697 (const_int 1) (const_int 1)
7698 (const_int 1) (const_int 1)]))
7701 "ix86_fixup_binary_operands_no_copy (PLUS, V16HImode, operands);")
7703 (define_expand "sse2_uavgv8hi3"
7704 [(set (match_operand:V8HI 0 "register_operand" "")
7710 (match_operand:V8HI 1 "nonimmediate_operand" ""))
7712 (match_operand:V8HI 2 "nonimmediate_operand" "")))
7713 (const_vector:V8HI [(const_int 1) (const_int 1)
7714 (const_int 1) (const_int 1)
7715 (const_int 1) (const_int 1)
7716 (const_int 1) (const_int 1)]))
7719 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
7721 (define_insn "*avx2_uavgv16hi3"
7722 [(set (match_operand:V16HI 0 "register_operand" "=x")
7728 (match_operand:V16HI 1 "nonimmediate_operand" "%x"))
7730 (match_operand:V16HI 2 "nonimmediate_operand" "xm")))
7731 (const_vector:V16HI [(const_int 1) (const_int 1)
7732 (const_int 1) (const_int 1)
7733 (const_int 1) (const_int 1)
7734 (const_int 1) (const_int 1)
7735 (const_int 1) (const_int 1)
7736 (const_int 1) (const_int 1)
7737 (const_int 1) (const_int 1)
7738 (const_int 1) (const_int 1)]))
7740 "TARGET_AVX2 && ix86_binary_operator_ok (PLUS, V16HImode, operands)"
7741 "vpavgw\t{%2, %1, %0|%0, %1, %2}"
7742 [(set_attr "type" "sseiadd")
7743 (set_attr "prefix" "vex")
7744 (set_attr "mode" "OI")])
7746 (define_insn "*sse2_uavgv8hi3"
7747 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7753 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
7755 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
7756 (const_vector:V8HI [(const_int 1) (const_int 1)
7757 (const_int 1) (const_int 1)
7758 (const_int 1) (const_int 1)
7759 (const_int 1) (const_int 1)]))
7761 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7763 pavgw\t{%2, %0|%0, %2}
7764 vpavgw\t{%2, %1, %0|%0, %1, %2}"
7765 [(set_attr "isa" "noavx,avx")
7766 (set_attr "type" "sseiadd")
7767 (set_attr "prefix_data16" "1,*")
7768 (set_attr "prefix" "orig,vex")
7769 (set_attr "mode" "TI")])
7771 ;; The correct representation for this is absolutely enormous, and
7772 ;; surely not generally useful.
7773 (define_insn "<sse2_avx2>_psadbw"
7774 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x,x")
7775 (unspec:VI8_AVX2 [(match_operand:<ssebytemode> 1 "register_operand" "0,x")
7776 (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,xm")]
7780 psadbw\t{%2, %0|%0, %2}
7781 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
7782 [(set_attr "isa" "noavx,avx")
7783 (set_attr "type" "sseiadd")
7784 (set_attr "atom_unit" "simul")
7785 (set_attr "prefix_data16" "1,*")
7786 (set_attr "prefix" "orig,vex")
7787 (set_attr "mode" "<sseinsnmode>")])
7789 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
7790 [(set (match_operand:SI 0 "register_operand" "=r")
7792 [(match_operand:VF 1 "register_operand" "x")]
7795 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
7796 [(set_attr "type" "ssemov")
7797 (set_attr "prefix" "maybe_vex")
7798 (set_attr "mode" "<MODE>")])
7800 (define_insn "avx2_pmovmskb"
7801 [(set (match_operand:SI 0 "register_operand" "=r")
7802 (unspec:SI [(match_operand:V32QI 1 "register_operand" "x")]
7805 "vpmovmskb\t{%1, %0|%0, %1}"
7806 [(set_attr "type" "ssemov")
7807 (set_attr "prefix" "vex")
7808 (set_attr "mode" "DI")])
7810 (define_insn "sse2_pmovmskb"
7811 [(set (match_operand:SI 0 "register_operand" "=r")
7812 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
7815 "%vpmovmskb\t{%1, %0|%0, %1}"
7816 [(set_attr "type" "ssemov")
7817 (set_attr "prefix_data16" "1")
7818 (set_attr "prefix" "maybe_vex")
7819 (set_attr "mode" "SI")])
7821 (define_expand "sse2_maskmovdqu"
7822 [(set (match_operand:V16QI 0 "memory_operand" "")
7823 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
7824 (match_operand:V16QI 2 "register_operand" "")
7829 (define_insn "*sse2_maskmovdqu"
7830 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
7831 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
7832 (match_operand:V16QI 2 "register_operand" "x")
7833 (mem:V16QI (match_dup 0))]
7836 "%vmaskmovdqu\t{%2, %1|%1, %2}"
7837 [(set_attr "type" "ssemov")
7838 (set_attr "prefix_data16" "1")
7839 ;; The implicit %rdi operand confuses default length_vex computation.
7840 (set (attr "length_vex")
7841 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
7842 (set_attr "prefix" "maybe_vex")
7843 (set_attr "mode" "TI")])
7845 (define_insn "sse_ldmxcsr"
7846 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
7850 [(set_attr "type" "sse")
7851 (set_attr "atom_sse_attr" "mxcsr")
7852 (set_attr "prefix" "maybe_vex")
7853 (set_attr "memory" "load")])
7855 (define_insn "sse_stmxcsr"
7856 [(set (match_operand:SI 0 "memory_operand" "=m")
7857 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
7860 [(set_attr "type" "sse")
7861 (set_attr "atom_sse_attr" "mxcsr")
7862 (set_attr "prefix" "maybe_vex")
7863 (set_attr "memory" "store")])
7865 (define_expand "sse_sfence"
7867 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
7868 "TARGET_SSE || TARGET_3DNOW_A"
7870 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
7871 MEM_VOLATILE_P (operands[0]) = 1;
7874 (define_insn "*sse_sfence"
7875 [(set (match_operand:BLK 0 "" "")
7876 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
7877 "TARGET_SSE || TARGET_3DNOW_A"
7879 [(set_attr "type" "sse")
7880 (set_attr "length_address" "0")
7881 (set_attr "atom_sse_attr" "fence")
7882 (set_attr "memory" "unknown")])
7884 (define_insn "sse2_clflush"
7885 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
7889 [(set_attr "type" "sse")
7890 (set_attr "atom_sse_attr" "fence")
7891 (set_attr "memory" "unknown")])
7893 (define_expand "sse2_mfence"
7895 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
7898 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
7899 MEM_VOLATILE_P (operands[0]) = 1;
7902 (define_insn "*sse2_mfence"
7903 [(set (match_operand:BLK 0 "" "")
7904 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
7905 "TARGET_64BIT || TARGET_SSE2"
7907 [(set_attr "type" "sse")
7908 (set_attr "length_address" "0")
7909 (set_attr "atom_sse_attr" "fence")
7910 (set_attr "memory" "unknown")])
7912 (define_expand "sse2_lfence"
7914 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
7917 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
7918 MEM_VOLATILE_P (operands[0]) = 1;
7921 (define_insn "*sse2_lfence"
7922 [(set (match_operand:BLK 0 "" "")
7923 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
7926 [(set_attr "type" "sse")
7927 (set_attr "length_address" "0")
7928 (set_attr "atom_sse_attr" "lfence")
7929 (set_attr "memory" "unknown")])
7931 (define_insn "sse3_mwait"
7932 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
7933 (match_operand:SI 1 "register_operand" "c")]
7936 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
7937 ;; Since 32bit register operands are implicitly zero extended to 64bit,
7938 ;; we only need to set up 32bit registers.
7940 [(set_attr "length" "3")])
7942 (define_insn "sse3_monitor"
7943 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
7944 (match_operand:SI 1 "register_operand" "c")
7945 (match_operand:SI 2 "register_operand" "d")]
7947 "TARGET_SSE3 && !TARGET_64BIT"
7948 "monitor\t%0, %1, %2"
7949 [(set_attr "length" "3")])
7951 (define_insn "sse3_monitor64"
7952 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
7953 (match_operand:SI 1 "register_operand" "c")
7954 (match_operand:SI 2 "register_operand" "d")]
7956 "TARGET_SSE3 && TARGET_64BIT"
7957 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
7958 ;; RCX and RDX are used. Since 32bit register operands are implicitly
7959 ;; zero extended to 64bit, we only need to set up 32bit registers.
7961 [(set_attr "length" "3")])
7963 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7965 ;; SSSE3 instructions
7967 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7969 (define_insn "avx2_phaddwv16hi3"
7970 [(set (match_operand:V16HI 0 "register_operand" "=x")
7977 (match_operand:V16HI 1 "register_operand" "x")
7978 (parallel [(const_int 0)]))
7979 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7981 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7982 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7985 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7986 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7988 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7989 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
7993 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
7994 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
7996 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
7997 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8000 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8001 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8003 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8004 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8010 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8011 (parallel [(const_int 0)]))
8012 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8014 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8015 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8018 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8019 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8021 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8022 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8026 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8027 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8029 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8030 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8033 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8034 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8036 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8037 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8039 "vphaddw\t{%2, %1, %0|%0, %1, %2}"
8040 [(set_attr "type" "sseiadd")
8041 (set_attr "prefix_extra" "1")
8042 (set_attr "prefix" "vex")
8043 (set_attr "mode" "OI")])
8045 (define_insn "ssse3_phaddwv8hi3"
8046 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8052 (match_operand:V8HI 1 "register_operand" "0,x")
8053 (parallel [(const_int 0)]))
8054 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8056 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8057 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8060 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8061 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8063 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8064 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8069 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8070 (parallel [(const_int 0)]))
8071 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8073 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8074 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8077 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8078 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8080 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8081 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8084 phaddw\t{%2, %0|%0, %2}
8085 vphaddw\t{%2, %1, %0|%0, %1, %2}"
8086 [(set_attr "isa" "noavx,avx")
8087 (set_attr "type" "sseiadd")
8088 (set_attr "atom_unit" "complex")
8089 (set_attr "prefix_data16" "1,*")
8090 (set_attr "prefix_extra" "1")
8091 (set_attr "prefix" "orig,vex")
8092 (set_attr "mode" "TI")])
8094 (define_insn "ssse3_phaddwv4hi3"
8095 [(set (match_operand:V4HI 0 "register_operand" "=y")
8100 (match_operand:V4HI 1 "register_operand" "0")
8101 (parallel [(const_int 0)]))
8102 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8104 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8105 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8109 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8110 (parallel [(const_int 0)]))
8111 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8113 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8114 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8116 "phaddw\t{%2, %0|%0, %2}"
8117 [(set_attr "type" "sseiadd")
8118 (set_attr "atom_unit" "complex")
8119 (set_attr "prefix_extra" "1")
8120 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8121 (set_attr "mode" "DI")])
8123 (define_insn "avx2_phadddv8si3"
8124 [(set (match_operand:V8SI 0 "register_operand" "=x")
8130 (match_operand:V8SI 1 "register_operand" "x")
8131 (parallel [(const_int 0)]))
8132 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8134 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8135 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8138 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
8139 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
8141 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
8142 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
8147 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
8148 (parallel [(const_int 0)]))
8149 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8151 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8152 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
8155 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
8156 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
8158 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
8159 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
8161 "vphaddd\t{%2, %1, %0|%0, %1, %2}"
8162 [(set_attr "type" "sseiadd")
8163 (set_attr "prefix_extra" "1")
8164 (set_attr "prefix" "vex")
8165 (set_attr "mode" "OI")])
8167 (define_insn "ssse3_phadddv4si3"
8168 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
8173 (match_operand:V4SI 1 "register_operand" "0,x")
8174 (parallel [(const_int 0)]))
8175 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8177 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8178 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8182 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
8183 (parallel [(const_int 0)]))
8184 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8186 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8187 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8190 phaddd\t{%2, %0|%0, %2}
8191 vphaddd\t{%2, %1, %0|%0, %1, %2}"
8192 [(set_attr "isa" "noavx,avx")
8193 (set_attr "type" "sseiadd")
8194 (set_attr "atom_unit" "complex")
8195 (set_attr "prefix_data16" "1,*")
8196 (set_attr "prefix_extra" "1")
8197 (set_attr "prefix" "orig,vex")
8198 (set_attr "mode" "TI")])
8200 (define_insn "ssse3_phadddv2si3"
8201 [(set (match_operand:V2SI 0 "register_operand" "=y")
8205 (match_operand:V2SI 1 "register_operand" "0")
8206 (parallel [(const_int 0)]))
8207 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8210 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8211 (parallel [(const_int 0)]))
8212 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8214 "phaddd\t{%2, %0|%0, %2}"
8215 [(set_attr "type" "sseiadd")
8216 (set_attr "atom_unit" "complex")
8217 (set_attr "prefix_extra" "1")
8218 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8219 (set_attr "mode" "DI")])
8221 (define_insn "avx2_phaddswv16hi3"
8222 [(set (match_operand:V16HI 0 "register_operand" "=x")
8229 (match_operand:V16HI 1 "register_operand" "x")
8230 (parallel [(const_int 0)]))
8231 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8233 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8234 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8237 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8238 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8240 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8241 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8245 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8246 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8248 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8249 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8252 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8253 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8255 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8256 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8262 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8263 (parallel [(const_int 0)]))
8264 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8266 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8267 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8270 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8271 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8273 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8274 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8278 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8279 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8281 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8282 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8285 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8286 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8288 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8289 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8291 "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8292 [(set_attr "type" "sseiadd")
8293 (set_attr "prefix_extra" "1")
8294 (set_attr "prefix" "vex")
8295 (set_attr "mode" "OI")])
8297 (define_insn "ssse3_phaddswv8hi3"
8298 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8304 (match_operand:V8HI 1 "register_operand" "0,x")
8305 (parallel [(const_int 0)]))
8306 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8308 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8309 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8312 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8313 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8315 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8316 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8321 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8322 (parallel [(const_int 0)]))
8323 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8325 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8326 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8329 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8330 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8332 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8333 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8336 phaddsw\t{%2, %0|%0, %2}
8337 vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8338 [(set_attr "isa" "noavx,avx")
8339 (set_attr "type" "sseiadd")
8340 (set_attr "atom_unit" "complex")
8341 (set_attr "prefix_data16" "1,*")
8342 (set_attr "prefix_extra" "1")
8343 (set_attr "prefix" "orig,vex")
8344 (set_attr "mode" "TI")])
8346 (define_insn "ssse3_phaddswv4hi3"
8347 [(set (match_operand:V4HI 0 "register_operand" "=y")
8352 (match_operand:V4HI 1 "register_operand" "0")
8353 (parallel [(const_int 0)]))
8354 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8356 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8357 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8361 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8362 (parallel [(const_int 0)]))
8363 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8365 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8366 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8368 "phaddsw\t{%2, %0|%0, %2}"
8369 [(set_attr "type" "sseiadd")
8370 (set_attr "atom_unit" "complex")
8371 (set_attr "prefix_extra" "1")
8372 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8373 (set_attr "mode" "DI")])
8375 (define_insn "avx2_phsubwv16hi3"
8376 [(set (match_operand:V16HI 0 "register_operand" "=x")
8383 (match_operand:V16HI 1 "register_operand" "x")
8384 (parallel [(const_int 0)]))
8385 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8387 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8388 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8391 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8392 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8394 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8395 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8399 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8400 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8402 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8403 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8406 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8407 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8409 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8410 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8416 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8417 (parallel [(const_int 0)]))
8418 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8420 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8421 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8424 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8425 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8427 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8428 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8432 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8433 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8435 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8436 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8439 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8440 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8442 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8443 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8445 "vphsubw\t{%2, %1, %0|%0, %1, %2}"
8446 [(set_attr "type" "sseiadd")
8447 (set_attr "prefix_extra" "1")
8448 (set_attr "prefix" "vex")
8449 (set_attr "mode" "OI")])
8451 (define_insn "ssse3_phsubwv8hi3"
8452 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8458 (match_operand:V8HI 1 "register_operand" "0,x")
8459 (parallel [(const_int 0)]))
8460 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8462 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8463 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8466 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8467 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8469 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8470 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8475 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8476 (parallel [(const_int 0)]))
8477 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8479 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8480 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8483 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8484 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8486 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8487 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8490 phsubw\t{%2, %0|%0, %2}
8491 vphsubw\t{%2, %1, %0|%0, %1, %2}"
8492 [(set_attr "isa" "noavx,avx")
8493 (set_attr "type" "sseiadd")
8494 (set_attr "atom_unit" "complex")
8495 (set_attr "prefix_data16" "1,*")
8496 (set_attr "prefix_extra" "1")
8497 (set_attr "prefix" "orig,vex")
8498 (set_attr "mode" "TI")])
8500 (define_insn "ssse3_phsubwv4hi3"
8501 [(set (match_operand:V4HI 0 "register_operand" "=y")
8506 (match_operand:V4HI 1 "register_operand" "0")
8507 (parallel [(const_int 0)]))
8508 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8510 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8511 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8515 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8516 (parallel [(const_int 0)]))
8517 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8519 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8520 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8522 "phsubw\t{%2, %0|%0, %2}"
8523 [(set_attr "type" "sseiadd")
8524 (set_attr "atom_unit" "complex")
8525 (set_attr "prefix_extra" "1")
8526 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8527 (set_attr "mode" "DI")])
8529 (define_insn "avx2_phsubdv8si3"
8530 [(set (match_operand:V8SI 0 "register_operand" "=x")
8536 (match_operand:V8SI 1 "register_operand" "x")
8537 (parallel [(const_int 0)]))
8538 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8540 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8541 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8544 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
8545 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
8547 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
8548 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
8553 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
8554 (parallel [(const_int 0)]))
8555 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8557 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8558 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
8561 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
8562 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
8564 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
8565 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
8567 "vphsubd\t{%2, %1, %0|%0, %1, %2}"
8568 [(set_attr "type" "sseiadd")
8569 (set_attr "prefix_extra" "1")
8570 (set_attr "prefix" "vex")
8571 (set_attr "mode" "OI")])
8573 (define_insn "ssse3_phsubdv4si3"
8574 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
8579 (match_operand:V4SI 1 "register_operand" "0,x")
8580 (parallel [(const_int 0)]))
8581 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8583 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8584 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8588 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
8589 (parallel [(const_int 0)]))
8590 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8592 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8593 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8596 phsubd\t{%2, %0|%0, %2}
8597 vphsubd\t{%2, %1, %0|%0, %1, %2}"
8599 [(set_attr "isa" "noavx,avx")
8600 (set_attr "type" "sseiadd")
8601 (set_attr "atom_unit" "complex")
8602 (set_attr "prefix_data16" "1,*")
8603 (set_attr "prefix_extra" "1")
8604 (set_attr "prefix" "orig,vex")
8605 (set_attr "mode" "TI")])
8607 (define_insn "ssse3_phsubdv2si3"
8608 [(set (match_operand:V2SI 0 "register_operand" "=y")
8612 (match_operand:V2SI 1 "register_operand" "0")
8613 (parallel [(const_int 0)]))
8614 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8617 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8618 (parallel [(const_int 0)]))
8619 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8621 "phsubd\t{%2, %0|%0, %2}"
8622 [(set_attr "type" "sseiadd")
8623 (set_attr "atom_unit" "complex")
8624 (set_attr "prefix_extra" "1")
8625 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8626 (set_attr "mode" "DI")])
8628 (define_insn "avx2_phsubswv16hi3"
8629 [(set (match_operand:V16HI 0 "register_operand" "=x")
8636 (match_operand:V16HI 1 "register_operand" "x")
8637 (parallel [(const_int 0)]))
8638 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8640 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8641 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8644 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8645 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8647 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8648 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8652 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8653 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8655 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8656 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8659 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8660 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8662 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8663 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8669 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8670 (parallel [(const_int 0)]))
8671 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8673 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8674 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8677 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8678 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8680 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8681 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8685 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8686 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8688 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8689 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8692 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8693 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8695 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8696 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8698 "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8699 [(set_attr "type" "sseiadd")
8700 (set_attr "prefix_extra" "1")
8701 (set_attr "prefix" "vex")
8702 (set_attr "mode" "OI")])
8704 (define_insn "ssse3_phsubswv8hi3"
8705 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8711 (match_operand:V8HI 1 "register_operand" "0,x")
8712 (parallel [(const_int 0)]))
8713 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8715 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8716 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8719 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8720 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8722 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8723 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8728 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8729 (parallel [(const_int 0)]))
8730 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8732 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8733 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8736 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8737 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8739 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8740 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8743 phsubsw\t{%2, %0|%0, %2}
8744 vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8745 [(set_attr "isa" "noavx,avx")
8746 (set_attr "type" "sseiadd")
8747 (set_attr "atom_unit" "complex")
8748 (set_attr "prefix_data16" "1,*")
8749 (set_attr "prefix_extra" "1")
8750 (set_attr "prefix" "orig,vex")
8751 (set_attr "mode" "TI")])
8753 (define_insn "ssse3_phsubswv4hi3"
8754 [(set (match_operand:V4HI 0 "register_operand" "=y")
8759 (match_operand:V4HI 1 "register_operand" "0")
8760 (parallel [(const_int 0)]))
8761 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8763 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8764 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8768 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8769 (parallel [(const_int 0)]))
8770 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8772 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8773 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8775 "phsubsw\t{%2, %0|%0, %2}"
8776 [(set_attr "type" "sseiadd")
8777 (set_attr "atom_unit" "complex")
8778 (set_attr "prefix_extra" "1")
8779 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8780 (set_attr "mode" "DI")])
8782 (define_insn "avx2_pmaddubsw256"
8783 [(set (match_operand:V16HI 0 "register_operand" "=x")
8788 (match_operand:V32QI 1 "register_operand" "x")
8789 (parallel [(const_int 0)
8807 (match_operand:V32QI 2 "nonimmediate_operand" "xm")
8808 (parallel [(const_int 0)
8826 (vec_select:V16QI (match_dup 1)
8827 (parallel [(const_int 1)
8844 (vec_select:V16QI (match_dup 2)
8845 (parallel [(const_int 1)
8860 (const_int 31)]))))))]
8862 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8863 [(set_attr "type" "sseiadd")
8864 (set_attr "prefix_extra" "1")
8865 (set_attr "prefix" "vex")
8866 (set_attr "mode" "OI")])
8868 (define_insn "ssse3_pmaddubsw128"
8869 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8874 (match_operand:V16QI 1 "register_operand" "0,x")
8875 (parallel [(const_int 0)
8885 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
8886 (parallel [(const_int 0)
8896 (vec_select:V8QI (match_dup 1)
8897 (parallel [(const_int 1)
8906 (vec_select:V8QI (match_dup 2)
8907 (parallel [(const_int 1)
8914 (const_int 15)]))))))]
8917 pmaddubsw\t{%2, %0|%0, %2}
8918 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8919 [(set_attr "isa" "noavx,avx")
8920 (set_attr "type" "sseiadd")
8921 (set_attr "atom_unit" "simul")
8922 (set_attr "prefix_data16" "1,*")
8923 (set_attr "prefix_extra" "1")
8924 (set_attr "prefix" "orig,vex")
8925 (set_attr "mode" "TI")])
8927 (define_insn "ssse3_pmaddubsw"
8928 [(set (match_operand:V4HI 0 "register_operand" "=y")
8933 (match_operand:V8QI 1 "register_operand" "0")
8934 (parallel [(const_int 0)
8940 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
8941 (parallel [(const_int 0)
8947 (vec_select:V4QI (match_dup 1)
8948 (parallel [(const_int 1)
8953 (vec_select:V4QI (match_dup 2)
8954 (parallel [(const_int 1)
8957 (const_int 7)]))))))]
8959 "pmaddubsw\t{%2, %0|%0, %2}"
8960 [(set_attr "type" "sseiadd")
8961 (set_attr "atom_unit" "simul")
8962 (set_attr "prefix_extra" "1")
8963 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8964 (set_attr "mode" "DI")])
8966 (define_expand "avx2_umulhrswv16hi3"
8967 [(set (match_operand:V16HI 0 "register_operand" "")
8974 (match_operand:V16HI 1 "nonimmediate_operand" ""))
8976 (match_operand:V16HI 2 "nonimmediate_operand" "")))
8978 (const_vector:V16HI [(const_int 1) (const_int 1)
8979 (const_int 1) (const_int 1)
8980 (const_int 1) (const_int 1)
8981 (const_int 1) (const_int 1)
8982 (const_int 1) (const_int 1)
8983 (const_int 1) (const_int 1)
8984 (const_int 1) (const_int 1)
8985 (const_int 1) (const_int 1)]))
8988 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
8990 (define_insn "*avx2_umulhrswv16hi3"
8991 [(set (match_operand:V16HI 0 "register_operand" "=x")
8998 (match_operand:V16HI 1 "nonimmediate_operand" "%x"))
9000 (match_operand:V16HI 2 "nonimmediate_operand" "xm")))
9002 (const_vector:V16HI [(const_int 1) (const_int 1)
9003 (const_int 1) (const_int 1)
9004 (const_int 1) (const_int 1)
9005 (const_int 1) (const_int 1)
9006 (const_int 1) (const_int 1)
9007 (const_int 1) (const_int 1)
9008 (const_int 1) (const_int 1)
9009 (const_int 1) (const_int 1)]))
9011 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
9012 "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9013 [(set_attr "type" "sseimul")
9014 (set_attr "prefix_extra" "1")
9015 (set_attr "prefix" "vex")
9016 (set_attr "mode" "OI")])
9018 (define_expand "ssse3_pmulhrswv8hi3"
9019 [(set (match_operand:V8HI 0 "register_operand" "")
9026 (match_operand:V8HI 1 "nonimmediate_operand" ""))
9028 (match_operand:V8HI 2 "nonimmediate_operand" "")))
9030 (const_vector:V8HI [(const_int 1) (const_int 1)
9031 (const_int 1) (const_int 1)
9032 (const_int 1) (const_int 1)
9033 (const_int 1) (const_int 1)]))
9036 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
9038 (define_insn "*ssse3_pmulhrswv8hi3"
9039 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9046 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
9048 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
9050 (const_vector:V8HI [(const_int 1) (const_int 1)
9051 (const_int 1) (const_int 1)
9052 (const_int 1) (const_int 1)
9053 (const_int 1) (const_int 1)]))
9055 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9057 pmulhrsw\t{%2, %0|%0, %2}
9058 vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9059 [(set_attr "isa" "noavx,avx")
9060 (set_attr "type" "sseimul")
9061 (set_attr "prefix_data16" "1,*")
9062 (set_attr "prefix_extra" "1")
9063 (set_attr "prefix" "orig,vex")
9064 (set_attr "mode" "TI")])
9066 (define_expand "ssse3_pmulhrswv4hi3"
9067 [(set (match_operand:V4HI 0 "register_operand" "")
9074 (match_operand:V4HI 1 "nonimmediate_operand" ""))
9076 (match_operand:V4HI 2 "nonimmediate_operand" "")))
9078 (const_vector:V4HI [(const_int 1) (const_int 1)
9079 (const_int 1) (const_int 1)]))
9082 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
9084 (define_insn "*ssse3_pmulhrswv4hi3"
9085 [(set (match_operand:V4HI 0 "register_operand" "=y")
9092 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
9094 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
9096 (const_vector:V4HI [(const_int 1) (const_int 1)
9097 (const_int 1) (const_int 1)]))
9099 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
9100 "pmulhrsw\t{%2, %0|%0, %2}"
9101 [(set_attr "type" "sseimul")
9102 (set_attr "prefix_extra" "1")
9103 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9104 (set_attr "mode" "DI")])
9106 (define_insn "<ssse3_avx2>_pshufb<mode>3"
9107 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
9108 (unspec:VI1_AVX2 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
9109 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")]
9113 pshufb\t{%2, %0|%0, %2}
9114 vpshufb\t{%2, %1, %0|%0, %1, %2}"
9115 [(set_attr "isa" "noavx,avx")
9116 (set_attr "type" "sselog1")
9117 (set_attr "prefix_data16" "1,*")
9118 (set_attr "prefix_extra" "1")
9119 (set_attr "prefix" "orig,vex")
9120 (set_attr "mode" "<sseinsnmode>")])
9122 (define_insn "ssse3_pshufbv8qi3"
9123 [(set (match_operand:V8QI 0 "register_operand" "=y")
9124 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
9125 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
9128 "pshufb\t{%2, %0|%0, %2}";
9129 [(set_attr "type" "sselog1")
9130 (set_attr "prefix_extra" "1")
9131 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9132 (set_attr "mode" "DI")])
9134 (define_insn "<ssse3_avx2>_psign<mode>3"
9135 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
9137 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
9138 (match_operand:VI124_AVX2 2 "nonimmediate_operand" "xm,xm")]
9142 psign<ssemodesuffix>\t{%2, %0|%0, %2}
9143 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9144 [(set_attr "isa" "noavx,avx")
9145 (set_attr "type" "sselog1")
9146 (set_attr "prefix_data16" "1,*")
9147 (set_attr "prefix_extra" "1")
9148 (set_attr "prefix" "orig,vex")
9149 (set_attr "mode" "<sseinsnmode>")])
9151 (define_insn "ssse3_psign<mode>3"
9152 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9154 [(match_operand:MMXMODEI 1 "register_operand" "0")
9155 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
9158 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
9159 [(set_attr "type" "sselog1")
9160 (set_attr "prefix_extra" "1")
9161 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9162 (set_attr "mode" "DI")])
9164 (define_insn "<ssse3_avx2>_palignr<mode>"
9165 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x")
9166 (unspec:SSESCALARMODE [(match_operand:SSESCALARMODE 1 "register_operand" "0,x")
9167 (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,xm")
9168 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
9172 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9174 switch (which_alternative)
9177 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9179 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9184 [(set_attr "isa" "noavx,avx")
9185 (set_attr "type" "sseishft")
9186 (set_attr "atom_unit" "sishuf")
9187 (set_attr "prefix_data16" "1,*")
9188 (set_attr "prefix_extra" "1")
9189 (set_attr "length_immediate" "1")
9190 (set_attr "prefix" "orig,vex")
9191 (set_attr "mode" "<sseinsnmode>")])
9193 (define_insn "ssse3_palignrdi"
9194 [(set (match_operand:DI 0 "register_operand" "=y")
9195 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
9196 (match_operand:DI 2 "nonimmediate_operand" "ym")
9197 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9201 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9202 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9204 [(set_attr "type" "sseishft")
9205 (set_attr "atom_unit" "sishuf")
9206 (set_attr "prefix_extra" "1")
9207 (set_attr "length_immediate" "1")
9208 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9209 (set_attr "mode" "DI")])
9211 (define_insn "abs<mode>2"
9212 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x")
9214 (match_operand:VI124_AVX2 1 "nonimmediate_operand" "xm")))]
9216 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
9217 [(set_attr "type" "sselog1")
9218 (set_attr "prefix_data16" "1")
9219 (set_attr "prefix_extra" "1")
9220 (set_attr "prefix" "maybe_vex")
9221 (set_attr "mode" "<sseinsnmode>")])
9223 (define_insn "abs<mode>2"
9224 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9226 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
9228 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
9229 [(set_attr "type" "sselog1")
9230 (set_attr "prefix_rep" "0")
9231 (set_attr "prefix_extra" "1")
9232 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9233 (set_attr "mode" "DI")])
9235 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9237 ;; AMD SSE4A instructions
9239 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9241 (define_insn "sse4a_movnt<mode>"
9242 [(set (match_operand:MODEF 0 "memory_operand" "=m")
9244 [(match_operand:MODEF 1 "register_operand" "x")]
9247 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
9248 [(set_attr "type" "ssemov")
9249 (set_attr "mode" "<MODE>")])
9251 (define_insn "sse4a_vmmovnt<mode>"
9252 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
9253 (unspec:<ssescalarmode>
9254 [(vec_select:<ssescalarmode>
9255 (match_operand:VF_128 1 "register_operand" "x")
9256 (parallel [(const_int 0)]))]
9259 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
9260 [(set_attr "type" "ssemov")
9261 (set_attr "mode" "<ssescalarmode>")])
9263 (define_insn "sse4a_extrqi"
9264 [(set (match_operand:V2DI 0 "register_operand" "=x")
9265 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9266 (match_operand 2 "const_0_to_255_operand" "")
9267 (match_operand 3 "const_0_to_255_operand" "")]
9270 "extrq\t{%3, %2, %0|%0, %2, %3}"
9271 [(set_attr "type" "sse")
9272 (set_attr "prefix_data16" "1")
9273 (set_attr "length_immediate" "2")
9274 (set_attr "mode" "TI")])
9276 (define_insn "sse4a_extrq"
9277 [(set (match_operand:V2DI 0 "register_operand" "=x")
9278 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9279 (match_operand:V16QI 2 "register_operand" "x")]
9282 "extrq\t{%2, %0|%0, %2}"
9283 [(set_attr "type" "sse")
9284 (set_attr "prefix_data16" "1")
9285 (set_attr "mode" "TI")])
9287 (define_insn "sse4a_insertqi"
9288 [(set (match_operand:V2DI 0 "register_operand" "=x")
9289 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9290 (match_operand:V2DI 2 "register_operand" "x")
9291 (match_operand 3 "const_0_to_255_operand" "")
9292 (match_operand 4 "const_0_to_255_operand" "")]
9295 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
9296 [(set_attr "type" "sseins")
9297 (set_attr "prefix_data16" "0")
9298 (set_attr "prefix_rep" "1")
9299 (set_attr "length_immediate" "2")
9300 (set_attr "mode" "TI")])
9302 (define_insn "sse4a_insertq"
9303 [(set (match_operand:V2DI 0 "register_operand" "=x")
9304 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9305 (match_operand:V2DI 2 "register_operand" "x")]
9308 "insertq\t{%2, %0|%0, %2}"
9309 [(set_attr "type" "sseins")
9310 (set_attr "prefix_data16" "0")
9311 (set_attr "prefix_rep" "1")
9312 (set_attr "mode" "TI")])
9314 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9316 ;; Intel SSE4.1 instructions
9318 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9320 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
9321 [(set (match_operand:VF 0 "register_operand" "=x,x")
9323 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
9324 (match_operand:VF 1 "register_operand" "0,x")
9325 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "")))]
9328 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
9329 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9330 [(set_attr "isa" "noavx,avx")
9331 (set_attr "type" "ssemov")
9332 (set_attr "length_immediate" "1")
9333 (set_attr "prefix_data16" "1,*")
9334 (set_attr "prefix_extra" "1")
9335 (set_attr "prefix" "orig,vex")
9336 (set_attr "mode" "<MODE>")])
9338 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
9339 [(set (match_operand:VF 0 "reg_not_xmm0_operand_maybe_avx" "=x,x")
9341 [(match_operand:VF 1 "reg_not_xmm0_operand_maybe_avx" "0,x")
9342 (match_operand:VF 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
9343 (match_operand:VF 3 "register_operand" "Yz,x")]
9347 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
9348 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9349 [(set_attr "isa" "noavx,avx")
9350 (set_attr "type" "ssemov")
9351 (set_attr "length_immediate" "1")
9352 (set_attr "prefix_data16" "1,*")
9353 (set_attr "prefix_extra" "1")
9354 (set_attr "prefix" "orig,vex")
9355 (set_attr "mode" "<MODE>")])
9357 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
9358 [(set (match_operand:VF 0 "register_operand" "=x,x")
9360 [(match_operand:VF 1 "nonimmediate_operand" "%0,x")
9361 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
9362 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9366 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
9367 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9368 [(set_attr "isa" "noavx,avx")
9369 (set_attr "type" "ssemul")
9370 (set_attr "length_immediate" "1")
9371 (set_attr "prefix_data16" "1,*")
9372 (set_attr "prefix_extra" "1")
9373 (set_attr "prefix" "orig,vex")
9374 (set_attr "mode" "<MODE>")])
9376 (define_insn "<sse4_1_avx2>_movntdqa"
9377 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x")
9378 (unspec:VI8_AVX2 [(match_operand:VI8_AVX2 1 "memory_operand" "m")]
9381 "%vmovntdqa\t{%1, %0|%0, %1}"
9382 [(set_attr "type" "ssemov")
9383 (set_attr "prefix_extra" "1")
9384 (set_attr "prefix" "maybe_vex")
9385 (set_attr "mode" "<sseinsnmode>")])
9387 (define_insn "<sse4_1_avx2>_mpsadbw"
9388 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
9389 (unspec:VI1_AVX2 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
9390 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
9391 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9395 mpsadbw\t{%3, %2, %0|%0, %2, %3}
9396 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9397 [(set_attr "isa" "noavx,avx")
9398 (set_attr "type" "sselog1")
9399 (set_attr "length_immediate" "1")
9400 (set_attr "prefix_extra" "1")
9401 (set_attr "prefix" "orig,vex")
9402 (set_attr "mode" "<sseinsnmode>")])
9404 (define_insn "avx2_packusdw"
9405 [(set (match_operand:V16HI 0 "register_operand" "=x")
9408 (match_operand:V8SI 1 "register_operand" "x"))
9410 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))))]
9412 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9413 [(set_attr "type" "sselog")
9414 (set_attr "prefix_extra" "1")
9415 (set_attr "prefix" "vex")
9416 (set_attr "mode" "OI")])
9418 (define_insn "sse4_1_packusdw"
9419 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9422 (match_operand:V4SI 1 "register_operand" "0,x"))
9424 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))]
9427 packusdw\t{%2, %0|%0, %2}
9428 vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9429 [(set_attr "isa" "noavx,avx")
9430 (set_attr "type" "sselog")
9431 (set_attr "prefix_extra" "1")
9432 (set_attr "prefix" "orig,vex")
9433 (set_attr "mode" "TI")])
9435 (define_insn "<sse4_1_avx2>_pblendvb"
9436 [(set (match_operand:VI1_AVX2 0 "reg_not_xmm0_operand" "=x,x")
9438 [(match_operand:VI1_AVX2 1 "reg_not_xmm0_operand_maybe_avx" "0,x")
9439 (match_operand:VI1_AVX2 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
9440 (match_operand:VI1_AVX2 3 "register_operand" "Yz,x")]
9444 pblendvb\t{%3, %2, %0|%0, %2, %3}
9445 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9446 [(set_attr "isa" "noavx,avx")
9447 (set_attr "type" "ssemov")
9448 (set_attr "prefix_extra" "1")
9449 (set_attr "length_immediate" "*,1")
9450 (set_attr "prefix" "orig,vex")
9451 (set_attr "mode" "<sseinsnmode>")])
9453 (define_insn "sse4_1_pblendw"
9454 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9456 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
9457 (match_operand:V8HI 1 "register_operand" "0,x")
9458 (match_operand:SI 3 "const_0_to_255_operand" "n,n")))]
9461 pblendw\t{%3, %2, %0|%0, %2, %3}
9462 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9463 [(set_attr "isa" "noavx,avx")
9464 (set_attr "type" "ssemov")
9465 (set_attr "prefix_extra" "1")
9466 (set_attr "length_immediate" "1")
9467 (set_attr "prefix" "orig,vex")
9468 (set_attr "mode" "TI")])
9470 ;; The builtin uses an 8-bit immediate. Expand that.
9471 (define_expand "avx2_pblendw"
9472 [(set (match_operand:V16HI 0 "register_operand" "")
9474 (match_operand:V16HI 2 "nonimmediate_operand" "")
9475 (match_operand:V16HI 1 "register_operand" "")
9476 (match_operand:SI 3 "const_0_to_255_operand" "")))]
9479 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
9480 operands[3] = GEN_INT (val << 8 | val);
9483 (define_insn "*avx2_pblendw"
9484 [(set (match_operand:V16HI 0 "register_operand" "=x")
9486 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
9487 (match_operand:V16HI 1 "register_operand" "x")
9488 (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
9491 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
9492 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9494 [(set_attr "type" "ssemov")
9495 (set_attr "prefix_extra" "1")
9496 (set_attr "length_immediate" "1")
9497 (set_attr "prefix" "vex")
9498 (set_attr "mode" "OI")])
9500 (define_insn "avx2_pblendd<mode>"
9501 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
9503 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
9504 (match_operand:VI4_AVX2 1 "register_operand" "x")
9505 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9507 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9508 [(set_attr "type" "ssemov")
9509 (set_attr "prefix_extra" "1")
9510 (set_attr "length_immediate" "1")
9511 (set_attr "prefix" "vex")
9512 (set_attr "mode" "<sseinsnmode>")])
9514 (define_insn "sse4_1_phminposuw"
9515 [(set (match_operand:V8HI 0 "register_operand" "=x")
9516 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
9517 UNSPEC_PHMINPOSUW))]
9519 "%vphminposuw\t{%1, %0|%0, %1}"
9520 [(set_attr "type" "sselog1")
9521 (set_attr "prefix_extra" "1")
9522 (set_attr "prefix" "maybe_vex")
9523 (set_attr "mode" "TI")])
9525 (define_insn "avx2_<code>v16qiv16hi2"
9526 [(set (match_operand:V16HI 0 "register_operand" "=x")
9528 (match_operand:V16QI 1 "nonimmediate_operand" "xm")))]
9530 "vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
9531 [(set_attr "type" "ssemov")
9532 (set_attr "prefix_extra" "1")
9533 (set_attr "prefix" "vex")
9534 (set_attr "mode" "OI")])
9536 (define_insn "sse4_1_<code>v8qiv8hi2"
9537 [(set (match_operand:V8HI 0 "register_operand" "=x")
9540 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9541 (parallel [(const_int 0)
9550 "%vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
9551 [(set_attr "type" "ssemov")
9552 (set_attr "prefix_extra" "1")
9553 (set_attr "prefix" "maybe_vex")
9554 (set_attr "mode" "TI")])
9556 (define_insn "avx2_<code>v8qiv8si2"
9557 [(set (match_operand:V8SI 0 "register_operand" "=x")
9560 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9561 (parallel [(const_int 0)
9570 "vpmov<extsuffix>bd\t{%1, %0|%0, %1}"
9571 [(set_attr "type" "ssemov")
9572 (set_attr "prefix_extra" "1")
9573 (set_attr "prefix" "vex")
9574 (set_attr "mode" "OI")])
9576 (define_insn "sse4_1_<code>v4qiv4si2"
9577 [(set (match_operand:V4SI 0 "register_operand" "=x")
9580 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9581 (parallel [(const_int 0)
9586 "%vpmov<extsuffix>bd\t{%1, %0|%0, %1}"
9587 [(set_attr "type" "ssemov")
9588 (set_attr "prefix_extra" "1")
9589 (set_attr "prefix" "maybe_vex")
9590 (set_attr "mode" "TI")])
9592 (define_insn "avx2_<code>v8hiv8si2"
9593 [(set (match_operand:V8SI 0 "register_operand" "=x")
9595 (match_operand:V8HI 1 "nonimmediate_operand" "xm")))]
9597 "vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
9598 [(set_attr "type" "ssemov")
9599 (set_attr "prefix_extra" "1")
9600 (set_attr "prefix" "vex")
9601 (set_attr "mode" "OI")])
9603 (define_insn "sse4_1_<code>v4hiv4si2"
9604 [(set (match_operand:V4SI 0 "register_operand" "=x")
9607 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9608 (parallel [(const_int 0)
9613 "%vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
9614 [(set_attr "type" "ssemov")
9615 (set_attr "prefix_extra" "1")
9616 (set_attr "prefix" "maybe_vex")
9617 (set_attr "mode" "TI")])
9619 (define_insn "avx2_<code>v4qiv4di2"
9620 [(set (match_operand:V4DI 0 "register_operand" "=x")
9623 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9624 (parallel [(const_int 0)
9629 "vpmov<extsuffix>bq\t{%1, %0|%0, %1}"
9630 [(set_attr "type" "ssemov")
9631 (set_attr "prefix_extra" "1")
9632 (set_attr "prefix" "vex")
9633 (set_attr "mode" "OI")])
9635 (define_insn "sse4_1_<code>v2qiv2di2"
9636 [(set (match_operand:V2DI 0 "register_operand" "=x")
9639 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9640 (parallel [(const_int 0)
9643 "%vpmov<extsuffix>bq\t{%1, %0|%0, %1}"
9644 [(set_attr "type" "ssemov")
9645 (set_attr "prefix_extra" "1")
9646 (set_attr "prefix" "maybe_vex")
9647 (set_attr "mode" "TI")])
9649 (define_insn "avx2_<code>v4hiv4di2"
9650 [(set (match_operand:V4DI 0 "register_operand" "=x")
9653 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9654 (parallel [(const_int 0)
9659 "vpmov<extsuffix>wq\t{%1, %0|%0, %1}"
9660 [(set_attr "type" "ssemov")
9661 (set_attr "prefix_extra" "1")
9662 (set_attr "prefix" "vex")
9663 (set_attr "mode" "OI")])
9665 (define_insn "sse4_1_<code>v2hiv2di2"
9666 [(set (match_operand:V2DI 0 "register_operand" "=x")
9669 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9670 (parallel [(const_int 0)
9673 "%vpmov<extsuffix>wq\t{%1, %0|%0, %1}"
9674 [(set_attr "type" "ssemov")
9675 (set_attr "prefix_extra" "1")
9676 (set_attr "prefix" "maybe_vex")
9677 (set_attr "mode" "TI")])
9679 (define_insn "avx2_<code>v4siv4di2"
9680 [(set (match_operand:V4DI 0 "register_operand" "=x")
9682 (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
9684 "vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
9685 [(set_attr "type" "ssemov")
9686 (set_attr "prefix_extra" "1")
9687 (set_attr "mode" "OI")])
9689 (define_insn "sse4_1_<code>v2siv2di2"
9690 [(set (match_operand:V2DI 0 "register_operand" "=x")
9693 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9694 (parallel [(const_int 0)
9697 "%vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
9698 [(set_attr "type" "ssemov")
9699 (set_attr "prefix_extra" "1")
9700 (set_attr "prefix" "maybe_vex")
9701 (set_attr "mode" "TI")])
9703 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
9704 ;; setting FLAGS_REG. But it is not a really compare instruction.
9705 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
9706 [(set (reg:CC FLAGS_REG)
9707 (unspec:CC [(match_operand:VF 0 "register_operand" "x")
9708 (match_operand:VF 1 "nonimmediate_operand" "xm")]
9711 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
9712 [(set_attr "type" "ssecomi")
9713 (set_attr "prefix_extra" "1")
9714 (set_attr "prefix" "vex")
9715 (set_attr "mode" "<MODE>")])
9717 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
9718 ;; But it is not a really compare instruction.
9719 (define_insn "avx_ptest256"
9720 [(set (reg:CC FLAGS_REG)
9721 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
9722 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
9725 "vptest\t{%1, %0|%0, %1}"
9726 [(set_attr "type" "ssecomi")
9727 (set_attr "prefix_extra" "1")
9728 (set_attr "prefix" "vex")
9729 (set_attr "mode" "OI")])
9731 (define_insn "sse4_1_ptest"
9732 [(set (reg:CC FLAGS_REG)
9733 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
9734 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
9737 "%vptest\t{%1, %0|%0, %1}"
9738 [(set_attr "type" "ssecomi")
9739 (set_attr "prefix_extra" "1")
9740 (set_attr "prefix" "maybe_vex")
9741 (set_attr "mode" "TI")])
9743 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
9744 [(set (match_operand:VF 0 "register_operand" "=x")
9746 [(match_operand:VF 1 "nonimmediate_operand" "xm")
9747 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9750 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9751 [(set_attr "type" "ssecvt")
9752 (set (attr "prefix_data16")
9754 (match_test "TARGET_AVX")
9756 (const_string "1")))
9757 (set_attr "prefix_extra" "1")
9758 (set_attr "length_immediate" "1")
9759 (set_attr "prefix" "maybe_vex")
9760 (set_attr "mode" "<MODE>")])
9762 (define_insn "sse4_1_round<ssescalarmodesuffix>"
9763 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
9766 [(match_operand:VF_128 2 "register_operand" "x,x")
9767 (match_operand:SI 3 "const_0_to_15_operand" "n,n")]
9769 (match_operand:VF_128 1 "register_operand" "0,x")
9773 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
9774 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9775 [(set_attr "isa" "noavx,avx")
9776 (set_attr "type" "ssecvt")
9777 (set_attr "length_immediate" "1")
9778 (set_attr "prefix_data16" "1,*")
9779 (set_attr "prefix_extra" "1")
9780 (set_attr "prefix" "orig,vex")
9781 (set_attr "mode" "<MODE>")])
9783 (define_expand "round<mode>2"
9786 (match_operand:VF 1 "nonimmediate_operand" "")
9788 (set (match_operand:VF 0 "register_operand" "")
9790 [(match_dup 4) (match_dup 5)]
9792 "TARGET_ROUND && !flag_trapping_math"
9794 enum machine_mode scalar_mode;
9795 const struct real_format *fmt;
9796 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
9799 scalar_mode = GET_MODE_INNER (<MODE>mode);
9801 /* load nextafter (0.5, 0.0) */
9802 fmt = REAL_MODE_FORMAT (scalar_mode);
9803 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
9804 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
9805 half = const_double_from_real_value (pred_half, scalar_mode);
9807 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
9808 vec_half = force_reg (<MODE>mode, vec_half);
9810 operands[3] = gen_reg_rtx (<MODE>mode);
9811 emit_insn (gen_copysign<mode>3 (operands[3], vec_half, operands[1]));
9813 operands[4] = gen_reg_rtx (<MODE>mode);
9814 operands[5] = GEN_INT (ROUND_TRUNC);
9817 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9819 ;; Intel SSE4.2 string/text processing instructions
9821 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9823 (define_insn_and_split "sse4_2_pcmpestr"
9824 [(set (match_operand:SI 0 "register_operand" "=c,c")
9826 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
9827 (match_operand:SI 3 "register_operand" "a,a")
9828 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
9829 (match_operand:SI 5 "register_operand" "d,d")
9830 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
9832 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9840 (set (reg:CC FLAGS_REG)
9849 && can_create_pseudo_p ()"
9854 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9855 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9856 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9859 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
9860 operands[3], operands[4],
9861 operands[5], operands[6]));
9863 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
9864 operands[3], operands[4],
9865 operands[5], operands[6]));
9866 if (flags && !(ecx || xmm0))
9867 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
9868 operands[2], operands[3],
9869 operands[4], operands[5],
9871 if (!(flags || ecx || xmm0))
9872 emit_note (NOTE_INSN_DELETED);
9876 [(set_attr "type" "sselog")
9877 (set_attr "prefix_data16" "1")
9878 (set_attr "prefix_extra" "1")
9879 (set_attr "length_immediate" "1")
9880 (set_attr "memory" "none,load")
9881 (set_attr "mode" "TI")])
9883 (define_insn "sse4_2_pcmpestri"
9884 [(set (match_operand:SI 0 "register_operand" "=c,c")
9886 [(match_operand:V16QI 1 "register_operand" "x,x")
9887 (match_operand:SI 2 "register_operand" "a,a")
9888 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9889 (match_operand:SI 4 "register_operand" "d,d")
9890 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9892 (set (reg:CC FLAGS_REG)
9901 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
9902 [(set_attr "type" "sselog")
9903 (set_attr "prefix_data16" "1")
9904 (set_attr "prefix_extra" "1")
9905 (set_attr "prefix" "maybe_vex")
9906 (set_attr "length_immediate" "1")
9907 (set_attr "memory" "none,load")
9908 (set_attr "mode" "TI")])
9910 (define_insn "sse4_2_pcmpestrm"
9911 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
9913 [(match_operand:V16QI 1 "register_operand" "x,x")
9914 (match_operand:SI 2 "register_operand" "a,a")
9915 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9916 (match_operand:SI 4 "register_operand" "d,d")
9917 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9919 (set (reg:CC FLAGS_REG)
9928 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
9929 [(set_attr "type" "sselog")
9930 (set_attr "prefix_data16" "1")
9931 (set_attr "prefix_extra" "1")
9932 (set_attr "length_immediate" "1")
9933 (set_attr "prefix" "maybe_vex")
9934 (set_attr "memory" "none,load")
9935 (set_attr "mode" "TI")])
9937 (define_insn "sse4_2_pcmpestr_cconly"
9938 [(set (reg:CC FLAGS_REG)
9940 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
9941 (match_operand:SI 3 "register_operand" "a,a,a,a")
9942 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
9943 (match_operand:SI 5 "register_operand" "d,d,d,d")
9944 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
9946 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
9947 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
9950 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
9951 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
9952 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
9953 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
9954 [(set_attr "type" "sselog")
9955 (set_attr "prefix_data16" "1")
9956 (set_attr "prefix_extra" "1")
9957 (set_attr "length_immediate" "1")
9958 (set_attr "memory" "none,load,none,load")
9959 (set_attr "prefix" "maybe_vex")
9960 (set_attr "mode" "TI")])
9962 (define_insn_and_split "sse4_2_pcmpistr"
9963 [(set (match_operand:SI 0 "register_operand" "=c,c")
9965 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
9966 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
9967 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
9969 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9975 (set (reg:CC FLAGS_REG)
9982 && can_create_pseudo_p ()"
9987 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9988 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9989 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9992 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
9993 operands[3], operands[4]));
9995 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
9996 operands[3], operands[4]));
9997 if (flags && !(ecx || xmm0))
9998 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
9999 operands[2], operands[3],
10001 if (!(flags || ecx || xmm0))
10002 emit_note (NOTE_INSN_DELETED);
10006 [(set_attr "type" "sselog")
10007 (set_attr "prefix_data16" "1")
10008 (set_attr "prefix_extra" "1")
10009 (set_attr "length_immediate" "1")
10010 (set_attr "memory" "none,load")
10011 (set_attr "mode" "TI")])
10013 (define_insn "sse4_2_pcmpistri"
10014 [(set (match_operand:SI 0 "register_operand" "=c,c")
10016 [(match_operand:V16QI 1 "register_operand" "x,x")
10017 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10018 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10020 (set (reg:CC FLAGS_REG)
10027 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
10028 [(set_attr "type" "sselog")
10029 (set_attr "prefix_data16" "1")
10030 (set_attr "prefix_extra" "1")
10031 (set_attr "length_immediate" "1")
10032 (set_attr "prefix" "maybe_vex")
10033 (set_attr "memory" "none,load")
10034 (set_attr "mode" "TI")])
10036 (define_insn "sse4_2_pcmpistrm"
10037 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10039 [(match_operand:V16QI 1 "register_operand" "x,x")
10040 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10041 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10043 (set (reg:CC FLAGS_REG)
10050 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
10051 [(set_attr "type" "sselog")
10052 (set_attr "prefix_data16" "1")
10053 (set_attr "prefix_extra" "1")
10054 (set_attr "length_immediate" "1")
10055 (set_attr "prefix" "maybe_vex")
10056 (set_attr "memory" "none,load")
10057 (set_attr "mode" "TI")])
10059 (define_insn "sse4_2_pcmpistr_cconly"
10060 [(set (reg:CC FLAGS_REG)
10062 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10063 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
10064 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
10066 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10067 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10070 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10071 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10072 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
10073 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
10074 [(set_attr "type" "sselog")
10075 (set_attr "prefix_data16" "1")
10076 (set_attr "prefix_extra" "1")
10077 (set_attr "length_immediate" "1")
10078 (set_attr "memory" "none,load,none,load")
10079 (set_attr "prefix" "maybe_vex")
10080 (set_attr "mode" "TI")])
10082 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10084 ;; XOP instructions
10086 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10088 ;; XOP parallel integer multiply/add instructions.
10089 ;; Note the XOP multiply/add instructions
10090 ;; a[i] = b[i] * c[i] + d[i];
10091 ;; do not allow the value being added to be a memory operation.
10092 (define_insn "xop_pmacsww"
10093 [(set (match_operand:V8HI 0 "register_operand" "=x")
10096 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10097 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10098 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10100 "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10101 [(set_attr "type" "ssemuladd")
10102 (set_attr "mode" "TI")])
10104 (define_insn "xop_pmacssww"
10105 [(set (match_operand:V8HI 0 "register_operand" "=x")
10107 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10108 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10109 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10111 "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10112 [(set_attr "type" "ssemuladd")
10113 (set_attr "mode" "TI")])
10115 (define_insn "xop_pmacsdd"
10116 [(set (match_operand:V4SI 0 "register_operand" "=x")
10119 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10120 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10121 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10123 "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10124 [(set_attr "type" "ssemuladd")
10125 (set_attr "mode" "TI")])
10127 (define_insn "xop_pmacssdd"
10128 [(set (match_operand:V4SI 0 "register_operand" "=x")
10130 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10131 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10132 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10134 "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10135 [(set_attr "type" "ssemuladd")
10136 (set_attr "mode" "TI")])
10138 (define_insn "xop_pmacssdql"
10139 [(set (match_operand:V2DI 0 "register_operand" "=x")
10144 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10145 (parallel [(const_int 1)
10148 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10149 (parallel [(const_int 1)
10151 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10153 "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10154 [(set_attr "type" "ssemuladd")
10155 (set_attr "mode" "TI")])
10157 (define_insn "xop_pmacssdqh"
10158 [(set (match_operand:V2DI 0 "register_operand" "=x")
10163 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10164 (parallel [(const_int 0)
10168 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10169 (parallel [(const_int 0)
10171 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10173 "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10174 [(set_attr "type" "ssemuladd")
10175 (set_attr "mode" "TI")])
10177 (define_insn "xop_pmacsdql"
10178 [(set (match_operand:V2DI 0 "register_operand" "=x")
10183 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10184 (parallel [(const_int 1)
10188 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10189 (parallel [(const_int 1)
10191 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10193 "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10194 [(set_attr "type" "ssemuladd")
10195 (set_attr "mode" "TI")])
10197 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10198 ;; fake it with a multiply/add. In general, we expect the define_split to
10199 ;; occur before register allocation, so we have to handle the corner case where
10200 ;; the target is the same as operands 1/2
10201 (define_insn_and_split "xop_mulv2div2di3_low"
10202 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10206 (match_operand:V4SI 1 "register_operand" "%x")
10207 (parallel [(const_int 1)
10211 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10212 (parallel [(const_int 1)
10213 (const_int 3)])))))]
10216 "&& reload_completed"
10217 [(set (match_dup 0)
10225 (parallel [(const_int 1)
10230 (parallel [(const_int 1)
10234 operands[3] = CONST0_RTX (V2DImode);
10236 [(set_attr "type" "ssemul")
10237 (set_attr "mode" "TI")])
10239 (define_insn "xop_pmacsdqh"
10240 [(set (match_operand:V2DI 0 "register_operand" "=x")
10245 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10246 (parallel [(const_int 0)
10250 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10251 (parallel [(const_int 0)
10253 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10255 "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10256 [(set_attr "type" "ssemuladd")
10257 (set_attr "mode" "TI")])
10259 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10260 ;; fake it with a multiply/add. In general, we expect the define_split to
10261 ;; occur before register allocation, so we have to handle the corner case where
10262 ;; the target is the same as either operands[1] or operands[2]
10263 (define_insn_and_split "xop_mulv2div2di3_high"
10264 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10268 (match_operand:V4SI 1 "register_operand" "%x")
10269 (parallel [(const_int 0)
10273 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10274 (parallel [(const_int 0)
10275 (const_int 2)])))))]
10278 "&& reload_completed"
10279 [(set (match_dup 0)
10287 (parallel [(const_int 0)
10292 (parallel [(const_int 0)
10296 operands[3] = CONST0_RTX (V2DImode);
10298 [(set_attr "type" "ssemul")
10299 (set_attr "mode" "TI")])
10301 ;; XOP parallel integer multiply/add instructions for the intrinisics
10302 (define_insn "xop_pmacsswd"
10303 [(set (match_operand:V4SI 0 "register_operand" "=x")
10308 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10309 (parallel [(const_int 1)
10315 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10316 (parallel [(const_int 1)
10320 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10322 "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10323 [(set_attr "type" "ssemuladd")
10324 (set_attr "mode" "TI")])
10326 (define_insn "xop_pmacswd"
10327 [(set (match_operand:V4SI 0 "register_operand" "=x")
10332 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10333 (parallel [(const_int 1)
10339 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10340 (parallel [(const_int 1)
10344 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10346 "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10347 [(set_attr "type" "ssemuladd")
10348 (set_attr "mode" "TI")])
10350 (define_insn "xop_pmadcsswd"
10351 [(set (match_operand:V4SI 0 "register_operand" "=x")
10357 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10358 (parallel [(const_int 0)
10364 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10365 (parallel [(const_int 0)
10373 (parallel [(const_int 1)
10380 (parallel [(const_int 1)
10383 (const_int 7)])))))
10384 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10386 "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10387 [(set_attr "type" "ssemuladd")
10388 (set_attr "mode" "TI")])
10390 (define_insn "xop_pmadcswd"
10391 [(set (match_operand:V4SI 0 "register_operand" "=x")
10397 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10398 (parallel [(const_int 0)
10404 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10405 (parallel [(const_int 0)
10413 (parallel [(const_int 1)
10420 (parallel [(const_int 1)
10423 (const_int 7)])))))
10424 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10426 "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10427 [(set_attr "type" "ssemuladd")
10428 (set_attr "mode" "TI")])
10430 ;; XOP parallel XMM conditional moves
10431 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
10432 [(set (match_operand:V 0 "register_operand" "=x,x")
10434 (match_operand:V 3 "nonimmediate_operand" "x,m")
10435 (match_operand:V 1 "register_operand" "x,x")
10436 (match_operand:V 2 "nonimmediate_operand" "xm,x")))]
10438 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10439 [(set_attr "type" "sse4arg")])
10441 ;; XOP horizontal add/subtract instructions
10442 (define_insn "xop_phaddbw"
10443 [(set (match_operand:V8HI 0 "register_operand" "=x")
10447 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10448 (parallel [(const_int 0)
10459 (parallel [(const_int 1)
10466 (const_int 15)])))))]
10468 "vphaddbw\t{%1, %0|%0, %1}"
10469 [(set_attr "type" "sseiadd1")])
10471 (define_insn "xop_phaddbd"
10472 [(set (match_operand:V4SI 0 "register_operand" "=x")
10477 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10478 (parallel [(const_int 0)
10485 (parallel [(const_int 1)
10488 (const_int 13)]))))
10493 (parallel [(const_int 2)
10500 (parallel [(const_int 3)
10503 (const_int 15)]))))))]
10505 "vphaddbd\t{%1, %0|%0, %1}"
10506 [(set_attr "type" "sseiadd1")])
10508 (define_insn "xop_phaddbq"
10509 [(set (match_operand:V2DI 0 "register_operand" "=x")
10515 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10516 (parallel [(const_int 0)
10521 (parallel [(const_int 1)
10527 (parallel [(const_int 2)
10532 (parallel [(const_int 3)
10533 (const_int 7)])))))
10539 (parallel [(const_int 8)
10544 (parallel [(const_int 9)
10545 (const_int 13)]))))
10550 (parallel [(const_int 10)
10555 (parallel [(const_int 11)
10556 (const_int 15)])))))))]
10558 "vphaddbq\t{%1, %0|%0, %1}"
10559 [(set_attr "type" "sseiadd1")])
10561 (define_insn "xop_phaddwd"
10562 [(set (match_operand:V4SI 0 "register_operand" "=x")
10566 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10567 (parallel [(const_int 0)
10574 (parallel [(const_int 1)
10577 (const_int 7)])))))]
10579 "vphaddwd\t{%1, %0|%0, %1}"
10580 [(set_attr "type" "sseiadd1")])
10582 (define_insn "xop_phaddwq"
10583 [(set (match_operand:V2DI 0 "register_operand" "=x")
10588 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10589 (parallel [(const_int 0)
10594 (parallel [(const_int 1)
10600 (parallel [(const_int 2)
10605 (parallel [(const_int 3)
10606 (const_int 7)]))))))]
10608 "vphaddwq\t{%1, %0|%0, %1}"
10609 [(set_attr "type" "sseiadd1")])
10611 (define_insn "xop_phadddq"
10612 [(set (match_operand:V2DI 0 "register_operand" "=x")
10616 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10617 (parallel [(const_int 0)
10622 (parallel [(const_int 1)
10623 (const_int 3)])))))]
10625 "vphadddq\t{%1, %0|%0, %1}"
10626 [(set_attr "type" "sseiadd1")])
10628 (define_insn "xop_phaddubw"
10629 [(set (match_operand:V8HI 0 "register_operand" "=x")
10633 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10634 (parallel [(const_int 0)
10645 (parallel [(const_int 1)
10652 (const_int 15)])))))]
10654 "vphaddubw\t{%1, %0|%0, %1}"
10655 [(set_attr "type" "sseiadd1")])
10657 (define_insn "xop_phaddubd"
10658 [(set (match_operand:V4SI 0 "register_operand" "=x")
10663 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10664 (parallel [(const_int 0)
10671 (parallel [(const_int 1)
10674 (const_int 13)]))))
10679 (parallel [(const_int 2)
10686 (parallel [(const_int 3)
10689 (const_int 15)]))))))]
10691 "vphaddubd\t{%1, %0|%0, %1}"
10692 [(set_attr "type" "sseiadd1")])
10694 (define_insn "xop_phaddubq"
10695 [(set (match_operand:V2DI 0 "register_operand" "=x")
10701 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10702 (parallel [(const_int 0)
10707 (parallel [(const_int 1)
10713 (parallel [(const_int 2)
10718 (parallel [(const_int 3)
10719 (const_int 7)])))))
10725 (parallel [(const_int 8)
10730 (parallel [(const_int 9)
10731 (const_int 13)]))))
10736 (parallel [(const_int 10)
10741 (parallel [(const_int 11)
10742 (const_int 15)])))))))]
10744 "vphaddubq\t{%1, %0|%0, %1}"
10745 [(set_attr "type" "sseiadd1")])
10747 (define_insn "xop_phadduwd"
10748 [(set (match_operand:V4SI 0 "register_operand" "=x")
10752 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10753 (parallel [(const_int 0)
10760 (parallel [(const_int 1)
10763 (const_int 7)])))))]
10765 "vphadduwd\t{%1, %0|%0, %1}"
10766 [(set_attr "type" "sseiadd1")])
10768 (define_insn "xop_phadduwq"
10769 [(set (match_operand:V2DI 0 "register_operand" "=x")
10774 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10775 (parallel [(const_int 0)
10780 (parallel [(const_int 1)
10786 (parallel [(const_int 2)
10791 (parallel [(const_int 3)
10792 (const_int 7)]))))))]
10794 "vphadduwq\t{%1, %0|%0, %1}"
10795 [(set_attr "type" "sseiadd1")])
10797 (define_insn "xop_phaddudq"
10798 [(set (match_operand:V2DI 0 "register_operand" "=x")
10802 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10803 (parallel [(const_int 0)
10808 (parallel [(const_int 1)
10809 (const_int 3)])))))]
10811 "vphaddudq\t{%1, %0|%0, %1}"
10812 [(set_attr "type" "sseiadd1")])
10814 (define_insn "xop_phsubbw"
10815 [(set (match_operand:V8HI 0 "register_operand" "=x")
10819 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10820 (parallel [(const_int 0)
10831 (parallel [(const_int 1)
10838 (const_int 15)])))))]
10840 "vphsubbw\t{%1, %0|%0, %1}"
10841 [(set_attr "type" "sseiadd1")])
10843 (define_insn "xop_phsubwd"
10844 [(set (match_operand:V4SI 0 "register_operand" "=x")
10848 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10849 (parallel [(const_int 0)
10856 (parallel [(const_int 1)
10859 (const_int 7)])))))]
10861 "vphsubwd\t{%1, %0|%0, %1}"
10862 [(set_attr "type" "sseiadd1")])
10864 (define_insn "xop_phsubdq"
10865 [(set (match_operand:V2DI 0 "register_operand" "=x")
10869 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10870 (parallel [(const_int 0)
10875 (parallel [(const_int 1)
10876 (const_int 3)])))))]
10878 "vphsubdq\t{%1, %0|%0, %1}"
10879 [(set_attr "type" "sseiadd1")])
10881 ;; XOP permute instructions
10882 (define_insn "xop_pperm"
10883 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
10885 [(match_operand:V16QI 1 "register_operand" "x,x")
10886 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10887 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
10888 UNSPEC_XOP_PERMUTE))]
10889 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10890 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10891 [(set_attr "type" "sse4arg")
10892 (set_attr "mode" "TI")])
10894 ;; XOP pack instructions that combine two vectors into a smaller vector
10895 (define_insn "xop_pperm_pack_v2di_v4si"
10896 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10899 (match_operand:V2DI 1 "register_operand" "x,x"))
10901 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
10902 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
10903 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10904 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10905 [(set_attr "type" "sse4arg")
10906 (set_attr "mode" "TI")])
10908 (define_insn "xop_pperm_pack_v4si_v8hi"
10909 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10912 (match_operand:V4SI 1 "register_operand" "x,x"))
10914 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
10915 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
10916 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10917 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10918 [(set_attr "type" "sse4arg")
10919 (set_attr "mode" "TI")])
10921 (define_insn "xop_pperm_pack_v8hi_v16qi"
10922 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
10925 (match_operand:V8HI 1 "register_operand" "x,x"))
10927 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
10928 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
10929 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10930 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10931 [(set_attr "type" "sse4arg")
10932 (set_attr "mode" "TI")])
10934 ;; XOP packed rotate instructions
10935 (define_expand "rotl<mode>3"
10936 [(set (match_operand:VI_128 0 "register_operand" "")
10938 (match_operand:VI_128 1 "nonimmediate_operand" "")
10939 (match_operand:SI 2 "general_operand")))]
10942 /* If we were given a scalar, convert it to parallel */
10943 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
10945 rtvec vs = rtvec_alloc (<ssescalarnum>);
10946 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
10947 rtx reg = gen_reg_rtx (<MODE>mode);
10948 rtx op2 = operands[2];
10951 if (GET_MODE (op2) != <ssescalarmode>mode)
10953 op2 = gen_reg_rtx (<ssescalarmode>mode);
10954 convert_move (op2, operands[2], false);
10957 for (i = 0; i < <ssescalarnum>; i++)
10958 RTVEC_ELT (vs, i) = op2;
10960 emit_insn (gen_vec_init<mode> (reg, par));
10961 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
10966 (define_expand "rotr<mode>3"
10967 [(set (match_operand:VI_128 0 "register_operand" "")
10969 (match_operand:VI_128 1 "nonimmediate_operand" "")
10970 (match_operand:SI 2 "general_operand")))]
10973 /* If we were given a scalar, convert it to parallel */
10974 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
10976 rtvec vs = rtvec_alloc (<ssescalarnum>);
10977 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
10978 rtx neg = gen_reg_rtx (<MODE>mode);
10979 rtx reg = gen_reg_rtx (<MODE>mode);
10980 rtx op2 = operands[2];
10983 if (GET_MODE (op2) != <ssescalarmode>mode)
10985 op2 = gen_reg_rtx (<ssescalarmode>mode);
10986 convert_move (op2, operands[2], false);
10989 for (i = 0; i < <ssescalarnum>; i++)
10990 RTVEC_ELT (vs, i) = op2;
10992 emit_insn (gen_vec_init<mode> (reg, par));
10993 emit_insn (gen_neg<mode>2 (neg, reg));
10994 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
10999 (define_insn "xop_rotl<mode>3"
11000 [(set (match_operand:VI_128 0 "register_operand" "=x")
11002 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
11003 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11005 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11006 [(set_attr "type" "sseishft")
11007 (set_attr "length_immediate" "1")
11008 (set_attr "mode" "TI")])
11010 (define_insn "xop_rotr<mode>3"
11011 [(set (match_operand:VI_128 0 "register_operand" "=x")
11013 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
11014 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11017 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
11018 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
11020 [(set_attr "type" "sseishft")
11021 (set_attr "length_immediate" "1")
11022 (set_attr "mode" "TI")])
11024 (define_expand "vrotr<mode>3"
11025 [(match_operand:VI_128 0 "register_operand" "")
11026 (match_operand:VI_128 1 "register_operand" "")
11027 (match_operand:VI_128 2 "register_operand" "")]
11030 rtx reg = gen_reg_rtx (<MODE>mode);
11031 emit_insn (gen_neg<mode>2 (reg, operands[2]));
11032 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11036 (define_expand "vrotl<mode>3"
11037 [(match_operand:VI_128 0 "register_operand" "")
11038 (match_operand:VI_128 1 "register_operand" "")
11039 (match_operand:VI_128 2 "register_operand" "")]
11042 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
11046 (define_insn "xop_vrotl<mode>3"
11047 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
11048 (if_then_else:VI_128
11050 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
11053 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
11057 (neg:VI_128 (match_dup 2)))))]
11058 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11059 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11060 [(set_attr "type" "sseishft")
11061 (set_attr "prefix_data16" "0")
11062 (set_attr "prefix_extra" "2")
11063 (set_attr "mode" "TI")])
11065 ;; XOP packed shift instructions.
11066 ;; FIXME: add V2DI back in
11067 (define_expand "vlshr<mode>3"
11068 [(match_operand:VI124_128 0 "register_operand" "")
11069 (match_operand:VI124_128 1 "register_operand" "")
11070 (match_operand:VI124_128 2 "register_operand" "")]
11073 rtx neg = gen_reg_rtx (<MODE>mode);
11074 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11075 emit_insn (gen_xop_lshl<mode>3 (operands[0], operands[1], neg));
11079 (define_expand "vashr<mode>3"
11080 [(match_operand:VI124_128 0 "register_operand" "")
11081 (match_operand:VI124_128 1 "register_operand" "")
11082 (match_operand:VI124_128 2 "register_operand" "")]
11085 rtx neg = gen_reg_rtx (<MODE>mode);
11086 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11087 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], neg));
11091 (define_expand "vashl<mode>3"
11092 [(match_operand:VI124_128 0 "register_operand" "")
11093 (match_operand:VI124_128 1 "register_operand" "")
11094 (match_operand:VI124_128 2 "register_operand" "")]
11097 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], operands[2]));
11101 (define_insn "xop_ashl<mode>3"
11102 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
11103 (if_then_else:VI_128
11105 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
11108 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
11112 (neg:VI_128 (match_dup 2)))))]
11113 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11114 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11115 [(set_attr "type" "sseishft")
11116 (set_attr "prefix_data16" "0")
11117 (set_attr "prefix_extra" "2")
11118 (set_attr "mode" "TI")])
11120 (define_insn "xop_lshl<mode>3"
11121 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
11122 (if_then_else:VI_128
11124 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
11127 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
11131 (neg:VI_128 (match_dup 2)))))]
11132 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11133 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11134 [(set_attr "type" "sseishft")
11135 (set_attr "prefix_data16" "0")
11136 (set_attr "prefix_extra" "2")
11137 (set_attr "mode" "TI")])
11139 ;; SSE2 doesn't have some shift varients, so define versions for XOP
11140 (define_expand "ashlv16qi3"
11141 [(match_operand:V16QI 0 "register_operand" "")
11142 (match_operand:V16QI 1 "register_operand" "")
11143 (match_operand:SI 2 "nonmemory_operand" "")]
11146 rtvec vs = rtvec_alloc (16);
11147 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11148 rtx reg = gen_reg_rtx (V16QImode);
11150 for (i = 0; i < 16; i++)
11151 RTVEC_ELT (vs, i) = operands[2];
11153 emit_insn (gen_vec_initv16qi (reg, par));
11154 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11158 (define_expand "lshlv16qi3"
11159 [(match_operand:V16QI 0 "register_operand" "")
11160 (match_operand:V16QI 1 "register_operand" "")
11161 (match_operand:SI 2 "nonmemory_operand" "")]
11164 rtvec vs = rtvec_alloc (16);
11165 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11166 rtx reg = gen_reg_rtx (V16QImode);
11168 for (i = 0; i < 16; i++)
11169 RTVEC_ELT (vs, i) = operands[2];
11171 emit_insn (gen_vec_initv16qi (reg, par));
11172 emit_insn (gen_xop_lshlv16qi3 (operands[0], operands[1], reg));
11176 (define_expand "ashrv16qi3"
11177 [(match_operand:V16QI 0 "register_operand" "")
11178 (match_operand:V16QI 1 "register_operand" "")
11179 (match_operand:SI 2 "nonmemory_operand" "")]
11182 rtvec vs = rtvec_alloc (16);
11183 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11184 rtx reg = gen_reg_rtx (V16QImode);
11186 rtx ele = ((CONST_INT_P (operands[2]))
11187 ? GEN_INT (- INTVAL (operands[2]))
11190 for (i = 0; i < 16; i++)
11191 RTVEC_ELT (vs, i) = ele;
11193 emit_insn (gen_vec_initv16qi (reg, par));
11195 if (!CONST_INT_P (operands[2]))
11197 rtx neg = gen_reg_rtx (V16QImode);
11198 emit_insn (gen_negv16qi2 (neg, reg));
11199 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], neg));
11202 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11207 (define_expand "ashrv2di3"
11208 [(match_operand:V2DI 0 "register_operand" "")
11209 (match_operand:V2DI 1 "register_operand" "")
11210 (match_operand:DI 2 "nonmemory_operand" "")]
11213 rtvec vs = rtvec_alloc (2);
11214 rtx par = gen_rtx_PARALLEL (V2DImode, vs);
11215 rtx reg = gen_reg_rtx (V2DImode);
11218 if (CONST_INT_P (operands[2]))
11219 ele = GEN_INT (- INTVAL (operands[2]));
11220 else if (GET_MODE (operands[2]) != DImode)
11222 rtx move = gen_reg_rtx (DImode);
11223 ele = gen_reg_rtx (DImode);
11224 convert_move (move, operands[2], false);
11225 emit_insn (gen_negdi2 (ele, move));
11229 ele = gen_reg_rtx (DImode);
11230 emit_insn (gen_negdi2 (ele, operands[2]));
11233 RTVEC_ELT (vs, 0) = ele;
11234 RTVEC_ELT (vs, 1) = ele;
11235 emit_insn (gen_vec_initv2di (reg, par));
11236 emit_insn (gen_xop_ashlv2di3 (operands[0], operands[1], reg));
11240 ;; XOP FRCZ support
11241 (define_insn "xop_frcz<mode>2"
11242 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
11244 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
11247 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
11248 [(set_attr "type" "ssecvt1")
11249 (set_attr "mode" "<MODE>")])
11252 (define_expand "xop_vmfrcz<mode>2"
11253 [(set (match_operand:VF_128 0 "register_operand")
11256 [(match_operand:VF_128 1 "nonimmediate_operand")]
11262 operands[3] = CONST0_RTX (<MODE>mode);
11265 (define_insn "*xop_vmfrcz_<mode>"
11266 [(set (match_operand:VF_128 0 "register_operand" "=x")
11269 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
11271 (match_operand:VF_128 2 "const0_operand")
11274 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
11275 [(set_attr "type" "ssecvt1")
11276 (set_attr "mode" "<MODE>")])
11278 (define_insn "xop_maskcmp<mode>3"
11279 [(set (match_operand:VI_128 0 "register_operand" "=x")
11280 (match_operator:VI_128 1 "ix86_comparison_int_operator"
11281 [(match_operand:VI_128 2 "register_operand" "x")
11282 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
11284 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
11285 [(set_attr "type" "sse4arg")
11286 (set_attr "prefix_data16" "0")
11287 (set_attr "prefix_rep" "0")
11288 (set_attr "prefix_extra" "2")
11289 (set_attr "length_immediate" "1")
11290 (set_attr "mode" "TI")])
11292 (define_insn "xop_maskcmp_uns<mode>3"
11293 [(set (match_operand:VI_128 0 "register_operand" "=x")
11294 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
11295 [(match_operand:VI_128 2 "register_operand" "x")
11296 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
11298 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
11299 [(set_attr "type" "ssecmp")
11300 (set_attr "prefix_data16" "0")
11301 (set_attr "prefix_rep" "0")
11302 (set_attr "prefix_extra" "2")
11303 (set_attr "length_immediate" "1")
11304 (set_attr "mode" "TI")])
11306 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
11307 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
11308 ;; the exact instruction generated for the intrinsic.
11309 (define_insn "xop_maskcmp_uns2<mode>3"
11310 [(set (match_operand:VI_128 0 "register_operand" "=x")
11312 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
11313 [(match_operand:VI_128 2 "register_operand" "x")
11314 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
11315 UNSPEC_XOP_UNSIGNED_CMP))]
11317 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
11318 [(set_attr "type" "ssecmp")
11319 (set_attr "prefix_data16" "0")
11320 (set_attr "prefix_extra" "2")
11321 (set_attr "length_immediate" "1")
11322 (set_attr "mode" "TI")])
11324 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
11325 ;; being added here to be complete.
11326 (define_insn "xop_pcom_tf<mode>3"
11327 [(set (match_operand:VI_128 0 "register_operand" "=x")
11329 [(match_operand:VI_128 1 "register_operand" "x")
11330 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
11331 (match_operand:SI 3 "const_int_operand" "n")]
11332 UNSPEC_XOP_TRUEFALSE))]
11335 return ((INTVAL (operands[3]) != 0)
11336 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11337 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
11339 [(set_attr "type" "ssecmp")
11340 (set_attr "prefix_data16" "0")
11341 (set_attr "prefix_extra" "2")
11342 (set_attr "length_immediate" "1")
11343 (set_attr "mode" "TI")])
11345 (define_insn "xop_vpermil2<mode>3"
11346 [(set (match_operand:VF 0 "register_operand" "=x")
11348 [(match_operand:VF 1 "register_operand" "x")
11349 (match_operand:VF 2 "nonimmediate_operand" "%x")
11350 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm")
11351 (match_operand:SI 4 "const_0_to_3_operand" "n")]
11354 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
11355 [(set_attr "type" "sse4arg")
11356 (set_attr "length_immediate" "1")
11357 (set_attr "mode" "<MODE>")])
11359 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11361 (define_insn "aesenc"
11362 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11363 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11364 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11368 aesenc\t{%2, %0|%0, %2}
11369 vaesenc\t{%2, %1, %0|%0, %1, %2}"
11370 [(set_attr "isa" "noavx,avx")
11371 (set_attr "type" "sselog1")
11372 (set_attr "prefix_extra" "1")
11373 (set_attr "prefix" "orig,vex")
11374 (set_attr "mode" "TI")])
11376 (define_insn "aesenclast"
11377 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11378 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11379 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11380 UNSPEC_AESENCLAST))]
11383 aesenclast\t{%2, %0|%0, %2}
11384 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
11385 [(set_attr "isa" "noavx,avx")
11386 (set_attr "type" "sselog1")
11387 (set_attr "prefix_extra" "1")
11388 (set_attr "prefix" "orig,vex")
11389 (set_attr "mode" "TI")])
11391 (define_insn "aesdec"
11392 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11393 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11394 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11398 aesdec\t{%2, %0|%0, %2}
11399 vaesdec\t{%2, %1, %0|%0, %1, %2}"
11400 [(set_attr "isa" "noavx,avx")
11401 (set_attr "type" "sselog1")
11402 (set_attr "prefix_extra" "1")
11403 (set_attr "prefix" "orig,vex")
11404 (set_attr "mode" "TI")])
11406 (define_insn "aesdeclast"
11407 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11408 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11409 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11410 UNSPEC_AESDECLAST))]
11413 aesdeclast\t{%2, %0|%0, %2}
11414 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
11415 [(set_attr "isa" "noavx,avx")
11416 (set_attr "type" "sselog1")
11417 (set_attr "prefix_extra" "1")
11418 (set_attr "prefix" "orig,vex")
11419 (set_attr "mode" "TI")])
11421 (define_insn "aesimc"
11422 [(set (match_operand:V2DI 0 "register_operand" "=x")
11423 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
11426 "%vaesimc\t{%1, %0|%0, %1}"
11427 [(set_attr "type" "sselog1")
11428 (set_attr "prefix_extra" "1")
11429 (set_attr "prefix" "maybe_vex")
11430 (set_attr "mode" "TI")])
11432 (define_insn "aeskeygenassist"
11433 [(set (match_operand:V2DI 0 "register_operand" "=x")
11434 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
11435 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11436 UNSPEC_AESKEYGENASSIST))]
11438 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
11439 [(set_attr "type" "sselog1")
11440 (set_attr "prefix_extra" "1")
11441 (set_attr "length_immediate" "1")
11442 (set_attr "prefix" "maybe_vex")
11443 (set_attr "mode" "TI")])
11445 (define_insn "pclmulqdq"
11446 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11447 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11448 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")
11449 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
11453 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
11454 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11455 [(set_attr "isa" "noavx,avx")
11456 (set_attr "type" "sselog1")
11457 (set_attr "prefix_extra" "1")
11458 (set_attr "length_immediate" "1")
11459 (set_attr "prefix" "orig,vex")
11460 (set_attr "mode" "TI")])
11462 (define_expand "avx_vzeroall"
11463 [(match_par_dup 0 [(const_int 0)])]
11466 int nregs = TARGET_64BIT ? 16 : 8;
11469 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11471 XVECEXP (operands[0], 0, 0)
11472 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11475 for (regno = 0; regno < nregs; regno++)
11476 XVECEXP (operands[0], 0, regno + 1)
11477 = gen_rtx_SET (VOIDmode,
11478 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
11479 CONST0_RTX (V8SImode));
11482 (define_insn "*avx_vzeroall"
11483 [(match_parallel 0 "vzeroall_operation"
11484 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
11487 [(set_attr "type" "sse")
11488 (set_attr "modrm" "0")
11489 (set_attr "memory" "none")
11490 (set_attr "prefix" "vex")
11491 (set_attr "mode" "OI")])
11493 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
11494 ;; if the upper 128bits are unused.
11495 (define_insn "avx_vzeroupper"
11496 [(unspec_volatile [(match_operand 0 "const_int_operand" "")]
11497 UNSPECV_VZEROUPPER)]
11500 [(set_attr "type" "sse")
11501 (set_attr "modrm" "0")
11502 (set_attr "memory" "none")
11503 (set_attr "prefix" "vex")
11504 (set_attr "mode" "OI")])
11506 (define_mode_attr AVXTOSSEMODE
11507 [(V4DI "V2DI") (V2DI "V2DI")
11508 (V8SI "V4SI") (V4SI "V4SI")
11509 (V16HI "V8HI") (V8HI "V8HI")
11510 (V32QI "V16QI") (V16QI "V16QI")])
11512 (define_insn "avx2_pbroadcast<mode>"
11513 [(set (match_operand:VI 0 "register_operand" "=x")
11515 (vec_select:<ssescalarmode>
11516 (match_operand:<AVXTOSSEMODE> 1 "nonimmediate_operand" "xm")
11517 (parallel [(const_int 0)]))))]
11519 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %1}"
11520 [(set_attr "type" "ssemov")
11521 (set_attr "prefix_extra" "1")
11522 (set_attr "prefix" "vex")
11523 (set_attr "mode" "<sseinsnmode>")])
11525 (define_insn "avx2_permvarv8si"
11526 [(set (match_operand:V8SI 0 "register_operand" "=x")
11528 [(match_operand:V8SI 1 "register_operand" "x")
11529 (match_operand:V8SI 2 "nonimmediate_operand" "xm")]
11532 "vpermd\t{%2, %1, %0|%0, %1, %2}"
11533 [(set_attr "type" "sselog")
11534 (set_attr "prefix" "vex")
11535 (set_attr "mode" "OI")])
11537 (define_insn "avx2_permv4df"
11538 [(set (match_operand:V4DF 0 "register_operand" "=x")
11540 [(match_operand:V4DF 1 "register_operand" "xm")
11541 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11544 "vpermpd\t{%2, %1, %0|%0, %1, %2}"
11545 [(set_attr "type" "sselog")
11546 (set_attr "prefix_extra" "1")
11547 (set_attr "prefix" "vex")
11548 (set_attr "mode" "OI")])
11550 (define_insn "avx2_permvarv8sf"
11551 [(set (match_operand:V8SF 0 "register_operand" "=x")
11553 [(match_operand:V8SF 1 "register_operand" "x")
11554 (match_operand:V8SF 2 "nonimmediate_operand" "xm")]
11557 "vpermps\t{%2, %1, %0|%0, %1, %2}"
11558 [(set_attr "type" "sselog")
11559 (set_attr "prefix" "vex")
11560 (set_attr "mode" "OI")])
11562 (define_expand "avx2_permv4di"
11563 [(match_operand:V4DI 0 "register_operand" "")
11564 (match_operand:V4DI 1 "nonimmediate_operand" "")
11565 (match_operand:SI 2 "const_0_to_255_operand" "")]
11568 int mask = INTVAL (operands[2]);
11569 emit_insn (gen_avx2_permv4di_1 (operands[0], operands[1],
11570 GEN_INT ((mask >> 0) & 3),
11571 GEN_INT ((mask >> 2) & 3),
11572 GEN_INT ((mask >> 4) & 3),
11573 GEN_INT ((mask >> 6) & 3)));
11577 (define_insn "avx2_permv4di_1"
11578 [(set (match_operand:V4DI 0 "register_operand" "=x")
11580 (match_operand:V4DI 1 "nonimmediate_operand" "xm")
11581 (parallel [(match_operand 2 "const_0_to_3_operand" "")
11582 (match_operand 3 "const_0_to_3_operand" "")
11583 (match_operand 4 "const_0_to_3_operand" "")
11584 (match_operand 5 "const_0_to_3_operand" "")])))]
11588 mask |= INTVAL (operands[2]) << 0;
11589 mask |= INTVAL (operands[3]) << 2;
11590 mask |= INTVAL (operands[4]) << 4;
11591 mask |= INTVAL (operands[5]) << 6;
11592 operands[2] = GEN_INT (mask);
11593 return "vpermq\t{%2, %1, %0|%0, %1, %2}";
11595 [(set_attr "type" "sselog")
11596 (set_attr "prefix" "vex")
11597 (set_attr "mode" "OI")])
11599 (define_insn "avx2_permv2ti"
11600 [(set (match_operand:V4DI 0 "register_operand" "=x")
11602 [(match_operand:V4DI 1 "register_operand" "x")
11603 (match_operand:V4DI 2 "nonimmediate_operand" "xm")
11604 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11607 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11608 [(set_attr "type" "sselog")
11609 (set_attr "prefix" "vex")
11610 (set_attr "mode" "OI")])
11612 (define_insn "avx2_vec_dupv4df"
11613 [(set (match_operand:V4DF 0 "register_operand" "=x")
11614 (vec_duplicate:V4DF
11616 (match_operand:V2DF 1 "register_operand" "x")
11617 (parallel [(const_int 0)]))))]
11619 "vbroadcastsd\t{%1, %0|%0, %1}"
11620 [(set_attr "type" "sselog1")
11621 (set_attr "prefix" "vex")
11622 (set_attr "mode" "V4DF")])
11624 ;; Modes handled by AVX vec_dup patterns.
11625 (define_mode_iterator AVX_VEC_DUP_MODE
11626 [V8SI V8SF V4DI V4DF])
11628 (define_insn "vec_dup<mode>"
11629 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x")
11630 (vec_duplicate:AVX_VEC_DUP_MODE
11631 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,?x")))]
11634 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
11636 [(set_attr "type" "ssemov")
11637 (set_attr "prefix_extra" "1")
11638 (set_attr "prefix" "vex")
11639 (set_attr "mode" "V8SF")])
11641 (define_insn "avx2_vbroadcasti128_<mode>"
11642 [(set (match_operand:VI_256 0 "register_operand" "=x")
11644 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m")
11647 "vbroadcasti128\t{%1, %0|%0, %1}"
11648 [(set_attr "type" "ssemov")
11649 (set_attr "prefix_extra" "1")
11650 (set_attr "prefix" "vex")
11651 (set_attr "mode" "OI")])
11654 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "")
11655 (vec_duplicate:AVX_VEC_DUP_MODE
11656 (match_operand:<ssescalarmode> 1 "register_operand" "")))]
11657 "TARGET_AVX && reload_completed"
11658 [(set (match_dup 2)
11659 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
11661 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
11662 "operands[2] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[0]));")
11664 (define_insn "avx_vbroadcastf128_<mode>"
11665 [(set (match_operand:V_256 0 "register_operand" "=x,x,x")
11667 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
11671 vbroadcast<i128>\t{%1, %0|%0, %1}
11672 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
11673 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
11674 [(set_attr "type" "ssemov,sselog1,sselog1")
11675 (set_attr "prefix_extra" "1")
11676 (set_attr "length_immediate" "0,1,1")
11677 (set_attr "prefix" "vex")
11678 (set_attr "mode" "<sseinsnmode>")])
11680 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
11681 ;; If it so happens that the input is in memory, use vbroadcast.
11682 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
11683 (define_insn "*avx_vperm_broadcast_v4sf"
11684 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
11686 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
11687 (match_parallel 2 "avx_vbroadcast_operand"
11688 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11691 int elt = INTVAL (operands[3]);
11692 switch (which_alternative)
11696 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
11697 return "vbroadcastss\t{%1, %0|%0, %1}";
11699 operands[2] = GEN_INT (elt * 0x55);
11700 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
11702 gcc_unreachable ();
11705 [(set_attr "type" "ssemov,ssemov,sselog1")
11706 (set_attr "prefix_extra" "1")
11707 (set_attr "length_immediate" "0,0,1")
11708 (set_attr "prefix" "vex")
11709 (set_attr "mode" "SF,SF,V4SF")])
11711 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
11712 [(set (match_operand:VF_256 0 "register_operand" "=x,x,x")
11714 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x")
11715 (match_parallel 2 "avx_vbroadcast_operand"
11716 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11719 "&& reload_completed"
11720 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
11722 rtx op0 = operands[0], op1 = operands[1];
11723 int elt = INTVAL (operands[3]);
11729 /* Shuffle element we care about into all elements of the 128-bit lane.
11730 The other lane gets shuffled too, but we don't care. */
11731 if (<MODE>mode == V4DFmode)
11732 mask = (elt & 1 ? 15 : 0);
11734 mask = (elt & 3) * 0x55;
11735 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
11737 /* Shuffle the lane we care about into both lanes of the dest. */
11738 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
11739 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
11743 operands[1] = adjust_address_nv (op1, <ssescalarmode>mode,
11744 elt * GET_MODE_SIZE (<ssescalarmode>mode));
11747 (define_expand "avx_vpermil<mode>"
11748 [(set (match_operand:VF2 0 "register_operand" "")
11750 (match_operand:VF2 1 "nonimmediate_operand" "")
11751 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11754 int mask = INTVAL (operands[2]);
11755 rtx perm[<ssescalarnum>];
11757 perm[0] = GEN_INT (mask & 1);
11758 perm[1] = GEN_INT ((mask >> 1) & 1);
11759 if (<MODE>mode == V4DFmode)
11761 perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
11762 perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
11766 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11769 (define_expand "avx_vpermil<mode>"
11770 [(set (match_operand:VF1 0 "register_operand" "")
11772 (match_operand:VF1 1 "nonimmediate_operand" "")
11773 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11776 int mask = INTVAL (operands[2]);
11777 rtx perm[<ssescalarnum>];
11779 perm[0] = GEN_INT (mask & 3);
11780 perm[1] = GEN_INT ((mask >> 2) & 3);
11781 perm[2] = GEN_INT ((mask >> 4) & 3);
11782 perm[3] = GEN_INT ((mask >> 6) & 3);
11783 if (<MODE>mode == V8SFmode)
11785 perm[4] = GEN_INT ((mask & 3) + 4);
11786 perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
11787 perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
11788 perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
11792 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11795 (define_insn "*avx_vpermilp<mode>"
11796 [(set (match_operand:VF 0 "register_operand" "=x")
11798 (match_operand:VF 1 "nonimmediate_operand" "xm")
11799 (match_parallel 2 ""
11800 [(match_operand 3 "const_int_operand" "")])))]
11802 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
11804 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
11805 operands[2] = GEN_INT (mask);
11806 return "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
11808 [(set_attr "type" "sselog")
11809 (set_attr "prefix_extra" "1")
11810 (set_attr "length_immediate" "1")
11811 (set_attr "prefix" "vex")
11812 (set_attr "mode" "<MODE>")])
11814 (define_insn "avx_vpermilvar<mode>3"
11815 [(set (match_operand:VF 0 "register_operand" "=x")
11817 [(match_operand:VF 1 "register_operand" "x")
11818 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "xm")]
11821 "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11822 [(set_attr "type" "sselog")
11823 (set_attr "prefix_extra" "1")
11824 (set_attr "prefix" "vex")
11825 (set_attr "mode" "<MODE>")])
11827 (define_expand "avx_vperm2f128<mode>3"
11828 [(set (match_operand:AVX256MODE2P 0 "register_operand" "")
11829 (unspec:AVX256MODE2P
11830 [(match_operand:AVX256MODE2P 1 "register_operand" "")
11831 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "")
11832 (match_operand:SI 3 "const_0_to_255_operand" "")]
11833 UNSPEC_VPERMIL2F128))]
11836 int mask = INTVAL (operands[3]);
11837 if ((mask & 0x88) == 0)
11839 rtx perm[<ssescalarnum>], t1, t2;
11840 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
11842 base = (mask & 3) * nelt2;
11843 for (i = 0; i < nelt2; ++i)
11844 perm[i] = GEN_INT (base + i);
11846 base = ((mask >> 4) & 3) * nelt2;
11847 for (i = 0; i < nelt2; ++i)
11848 perm[i + nelt2] = GEN_INT (base + i);
11850 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
11851 operands[1], operands[2]);
11852 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
11853 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
11854 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
11860 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
11861 ;; means that in order to represent this properly in rtl we'd have to
11862 ;; nest *another* vec_concat with a zero operand and do the select from
11863 ;; a 4x wide vector. That doesn't seem very nice.
11864 (define_insn "*avx_vperm2f128<mode>_full"
11865 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
11866 (unspec:AVX256MODE2P
11867 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
11868 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
11869 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11870 UNSPEC_VPERMIL2F128))]
11872 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11873 [(set_attr "type" "sselog")
11874 (set_attr "prefix_extra" "1")
11875 (set_attr "length_immediate" "1")
11876 (set_attr "prefix" "vex")
11877 (set_attr "mode" "<sseinsnmode>")])
11879 (define_insn "*avx_vperm2f128<mode>_nozero"
11880 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
11881 (vec_select:AVX256MODE2P
11882 (vec_concat:<ssedoublevecmode>
11883 (match_operand:AVX256MODE2P 1 "register_operand" "x")
11884 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
11885 (match_parallel 3 ""
11886 [(match_operand 4 "const_int_operand" "")])))]
11888 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
11890 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
11891 operands[3] = GEN_INT (mask);
11892 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
11894 [(set_attr "type" "sselog")
11895 (set_attr "prefix_extra" "1")
11896 (set_attr "length_immediate" "1")
11897 (set_attr "prefix" "vex")
11898 (set_attr "mode" "<sseinsnmode>")])
11900 (define_expand "avx_vinsertf128<mode>"
11901 [(match_operand:V_256 0 "register_operand" "")
11902 (match_operand:V_256 1 "register_operand" "")
11903 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "")
11904 (match_operand:SI 3 "const_0_to_1_operand" "")]
11907 rtx (*insn)(rtx, rtx, rtx);
11909 switch (INTVAL (operands[3]))
11912 insn = gen_vec_set_lo_<mode>;
11915 insn = gen_vec_set_hi_<mode>;
11918 gcc_unreachable ();
11921 emit_insn (insn (operands[0], operands[1], operands[2]));
11925 (define_insn "avx2_vec_set_lo_v4di"
11926 [(set (match_operand:V4DI 0 "register_operand" "=x")
11928 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11930 (match_operand:V4DI 1 "register_operand" "x")
11931 (parallel [(const_int 2) (const_int 3)]))))]
11933 "vinserti128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11934 [(set_attr "type" "sselog")
11935 (set_attr "prefix_extra" "1")
11936 (set_attr "length_immediate" "1")
11937 (set_attr "prefix" "vex")
11938 (set_attr "mode" "OI")])
11940 (define_insn "avx2_vec_set_hi_v4di"
11941 [(set (match_operand:V4DI 0 "register_operand" "=x")
11944 (match_operand:V4DI 1 "register_operand" "x")
11945 (parallel [(const_int 0) (const_int 1)]))
11946 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
11948 "vinserti128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11949 [(set_attr "type" "sselog")
11950 (set_attr "prefix_extra" "1")
11951 (set_attr "length_immediate" "1")
11952 (set_attr "prefix" "vex")
11953 (set_attr "mode" "OI")])
11955 (define_insn "vec_set_lo_<mode>"
11956 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
11957 (vec_concat:VI8F_256
11958 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
11959 (vec_select:<ssehalfvecmode>
11960 (match_operand:VI8F_256 1 "register_operand" "x")
11961 (parallel [(const_int 2) (const_int 3)]))))]
11963 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11964 [(set_attr "type" "sselog")
11965 (set_attr "prefix_extra" "1")
11966 (set_attr "length_immediate" "1")
11967 (set_attr "prefix" "vex")
11968 (set_attr "mode" "<sseinsnmode>")])
11970 (define_insn "vec_set_hi_<mode>"
11971 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
11972 (vec_concat:VI8F_256
11973 (vec_select:<ssehalfvecmode>
11974 (match_operand:VI8F_256 1 "register_operand" "x")
11975 (parallel [(const_int 0) (const_int 1)]))
11976 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
11978 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11979 [(set_attr "type" "sselog")
11980 (set_attr "prefix_extra" "1")
11981 (set_attr "length_immediate" "1")
11982 (set_attr "prefix" "vex")
11983 (set_attr "mode" "<sseinsnmode>")])
11985 (define_insn "vec_set_lo_<mode>"
11986 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
11987 (vec_concat:VI4F_256
11988 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
11989 (vec_select:<ssehalfvecmode>
11990 (match_operand:VI4F_256 1 "register_operand" "x")
11991 (parallel [(const_int 4) (const_int 5)
11992 (const_int 6) (const_int 7)]))))]
11994 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11995 [(set_attr "type" "sselog")
11996 (set_attr "prefix_extra" "1")
11997 (set_attr "length_immediate" "1")
11998 (set_attr "prefix" "vex")
11999 (set_attr "mode" "<sseinsnmode>")])
12001 (define_insn "vec_set_hi_<mode>"
12002 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
12003 (vec_concat:VI4F_256
12004 (vec_select:<ssehalfvecmode>
12005 (match_operand:VI4F_256 1 "register_operand" "x")
12006 (parallel [(const_int 0) (const_int 1)
12007 (const_int 2) (const_int 3)]))
12008 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
12010 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12011 [(set_attr "type" "sselog")
12012 (set_attr "prefix_extra" "1")
12013 (set_attr "length_immediate" "1")
12014 (set_attr "prefix" "vex")
12015 (set_attr "mode" "<sseinsnmode>")])
12017 (define_insn "vec_set_lo_v16hi"
12018 [(set (match_operand:V16HI 0 "register_operand" "=x")
12020 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
12022 (match_operand:V16HI 1 "register_operand" "x")
12023 (parallel [(const_int 8) (const_int 9)
12024 (const_int 10) (const_int 11)
12025 (const_int 12) (const_int 13)
12026 (const_int 14) (const_int 15)]))))]
12028 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12029 [(set_attr "type" "sselog")
12030 (set_attr "prefix_extra" "1")
12031 (set_attr "length_immediate" "1")
12032 (set_attr "prefix" "vex")
12033 (set_attr "mode" "OI")])
12035 (define_insn "vec_set_hi_v16hi"
12036 [(set (match_operand:V16HI 0 "register_operand" "=x")
12039 (match_operand:V16HI 1 "register_operand" "x")
12040 (parallel [(const_int 0) (const_int 1)
12041 (const_int 2) (const_int 3)
12042 (const_int 4) (const_int 5)
12043 (const_int 6) (const_int 7)]))
12044 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
12046 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12047 [(set_attr "type" "sselog")
12048 (set_attr "prefix_extra" "1")
12049 (set_attr "length_immediate" "1")
12050 (set_attr "prefix" "vex")
12051 (set_attr "mode" "OI")])
12053 (define_insn "vec_set_lo_v32qi"
12054 [(set (match_operand:V32QI 0 "register_operand" "=x")
12056 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
12058 (match_operand:V32QI 1 "register_operand" "x")
12059 (parallel [(const_int 16) (const_int 17)
12060 (const_int 18) (const_int 19)
12061 (const_int 20) (const_int 21)
12062 (const_int 22) (const_int 23)
12063 (const_int 24) (const_int 25)
12064 (const_int 26) (const_int 27)
12065 (const_int 28) (const_int 29)
12066 (const_int 30) (const_int 31)]))))]
12068 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12069 [(set_attr "type" "sselog")
12070 (set_attr "prefix_extra" "1")
12071 (set_attr "length_immediate" "1")
12072 (set_attr "prefix" "vex")
12073 (set_attr "mode" "OI")])
12075 (define_insn "vec_set_hi_v32qi"
12076 [(set (match_operand:V32QI 0 "register_operand" "=x")
12079 (match_operand:V32QI 1 "register_operand" "x")
12080 (parallel [(const_int 0) (const_int 1)
12081 (const_int 2) (const_int 3)
12082 (const_int 4) (const_int 5)
12083 (const_int 6) (const_int 7)
12084 (const_int 8) (const_int 9)
12085 (const_int 10) (const_int 11)
12086 (const_int 12) (const_int 13)
12087 (const_int 14) (const_int 15)]))
12088 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
12090 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12091 [(set_attr "type" "sselog")
12092 (set_attr "prefix_extra" "1")
12093 (set_attr "length_immediate" "1")
12094 (set_attr "prefix" "vex")
12095 (set_attr "mode" "OI")])
12097 (define_expand "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
12098 [(set (match_operand:V48_AVX2 0 "register_operand" "")
12100 [(match_operand:<sseintvecmode> 2 "register_operand" "")
12101 (match_operand:V48_AVX2 1 "memory_operand" "")
12106 (define_expand "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
12107 [(set (match_operand:V48_AVX2 0 "memory_operand" "")
12109 [(match_operand:<sseintvecmode> 1 "register_operand" "")
12110 (match_operand:V48_AVX2 2 "register_operand" "")
12115 (define_insn "*avx2_maskmov<ssemodesuffix><avxsizesuffix>"
12116 [(set (match_operand:VI48_AVX2 0 "nonimmediate_operand" "=x,m")
12118 [(match_operand:<sseintvecmode> 1 "register_operand" "x,x")
12119 (match_operand:VI48_AVX2 2 "nonimmediate_operand" "m,x")
12123 && (REG_P (operands[0]) == MEM_P (operands[2]))"
12124 "vpmaskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12125 [(set_attr "type" "sselog1")
12126 (set_attr "prefix_extra" "1")
12127 (set_attr "prefix" "vex")
12128 (set_attr "mode" "<sseinsnmode>")])
12130 (define_insn "*avx_maskmov<ssemodesuffix><avxsizesuffix>"
12131 [(set (match_operand:VF 0 "nonimmediate_operand" "=x,m")
12133 [(match_operand:<sseintvecmode> 1 "register_operand" "x,x")
12134 (match_operand:VF 2 "nonimmediate_operand" "m,x")
12138 && (REG_P (operands[0]) == MEM_P (operands[2]))"
12139 "vmaskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12140 [(set_attr "type" "sselog1")
12141 (set_attr "prefix_extra" "1")
12142 (set_attr "prefix" "vex")
12143 (set_attr "mode" "<MODE>")])
12145 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
12146 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
12147 (unspec:AVX256MODE2P
12148 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
12152 "&& reload_completed"
12155 rtx op0 = operands[0];
12156 rtx op1 = operands[1];
12158 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
12160 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
12161 emit_move_insn (op0, op1);
12165 (define_expand "vec_init<mode>"
12166 [(match_operand:V_256 0 "register_operand" "")
12167 (match_operand 1 "" "")]
12170 ix86_expand_vector_init (false, operands[0], operands[1]);
12174 (define_expand "avx2_extracti128"
12175 [(match_operand:V2DI 0 "nonimmediate_operand" "")
12176 (match_operand:V4DI 1 "register_operand" "")
12177 (match_operand:SI 2 "const_0_to_1_operand" "")]
12180 rtx (*insn)(rtx, rtx);
12182 switch (INTVAL (operands[2]))
12185 insn = gen_vec_extract_lo_v4di;
12188 insn = gen_vec_extract_hi_v4di;
12191 gcc_unreachable ();
12194 emit_insn (insn (operands[0], operands[1]));
12198 (define_expand "avx2_inserti128"
12199 [(match_operand:V4DI 0 "register_operand" "")
12200 (match_operand:V4DI 1 "register_operand" "")
12201 (match_operand:V2DI 2 "nonimmediate_operand" "")
12202 (match_operand:SI 3 "const_0_to_1_operand" "")]
12205 rtx (*insn)(rtx, rtx, rtx);
12207 switch (INTVAL (operands[3]))
12210 insn = gen_avx2_vec_set_lo_v4di;
12213 insn = gen_avx2_vec_set_hi_v4di;
12216 gcc_unreachable ();
12219 emit_insn (insn (operands[0], operands[1], operands[2]));
12223 (define_insn "avx2_ashrvv8si"
12224 [(set (match_operand:V8SI 0 "register_operand" "=x")
12230 (match_operand:V8SI 1 "register_operand" "x")
12231 (parallel [(const_int 0)]))
12233 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
12234 (parallel [(const_int 0)])))
12238 (parallel [(const_int 1)]))
12241 (parallel [(const_int 1)]))))
12246 (parallel [(const_int 2)]))
12249 (parallel [(const_int 2)])))
12253 (parallel [(const_int 3)]))
12256 (parallel [(const_int 3)])))))
12262 (parallel [(const_int 0)]))
12265 (parallel [(const_int 0)])))
12269 (parallel [(const_int 1)]))
12272 (parallel [(const_int 1)]))))
12277 (parallel [(const_int 2)]))
12280 (parallel [(const_int 2)])))
12284 (parallel [(const_int 3)]))
12287 (parallel [(const_int 3)])))))))]
12289 "vpsravd\t{%2, %1, %0|%0, %1, %2}"
12290 [(set_attr "type" "sseishft")
12291 (set_attr "prefix" "vex")
12292 (set_attr "mode" "OI")])
12294 (define_insn "avx2_ashrvv4si"
12295 [(set (match_operand:V4SI 0 "register_operand" "=x")
12300 (match_operand:V4SI 1 "register_operand" "x")
12301 (parallel [(const_int 0)]))
12303 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
12304 (parallel [(const_int 0)])))
12308 (parallel [(const_int 1)]))
12311 (parallel [(const_int 1)]))))
12316 (parallel [(const_int 2)]))
12319 (parallel [(const_int 2)])))
12323 (parallel [(const_int 3)]))
12326 (parallel [(const_int 3)]))))))]
12328 "vpsravd\t{%2, %1, %0|%0, %1, %2}"
12329 [(set_attr "type" "sseishft")
12330 (set_attr "prefix" "vex")
12331 (set_attr "mode" "TI")])
12333 (define_insn "avx2_<lshift>vv8si"
12334 [(set (match_operand:V8SI 0 "register_operand" "=x")
12340 (match_operand:V8SI 1 "register_operand" "x")
12341 (parallel [(const_int 0)]))
12343 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
12344 (parallel [(const_int 0)])))
12348 (parallel [(const_int 1)]))
12351 (parallel [(const_int 1)]))))
12356 (parallel [(const_int 2)]))
12359 (parallel [(const_int 2)])))
12363 (parallel [(const_int 3)]))
12366 (parallel [(const_int 3)])))))
12372 (parallel [(const_int 0)]))
12375 (parallel [(const_int 0)])))
12379 (parallel [(const_int 1)]))
12382 (parallel [(const_int 1)]))))
12387 (parallel [(const_int 2)]))
12390 (parallel [(const_int 2)])))
12394 (parallel [(const_int 3)]))
12397 (parallel [(const_int 3)])))))))]
12399 "vp<lshift_insn>vd\t{%2, %1, %0|%0, %1, %2}"
12400 [(set_attr "type" "sseishft")
12401 (set_attr "prefix" "vex")
12402 (set_attr "mode" "OI")])
12404 (define_insn "avx2_<lshift>v<mode>"
12405 [(set (match_operand:VI4SD_AVX2 0 "register_operand" "=x")
12406 (vec_concat:VI4SD_AVX2
12407 (vec_concat:<ssehalfvecmode>
12408 (lshift:<ssescalarmode>
12409 (vec_select:<ssescalarmode>
12410 (match_operand:VI4SD_AVX2 1 "register_operand" "x")
12411 (parallel [(const_int 0)]))
12412 (vec_select:<ssescalarmode>
12413 (match_operand:VI4SD_AVX2 2 "nonimmediate_operand" "xm")
12414 (parallel [(const_int 0)])))
12415 (lshift:<ssescalarmode>
12416 (vec_select:<ssescalarmode>
12418 (parallel [(const_int 1)]))
12419 (vec_select:<ssescalarmode>
12421 (parallel [(const_int 1)]))))
12422 (vec_concat:<ssehalfvecmode>
12423 (lshift:<ssescalarmode>
12424 (vec_select:<ssescalarmode>
12426 (parallel [(const_int 2)]))
12427 (vec_select:<ssescalarmode>
12429 (parallel [(const_int 2)])))
12430 (lshift:<ssescalarmode>
12431 (vec_select:<ssescalarmode>
12433 (parallel [(const_int 3)]))
12434 (vec_select:<ssescalarmode>
12436 (parallel [(const_int 3)]))))))]
12438 "vp<lshift_insn>v<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12439 [(set_attr "type" "sseishft")
12440 (set_attr "prefix" "vex")
12441 (set_attr "mode" "<sseinsnmode>")])
12443 (define_insn "avx2_<lshift>vv2di"
12444 [(set (match_operand:V2DI 0 "register_operand" "=x")
12448 (match_operand:V2DI 1 "register_operand" "x")
12449 (parallel [(const_int 0)]))
12451 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
12452 (parallel [(const_int 0)])))
12456 (parallel [(const_int 1)]))
12459 (parallel [(const_int 1)])))))]
12461 "vp<lshift_insn>vq\t{%2, %1, %0|%0, %1, %2}"
12462 [(set_attr "type" "sseishft")
12463 (set_attr "prefix" "vex")
12464 (set_attr "mode" "TI")])
12466 (define_insn "avx_vec_concat<mode>"
12467 [(set (match_operand:V_256 0 "register_operand" "=x,x")
12469 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
12470 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))]
12473 switch (which_alternative)
12476 return "vinsert<i128>\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
12478 switch (get_attr_mode (insn))
12481 return "vmovaps\t{%1, %x0|%x0, %1}";
12483 return "vmovapd\t{%1, %x0|%x0, %1}";
12485 return "vmovdqa\t{%1, %x0|%x0, %1}";
12488 gcc_unreachable ();
12491 [(set_attr "type" "sselog,ssemov")
12492 (set_attr "prefix_extra" "1,*")
12493 (set_attr "length_immediate" "1,*")
12494 (set_attr "prefix" "vex")
12495 (set_attr "mode" "<sseinsnmode>")])
12497 (define_insn "vcvtph2ps"
12498 [(set (match_operand:V4SF 0 "register_operand" "=x")
12500 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
12502 (parallel [(const_int 0) (const_int 1)
12503 (const_int 1) (const_int 2)])))]
12505 "vcvtph2ps\t{%1, %0|%0, %1}"
12506 [(set_attr "type" "ssecvt")
12507 (set_attr "prefix" "vex")
12508 (set_attr "mode" "V4SF")])
12510 (define_insn "*vcvtph2ps_load"
12511 [(set (match_operand:V4SF 0 "register_operand" "=x")
12512 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
12513 UNSPEC_VCVTPH2PS))]
12515 "vcvtph2ps\t{%1, %0|%0, %1}"
12516 [(set_attr "type" "ssecvt")
12517 (set_attr "prefix" "vex")
12518 (set_attr "mode" "V8SF")])
12520 (define_insn "vcvtph2ps256"
12521 [(set (match_operand:V8SF 0 "register_operand" "=x")
12522 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
12523 UNSPEC_VCVTPH2PS))]
12525 "vcvtph2ps\t{%1, %0|%0, %1}"
12526 [(set_attr "type" "ssecvt")
12527 (set_attr "prefix" "vex")
12528 (set_attr "mode" "V8SF")])
12530 (define_expand "vcvtps2ph"
12531 [(set (match_operand:V8HI 0 "register_operand" "")
12533 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "")
12534 (match_operand:SI 2 "const_0_to_255_operand" "")]
12538 "operands[3] = CONST0_RTX (V4HImode);")
12540 (define_insn "*vcvtps2ph"
12541 [(set (match_operand:V8HI 0 "register_operand" "=x")
12543 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12544 (match_operand:SI 2 "const_0_to_255_operand" "N")]
12546 (match_operand:V4HI 3 "const0_operand" "")))]
12548 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12549 [(set_attr "type" "ssecvt")
12550 (set_attr "prefix" "vex")
12551 (set_attr "mode" "V4SF")])
12553 (define_insn "*vcvtps2ph_store"
12554 [(set (match_operand:V4HI 0 "memory_operand" "=m")
12555 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12556 (match_operand:SI 2 "const_0_to_255_operand" "N")]
12557 UNSPEC_VCVTPS2PH))]
12559 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12560 [(set_attr "type" "ssecvt")
12561 (set_attr "prefix" "vex")
12562 (set_attr "mode" "V4SF")])
12564 (define_insn "vcvtps2ph256"
12565 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
12566 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
12567 (match_operand:SI 2 "const_0_to_255_operand" "N")]
12568 UNSPEC_VCVTPS2PH))]
12570 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12571 [(set_attr "type" "ssecvt")
12572 (set_attr "prefix" "vex")
12573 (set_attr "mode" "V8SF")])
12575 ;; For gather* insn patterns
12576 (define_mode_iterator VEC_GATHER_MODE
12577 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
12578 (define_mode_attr VEC_GATHER_MODE
12579 [(V2DI "V4SI") (V2DF "V4SI")
12580 (V4DI "V4SI") (V4DF "V4SI")
12581 (V4SI "V4SI") (V4SF "V4SI")
12582 (V8SI "V8SI") (V8SF "V8SI")])
12584 (define_expand "avx2_gathersi<mode>"
12585 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "")
12586 (unspec:VEC_GATHER_MODE
12587 [(match_operand:VEC_GATHER_MODE 1 "register_operand" "")
12588 (match_operand 2 "register_operand" "")
12589 (mem:BLK (scratch))
12590 (match_operand:<VEC_GATHER_MODE> 3 "register_operand" "")
12591 (match_operand:VEC_GATHER_MODE 4 "register_operand" "")
12592 (match_operand:SI 5 "const1248_operand " "")]
12594 (clobber (match_scratch:VEC_GATHER_MODE 6 ""))])]
12597 (define_insn "*avx2_gathersi<mode>"
12598 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
12599 (unspec:VEC_GATHER_MODE
12600 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
12601 (match_operand:P 3 "register_operand" "r")
12602 (mem:BLK (scratch))
12603 (match_operand:<VEC_GATHER_MODE> 4 "register_operand" "x")
12604 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")
12605 (match_operand:SI 6 "const1248_operand" "n")]
12607 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
12609 "v<gthrfirstp>gatherd<gthrlastp>\t{%1, (%3, %4, %p6), %0|%0, (%3, %4, %p6), %1}"
12610 [(set_attr "type" "ssemov")
12611 (set_attr "prefix" "vex")
12612 (set_attr "mode" "<sseinsnmode>")])
12614 (define_expand "avx2_gatherdi<mode>"
12615 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "")
12616 (unspec:VEC_GATHER_MODE
12617 [(match_operand:VEC_GATHER_MODE 1 "register_operand" "")
12618 (match_operand 2 "register_operand" "")
12619 (mem:BLK (scratch))
12620 (match_operand:<AVXMODE48P_DI> 3 "register_operand" "")
12621 (match_operand:VEC_GATHER_MODE 4 "register_operand" "")
12622 (match_operand:SI 5 "const1248_operand " "")]
12624 (clobber (match_scratch:VEC_GATHER_MODE 6 ""))])]
12627 (define_insn "*avx2_gatherdi<mode>"
12628 [(set (match_operand:AVXMODE48P_DI 0 "register_operand" "=&x")
12629 (unspec:AVXMODE48P_DI
12630 [(match_operand:AVXMODE48P_DI 2 "register_operand" "0")
12631 (match_operand:P 3 "register_operand" "r")
12632 (mem:BLK (scratch))
12633 (match_operand:<AVXMODE48P_DI> 4 "register_operand" "x")
12634 (match_operand:AVXMODE48P_DI 5 "register_operand" "1")
12635 (match_operand:SI 6 "const1248_operand" "n")]
12637 (clobber (match_scratch:AVXMODE48P_DI 1 "=&x"))]
12639 "v<gthrfirstp>gatherq<gthrlastp>\t{%1, (%3, %4, %p6), %0|%0, (%3, %4, %p6), %1}"
12640 [(set_attr "type" "ssemov")
12641 (set_attr "prefix" "vex")
12642 (set_attr "mode" "<sseinsnmode>")])
12644 ;; Special handling for VEX.256 with float arguments
12645 ;; since there're still xmms as operands
12646 (define_expand "avx2_gatherdi<mode>256"
12647 [(parallel [(set (match_operand:VI4F_128 0 "register_operand" "")
12649 [(match_operand:VI4F_128 1 "register_operand" "")
12650 (match_operand 2 "register_operand" "")
12651 (mem:BLK (scratch))
12652 (match_operand:V4DI 3 "register_operand" "")
12653 (match_operand:VI4F_128 4 "register_operand" "")
12654 (match_operand:SI 5 "const1248_operand " "")]
12656 (clobber (match_scratch:VI4F_128 6 ""))])]
12659 (define_insn "*avx2_gatherdi<mode>256"
12660 [(set (match_operand:VI4F_128 0 "register_operand" "=x")
12662 [(match_operand:VI4F_128 2 "register_operand" "0")
12663 (match_operand:P 3 "register_operand" "r")
12664 (mem:BLK (scratch))
12665 (match_operand:V4DI 4 "register_operand" "x")
12666 (match_operand:VI4F_128 5 "register_operand" "1")
12667 (match_operand:SI 6 "const1248_operand" "n")]
12669 (clobber (match_scratch:VI4F_128 1 "=&x"))]
12671 "v<gthrfirstp>gatherq<gthrlastp>\t{%1, (%3, %4, %p6), %0|%0, (%3, %4, %p6), %1}"
12672 [(set_attr "type" "ssemov")
12673 (set_attr "prefix" "vex")
12674 (set_attr "mode" "<sseinsnmode>")])