1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 (define_c_enum "unspec" [
55 UNSPEC_XOP_UNSIGNED_CMP
66 UNSPEC_AESKEYGENASSIST
90 (define_c_enum "unspecv" [
100 ;; All vector modes including V?TImode, used in move patterns.
101 (define_mode_iterator V16
102 [(V32QI "TARGET_AVX") V16QI
103 (V16HI "TARGET_AVX") V8HI
104 (V8SI "TARGET_AVX") V4SI
105 (V4DI "TARGET_AVX") V2DI
106 (V2TI "TARGET_AVX") V1TI
107 (V8SF "TARGET_AVX") V4SF
108 (V4DF "TARGET_AVX") V2DF])
111 (define_mode_iterator V
112 [(V32QI "TARGET_AVX") V16QI
113 (V16HI "TARGET_AVX") V8HI
114 (V8SI "TARGET_AVX") V4SI
115 (V4DI "TARGET_AVX") V2DI
116 (V8SF "TARGET_AVX") V4SF
117 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
119 ;; All 128bit vector modes
120 (define_mode_iterator V_128
121 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
123 ;; All 256bit vector modes
124 (define_mode_iterator V_256
125 [V32QI V16HI V8SI V4DI V8SF V4DF])
127 ;; All vector float modes
128 (define_mode_iterator VF
129 [(V8SF "TARGET_AVX") V4SF
130 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
132 ;; All SFmode vector float modes
133 (define_mode_iterator VF1
134 [(V8SF "TARGET_AVX") V4SF])
136 ;; All DFmode vector float modes
137 (define_mode_iterator VF2
138 [(V4DF "TARGET_AVX") V2DF])
140 ;; All 128bit vector float modes
141 (define_mode_iterator VF_128
142 [V4SF (V2DF "TARGET_SSE2")])
144 ;; All 256bit vector float modes
145 (define_mode_iterator VF_256
148 ;; All vector integer modes
149 (define_mode_iterator VI
150 [(V32QI "TARGET_AVX") V16QI
151 (V16HI "TARGET_AVX") V8HI
152 (V8SI "TARGET_AVX") V4SI
153 (V4DI "TARGET_AVX") V2DI])
155 (define_mode_iterator VI_AVX2
156 [(V32QI "TARGET_AVX2") V16QI
157 (V16HI "TARGET_AVX2") V8HI
158 (V8SI "TARGET_AVX2") V4SI
159 (V4DI "TARGET_AVX2") V2DI])
161 ;; All QImode vector integer modes
162 (define_mode_iterator VI1
163 [(V32QI "TARGET_AVX") V16QI])
165 ;; All DImode vector integer modes
166 (define_mode_iterator VI8
167 [(V4DI "TARGET_AVX") V2DI])
169 (define_mode_iterator VI1_AVX2
170 [(V32QI "TARGET_AVX2") V16QI])
172 (define_mode_iterator VI2_AVX2
173 [(V16HI "TARGET_AVX2") V8HI])
175 (define_mode_iterator VI4_AVX2
176 [(V8SI "TARGET_AVX2") V4SI])
178 (define_mode_iterator VI8_AVX2
179 [(V4DI "TARGET_AVX2") V2DI])
181 ;; ??? We should probably use TImode instead.
182 (define_mode_iterator VIMAX_AVX2
183 [(V2TI "TARGET_AVX2") V1TI])
185 ;; ??? This should probably be dropped in favor of VIMAX_AVX2.
186 (define_mode_iterator SSESCALARMODE
187 [(V2TI "TARGET_AVX2") TI])
189 (define_mode_iterator VI12_AVX2
190 [(V32QI "TARGET_AVX2") V16QI
191 (V16HI "TARGET_AVX2") V8HI])
193 (define_mode_iterator VI24_AVX2
194 [(V16HI "TARGET_AVX2") V8HI
195 (V8SI "TARGET_AVX2") V4SI])
197 (define_mode_iterator VI124_AVX2
198 [(V32QI "TARGET_AVX2") V16QI
199 (V16HI "TARGET_AVX2") V8HI
200 (V8SI "TARGET_AVX2") V4SI])
202 (define_mode_iterator VI248_AVX2
203 [(V16HI "TARGET_AVX2") V8HI
204 (V8SI "TARGET_AVX2") V4SI
205 (V4DI "TARGET_AVX2") V2DI])
207 (define_mode_iterator VI48_AVX2
208 [(V8SI "TARGET_AVX2") V4SI
209 (V4DI "TARGET_AVX2") V2DI])
211 (define_mode_iterator V48_AVX2
214 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
215 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
217 (define_mode_attr sse2_avx2
218 [(V16QI "sse2") (V32QI "avx2")
219 (V8HI "sse2") (V16HI "avx2")
220 (V4SI "sse2") (V8SI "avx2")
221 (V2DI "sse2") (V4DI "avx2")
222 (V1TI "sse2") (V2TI "avx2")])
224 (define_mode_attr ssse3_avx2
225 [(V16QI "ssse3") (V32QI "avx2")
226 (V8HI "ssse3") (V16HI "avx2")
227 (V4SI "ssse3") (V8SI "avx2")
228 (V2DI "ssse3") (V4DI "avx2")
229 (TI "ssse3") (V2TI "avx2")])
231 (define_mode_attr sse4_1_avx2
232 [(V16QI "sse4_1") (V32QI "avx2")
233 (V8HI "sse4_1") (V16HI "avx2")
234 (V4SI "sse4_1") (V8SI "avx2")
235 (V2DI "sse4_1") (V4DI "avx2")])
237 (define_mode_attr avx_avx2
238 [(V4SF "avx") (V2DF "avx")
239 (V8SF "avx") (V4DF "avx")
240 (V4SI "avx2") (V2DI "avx2")
241 (V8SI "avx2") (V4DI "avx2")])
243 (define_mode_attr vec_avx2
244 [(V16QI "vec") (V32QI "avx2")
245 (V8HI "vec") (V16HI "avx2")
246 (V4SI "vec") (V8SI "avx2")
247 (V2DI "vec") (V4DI "avx2")])
249 (define_mode_attr ssedoublemode
250 [(V16HI "V16SI") (V8HI "V8SI")])
252 (define_mode_attr ssebytemode
253 [(V4DI "V32QI") (V2DI "V16QI")])
255 ;; All 128bit vector integer modes
256 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
258 ;; All 256bit vector integer modes
259 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
261 ;; Random 128bit vector integer mode combinations
262 (define_mode_iterator VI12_128 [V16QI V8HI])
263 (define_mode_iterator VI14_128 [V16QI V4SI])
264 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
265 (define_mode_iterator VI128_128 [V16QI V8HI V2DI])
266 (define_mode_iterator VI24_128 [V8HI V4SI])
267 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
268 (define_mode_iterator VI48_128 [V4SI V2DI])
270 ;; Random 256bit vector integer mode combinations
271 (define_mode_iterator VI124_256 [V32QI V16HI V8SI])
272 (define_mode_iterator VI48_256 [V8SI V4DI])
274 ;; Int-float size matches
275 (define_mode_iterator VI4F_128 [V4SI V4SF])
276 (define_mode_iterator VI8F_128 [V2DI V2DF])
277 (define_mode_iterator VI4F_256 [V8SI V8SF])
278 (define_mode_iterator VI8F_256 [V4DI V4DF])
280 ;; Mapping from float mode to required SSE level
281 (define_mode_attr sse
282 [(SF "sse") (DF "sse2")
283 (V4SF "sse") (V2DF "sse2")
284 (V8SF "avx") (V4DF "avx")])
286 (define_mode_attr sse2
287 [(V16QI "sse2") (V32QI "avx")
288 (V2DI "sse2") (V4DI "avx")])
290 (define_mode_attr sse3
291 [(V16QI "sse3") (V32QI "avx")])
293 (define_mode_attr sse4_1
294 [(V4SF "sse4_1") (V2DF "sse4_1")
295 (V8SF "avx") (V4DF "avx")])
297 (define_mode_attr avxsizesuffix
298 [(V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
299 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
300 (V8SF "256") (V4DF "256")
301 (V4SF "") (V2DF "")])
303 ;; SSE instruction mode
304 (define_mode_attr sseinsnmode
305 [(V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
306 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
307 (V8SF "V8SF") (V4DF "V4DF")
308 (V4SF "V4SF") (V2DF "V2DF")
311 ;; Mapping of vector float modes to an integer mode of the same size
312 (define_mode_attr sseintvecmode
313 [(V8SF "V8SI") (V4DF "V4DI")
314 (V4SF "V4SI") (V2DF "V2DI")
315 (V8SI "V8SI") (V4DI "V4DI")
316 (V4SI "V4SI") (V2DI "V2DI")
317 (V16HI "V16HI") (V8HI "V8HI")
318 (V32QI "V32QI") (V16QI "V16QI")])
320 (define_mode_attr sseintvecmodelower
321 [(V8SF "v8si") (V4DF "v4di")
322 (V4SF "v4si") (V2DF "v2di")
323 (V8SI "v8si") (V4DI "v4di")
324 (V4SI "v4si") (V2DI "v2di")
325 (V16HI "v16hi") (V8HI "v8hi")
326 (V32QI "v32qi") (V16QI "v16qi")])
328 ;; Mapping of vector modes to a vector mode of double size
329 (define_mode_attr ssedoublevecmode
330 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
331 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
332 (V8SF "V16SF") (V4DF "V8DF")
333 (V4SF "V8SF") (V2DF "V4DF")])
335 ;; Mapping of vector modes to a vector mode of half size
336 (define_mode_attr ssehalfvecmode
337 [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
338 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
339 (V8SF "V4SF") (V4DF "V2DF")
342 ;; Mapping of vector modes back to the scalar modes
343 (define_mode_attr ssescalarmode
344 [(V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI")
345 (V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI")
346 (V8SF "SF") (V4DF "DF")
347 (V4SF "SF") (V2DF "DF")])
349 ;; Number of scalar elements in each vector type
350 (define_mode_attr ssescalarnum
351 [(V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
352 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
353 (V8SF "8") (V4DF "4")
354 (V4SF "4") (V2DF "2")])
356 ;; SSE prefix for integer vector modes
357 (define_mode_attr sseintprefix
358 [(V2DI "p") (V2DF "")
361 (V8SI "p") (V8SF "")])
363 ;; SSE scalar suffix for vector modes
364 (define_mode_attr ssescalarmodesuffix
366 (V8SF "ss") (V4DF "sd")
367 (V4SF "ss") (V2DF "sd")
368 (V8SI "ss") (V4DI "sd")
371 ;; Pack/unpack vector modes
372 (define_mode_attr sseunpackmode
373 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
374 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")])
376 (define_mode_attr ssepackmode
377 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
378 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")])
380 ;; Mapping of the max integer size for xop rotate immediate constraint
381 (define_mode_attr sserotatemax
382 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
384 ;; Mapping of mode to cast intrinsic name
385 (define_mode_attr castmode [(V8SI "si") (V8SF "ps") (V4DF "pd")])
387 ;; Instruction suffix for sign and zero extensions.
388 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
390 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
391 (define_mode_attr i128
392 [(V8SF "f128") (V4DF "f128") (V32QI "%~128") (V16HI "%~128")
393 (V8SI "%~128") (V4DI "%~128")])
396 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
398 (define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
400 ;; Mapping of immediate bits for blend instructions
401 (define_mode_attr blendbits
402 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
404 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
406 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
410 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
412 ;; All of these patterns are enabled for SSE1 as well as SSE2.
413 ;; This is essential for maintaining stable calling conventions.
415 (define_expand "mov<mode>"
416 [(set (match_operand:V16 0 "nonimmediate_operand" "")
417 (match_operand:V16 1 "nonimmediate_operand" ""))]
420 ix86_expand_vector_move (<MODE>mode, operands);
424 (define_insn "*mov<mode>_internal"
425 [(set (match_operand:V16 0 "nonimmediate_operand" "=x,x ,m")
426 (match_operand:V16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
428 && (register_operand (operands[0], <MODE>mode)
429 || register_operand (operands[1], <MODE>mode))"
431 switch (which_alternative)
434 return standard_sse_constant_opcode (insn, operands[1]);
437 switch (get_attr_mode (insn))
442 && (misaligned_operand (operands[0], <MODE>mode)
443 || misaligned_operand (operands[1], <MODE>mode)))
444 return "vmovups\t{%1, %0|%0, %1}";
446 return "%vmovaps\t{%1, %0|%0, %1}";
451 && (misaligned_operand (operands[0], <MODE>mode)
452 || misaligned_operand (operands[1], <MODE>mode)))
453 return "vmovupd\t{%1, %0|%0, %1}";
454 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
455 return "%vmovaps\t{%1, %0|%0, %1}";
457 return "%vmovapd\t{%1, %0|%0, %1}";
462 && (misaligned_operand (operands[0], <MODE>mode)
463 || misaligned_operand (operands[1], <MODE>mode)))
464 return "vmovdqu\t{%1, %0|%0, %1}";
465 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
466 return "%vmovaps\t{%1, %0|%0, %1}";
468 return "%vmovdqa\t{%1, %0|%0, %1}";
477 [(set_attr "type" "sselog1,ssemov,ssemov")
478 (set_attr "prefix" "maybe_vex")
480 (cond [(match_test "TARGET_AVX")
481 (const_string "<sseinsnmode>")
482 (ior (ior (match_test "optimize_function_for_size_p (cfun)")
483 (not (match_test "TARGET_SSE2")))
484 (and (eq_attr "alternative" "2")
485 (match_test "TARGET_SSE_TYPELESS_STORES")))
486 (const_string "V4SF")
487 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
488 (const_string "V4SF")
489 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
490 (const_string "V2DF")
492 (const_string "TI")))])
494 (define_insn "sse2_movq128"
495 [(set (match_operand:V2DI 0 "register_operand" "=x")
498 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
499 (parallel [(const_int 0)]))
502 "%vmovq\t{%1, %0|%0, %1}"
503 [(set_attr "type" "ssemov")
504 (set_attr "prefix" "maybe_vex")
505 (set_attr "mode" "TI")])
507 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
508 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
509 ;; from memory, we'd prefer to load the memory directly into the %xmm
510 ;; register. To facilitate this happy circumstance, this pattern won't
511 ;; split until after register allocation. If the 64-bit value didn't
512 ;; come from memory, this is the best we can do. This is much better
513 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
516 (define_insn_and_split "movdi_to_sse"
518 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
519 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
520 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
521 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
523 "&& reload_completed"
526 if (register_operand (operands[1], DImode))
528 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
529 Assemble the 64-bit DImode value in an xmm register. */
530 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
531 gen_rtx_SUBREG (SImode, operands[1], 0)));
532 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
533 gen_rtx_SUBREG (SImode, operands[1], 4)));
534 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
537 else if (memory_operand (operands[1], DImode))
538 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
539 operands[1], const0_rtx));
545 [(set (match_operand:V4SF 0 "register_operand" "")
546 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
547 "TARGET_SSE && reload_completed"
550 (vec_duplicate:V4SF (match_dup 1))
554 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
555 operands[2] = CONST0_RTX (V4SFmode);
559 [(set (match_operand:V2DF 0 "register_operand" "")
560 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
561 "TARGET_SSE2 && reload_completed"
562 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
564 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
565 operands[2] = CONST0_RTX (DFmode);
568 (define_expand "push<mode>1"
569 [(match_operand:V16 0 "register_operand" "")]
572 ix86_expand_push (<MODE>mode, operands[0]);
576 (define_expand "movmisalign<mode>"
577 [(set (match_operand:V16 0 "nonimmediate_operand" "")
578 (match_operand:V16 1 "nonimmediate_operand" ""))]
581 ix86_expand_vector_move_misalign (<MODE>mode, operands);
585 (define_expand "<sse>_movu<ssemodesuffix><avxsizesuffix>"
586 [(set (match_operand:VF 0 "nonimmediate_operand" "")
588 [(match_operand:VF 1 "nonimmediate_operand" "")]
592 if (MEM_P (operands[0]) && MEM_P (operands[1]))
593 operands[1] = force_reg (<MODE>mode, operands[1]);
596 (define_insn "*<sse>_movu<ssemodesuffix><avxsizesuffix>"
597 [(set (match_operand:VF 0 "nonimmediate_operand" "=x,m")
599 [(match_operand:VF 1 "nonimmediate_operand" "xm,x")]
601 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
602 "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
603 [(set_attr "type" "ssemov")
604 (set_attr "movu" "1")
605 (set_attr "prefix" "maybe_vex")
606 (set_attr "mode" "<MODE>")])
608 (define_expand "<sse2>_movdqu<avxsizesuffix>"
609 [(set (match_operand:VI1 0 "nonimmediate_operand" "")
610 (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "")]
614 if (MEM_P (operands[0]) && MEM_P (operands[1]))
615 operands[1] = force_reg (<MODE>mode, operands[1]);
618 (define_insn "*<sse2>_movdqu<avxsizesuffix>"
619 [(set (match_operand:VI1 0 "nonimmediate_operand" "=x,m")
620 (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "xm,x")]
622 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
623 "%vmovdqu\t{%1, %0|%0, %1}"
624 [(set_attr "type" "ssemov")
625 (set_attr "movu" "1")
626 (set (attr "prefix_data16")
628 (match_test "TARGET_AVX")
631 (set_attr "prefix" "maybe_vex")
632 (set_attr "mode" "<sseinsnmode>")])
634 (define_insn "<sse3>_lddqu<avxsizesuffix>"
635 [(set (match_operand:VI1 0 "register_operand" "=x")
636 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
639 "%vlddqu\t{%1, %0|%0, %1}"
640 [(set_attr "type" "ssemov")
641 (set_attr "movu" "1")
642 (set (attr "prefix_data16")
644 (match_test "TARGET_AVX")
647 (set (attr "prefix_rep")
649 (match_test "TARGET_AVX")
652 (set_attr "prefix" "maybe_vex")
653 (set_attr "mode" "<sseinsnmode>")])
655 (define_insn "sse2_movnti<mode>"
656 [(set (match_operand:SWI48 0 "memory_operand" "=m")
657 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
660 "movnti\t{%1, %0|%0, %1}"
661 [(set_attr "type" "ssemov")
662 (set_attr "prefix_data16" "0")
663 (set_attr "mode" "<MODE>")])
665 (define_insn "<sse>_movnt<mode>"
666 [(set (match_operand:VF 0 "memory_operand" "=m")
667 (unspec:VF [(match_operand:VF 1 "register_operand" "x")]
670 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
671 [(set_attr "type" "ssemov")
672 (set_attr "prefix" "maybe_vex")
673 (set_attr "mode" "<MODE>")])
675 (define_insn "<sse2>_movnt<mode>"
676 [(set (match_operand:VI8 0 "memory_operand" "=m")
677 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "x")]
680 "%vmovntdq\t{%1, %0|%0, %1}"
681 [(set_attr "type" "ssecvt")
682 (set (attr "prefix_data16")
684 (match_test "TARGET_AVX")
687 (set_attr "prefix" "maybe_vex")
688 (set_attr "mode" "<sseinsnmode>")])
690 ; Expand patterns for non-temporal stores. At the moment, only those
691 ; that directly map to insns are defined; it would be possible to
692 ; define patterns for other modes that would expand to several insns.
694 ;; Modes handled by storent patterns.
695 (define_mode_iterator STORENT_MODE
696 [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
697 (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
698 (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
699 (V8SF "TARGET_AVX") V4SF
700 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
702 (define_expand "storent<mode>"
703 [(set (match_operand:STORENT_MODE 0 "memory_operand" "")
705 [(match_operand:STORENT_MODE 1 "register_operand" "")]
709 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
711 ;; Parallel floating point arithmetic
713 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
715 (define_expand "<code><mode>2"
716 [(set (match_operand:VF 0 "register_operand" "")
718 (match_operand:VF 1 "register_operand" "")))]
720 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
722 (define_insn_and_split "*absneg<mode>2"
723 [(set (match_operand:VF 0 "register_operand" "=x,x,x,x")
724 (match_operator:VF 3 "absneg_operator"
725 [(match_operand:VF 1 "nonimmediate_operand" "0, xm,x, m")]))
726 (use (match_operand:VF 2 "nonimmediate_operand" "xm,0, xm,x"))]
729 "&& reload_completed"
732 enum rtx_code absneg_op;
738 if (MEM_P (operands[1]))
739 op1 = operands[2], op2 = operands[1];
741 op1 = operands[1], op2 = operands[2];
746 if (rtx_equal_p (operands[0], operands[1]))
752 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
753 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
754 t = gen_rtx_SET (VOIDmode, operands[0], t);
758 [(set_attr "isa" "noavx,noavx,avx,avx")])
760 (define_expand "<plusminus_insn><mode>3"
761 [(set (match_operand:VF 0 "register_operand" "")
763 (match_operand:VF 1 "nonimmediate_operand" "")
764 (match_operand:VF 2 "nonimmediate_operand" "")))]
766 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
768 (define_insn "*<plusminus_insn><mode>3"
769 [(set (match_operand:VF 0 "register_operand" "=x,x")
771 (match_operand:VF 1 "nonimmediate_operand" "<comm>0,x")
772 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
773 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
775 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
776 v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
777 [(set_attr "isa" "noavx,avx")
778 (set_attr "type" "sseadd")
779 (set_attr "prefix" "orig,vex")
780 (set_attr "mode" "<MODE>")])
782 (define_insn "<sse>_vm<plusminus_insn><mode>3"
783 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
786 (match_operand:VF_128 1 "register_operand" "0,x")
787 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
792 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}
793 v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
794 [(set_attr "isa" "noavx,avx")
795 (set_attr "type" "sseadd")
796 (set_attr "prefix" "orig,vex")
797 (set_attr "mode" "<ssescalarmode>")])
799 (define_expand "mul<mode>3"
800 [(set (match_operand:VF 0 "register_operand" "")
802 (match_operand:VF 1 "nonimmediate_operand" "")
803 (match_operand:VF 2 "nonimmediate_operand" "")))]
805 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
807 (define_insn "*mul<mode>3"
808 [(set (match_operand:VF 0 "register_operand" "=x,x")
810 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
811 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
812 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
814 mul<ssemodesuffix>\t{%2, %0|%0, %2}
815 vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
816 [(set_attr "isa" "noavx,avx")
817 (set_attr "type" "ssemul")
818 (set_attr "prefix" "orig,vex")
819 (set_attr "mode" "<MODE>")])
821 (define_insn "<sse>_vmmul<mode>3"
822 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
825 (match_operand:VF_128 1 "register_operand" "0,x")
826 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
831 mul<ssescalarmodesuffix>\t{%2, %0|%0, %2}
832 vmul<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
833 [(set_attr "isa" "noavx,avx")
834 (set_attr "type" "ssemul")
835 (set_attr "prefix" "orig,vex")
836 (set_attr "mode" "<ssescalarmode>")])
838 (define_expand "div<mode>3"
839 [(set (match_operand:VF2 0 "register_operand" "")
840 (div:VF2 (match_operand:VF2 1 "register_operand" "")
841 (match_operand:VF2 2 "nonimmediate_operand" "")))]
843 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
845 (define_expand "div<mode>3"
846 [(set (match_operand:VF1 0 "register_operand" "")
847 (div:VF1 (match_operand:VF1 1 "register_operand" "")
848 (match_operand:VF1 2 "nonimmediate_operand" "")))]
851 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
854 && TARGET_RECIP_VEC_DIV
855 && !optimize_insn_for_size_p ()
856 && flag_finite_math_only && !flag_trapping_math
857 && flag_unsafe_math_optimizations)
859 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
864 (define_insn "<sse>_div<mode>3"
865 [(set (match_operand:VF 0 "register_operand" "=x,x")
867 (match_operand:VF 1 "register_operand" "0,x")
868 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
871 div<ssemodesuffix>\t{%2, %0|%0, %2}
872 vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
873 [(set_attr "isa" "noavx,avx")
874 (set_attr "type" "ssediv")
875 (set_attr "prefix" "orig,vex")
876 (set_attr "mode" "<MODE>")])
878 (define_insn "<sse>_vmdiv<mode>3"
879 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
882 (match_operand:VF_128 1 "register_operand" "0,x")
883 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
888 div<ssescalarmodesuffix>\t{%2, %0|%0, %2}
889 vdiv<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
890 [(set_attr "isa" "noavx,avx")
891 (set_attr "type" "ssediv")
892 (set_attr "prefix" "orig,vex")
893 (set_attr "mode" "<ssescalarmode>")])
895 (define_insn "<sse>_rcp<mode>2"
896 [(set (match_operand:VF1 0 "register_operand" "=x")
898 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
900 "%vrcpps\t{%1, %0|%0, %1}"
901 [(set_attr "type" "sse")
902 (set_attr "atom_sse_attr" "rcp")
903 (set_attr "prefix" "maybe_vex")
904 (set_attr "mode" "<MODE>")])
906 (define_insn "sse_vmrcpv4sf2"
907 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
909 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
911 (match_operand:V4SF 2 "register_operand" "0,x")
915 rcpss\t{%1, %0|%0, %1}
916 vrcpss\t{%1, %2, %0|%0, %2, %1}"
917 [(set_attr "isa" "noavx,avx")
918 (set_attr "type" "sse")
919 (set_attr "atom_sse_attr" "rcp")
920 (set_attr "prefix" "orig,vex")
921 (set_attr "mode" "SF")])
923 (define_expand "sqrt<mode>2"
924 [(set (match_operand:VF2 0 "register_operand" "")
925 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand" "")))]
928 (define_expand "sqrt<mode>2"
929 [(set (match_operand:VF1 0 "register_operand" "")
930 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand" "")))]
934 && TARGET_RECIP_VEC_SQRT
935 && !optimize_insn_for_size_p ()
936 && flag_finite_math_only && !flag_trapping_math
937 && flag_unsafe_math_optimizations)
939 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
944 (define_insn "<sse>_sqrt<mode>2"
945 [(set (match_operand:VF 0 "register_operand" "=x")
946 (sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "xm")))]
948 "%vsqrt<ssemodesuffix>\t{%1, %0|%0, %1}"
949 [(set_attr "type" "sse")
950 (set_attr "atom_sse_attr" "sqrt")
951 (set_attr "prefix" "maybe_vex")
952 (set_attr "mode" "<MODE>")])
954 (define_insn "<sse>_vmsqrt<mode>2"
955 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
958 (match_operand:VF_128 1 "nonimmediate_operand" "xm,xm"))
959 (match_operand:VF_128 2 "register_operand" "0,x")
963 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}
964 vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
965 [(set_attr "isa" "noavx,avx")
966 (set_attr "type" "sse")
967 (set_attr "atom_sse_attr" "sqrt")
968 (set_attr "prefix" "orig,vex")
969 (set_attr "mode" "<ssescalarmode>")])
971 (define_expand "rsqrt<mode>2"
972 [(set (match_operand:VF1 0 "register_operand" "")
974 [(match_operand:VF1 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
977 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
981 (define_insn "<sse>_rsqrt<mode>2"
982 [(set (match_operand:VF1 0 "register_operand" "=x")
984 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
986 "%vrsqrtps\t{%1, %0|%0, %1}"
987 [(set_attr "type" "sse")
988 (set_attr "prefix" "maybe_vex")
989 (set_attr "mode" "<MODE>")])
991 (define_insn "sse_vmrsqrtv4sf2"
992 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
994 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
996 (match_operand:V4SF 2 "register_operand" "0,x")
1000 rsqrtss\t{%1, %0|%0, %1}
1001 vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
1002 [(set_attr "isa" "noavx,avx")
1003 (set_attr "type" "sse")
1004 (set_attr "prefix" "orig,vex")
1005 (set_attr "mode" "SF")])
1007 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1008 ;; isn't really correct, as those rtl operators aren't defined when
1009 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1011 (define_expand "<code><mode>3"
1012 [(set (match_operand:VF 0 "register_operand" "")
1014 (match_operand:VF 1 "nonimmediate_operand" "")
1015 (match_operand:VF 2 "nonimmediate_operand" "")))]
1018 if (!flag_finite_math_only)
1019 operands[1] = force_reg (<MODE>mode, operands[1]);
1020 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1023 (define_insn "*<code><mode>3_finite"
1024 [(set (match_operand:VF 0 "register_operand" "=x,x")
1026 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
1027 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1028 "TARGET_SSE && flag_finite_math_only
1029 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1031 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1032 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1033 [(set_attr "isa" "noavx,avx")
1034 (set_attr "type" "sseadd")
1035 (set_attr "prefix" "orig,vex")
1036 (set_attr "mode" "<MODE>")])
1038 (define_insn "*<code><mode>3"
1039 [(set (match_operand:VF 0 "register_operand" "=x,x")
1041 (match_operand:VF 1 "register_operand" "0,x")
1042 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1043 "TARGET_SSE && !flag_finite_math_only"
1045 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1046 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1047 [(set_attr "isa" "noavx,avx")
1048 (set_attr "type" "sseadd")
1049 (set_attr "prefix" "orig,vex")
1050 (set_attr "mode" "<MODE>")])
1052 (define_insn "<sse>_vm<code><mode>3"
1053 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1056 (match_operand:VF_128 1 "register_operand" "0,x")
1057 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
1062 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %2}
1063 v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1064 [(set_attr "isa" "noavx,avx")
1065 (set_attr "type" "sse")
1066 (set_attr "prefix" "orig,vex")
1067 (set_attr "mode" "<ssescalarmode>")])
1069 ;; These versions of the min/max patterns implement exactly the operations
1070 ;; min = (op1 < op2 ? op1 : op2)
1071 ;; max = (!(op1 < op2) ? op1 : op2)
1072 ;; Their operands are not commutative, and thus they may be used in the
1073 ;; presence of -0.0 and NaN.
1075 (define_insn "*ieee_smin<mode>3"
1076 [(set (match_operand:VF 0 "register_operand" "=x,x")
1078 [(match_operand:VF 1 "register_operand" "0,x")
1079 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
1083 min<ssemodesuffix>\t{%2, %0|%0, %2}
1084 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1085 [(set_attr "isa" "noavx,avx")
1086 (set_attr "type" "sseadd")
1087 (set_attr "prefix" "orig,vex")
1088 (set_attr "mode" "<MODE>")])
1090 (define_insn "*ieee_smax<mode>3"
1091 [(set (match_operand:VF 0 "register_operand" "=x,x")
1093 [(match_operand:VF 1 "register_operand" "0,x")
1094 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
1098 max<ssemodesuffix>\t{%2, %0|%0, %2}
1099 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1100 [(set_attr "isa" "noavx,avx")
1101 (set_attr "type" "sseadd")
1102 (set_attr "prefix" "orig,vex")
1103 (set_attr "mode" "<MODE>")])
1105 (define_insn "avx_addsubv4df3"
1106 [(set (match_operand:V4DF 0 "register_operand" "=x")
1109 (match_operand:V4DF 1 "register_operand" "x")
1110 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1111 (minus:V4DF (match_dup 1) (match_dup 2))
1114 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1115 [(set_attr "type" "sseadd")
1116 (set_attr "prefix" "vex")
1117 (set_attr "mode" "V4DF")])
1119 (define_insn "sse3_addsubv2df3"
1120 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1123 (match_operand:V2DF 1 "register_operand" "0,x")
1124 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
1125 (minus:V2DF (match_dup 1) (match_dup 2))
1129 addsubpd\t{%2, %0|%0, %2}
1130 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1131 [(set_attr "isa" "noavx,avx")
1132 (set_attr "type" "sseadd")
1133 (set_attr "atom_unit" "complex")
1134 (set_attr "prefix" "orig,vex")
1135 (set_attr "mode" "V2DF")])
1137 (define_insn "avx_addsubv8sf3"
1138 [(set (match_operand:V8SF 0 "register_operand" "=x")
1141 (match_operand:V8SF 1 "register_operand" "x")
1142 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1143 (minus:V8SF (match_dup 1) (match_dup 2))
1146 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1147 [(set_attr "type" "sseadd")
1148 (set_attr "prefix" "vex")
1149 (set_attr "mode" "V8SF")])
1151 (define_insn "sse3_addsubv4sf3"
1152 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1155 (match_operand:V4SF 1 "register_operand" "0,x")
1156 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
1157 (minus:V4SF (match_dup 1) (match_dup 2))
1161 addsubps\t{%2, %0|%0, %2}
1162 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1163 [(set_attr "isa" "noavx,avx")
1164 (set_attr "type" "sseadd")
1165 (set_attr "prefix" "orig,vex")
1166 (set_attr "prefix_rep" "1,*")
1167 (set_attr "mode" "V4SF")])
1169 (define_insn "avx_h<plusminus_insn>v4df3"
1170 [(set (match_operand:V4DF 0 "register_operand" "=x")
1175 (match_operand:V4DF 1 "register_operand" "x")
1176 (parallel [(const_int 0)]))
1177 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1180 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1181 (parallel [(const_int 0)]))
1182 (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
1185 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1186 (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
1188 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1189 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1191 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1192 [(set_attr "type" "sseadd")
1193 (set_attr "prefix" "vex")
1194 (set_attr "mode" "V4DF")])
1196 (define_insn "sse3_h<plusminus_insn>v2df3"
1197 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1201 (match_operand:V2DF 1 "register_operand" "0,x")
1202 (parallel [(const_int 0)]))
1203 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1206 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
1207 (parallel [(const_int 0)]))
1208 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1211 h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}
1212 vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1213 [(set_attr "isa" "noavx,avx")
1214 (set_attr "type" "sseadd")
1215 (set_attr "prefix" "orig,vex")
1216 (set_attr "mode" "V2DF")])
1218 (define_insn "avx_h<plusminus_insn>v8sf3"
1219 [(set (match_operand:V8SF 0 "register_operand" "=x")
1225 (match_operand:V8SF 1 "register_operand" "x")
1226 (parallel [(const_int 0)]))
1227 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1229 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1230 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1234 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1235 (parallel [(const_int 0)]))
1236 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1238 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1239 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1243 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1244 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1246 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1247 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1250 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1251 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1253 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1254 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1256 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1257 [(set_attr "type" "sseadd")
1258 (set_attr "prefix" "vex")
1259 (set_attr "mode" "V8SF")])
1261 (define_insn "sse3_h<plusminus_insn>v4sf3"
1262 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1267 (match_operand:V4SF 1 "register_operand" "0,x")
1268 (parallel [(const_int 0)]))
1269 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1271 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1272 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1276 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
1277 (parallel [(const_int 0)]))
1278 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1280 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1281 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1284 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
1285 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1286 [(set_attr "isa" "noavx,avx")
1287 (set_attr "type" "sseadd")
1288 (set_attr "atom_unit" "complex")
1289 (set_attr "prefix" "orig,vex")
1290 (set_attr "prefix_rep" "1,*")
1291 (set_attr "mode" "V4SF")])
1293 (define_expand "reduc_splus_v4df"
1294 [(match_operand:V4DF 0 "register_operand" "")
1295 (match_operand:V4DF 1 "register_operand" "")]
1298 rtx tmp = gen_reg_rtx (V4DFmode);
1299 rtx tmp2 = gen_reg_rtx (V4DFmode);
1300 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
1301 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
1302 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
1306 (define_expand "reduc_splus_v2df"
1307 [(match_operand:V2DF 0 "register_operand" "")
1308 (match_operand:V2DF 1 "register_operand" "")]
1311 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1315 (define_expand "reduc_splus_v8sf"
1316 [(match_operand:V8SF 0 "register_operand" "")
1317 (match_operand:V8SF 1 "register_operand" "")]
1320 rtx tmp = gen_reg_rtx (V8SFmode);
1321 rtx tmp2 = gen_reg_rtx (V8SFmode);
1322 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
1323 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
1324 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
1325 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
1329 (define_expand "reduc_splus_v4sf"
1330 [(match_operand:V4SF 0 "register_operand" "")
1331 (match_operand:V4SF 1 "register_operand" "")]
1336 rtx tmp = gen_reg_rtx (V4SFmode);
1337 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1338 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1341 ix86_expand_reduc (gen_addv4sf3, operands[0], operands[1]);
1345 ;; Modes handled by reduc_sm{in,ax}* patterns.
1346 (define_mode_iterator REDUC_SMINMAX_MODE
1347 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
1348 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
1349 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
1350 (V4SF "TARGET_SSE")])
1352 (define_expand "reduc_<code>_<mode>"
1353 [(smaxmin:REDUC_SMINMAX_MODE
1354 (match_operand:REDUC_SMINMAX_MODE 0 "register_operand" "")
1355 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand" ""))]
1358 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1362 (define_expand "reduc_<code>_<mode>"
1364 (match_operand:VI_256 0 "register_operand" "")
1365 (match_operand:VI_256 1 "register_operand" ""))]
1368 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1372 (define_expand "reduc_umin_v8hi"
1374 (match_operand:V8HI 0 "register_operand" "")
1375 (match_operand:V8HI 1 "register_operand" ""))]
1378 ix86_expand_reduc (gen_uminv8hi3, operands[0], operands[1]);
1382 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1384 ;; Parallel floating point comparisons
1386 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1388 (define_insn "avx_cmp<mode>3"
1389 [(set (match_operand:VF 0 "register_operand" "=x")
1391 [(match_operand:VF 1 "register_operand" "x")
1392 (match_operand:VF 2 "nonimmediate_operand" "xm")
1393 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1396 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1397 [(set_attr "type" "ssecmp")
1398 (set_attr "length_immediate" "1")
1399 (set_attr "prefix" "vex")
1400 (set_attr "mode" "<MODE>")])
1402 (define_insn "avx_vmcmp<mode>3"
1403 [(set (match_operand:VF_128 0 "register_operand" "=x")
1406 [(match_operand:VF_128 1 "register_operand" "x")
1407 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
1408 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1413 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1414 [(set_attr "type" "ssecmp")
1415 (set_attr "length_immediate" "1")
1416 (set_attr "prefix" "vex")
1417 (set_attr "mode" "<ssescalarmode>")])
1419 (define_insn "*<sse>_maskcmp<mode>3_comm"
1420 [(set (match_operand:VF 0 "register_operand" "=x,x")
1421 (match_operator:VF 3 "sse_comparison_operator"
1422 [(match_operand:VF 1 "register_operand" "%0,x")
1423 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1425 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
1427 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1428 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1429 [(set_attr "isa" "noavx,avx")
1430 (set_attr "type" "ssecmp")
1431 (set_attr "length_immediate" "1")
1432 (set_attr "prefix" "orig,vex")
1433 (set_attr "mode" "<MODE>")])
1435 (define_insn "<sse>_maskcmp<mode>3"
1436 [(set (match_operand:VF 0 "register_operand" "=x,x")
1437 (match_operator:VF 3 "sse_comparison_operator"
1438 [(match_operand:VF 1 "register_operand" "0,x")
1439 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1442 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1443 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1444 [(set_attr "isa" "noavx,avx")
1445 (set_attr "type" "ssecmp")
1446 (set_attr "length_immediate" "1")
1447 (set_attr "prefix" "orig,vex")
1448 (set_attr "mode" "<MODE>")])
1450 (define_insn "<sse>_vmmaskcmp<mode>3"
1451 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1453 (match_operator:VF_128 3 "sse_comparison_operator"
1454 [(match_operand:VF_128 1 "register_operand" "0,x")
1455 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
1460 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %2}
1461 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1462 [(set_attr "isa" "noavx,avx")
1463 (set_attr "type" "ssecmp")
1464 (set_attr "length_immediate" "1,*")
1465 (set_attr "prefix" "orig,vex")
1466 (set_attr "mode" "<ssescalarmode>")])
1468 (define_insn "<sse>_comi"
1469 [(set (reg:CCFP FLAGS_REG)
1472 (match_operand:<ssevecmode> 0 "register_operand" "x")
1473 (parallel [(const_int 0)]))
1475 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1476 (parallel [(const_int 0)]))))]
1477 "SSE_FLOAT_MODE_P (<MODE>mode)"
1478 "%vcomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1479 [(set_attr "type" "ssecomi")
1480 (set_attr "prefix" "maybe_vex")
1481 (set_attr "prefix_rep" "0")
1482 (set (attr "prefix_data16")
1483 (if_then_else (eq_attr "mode" "DF")
1485 (const_string "0")))
1486 (set_attr "mode" "<MODE>")])
1488 (define_insn "<sse>_ucomi"
1489 [(set (reg:CCFPU FLAGS_REG)
1492 (match_operand:<ssevecmode> 0 "register_operand" "x")
1493 (parallel [(const_int 0)]))
1495 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1496 (parallel [(const_int 0)]))))]
1497 "SSE_FLOAT_MODE_P (<MODE>mode)"
1498 "%vucomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1499 [(set_attr "type" "ssecomi")
1500 (set_attr "prefix" "maybe_vex")
1501 (set_attr "prefix_rep" "0")
1502 (set (attr "prefix_data16")
1503 (if_then_else (eq_attr "mode" "DF")
1505 (const_string "0")))
1506 (set_attr "mode" "<MODE>")])
1508 (define_expand "vcond<V_256:mode><VF_256:mode>"
1509 [(set (match_operand:V_256 0 "register_operand" "")
1511 (match_operator 3 ""
1512 [(match_operand:VF_256 4 "nonimmediate_operand" "")
1513 (match_operand:VF_256 5 "nonimmediate_operand" "")])
1514 (match_operand:V_256 1 "general_operand" "")
1515 (match_operand:V_256 2 "general_operand" "")))]
1517 && (GET_MODE_NUNITS (<V_256:MODE>mode)
1518 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
1520 bool ok = ix86_expand_fp_vcond (operands);
1525 (define_expand "vcond<V_128:mode><VF_128:mode>"
1526 [(set (match_operand:V_128 0 "register_operand" "")
1528 (match_operator 3 ""
1529 [(match_operand:VF_128 4 "nonimmediate_operand" "")
1530 (match_operand:VF_128 5 "nonimmediate_operand" "")])
1531 (match_operand:V_128 1 "general_operand" "")
1532 (match_operand:V_128 2 "general_operand" "")))]
1534 && (GET_MODE_NUNITS (<V_128:MODE>mode)
1535 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
1537 bool ok = ix86_expand_fp_vcond (operands);
1542 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1544 ;; Parallel floating point logical operations
1546 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1548 (define_insn "<sse>_andnot<mode>3"
1549 [(set (match_operand:VF 0 "register_operand" "=x,x")
1552 (match_operand:VF 1 "register_operand" "0,x"))
1553 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1556 static char buf[32];
1559 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1561 switch (which_alternative)
1564 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1567 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1573 snprintf (buf, sizeof (buf), insn, suffix);
1576 [(set_attr "isa" "noavx,avx")
1577 (set_attr "type" "sselog")
1578 (set_attr "prefix" "orig,vex")
1579 (set_attr "mode" "<MODE>")])
1581 (define_expand "<code><mode>3"
1582 [(set (match_operand:VF 0 "register_operand" "")
1584 (match_operand:VF 1 "nonimmediate_operand" "")
1585 (match_operand:VF 2 "nonimmediate_operand" "")))]
1587 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1589 (define_insn "*<code><mode>3"
1590 [(set (match_operand:VF 0 "register_operand" "=x,x")
1592 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
1593 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1594 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1596 static char buf[32];
1599 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1601 switch (which_alternative)
1604 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1607 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1613 snprintf (buf, sizeof (buf), insn, suffix);
1616 [(set_attr "isa" "noavx,avx")
1617 (set_attr "type" "sselog")
1618 (set_attr "prefix" "orig,vex")
1619 (set_attr "mode" "<MODE>")])
1621 (define_expand "copysign<mode>3"
1624 (not:VF (match_dup 3))
1625 (match_operand:VF 1 "nonimmediate_operand" "")))
1627 (and:VF (match_dup 3)
1628 (match_operand:VF 2 "nonimmediate_operand" "")))
1629 (set (match_operand:VF 0 "register_operand" "")
1630 (ior:VF (match_dup 4) (match_dup 5)))]
1633 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
1635 operands[4] = gen_reg_rtx (<MODE>mode);
1636 operands[5] = gen_reg_rtx (<MODE>mode);
1639 ;; Also define scalar versions. These are used for abs, neg, and
1640 ;; conditional move. Using subregs into vector modes causes register
1641 ;; allocation lossage. These patterns do not allow memory operands
1642 ;; because the native instructions read the full 128-bits.
1644 (define_insn "*andnot<mode>3"
1645 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1648 (match_operand:MODEF 1 "register_operand" "0,x"))
1649 (match_operand:MODEF 2 "register_operand" "x,x")))]
1650 "SSE_FLOAT_MODE_P (<MODE>mode)"
1652 static char buf[32];
1655 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1657 switch (which_alternative)
1660 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1663 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1669 snprintf (buf, sizeof (buf), insn, suffix);
1672 [(set_attr "isa" "noavx,avx")
1673 (set_attr "type" "sselog")
1674 (set_attr "prefix" "orig,vex")
1675 (set_attr "mode" "<ssevecmode>")])
1677 (define_insn "*<code><mode>3"
1678 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1680 (match_operand:MODEF 1 "register_operand" "%0,x")
1681 (match_operand:MODEF 2 "register_operand" "x,x")))]
1682 "SSE_FLOAT_MODE_P (<MODE>mode)"
1684 static char buf[32];
1687 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1689 switch (which_alternative)
1692 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1695 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1701 snprintf (buf, sizeof (buf), insn, suffix);
1704 [(set_attr "isa" "noavx,avx")
1705 (set_attr "type" "sselog")
1706 (set_attr "prefix" "orig,vex")
1707 (set_attr "mode" "<ssevecmode>")])
1709 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1711 ;; FMA4 floating point multiply/accumulate instructions. This
1712 ;; includes the scalar version of the instructions as well as the
1715 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1717 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1718 ;; combine to generate a multiply/add with two memory references. We then
1719 ;; split this insn, into loading up the destination register with one of the
1720 ;; memory operations. If we don't manage to split the insn, reload will
1721 ;; generate the appropriate moves. The reason this is needed, is that combine
1722 ;; has already folded one of the memory references into both the multiply and
1723 ;; add insns, and it can't generate a new pseudo. I.e.:
1724 ;; (set (reg1) (mem (addr1)))
1725 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1726 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1728 ;; ??? This is historic, pre-dating the gimple fma transformation.
1729 ;; We could now properly represent that only one memory operand is
1730 ;; allowed and not be penalized during optimization.
1732 ;; Intrinsic FMA operations.
1734 ;; The standard names for fma is only available with SSE math enabled.
1735 (define_expand "fma<mode>4"
1736 [(set (match_operand:FMAMODE 0 "register_operand")
1738 (match_operand:FMAMODE 1 "nonimmediate_operand")
1739 (match_operand:FMAMODE 2 "nonimmediate_operand")
1740 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1741 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1743 (define_expand "fms<mode>4"
1744 [(set (match_operand:FMAMODE 0 "register_operand")
1746 (match_operand:FMAMODE 1 "nonimmediate_operand")
1747 (match_operand:FMAMODE 2 "nonimmediate_operand")
1748 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1749 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1751 (define_expand "fnma<mode>4"
1752 [(set (match_operand:FMAMODE 0 "register_operand")
1754 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1755 (match_operand:FMAMODE 2 "nonimmediate_operand")
1756 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1757 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1759 (define_expand "fnms<mode>4"
1760 [(set (match_operand:FMAMODE 0 "register_operand")
1762 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1763 (match_operand:FMAMODE 2 "nonimmediate_operand")
1764 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1765 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1767 ;; The builtin for fma4intrin.h is not constrained by SSE math enabled.
1768 (define_expand "fma4i_fmadd_<mode>"
1769 [(set (match_operand:FMAMODE 0 "register_operand")
1771 (match_operand:FMAMODE 1 "nonimmediate_operand")
1772 (match_operand:FMAMODE 2 "nonimmediate_operand")
1773 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1774 "TARGET_FMA || TARGET_FMA4")
1776 (define_insn "*fma4i_fmadd_<mode>"
1777 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1779 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1780 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1781 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1783 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1784 [(set_attr "type" "ssemuladd")
1785 (set_attr "mode" "<MODE>")])
1787 (define_insn "*fma4i_fmsub_<mode>"
1788 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1790 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1791 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1793 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1795 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1796 [(set_attr "type" "ssemuladd")
1797 (set_attr "mode" "<MODE>")])
1799 (define_insn "*fma4i_fnmadd_<mode>"
1800 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1803 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1804 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1805 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1807 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1808 [(set_attr "type" "ssemuladd")
1809 (set_attr "mode" "<MODE>")])
1811 (define_insn "*fma4i_fnmsub_<mode>"
1812 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1815 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1816 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1818 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1820 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1821 [(set_attr "type" "ssemuladd")
1822 (set_attr "mode" "<MODE>")])
1824 ;; Scalar versions of the above. Unlike ADDSS et al, these write the
1825 ;; entire destination register, with the high-order elements zeroed.
1827 (define_expand "fma4i_vmfmadd_<mode>"
1828 [(set (match_operand:VF_128 0 "register_operand")
1831 (match_operand:VF_128 1 "nonimmediate_operand")
1832 (match_operand:VF_128 2 "nonimmediate_operand")
1833 (match_operand:VF_128 3 "nonimmediate_operand"))
1838 operands[4] = CONST0_RTX (<MODE>mode);
1841 (define_expand "fmai_vmfmadd_<mode>"
1842 [(set (match_operand:VF_128 0 "register_operand")
1845 (match_operand:VF_128 1 "nonimmediate_operand")
1846 (match_operand:VF_128 2 "nonimmediate_operand")
1847 (match_operand:VF_128 3 "nonimmediate_operand"))
1852 (define_insn "*fmai_fmadd_<mode>"
1853 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1856 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")
1857 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1858 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))
1863 vfmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1864 vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1865 vfmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1866 [(set_attr "type" "ssemuladd")
1867 (set_attr "mode" "<MODE>")])
1869 (define_insn "*fmai_fmsub_<mode>"
1870 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1873 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")
1874 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1876 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")))
1881 vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1882 vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1883 vfmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1884 [(set_attr "type" "ssemuladd")
1885 (set_attr "mode" "<MODE>")])
1887 (define_insn "*fmai_fnmadd_<mode>"
1888 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1892 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x"))
1893 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1894 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))
1899 vfnmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1900 vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1901 vfnmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1902 [(set_attr "type" "ssemuladd")
1903 (set_attr "mode" "<MODE>")])
1905 (define_insn "*fmai_fnmsub_<mode>"
1906 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1910 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x"))
1911 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1913 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")))
1918 vfnmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1919 vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1920 vfnmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1921 [(set_attr "type" "ssemuladd")
1922 (set_attr "mode" "<MODE>")])
1924 (define_insn "*fma4i_vmfmadd_<mode>"
1925 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1928 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
1929 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1930 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
1931 (match_operand:VF_128 4 "const0_operand" "")
1934 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1935 [(set_attr "type" "ssemuladd")
1936 (set_attr "mode" "<MODE>")])
1938 (define_insn "*fma4i_vmfmsub_<mode>"
1939 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1942 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
1943 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1945 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
1946 (match_operand:VF_128 4 "const0_operand" "")
1949 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1950 [(set_attr "type" "ssemuladd")
1951 (set_attr "mode" "<MODE>")])
1953 (define_insn "*fma4i_vmfnmadd_<mode>"
1954 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1958 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
1959 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1960 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
1961 (match_operand:VF_128 4 "const0_operand" "")
1964 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1965 [(set_attr "type" "ssemuladd")
1966 (set_attr "mode" "<MODE>")])
1968 (define_insn "*fma4i_vmfnmsub_<mode>"
1969 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1973 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
1974 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1976 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
1977 (match_operand:VF_128 4 "const0_operand" "")
1980 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1981 [(set_attr "type" "ssemuladd")
1982 (set_attr "mode" "<MODE>")])
1984 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1986 ;; FMA4 Parallel floating point multiply addsub and subadd operations.
1988 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1990 ;; It would be possible to represent these without the UNSPEC as
1993 ;; (fma op1 op2 op3)
1994 ;; (fma op1 op2 (neg op3))
1997 ;; But this doesn't seem useful in practice.
1999 (define_expand "fmaddsub_<mode>"
2000 [(set (match_operand:VF 0 "register_operand")
2002 [(match_operand:VF 1 "nonimmediate_operand")
2003 (match_operand:VF 2 "nonimmediate_operand")
2004 (match_operand:VF 3 "nonimmediate_operand")]
2006 "TARGET_FMA || TARGET_FMA4")
2008 (define_insn "*fma4_fmaddsub_<mode>"
2009 [(set (match_operand:VF 0 "register_operand" "=x,x")
2011 [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
2012 (match_operand:VF 2 "nonimmediate_operand" " x,m")
2013 (match_operand:VF 3 "nonimmediate_operand" "xm,x")]
2016 "vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2017 [(set_attr "type" "ssemuladd")
2018 (set_attr "mode" "<MODE>")])
2020 (define_insn "*fma4_fmsubadd_<mode>"
2021 [(set (match_operand:VF 0 "register_operand" "=x,x")
2023 [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
2024 (match_operand:VF 2 "nonimmediate_operand" " x,m")
2026 (match_operand:VF 3 "nonimmediate_operand" "xm,x"))]
2029 "vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2030 [(set_attr "type" "ssemuladd")
2031 (set_attr "mode" "<MODE>")])
2033 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2035 ;; FMA3 floating point multiply/accumulate instructions.
2037 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2039 (define_insn "*fma_fmadd_<mode>"
2040 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
2042 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
2043 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
2044 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
2047 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2048 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2049 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2050 [(set_attr "type" "ssemuladd")
2051 (set_attr "mode" "<MODE>")])
2053 (define_insn "*fma_fmsub_<mode>"
2054 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
2056 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
2057 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
2059 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
2062 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2063 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2064 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2065 [(set_attr "type" "ssemuladd")
2066 (set_attr "mode" "<MODE>")])
2068 (define_insn "*fma_fnmadd_<mode>"
2069 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
2072 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
2073 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
2074 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
2077 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2078 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2079 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2080 [(set_attr "type" "ssemuladd")
2081 (set_attr "mode" "<MODE>")])
2083 (define_insn "*fma_fnmsub_<mode>"
2084 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
2087 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
2088 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
2090 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
2093 vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2094 vfnmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2095 vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2096 [(set_attr "type" "ssemuladd")
2097 (set_attr "mode" "<MODE>")])
2099 (define_insn "*fma_fmaddsub_<mode>"
2100 [(set (match_operand:VF 0 "register_operand" "=x,x,x")
2102 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
2103 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
2104 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0")]
2108 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2109 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2110 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2111 [(set_attr "type" "ssemuladd")
2112 (set_attr "mode" "<MODE>")])
2114 (define_insn "*fma_fmsubadd_<mode>"
2115 [(set (match_operand:VF 0 "register_operand" "=x,x,x")
2117 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
2118 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
2120 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0"))]
2124 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2125 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2126 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2127 [(set_attr "type" "ssemuladd")
2128 (set_attr "mode" "<MODE>")])
2130 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2132 ;; Parallel single-precision floating point conversion operations
2134 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2136 (define_insn "sse_cvtpi2ps"
2137 [(set (match_operand:V4SF 0 "register_operand" "=x")
2140 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2141 (match_operand:V4SF 1 "register_operand" "0")
2144 "cvtpi2ps\t{%2, %0|%0, %2}"
2145 [(set_attr "type" "ssecvt")
2146 (set_attr "mode" "V4SF")])
2148 (define_insn "sse_cvtps2pi"
2149 [(set (match_operand:V2SI 0 "register_operand" "=y")
2151 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2153 (parallel [(const_int 0) (const_int 1)])))]
2155 "cvtps2pi\t{%1, %0|%0, %1}"
2156 [(set_attr "type" "ssecvt")
2157 (set_attr "unit" "mmx")
2158 (set_attr "mode" "DI")])
2160 (define_insn "sse_cvttps2pi"
2161 [(set (match_operand:V2SI 0 "register_operand" "=y")
2163 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2164 (parallel [(const_int 0) (const_int 1)])))]
2166 "cvttps2pi\t{%1, %0|%0, %1}"
2167 [(set_attr "type" "ssecvt")
2168 (set_attr "unit" "mmx")
2169 (set_attr "prefix_rep" "0")
2170 (set_attr "mode" "SF")])
2172 (define_insn "sse_cvtsi2ss"
2173 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2176 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2177 (match_operand:V4SF 1 "register_operand" "0,0,x")
2181 cvtsi2ss\t{%2, %0|%0, %2}
2182 cvtsi2ss\t{%2, %0|%0, %2}
2183 vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2184 [(set_attr "isa" "noavx,noavx,avx")
2185 (set_attr "type" "sseicvt")
2186 (set_attr "athlon_decode" "vector,double,*")
2187 (set_attr "amdfam10_decode" "vector,double,*")
2188 (set_attr "bdver1_decode" "double,direct,*")
2189 (set_attr "prefix" "orig,orig,vex")
2190 (set_attr "mode" "SF")])
2192 (define_insn "sse_cvtsi2ssq"
2193 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2196 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2197 (match_operand:V4SF 1 "register_operand" "0,0,x")
2199 "TARGET_SSE && TARGET_64BIT"
2201 cvtsi2ssq\t{%2, %0|%0, %2}
2202 cvtsi2ssq\t{%2, %0|%0, %2}
2203 vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2204 [(set_attr "isa" "noavx,noavx,avx")
2205 (set_attr "type" "sseicvt")
2206 (set_attr "athlon_decode" "vector,double,*")
2207 (set_attr "amdfam10_decode" "vector,double,*")
2208 (set_attr "bdver1_decode" "double,direct,*")
2209 (set_attr "length_vex" "*,*,4")
2210 (set_attr "prefix_rex" "1,1,*")
2211 (set_attr "prefix" "orig,orig,vex")
2212 (set_attr "mode" "SF")])
2214 (define_insn "sse_cvtss2si"
2215 [(set (match_operand:SI 0 "register_operand" "=r,r")
2218 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2219 (parallel [(const_int 0)]))]
2220 UNSPEC_FIX_NOTRUNC))]
2222 "%vcvtss2si\t{%1, %0|%0, %1}"
2223 [(set_attr "type" "sseicvt")
2224 (set_attr "athlon_decode" "double,vector")
2225 (set_attr "bdver1_decode" "double,double")
2226 (set_attr "prefix_rep" "1")
2227 (set_attr "prefix" "maybe_vex")
2228 (set_attr "mode" "SI")])
2230 (define_insn "sse_cvtss2si_2"
2231 [(set (match_operand:SI 0 "register_operand" "=r,r")
2232 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2233 UNSPEC_FIX_NOTRUNC))]
2235 "%vcvtss2si\t{%1, %0|%0, %1}"
2236 [(set_attr "type" "sseicvt")
2237 (set_attr "athlon_decode" "double,vector")
2238 (set_attr "amdfam10_decode" "double,double")
2239 (set_attr "bdver1_decode" "double,double")
2240 (set_attr "prefix_rep" "1")
2241 (set_attr "prefix" "maybe_vex")
2242 (set_attr "mode" "SI")])
2244 (define_insn "sse_cvtss2siq"
2245 [(set (match_operand:DI 0 "register_operand" "=r,r")
2248 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2249 (parallel [(const_int 0)]))]
2250 UNSPEC_FIX_NOTRUNC))]
2251 "TARGET_SSE && TARGET_64BIT"
2252 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2253 [(set_attr "type" "sseicvt")
2254 (set_attr "athlon_decode" "double,vector")
2255 (set_attr "bdver1_decode" "double,double")
2256 (set_attr "prefix_rep" "1")
2257 (set_attr "prefix" "maybe_vex")
2258 (set_attr "mode" "DI")])
2260 (define_insn "sse_cvtss2siq_2"
2261 [(set (match_operand:DI 0 "register_operand" "=r,r")
2262 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2263 UNSPEC_FIX_NOTRUNC))]
2264 "TARGET_SSE && TARGET_64BIT"
2265 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2266 [(set_attr "type" "sseicvt")
2267 (set_attr "athlon_decode" "double,vector")
2268 (set_attr "amdfam10_decode" "double,double")
2269 (set_attr "bdver1_decode" "double,double")
2270 (set_attr "prefix_rep" "1")
2271 (set_attr "prefix" "maybe_vex")
2272 (set_attr "mode" "DI")])
2274 (define_insn "sse_cvttss2si"
2275 [(set (match_operand:SI 0 "register_operand" "=r,r")
2278 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2279 (parallel [(const_int 0)]))))]
2281 "%vcvttss2si\t{%1, %0|%0, %1}"
2282 [(set_attr "type" "sseicvt")
2283 (set_attr "athlon_decode" "double,vector")
2284 (set_attr "amdfam10_decode" "double,double")
2285 (set_attr "bdver1_decode" "double,double")
2286 (set_attr "prefix_rep" "1")
2287 (set_attr "prefix" "maybe_vex")
2288 (set_attr "mode" "SI")])
2290 (define_insn "sse_cvttss2siq"
2291 [(set (match_operand:DI 0 "register_operand" "=r,r")
2294 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2295 (parallel [(const_int 0)]))))]
2296 "TARGET_SSE && TARGET_64BIT"
2297 "%vcvttss2si{q}\t{%1, %0|%0, %1}"
2298 [(set_attr "type" "sseicvt")
2299 (set_attr "athlon_decode" "double,vector")
2300 (set_attr "amdfam10_decode" "double,double")
2301 (set_attr "bdver1_decode" "double,double")
2302 (set_attr "prefix_rep" "1")
2303 (set_attr "prefix" "maybe_vex")
2304 (set_attr "mode" "DI")])
2306 (define_insn "float<sseintvecmodelower><mode>2"
2307 [(set (match_operand:VF1 0 "register_operand" "=x")
2309 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "xm")))]
2311 "%vcvtdq2ps\t{%1, %0|%0, %1}"
2312 [(set_attr "type" "ssecvt")
2313 (set_attr "prefix" "maybe_vex")
2314 (set_attr "mode" "<sseinsnmode>")])
2316 (define_expand "floatuns<sseintvecmodelower><mode>2"
2317 [(match_operand:VF1 0 "register_operand" "")
2318 (match_operand:<sseintvecmode> 1 "register_operand" "")]
2319 "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
2321 ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
2325 (define_insn "avx_cvtps2dq256"
2326 [(set (match_operand:V8SI 0 "register_operand" "=x")
2327 (unspec:V8SI [(match_operand:V8SF 1 "nonimmediate_operand" "xm")]
2328 UNSPEC_FIX_NOTRUNC))]
2330 "vcvtps2dq\t{%1, %0|%0, %1}"
2331 [(set_attr "type" "ssecvt")
2332 (set_attr "prefix" "vex")
2333 (set_attr "mode" "OI")])
2335 (define_insn "sse2_cvtps2dq"
2336 [(set (match_operand:V4SI 0 "register_operand" "=x")
2337 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2338 UNSPEC_FIX_NOTRUNC))]
2340 "%vcvtps2dq\t{%1, %0|%0, %1}"
2341 [(set_attr "type" "ssecvt")
2342 (set (attr "prefix_data16")
2344 (match_test "TARGET_AVX")
2346 (const_string "1")))
2347 (set_attr "prefix" "maybe_vex")
2348 (set_attr "mode" "TI")])
2350 (define_insn "fix_truncv8sfv8si2"
2351 [(set (match_operand:V8SI 0 "register_operand" "=x")
2352 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
2354 "vcvttps2dq\t{%1, %0|%0, %1}"
2355 [(set_attr "type" "ssecvt")
2356 (set_attr "prefix" "vex")
2357 (set_attr "mode" "OI")])
2359 (define_insn "fix_truncv4sfv4si2"
2360 [(set (match_operand:V4SI 0 "register_operand" "=x")
2361 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2363 "%vcvttps2dq\t{%1, %0|%0, %1}"
2364 [(set_attr "type" "ssecvt")
2365 (set (attr "prefix_rep")
2367 (match_test "TARGET_AVX")
2369 (const_string "1")))
2370 (set (attr "prefix_data16")
2372 (match_test "TARGET_AVX")
2374 (const_string "0")))
2375 (set_attr "prefix_data16" "0")
2376 (set_attr "prefix" "maybe_vex")
2377 (set_attr "mode" "TI")])
2379 (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
2380 [(match_operand:<sseintvecmode> 0 "register_operand" "")
2381 (match_operand:VF1 1 "register_operand" "")]
2385 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
2386 tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
2387 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
2388 emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
2392 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2394 ;; Parallel double-precision floating point conversion operations
2396 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2398 (define_insn "sse2_cvtpi2pd"
2399 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2400 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2402 "cvtpi2pd\t{%1, %0|%0, %1}"
2403 [(set_attr "type" "ssecvt")
2404 (set_attr "unit" "mmx,*")
2405 (set_attr "prefix_data16" "1,*")
2406 (set_attr "mode" "V2DF")])
2408 (define_insn "sse2_cvtpd2pi"
2409 [(set (match_operand:V2SI 0 "register_operand" "=y")
2410 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2411 UNSPEC_FIX_NOTRUNC))]
2413 "cvtpd2pi\t{%1, %0|%0, %1}"
2414 [(set_attr "type" "ssecvt")
2415 (set_attr "unit" "mmx")
2416 (set_attr "bdver1_decode" "double")
2417 (set_attr "prefix_data16" "1")
2418 (set_attr "mode" "DI")])
2420 (define_insn "sse2_cvttpd2pi"
2421 [(set (match_operand:V2SI 0 "register_operand" "=y")
2422 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2424 "cvttpd2pi\t{%1, %0|%0, %1}"
2425 [(set_attr "type" "ssecvt")
2426 (set_attr "unit" "mmx")
2427 (set_attr "bdver1_decode" "double")
2428 (set_attr "prefix_data16" "1")
2429 (set_attr "mode" "TI")])
2431 (define_insn "sse2_cvtsi2sd"
2432 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2435 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2436 (match_operand:V2DF 1 "register_operand" "0,0,x")
2440 cvtsi2sd\t{%2, %0|%0, %2}
2441 cvtsi2sd\t{%2, %0|%0, %2}
2442 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2443 [(set_attr "isa" "noavx,noavx,avx")
2444 (set_attr "type" "sseicvt")
2445 (set_attr "athlon_decode" "double,direct,*")
2446 (set_attr "amdfam10_decode" "vector,double,*")
2447 (set_attr "bdver1_decode" "double,direct,*")
2448 (set_attr "prefix" "orig,orig,vex")
2449 (set_attr "mode" "DF")])
2451 (define_insn "sse2_cvtsi2sdq"
2452 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2455 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2456 (match_operand:V2DF 1 "register_operand" "0,0,x")
2458 "TARGET_SSE2 && TARGET_64BIT"
2460 cvtsi2sdq\t{%2, %0|%0, %2}
2461 cvtsi2sdq\t{%2, %0|%0, %2}
2462 vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2463 [(set_attr "isa" "noavx,noavx,avx")
2464 (set_attr "type" "sseicvt")
2465 (set_attr "athlon_decode" "double,direct,*")
2466 (set_attr "amdfam10_decode" "vector,double,*")
2467 (set_attr "bdver1_decode" "double,direct,*")
2468 (set_attr "length_vex" "*,*,4")
2469 (set_attr "prefix_rex" "1,1,*")
2470 (set_attr "prefix" "orig,orig,vex")
2471 (set_attr "mode" "DF")])
2473 (define_insn "sse2_cvtsd2si"
2474 [(set (match_operand:SI 0 "register_operand" "=r,r")
2477 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2478 (parallel [(const_int 0)]))]
2479 UNSPEC_FIX_NOTRUNC))]
2481 "%vcvtsd2si\t{%1, %0|%0, %1}"
2482 [(set_attr "type" "sseicvt")
2483 (set_attr "athlon_decode" "double,vector")
2484 (set_attr "bdver1_decode" "double,double")
2485 (set_attr "prefix_rep" "1")
2486 (set_attr "prefix" "maybe_vex")
2487 (set_attr "mode" "SI")])
2489 (define_insn "sse2_cvtsd2si_2"
2490 [(set (match_operand:SI 0 "register_operand" "=r,r")
2491 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2492 UNSPEC_FIX_NOTRUNC))]
2494 "%vcvtsd2si\t{%1, %0|%0, %1}"
2495 [(set_attr "type" "sseicvt")
2496 (set_attr "athlon_decode" "double,vector")
2497 (set_attr "amdfam10_decode" "double,double")
2498 (set_attr "bdver1_decode" "double,double")
2499 (set_attr "prefix_rep" "1")
2500 (set_attr "prefix" "maybe_vex")
2501 (set_attr "mode" "SI")])
2503 (define_insn "sse2_cvtsd2siq"
2504 [(set (match_operand:DI 0 "register_operand" "=r,r")
2507 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2508 (parallel [(const_int 0)]))]
2509 UNSPEC_FIX_NOTRUNC))]
2510 "TARGET_SSE2 && TARGET_64BIT"
2511 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2512 [(set_attr "type" "sseicvt")
2513 (set_attr "athlon_decode" "double,vector")
2514 (set_attr "bdver1_decode" "double,double")
2515 (set_attr "prefix_rep" "1")
2516 (set_attr "prefix" "maybe_vex")
2517 (set_attr "mode" "DI")])
2519 (define_insn "sse2_cvtsd2siq_2"
2520 [(set (match_operand:DI 0 "register_operand" "=r,r")
2521 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2522 UNSPEC_FIX_NOTRUNC))]
2523 "TARGET_SSE2 && TARGET_64BIT"
2524 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2525 [(set_attr "type" "sseicvt")
2526 (set_attr "athlon_decode" "double,vector")
2527 (set_attr "amdfam10_decode" "double,double")
2528 (set_attr "bdver1_decode" "double,double")
2529 (set_attr "prefix_rep" "1")
2530 (set_attr "prefix" "maybe_vex")
2531 (set_attr "mode" "DI")])
2533 (define_insn "sse2_cvttsd2si"
2534 [(set (match_operand:SI 0 "register_operand" "=r,r")
2537 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2538 (parallel [(const_int 0)]))))]
2540 "%vcvttsd2si\t{%1, %0|%0, %1}"
2541 [(set_attr "type" "sseicvt")
2542 (set_attr "athlon_decode" "double,vector")
2543 (set_attr "amdfam10_decode" "double,double")
2544 (set_attr "bdver1_decode" "double,double")
2545 (set_attr "prefix_rep" "1")
2546 (set_attr "prefix" "maybe_vex")
2547 (set_attr "mode" "SI")])
2549 (define_insn "sse2_cvttsd2siq"
2550 [(set (match_operand:DI 0 "register_operand" "=r,r")
2553 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2554 (parallel [(const_int 0)]))))]
2555 "TARGET_SSE2 && TARGET_64BIT"
2556 "%vcvttsd2si{q}\t{%1, %0|%0, %1}"
2557 [(set_attr "type" "sseicvt")
2558 (set_attr "athlon_decode" "double,vector")
2559 (set_attr "amdfam10_decode" "double,double")
2560 (set_attr "bdver1_decode" "double,double")
2561 (set_attr "prefix_rep" "1")
2562 (set_attr "prefix" "maybe_vex")
2563 (set_attr "mode" "DI")])
2565 (define_insn "floatv4siv4df2"
2566 [(set (match_operand:V4DF 0 "register_operand" "=x")
2567 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2569 "vcvtdq2pd\t{%1, %0|%0, %1}"
2570 [(set_attr "type" "ssecvt")
2571 (set_attr "prefix" "vex")
2572 (set_attr "mode" "V4DF")])
2574 (define_insn "avx_cvtdq2pd256_2"
2575 [(set (match_operand:V4DF 0 "register_operand" "=x")
2578 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
2579 (parallel [(const_int 0) (const_int 1)
2580 (const_int 2) (const_int 3)]))))]
2582 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
2583 [(set_attr "type" "ssecvt")
2584 (set_attr "prefix" "vex")
2585 (set_attr "mode" "V4DF")])
2587 (define_insn "sse2_cvtdq2pd"
2588 [(set (match_operand:V2DF 0 "register_operand" "=x")
2591 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2592 (parallel [(const_int 0) (const_int 1)]))))]
2594 "%vcvtdq2pd\t{%1, %0|%0, %q1}"
2595 [(set_attr "type" "ssecvt")
2596 (set_attr "prefix" "maybe_vex")
2597 (set_attr "mode" "V2DF")])
2599 (define_insn "avx_cvtpd2dq256"
2600 [(set (match_operand:V4SI 0 "register_operand" "=x")
2601 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2602 UNSPEC_FIX_NOTRUNC))]
2604 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2605 [(set_attr "type" "ssecvt")
2606 (set_attr "prefix" "vex")
2607 (set_attr "mode" "OI")])
2609 (define_expand "avx_cvtpd2dq256_2"
2610 [(set (match_operand:V8SI 0 "register_operand" "")
2612 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "")]
2616 "operands[2] = CONST0_RTX (V4SImode);")
2618 (define_insn "*avx_cvtpd2dq256_2"
2619 [(set (match_operand:V8SI 0 "register_operand" "=x")
2621 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2623 (match_operand:V4SI 2 "const0_operand" "")))]
2625 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
2626 [(set_attr "type" "ssecvt")
2627 (set_attr "prefix" "vex")
2628 (set_attr "mode" "OI")])
2630 (define_expand "sse2_cvtpd2dq"
2631 [(set (match_operand:V4SI 0 "register_operand" "")
2633 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2637 "operands[2] = CONST0_RTX (V2SImode);")
2639 (define_insn "*sse2_cvtpd2dq"
2640 [(set (match_operand:V4SI 0 "register_operand" "=x")
2642 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2644 (match_operand:V2SI 2 "const0_operand" "")))]
2648 return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
2650 return "cvtpd2dq\t{%1, %0|%0, %1}";
2652 [(set_attr "type" "ssecvt")
2653 (set_attr "prefix_rep" "1")
2654 (set_attr "prefix_data16" "0")
2655 (set_attr "prefix" "maybe_vex")
2656 (set_attr "mode" "TI")
2657 (set_attr "amdfam10_decode" "double")
2658 (set_attr "athlon_decode" "vector")
2659 (set_attr "bdver1_decode" "double")])
2661 (define_insn "fix_truncv4dfv4si2"
2662 [(set (match_operand:V4SI 0 "register_operand" "=x")
2663 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2665 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2666 [(set_attr "type" "ssecvt")
2667 (set_attr "prefix" "vex")
2668 (set_attr "mode" "OI")])
2670 (define_expand "avx_cvttpd2dq256_2"
2671 [(set (match_operand:V8SI 0 "register_operand" "")
2673 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" ""))
2676 "operands[2] = CONST0_RTX (V4SImode);")
2678 (define_insn "*avx_cvttpd2dq256_2"
2679 [(set (match_operand:V8SI 0 "register_operand" "=x")
2681 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm"))
2682 (match_operand:V4SI 2 "const0_operand" "")))]
2684 "vcvttpd2dq{y}\t{%1, %x0|%x0, %1}"
2685 [(set_attr "type" "ssecvt")
2686 (set_attr "prefix" "vex")
2687 (set_attr "mode" "OI")])
2689 (define_expand "sse2_cvttpd2dq"
2690 [(set (match_operand:V4SI 0 "register_operand" "")
2692 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2695 "operands[2] = CONST0_RTX (V2SImode);")
2697 (define_insn "*sse2_cvttpd2dq"
2698 [(set (match_operand:V4SI 0 "register_operand" "=x")
2700 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2701 (match_operand:V2SI 2 "const0_operand" "")))]
2705 return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
2707 return "cvttpd2dq\t{%1, %0|%0, %1}";
2709 [(set_attr "type" "ssecvt")
2710 (set_attr "amdfam10_decode" "double")
2711 (set_attr "athlon_decode" "vector")
2712 (set_attr "bdver1_decode" "double")
2713 (set_attr "prefix" "maybe_vex")
2714 (set_attr "mode" "TI")])
2716 (define_insn "sse2_cvtsd2ss"
2717 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2720 (float_truncate:V2SF
2721 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,xm")))
2722 (match_operand:V4SF 1 "register_operand" "0,0,x")
2726 cvtsd2ss\t{%2, %0|%0, %2}
2727 cvtsd2ss\t{%2, %0|%0, %2}
2728 vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2729 [(set_attr "isa" "noavx,noavx,avx")
2730 (set_attr "type" "ssecvt")
2731 (set_attr "athlon_decode" "vector,double,*")
2732 (set_attr "amdfam10_decode" "vector,double,*")
2733 (set_attr "bdver1_decode" "direct,direct,*")
2734 (set_attr "prefix" "orig,orig,vex")
2735 (set_attr "mode" "SF")])
2737 (define_insn "sse2_cvtss2sd"
2738 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2742 (match_operand:V4SF 2 "nonimmediate_operand" "x,m,xm")
2743 (parallel [(const_int 0) (const_int 1)])))
2744 (match_operand:V2DF 1 "register_operand" "0,0,x")
2748 cvtss2sd\t{%2, %0|%0, %2}
2749 cvtss2sd\t{%2, %0|%0, %2}
2750 vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
2751 [(set_attr "isa" "noavx,noavx,avx")
2752 (set_attr "type" "ssecvt")
2753 (set_attr "amdfam10_decode" "vector,double,*")
2754 (set_attr "athlon_decode" "direct,direct,*")
2755 (set_attr "bdver1_decode" "direct,direct,*")
2756 (set_attr "prefix" "orig,orig,vex")
2757 (set_attr "mode" "DF")])
2759 (define_insn "avx_cvtpd2ps256"
2760 [(set (match_operand:V4SF 0 "register_operand" "=x")
2761 (float_truncate:V4SF
2762 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2764 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
2765 [(set_attr "type" "ssecvt")
2766 (set_attr "prefix" "vex")
2767 (set_attr "mode" "V4SF")])
2769 (define_expand "sse2_cvtpd2ps"
2770 [(set (match_operand:V4SF 0 "register_operand" "")
2772 (float_truncate:V2SF
2773 (match_operand:V2DF 1 "nonimmediate_operand" ""))
2776 "operands[2] = CONST0_RTX (V2SFmode);")
2778 (define_insn "*sse2_cvtpd2ps"
2779 [(set (match_operand:V4SF 0 "register_operand" "=x")
2781 (float_truncate:V2SF
2782 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2783 (match_operand:V2SF 2 "const0_operand" "")))]
2787 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
2789 return "cvtpd2ps\t{%1, %0|%0, %1}";
2791 [(set_attr "type" "ssecvt")
2792 (set_attr "amdfam10_decode" "double")
2793 (set_attr "athlon_decode" "vector")
2794 (set_attr "bdver1_decode" "double")
2795 (set_attr "prefix_data16" "1")
2796 (set_attr "prefix" "maybe_vex")
2797 (set_attr "mode" "V4SF")])
2799 (define_insn "avx_cvtps2pd256"
2800 [(set (match_operand:V4DF 0 "register_operand" "=x")
2802 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2804 "vcvtps2pd\t{%1, %0|%0, %1}"
2805 [(set_attr "type" "ssecvt")
2806 (set_attr "prefix" "vex")
2807 (set_attr "mode" "V4DF")])
2809 (define_insn "*avx_cvtps2pd256_2"
2810 [(set (match_operand:V4DF 0 "register_operand" "=x")
2813 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
2814 (parallel [(const_int 0) (const_int 1)
2815 (const_int 2) (const_int 3)]))))]
2817 "vcvtps2pd\t{%x1, %0|%0, %x1}"
2818 [(set_attr "type" "ssecvt")
2819 (set_attr "prefix" "vex")
2820 (set_attr "mode" "V4DF")])
2822 (define_insn "sse2_cvtps2pd"
2823 [(set (match_operand:V2DF 0 "register_operand" "=x")
2826 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2827 (parallel [(const_int 0) (const_int 1)]))))]
2829 "%vcvtps2pd\t{%1, %0|%0, %q1}"
2830 [(set_attr "type" "ssecvt")
2831 (set_attr "amdfam10_decode" "direct")
2832 (set_attr "athlon_decode" "double")
2833 (set_attr "bdver1_decode" "double")
2834 (set_attr "prefix_data16" "0")
2835 (set_attr "prefix" "maybe_vex")
2836 (set_attr "mode" "V2DF")])
2838 (define_expand "vec_unpacks_hi_v4sf"
2843 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2844 (parallel [(const_int 6) (const_int 7)
2845 (const_int 2) (const_int 3)])))
2846 (set (match_operand:V2DF 0 "register_operand" "")
2850 (parallel [(const_int 0) (const_int 1)]))))]
2852 "operands[2] = gen_reg_rtx (V4SFmode);")
2854 (define_expand "vec_unpacks_hi_v8sf"
2857 (match_operand:V8SF 1 "nonimmediate_operand" "")
2858 (parallel [(const_int 4) (const_int 5)
2859 (const_int 6) (const_int 7)])))
2860 (set (match_operand:V4DF 0 "register_operand" "")
2864 "operands[2] = gen_reg_rtx (V4SFmode);")
2866 (define_expand "vec_unpacks_lo_v4sf"
2867 [(set (match_operand:V2DF 0 "register_operand" "")
2870 (match_operand:V4SF 1 "nonimmediate_operand" "")
2871 (parallel [(const_int 0) (const_int 1)]))))]
2874 (define_expand "vec_unpacks_lo_v8sf"
2875 [(set (match_operand:V4DF 0 "register_operand" "")
2878 (match_operand:V8SF 1 "nonimmediate_operand" "")
2879 (parallel [(const_int 0) (const_int 1)
2880 (const_int 2) (const_int 3)]))))]
2883 (define_mode_attr sseunpackfltmode
2884 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF") (V8SI "V4DF")])
2886 (define_expand "vec_unpacks_float_hi_<mode>"
2887 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2888 (match_operand:VI2_AVX2 1 "register_operand" "")]
2891 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2893 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
2894 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2895 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2899 (define_expand "vec_unpacks_float_lo_<mode>"
2900 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2901 (match_operand:VI2_AVX2 1 "register_operand" "")]
2904 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2906 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
2907 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2908 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2912 (define_expand "vec_unpacku_float_hi_<mode>"
2913 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2914 (match_operand:VI2_AVX2 1 "register_operand" "")]
2917 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2919 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
2920 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2921 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2925 (define_expand "vec_unpacku_float_lo_<mode>"
2926 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2927 (match_operand:VI2_AVX2 1 "register_operand" "")]
2930 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2932 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
2933 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2934 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2938 (define_expand "vec_unpacks_float_hi_v4si"
2941 (match_operand:V4SI 1 "nonimmediate_operand" "")
2942 (parallel [(const_int 2) (const_int 3)
2943 (const_int 2) (const_int 3)])))
2944 (set (match_operand:V2DF 0 "register_operand" "")
2948 (parallel [(const_int 0) (const_int 1)]))))]
2950 "operands[2] = gen_reg_rtx (V4SImode);")
2952 (define_expand "vec_unpacks_float_lo_v4si"
2953 [(set (match_operand:V2DF 0 "register_operand" "")
2956 (match_operand:V4SI 1 "nonimmediate_operand" "")
2957 (parallel [(const_int 0) (const_int 1)]))))]
2960 (define_expand "vec_unpacks_float_hi_v8si"
2963 (match_operand:V8SI 1 "nonimmediate_operand" "")
2964 (parallel [(const_int 4) (const_int 5)
2965 (const_int 6) (const_int 7)])))
2966 (set (match_operand:V4DF 0 "register_operand" "")
2970 "operands[2] = gen_reg_rtx (V4SImode);")
2972 (define_expand "vec_unpacks_float_lo_v8si"
2973 [(set (match_operand:V4DF 0 "register_operand" "")
2976 (match_operand:V8SI 1 "nonimmediate_operand" "")
2977 (parallel [(const_int 0) (const_int 1)
2978 (const_int 2) (const_int 3)]))))]
2981 (define_expand "vec_unpacku_float_hi_v4si"
2984 (match_operand:V4SI 1 "nonimmediate_operand" "")
2985 (parallel [(const_int 2) (const_int 3)
2986 (const_int 2) (const_int 3)])))
2991 (parallel [(const_int 0) (const_int 1)]))))
2993 (lt:V2DF (match_dup 6) (match_dup 3)))
2995 (and:V2DF (match_dup 7) (match_dup 4)))
2996 (set (match_operand:V2DF 0 "register_operand" "")
2997 (plus:V2DF (match_dup 6) (match_dup 8)))]
3000 REAL_VALUE_TYPE TWO32r;
3004 real_ldexp (&TWO32r, &dconst1, 32);
3005 x = const_double_from_real_value (TWO32r, DFmode);
3007 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3008 operands[4] = force_reg (V2DFmode,
3009 ix86_build_const_vector (V2DFmode, 1, x));
3011 operands[5] = gen_reg_rtx (V4SImode);
3013 for (i = 6; i < 9; i++)
3014 operands[i] = gen_reg_rtx (V2DFmode);
3017 (define_expand "vec_unpacku_float_lo_v4si"
3021 (match_operand:V4SI 1 "nonimmediate_operand" "")
3022 (parallel [(const_int 0) (const_int 1)]))))
3024 (lt:V2DF (match_dup 5) (match_dup 3)))
3026 (and:V2DF (match_dup 6) (match_dup 4)))
3027 (set (match_operand:V2DF 0 "register_operand" "")
3028 (plus:V2DF (match_dup 5) (match_dup 7)))]
3031 REAL_VALUE_TYPE TWO32r;
3035 real_ldexp (&TWO32r, &dconst1, 32);
3036 x = const_double_from_real_value (TWO32r, DFmode);
3038 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3039 operands[4] = force_reg (V2DFmode,
3040 ix86_build_const_vector (V2DFmode, 1, x));
3042 for (i = 5; i < 8; i++)
3043 operands[i] = gen_reg_rtx (V2DFmode);
3046 (define_expand "vec_unpacku_float_hi_v8si"
3047 [(match_operand:V4DF 0 "register_operand" "")
3048 (match_operand:V8SI 1 "register_operand" "")]
3051 REAL_VALUE_TYPE TWO32r;
3055 real_ldexp (&TWO32r, &dconst1, 32);
3056 x = const_double_from_real_value (TWO32r, DFmode);
3058 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
3059 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
3060 tmp[5] = gen_reg_rtx (V4SImode);
3062 for (i = 2; i < 5; i++)
3063 tmp[i] = gen_reg_rtx (V4DFmode);
3064 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
3065 emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
3066 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
3067 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
3068 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
3069 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
3073 (define_expand "vec_unpacku_float_lo_v8si"
3074 [(match_operand:V4DF 0 "register_operand" "")
3075 (match_operand:V8SI 1 "nonimmediate_operand" "")]
3078 REAL_VALUE_TYPE TWO32r;
3082 real_ldexp (&TWO32r, &dconst1, 32);
3083 x = const_double_from_real_value (TWO32r, DFmode);
3085 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
3086 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
3088 for (i = 2; i < 5; i++)
3089 tmp[i] = gen_reg_rtx (V4DFmode);
3090 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
3091 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
3092 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
3093 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
3094 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
3098 (define_expand "vec_pack_trunc_v4df"
3100 (float_truncate:V4SF
3101 (match_operand:V4DF 1 "nonimmediate_operand" "")))
3103 (float_truncate:V4SF
3104 (match_operand:V4DF 2 "nonimmediate_operand" "")))
3105 (set (match_operand:V8SF 0 "register_operand" "")
3111 operands[3] = gen_reg_rtx (V4SFmode);
3112 operands[4] = gen_reg_rtx (V4SFmode);
3115 (define_expand "vec_pack_trunc_v2df"
3116 [(match_operand:V4SF 0 "register_operand" "")
3117 (match_operand:V2DF 1 "nonimmediate_operand" "")
3118 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3123 if (TARGET_AVX && !TARGET_PREFER_AVX128)
3125 tmp0 = gen_reg_rtx (V4DFmode);
3126 tmp1 = force_reg (V2DFmode, operands[1]);
3128 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
3129 emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
3133 tmp0 = gen_reg_rtx (V4SFmode);
3134 tmp1 = gen_reg_rtx (V4SFmode);
3136 emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
3137 emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
3138 emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
3143 (define_expand "vec_pack_sfix_trunc_v4df"
3144 [(match_operand:V8SI 0 "register_operand" "")
3145 (match_operand:V4DF 1 "nonimmediate_operand" "")
3146 (match_operand:V4DF 2 "nonimmediate_operand" "")]
3151 r1 = gen_reg_rtx (V4SImode);
3152 r2 = gen_reg_rtx (V4SImode);
3154 emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
3155 emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
3156 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
3160 (define_expand "vec_pack_sfix_trunc_v2df"
3161 [(match_operand:V4SI 0 "register_operand" "")
3162 (match_operand:V2DF 1 "nonimmediate_operand" "")
3163 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3168 if (TARGET_AVX && !TARGET_PREFER_AVX128)
3170 tmp0 = gen_reg_rtx (V4DFmode);
3171 tmp1 = force_reg (V2DFmode, operands[1]);
3173 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
3174 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
3178 tmp0 = gen_reg_rtx (V4SImode);
3179 tmp1 = gen_reg_rtx (V4SImode);
3181 emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
3182 emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
3184 (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3185 gen_lowpart (V2DImode, tmp0),
3186 gen_lowpart (V2DImode, tmp1)));
3191 (define_mode_attr ssepackfltmode
3192 [(V4DF "V8SI") (V2DF "V4SI")])
3194 (define_expand "vec_pack_ufix_trunc_<mode>"
3195 [(match_operand:<ssepackfltmode> 0 "register_operand" "")
3196 (match_operand:VF2 1 "register_operand" "")
3197 (match_operand:VF2 2 "register_operand" "")]
3201 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
3202 tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
3203 tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
3204 emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
3205 if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
3207 tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
3208 ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
3212 tmp[5] = gen_reg_rtx (V8SFmode);
3213 ix86_expand_vec_extract_even_odd (tmp[5], gen_lowpart (V8SFmode, tmp[2]),
3214 gen_lowpart (V8SFmode, tmp[3]), 0);
3215 tmp[5] = gen_lowpart (V8SImode, tmp[5]);
3217 tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
3218 operands[0], 0, OPTAB_DIRECT);
3219 if (tmp[6] != operands[0])
3220 emit_move_insn (operands[0], tmp[6]);
3224 (define_expand "vec_pack_sfix_v4df"
3225 [(match_operand:V8SI 0 "register_operand" "")
3226 (match_operand:V4DF 1 "nonimmediate_operand" "")
3227 (match_operand:V4DF 2 "nonimmediate_operand" "")]
3232 r1 = gen_reg_rtx (V4SImode);
3233 r2 = gen_reg_rtx (V4SImode);
3235 emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
3236 emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
3237 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
3241 (define_expand "vec_pack_sfix_v2df"
3242 [(match_operand:V4SI 0 "register_operand" "")
3243 (match_operand:V2DF 1 "nonimmediate_operand" "")
3244 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3249 if (TARGET_AVX && !TARGET_PREFER_AVX128)
3251 tmp0 = gen_reg_rtx (V4DFmode);
3252 tmp1 = force_reg (V2DFmode, operands[1]);
3254 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
3255 emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
3259 tmp0 = gen_reg_rtx (V4SImode);
3260 tmp1 = gen_reg_rtx (V4SImode);
3262 emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
3263 emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
3265 (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3266 gen_lowpart (V2DImode, tmp0),
3267 gen_lowpart (V2DImode, tmp1)));
3272 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3274 ;; Parallel single-precision floating point element swizzling
3276 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3278 (define_expand "sse_movhlps_exp"
3279 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3282 (match_operand:V4SF 1 "nonimmediate_operand" "")
3283 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3284 (parallel [(const_int 6)
3290 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3292 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
3294 /* Fix up the destination if needed. */
3295 if (dst != operands[0])
3296 emit_move_insn (operands[0], dst);
3301 (define_insn "sse_movhlps"
3302 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
3305 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3306 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
3307 (parallel [(const_int 6)
3311 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3313 movhlps\t{%2, %0|%0, %2}
3314 vmovhlps\t{%2, %1, %0|%0, %1, %2}
3315 movlps\t{%H2, %0|%0, %H2}
3316 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3317 %vmovhps\t{%2, %0|%0, %2}"
3318 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3319 (set_attr "type" "ssemov")
3320 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3321 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3323 (define_expand "sse_movlhps_exp"
3324 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3327 (match_operand:V4SF 1 "nonimmediate_operand" "")
3328 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3329 (parallel [(const_int 0)
3335 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3337 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
3339 /* Fix up the destination if needed. */
3340 if (dst != operands[0])
3341 emit_move_insn (operands[0], dst);
3346 (define_insn "sse_movlhps"
3347 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
3350 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3351 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,x,x"))
3352 (parallel [(const_int 0)
3356 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3358 movlhps\t{%2, %0|%0, %2}
3359 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3360 movhps\t{%2, %0|%0, %2}
3361 vmovhps\t{%2, %1, %0|%0, %1, %2}
3362 %vmovlps\t{%2, %H0|%H0, %2}"
3363 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3364 (set_attr "type" "ssemov")
3365 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3366 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3368 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3369 (define_insn "avx_unpckhps256"
3370 [(set (match_operand:V8SF 0 "register_operand" "=x")
3373 (match_operand:V8SF 1 "register_operand" "x")
3374 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3375 (parallel [(const_int 2) (const_int 10)
3376 (const_int 3) (const_int 11)
3377 (const_int 6) (const_int 14)
3378 (const_int 7) (const_int 15)])))]
3380 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3381 [(set_attr "type" "sselog")
3382 (set_attr "prefix" "vex")
3383 (set_attr "mode" "V8SF")])
3385 (define_expand "vec_interleave_highv8sf"
3389 (match_operand:V8SF 1 "register_operand" "x")
3390 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3391 (parallel [(const_int 0) (const_int 8)
3392 (const_int 1) (const_int 9)
3393 (const_int 4) (const_int 12)
3394 (const_int 5) (const_int 13)])))
3400 (parallel [(const_int 2) (const_int 10)
3401 (const_int 3) (const_int 11)
3402 (const_int 6) (const_int 14)
3403 (const_int 7) (const_int 15)])))
3404 (set (match_operand:V8SF 0 "register_operand" "")
3409 (parallel [(const_int 4) (const_int 5)
3410 (const_int 6) (const_int 7)
3411 (const_int 12) (const_int 13)
3412 (const_int 14) (const_int 15)])))]
3415 operands[3] = gen_reg_rtx (V8SFmode);
3416 operands[4] = gen_reg_rtx (V8SFmode);
3419 (define_insn "vec_interleave_highv4sf"
3420 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3423 (match_operand:V4SF 1 "register_operand" "0,x")
3424 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3425 (parallel [(const_int 2) (const_int 6)
3426 (const_int 3) (const_int 7)])))]
3429 unpckhps\t{%2, %0|%0, %2}
3430 vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3431 [(set_attr "isa" "noavx,avx")
3432 (set_attr "type" "sselog")
3433 (set_attr "prefix" "orig,vex")
3434 (set_attr "mode" "V4SF")])
3436 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3437 (define_insn "avx_unpcklps256"
3438 [(set (match_operand:V8SF 0 "register_operand" "=x")
3441 (match_operand:V8SF 1 "register_operand" "x")
3442 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3443 (parallel [(const_int 0) (const_int 8)
3444 (const_int 1) (const_int 9)
3445 (const_int 4) (const_int 12)
3446 (const_int 5) (const_int 13)])))]
3448 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3449 [(set_attr "type" "sselog")
3450 (set_attr "prefix" "vex")
3451 (set_attr "mode" "V8SF")])
3453 (define_expand "vec_interleave_lowv8sf"
3457 (match_operand:V8SF 1 "register_operand" "x")
3458 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3459 (parallel [(const_int 0) (const_int 8)
3460 (const_int 1) (const_int 9)
3461 (const_int 4) (const_int 12)
3462 (const_int 5) (const_int 13)])))
3468 (parallel [(const_int 2) (const_int 10)
3469 (const_int 3) (const_int 11)
3470 (const_int 6) (const_int 14)
3471 (const_int 7) (const_int 15)])))
3472 (set (match_operand:V8SF 0 "register_operand" "")
3477 (parallel [(const_int 0) (const_int 1)
3478 (const_int 2) (const_int 3)
3479 (const_int 8) (const_int 9)
3480 (const_int 10) (const_int 11)])))]
3483 operands[3] = gen_reg_rtx (V8SFmode);
3484 operands[4] = gen_reg_rtx (V8SFmode);
3487 (define_insn "vec_interleave_lowv4sf"
3488 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3491 (match_operand:V4SF 1 "register_operand" "0,x")
3492 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3493 (parallel [(const_int 0) (const_int 4)
3494 (const_int 1) (const_int 5)])))]
3497 unpcklps\t{%2, %0|%0, %2}
3498 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3499 [(set_attr "isa" "noavx,avx")
3500 (set_attr "type" "sselog")
3501 (set_attr "prefix" "orig,vex")
3502 (set_attr "mode" "V4SF")])
3504 ;; These are modeled with the same vec_concat as the others so that we
3505 ;; capture users of shufps that can use the new instructions
3506 (define_insn "avx_movshdup256"
3507 [(set (match_operand:V8SF 0 "register_operand" "=x")
3510 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3512 (parallel [(const_int 1) (const_int 1)
3513 (const_int 3) (const_int 3)
3514 (const_int 5) (const_int 5)
3515 (const_int 7) (const_int 7)])))]
3517 "vmovshdup\t{%1, %0|%0, %1}"
3518 [(set_attr "type" "sse")
3519 (set_attr "prefix" "vex")
3520 (set_attr "mode" "V8SF")])
3522 (define_insn "sse3_movshdup"
3523 [(set (match_operand:V4SF 0 "register_operand" "=x")
3526 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3528 (parallel [(const_int 1)
3533 "%vmovshdup\t{%1, %0|%0, %1}"
3534 [(set_attr "type" "sse")
3535 (set_attr "prefix_rep" "1")
3536 (set_attr "prefix" "maybe_vex")
3537 (set_attr "mode" "V4SF")])
3539 (define_insn "avx_movsldup256"
3540 [(set (match_operand:V8SF 0 "register_operand" "=x")
3543 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3545 (parallel [(const_int 0) (const_int 0)
3546 (const_int 2) (const_int 2)
3547 (const_int 4) (const_int 4)
3548 (const_int 6) (const_int 6)])))]
3550 "vmovsldup\t{%1, %0|%0, %1}"
3551 [(set_attr "type" "sse")
3552 (set_attr "prefix" "vex")
3553 (set_attr "mode" "V8SF")])
3555 (define_insn "sse3_movsldup"
3556 [(set (match_operand:V4SF 0 "register_operand" "=x")
3559 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3561 (parallel [(const_int 0)
3566 "%vmovsldup\t{%1, %0|%0, %1}"
3567 [(set_attr "type" "sse")
3568 (set_attr "prefix_rep" "1")
3569 (set_attr "prefix" "maybe_vex")
3570 (set_attr "mode" "V4SF")])
3572 (define_expand "avx_shufps256"
3573 [(match_operand:V8SF 0 "register_operand" "")
3574 (match_operand:V8SF 1 "register_operand" "")
3575 (match_operand:V8SF 2 "nonimmediate_operand" "")
3576 (match_operand:SI 3 "const_int_operand" "")]
3579 int mask = INTVAL (operands[3]);
3580 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3581 GEN_INT ((mask >> 0) & 3),
3582 GEN_INT ((mask >> 2) & 3),
3583 GEN_INT (((mask >> 4) & 3) + 8),
3584 GEN_INT (((mask >> 6) & 3) + 8),
3585 GEN_INT (((mask >> 0) & 3) + 4),
3586 GEN_INT (((mask >> 2) & 3) + 4),
3587 GEN_INT (((mask >> 4) & 3) + 12),
3588 GEN_INT (((mask >> 6) & 3) + 12)));
3592 ;; One bit in mask selects 2 elements.
3593 (define_insn "avx_shufps256_1"
3594 [(set (match_operand:V8SF 0 "register_operand" "=x")
3597 (match_operand:V8SF 1 "register_operand" "x")
3598 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3599 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3600 (match_operand 4 "const_0_to_3_operand" "")
3601 (match_operand 5 "const_8_to_11_operand" "")
3602 (match_operand 6 "const_8_to_11_operand" "")
3603 (match_operand 7 "const_4_to_7_operand" "")
3604 (match_operand 8 "const_4_to_7_operand" "")
3605 (match_operand 9 "const_12_to_15_operand" "")
3606 (match_operand 10 "const_12_to_15_operand" "")])))]
3608 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3609 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3610 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3611 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3614 mask = INTVAL (operands[3]);
3615 mask |= INTVAL (operands[4]) << 2;
3616 mask |= (INTVAL (operands[5]) - 8) << 4;
3617 mask |= (INTVAL (operands[6]) - 8) << 6;
3618 operands[3] = GEN_INT (mask);
3620 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3622 [(set_attr "type" "sselog")
3623 (set_attr "length_immediate" "1")
3624 (set_attr "prefix" "vex")
3625 (set_attr "mode" "V8SF")])
3627 (define_expand "sse_shufps"
3628 [(match_operand:V4SF 0 "register_operand" "")
3629 (match_operand:V4SF 1 "register_operand" "")
3630 (match_operand:V4SF 2 "nonimmediate_operand" "")
3631 (match_operand:SI 3 "const_int_operand" "")]
3634 int mask = INTVAL (operands[3]);
3635 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3636 GEN_INT ((mask >> 0) & 3),
3637 GEN_INT ((mask >> 2) & 3),
3638 GEN_INT (((mask >> 4) & 3) + 4),
3639 GEN_INT (((mask >> 6) & 3) + 4)));
3643 (define_insn "sse_shufps_<mode>"
3644 [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
3645 (vec_select:VI4F_128
3646 (vec_concat:<ssedoublevecmode>
3647 (match_operand:VI4F_128 1 "register_operand" "0,x")
3648 (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm"))
3649 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3650 (match_operand 4 "const_0_to_3_operand" "")
3651 (match_operand 5 "const_4_to_7_operand" "")
3652 (match_operand 6 "const_4_to_7_operand" "")])))]
3656 mask |= INTVAL (operands[3]) << 0;
3657 mask |= INTVAL (operands[4]) << 2;
3658 mask |= (INTVAL (operands[5]) - 4) << 4;
3659 mask |= (INTVAL (operands[6]) - 4) << 6;
3660 operands[3] = GEN_INT (mask);
3662 switch (which_alternative)
3665 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3667 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3672 [(set_attr "isa" "noavx,avx")
3673 (set_attr "type" "sselog")
3674 (set_attr "length_immediate" "1")
3675 (set_attr "prefix" "orig,vex")
3676 (set_attr "mode" "V4SF")])
3678 (define_insn "sse_storehps"
3679 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3681 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3682 (parallel [(const_int 2) (const_int 3)])))]
3685 %vmovhps\t{%1, %0|%0, %1}
3686 %vmovhlps\t{%1, %d0|%d0, %1}
3687 %vmovlps\t{%H1, %d0|%d0, %H1}"
3688 [(set_attr "type" "ssemov")
3689 (set_attr "prefix" "maybe_vex")
3690 (set_attr "mode" "V2SF,V4SF,V2SF")])
3692 (define_expand "sse_loadhps_exp"
3693 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3696 (match_operand:V4SF 1 "nonimmediate_operand" "")
3697 (parallel [(const_int 0) (const_int 1)]))
3698 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3701 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3703 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
3705 /* Fix up the destination if needed. */
3706 if (dst != operands[0])
3707 emit_move_insn (operands[0], dst);
3712 (define_insn "sse_loadhps"
3713 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
3716 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3717 (parallel [(const_int 0) (const_int 1)]))
3718 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))]
3721 movhps\t{%2, %0|%0, %2}
3722 vmovhps\t{%2, %1, %0|%0, %1, %2}
3723 movlhps\t{%2, %0|%0, %2}
3724 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3725 %vmovlps\t{%2, %H0|%H0, %2}"
3726 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3727 (set_attr "type" "ssemov")
3728 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3729 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
3731 (define_insn "sse_storelps"
3732 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3734 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
3735 (parallel [(const_int 0) (const_int 1)])))]
3738 %vmovlps\t{%1, %0|%0, %1}
3739 %vmovaps\t{%1, %0|%0, %1}
3740 %vmovlps\t{%1, %d0|%d0, %1}"
3741 [(set_attr "type" "ssemov")
3742 (set_attr "prefix" "maybe_vex")
3743 (set_attr "mode" "V2SF,V4SF,V2SF")])
3745 (define_expand "sse_loadlps_exp"
3746 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3748 (match_operand:V2SF 2 "nonimmediate_operand" "")
3750 (match_operand:V4SF 1 "nonimmediate_operand" "")
3751 (parallel [(const_int 2) (const_int 3)]))))]
3754 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3756 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
3758 /* Fix up the destination if needed. */
3759 if (dst != operands[0])
3760 emit_move_insn (operands[0], dst);
3765 (define_insn "sse_loadlps"
3766 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
3768 (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,x,x")
3770 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
3771 (parallel [(const_int 2) (const_int 3)]))))]
3774 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3775 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3776 movlps\t{%2, %0|%0, %2}
3777 vmovlps\t{%2, %1, %0|%0, %1, %2}
3778 %vmovlps\t{%2, %0|%0, %2}"
3779 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3780 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
3781 (set_attr "length_immediate" "1,1,*,*,*")
3782 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3783 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3785 (define_insn "sse_movss"
3786 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3788 (match_operand:V4SF 2 "register_operand" " x,x")
3789 (match_operand:V4SF 1 "register_operand" " 0,x")
3793 movss\t{%2, %0|%0, %2}
3794 vmovss\t{%2, %1, %0|%0, %1, %2}"
3795 [(set_attr "isa" "noavx,avx")
3796 (set_attr "type" "ssemov")
3797 (set_attr "prefix" "orig,vex")
3798 (set_attr "mode" "SF")])
3800 (define_insn "avx2_vec_dup<mode>"
3801 [(set (match_operand:VF1 0 "register_operand" "=x")
3804 (match_operand:V4SF 1 "register_operand" "x")
3805 (parallel [(const_int 0)]))))]
3807 "vbroadcastss\t{%1, %0|%0, %1}"
3808 [(set_attr "type" "sselog1")
3809 (set_attr "prefix" "vex")
3810 (set_attr "mode" "<MODE>")])
3812 (define_insn "vec_dupv4sf"
3813 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
3815 (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))]
3818 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
3819 vbroadcastss\t{%1, %0|%0, %1}
3820 shufps\t{$0, %0, %0|%0, %0, 0}"
3821 [(set_attr "isa" "avx,avx,noavx")
3822 (set_attr "type" "sselog1,ssemov,sselog1")
3823 (set_attr "length_immediate" "1,0,1")
3824 (set_attr "prefix_extra" "0,1,*")
3825 (set_attr "prefix" "vex,vex,orig")
3826 (set_attr "mode" "V4SF")])
3828 ;; Although insertps takes register source, we prefer
3829 ;; unpcklps with register source since it is shorter.
3830 (define_insn "*vec_concatv2sf_sse4_1"
3831 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,x,x,*y ,*y")
3833 (match_operand:SF 1 "nonimmediate_operand" " 0,x,0,x,m, 0 , m")
3834 (match_operand:SF 2 "vector_move_operand" " x,x,m,m,C,*ym, C")))]
3837 unpcklps\t{%2, %0|%0, %2}
3838 vunpcklps\t{%2, %1, %0|%0, %1, %2}
3839 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3840 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3841 %vmovss\t{%1, %0|%0, %1}
3842 punpckldq\t{%2, %0|%0, %2}
3843 movd\t{%1, %0|%0, %1}"
3844 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
3845 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
3846 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
3847 (set_attr "prefix_extra" "*,*,1,1,*,*,*")
3848 (set_attr "length_immediate" "*,*,1,1,*,*,*")
3849 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
3850 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
3852 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3853 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3854 ;; alternatives pretty much forces the MMX alternative to be chosen.
3855 (define_insn "*vec_concatv2sf_sse"
3856 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
3858 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3859 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
3862 unpcklps\t{%2, %0|%0, %2}
3863 movss\t{%1, %0|%0, %1}
3864 punpckldq\t{%2, %0|%0, %2}
3865 movd\t{%1, %0|%0, %1}"
3866 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3867 (set_attr "mode" "V4SF,SF,DI,DI")])
3869 (define_insn "*vec_concatv4sf"
3870 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x")
3872 (match_operand:V2SF 1 "register_operand" " 0,x,0,x")
3873 (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
3876 movlhps\t{%2, %0|%0, %2}
3877 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3878 movhps\t{%2, %0|%0, %2}
3879 vmovhps\t{%2, %1, %0|%0, %1, %2}"
3880 [(set_attr "isa" "noavx,avx,noavx,avx")
3881 (set_attr "type" "ssemov")
3882 (set_attr "prefix" "orig,vex,orig,vex")
3883 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
3885 (define_expand "vec_init<mode>"
3886 [(match_operand:V_128 0 "register_operand" "")
3887 (match_operand 1 "" "")]
3890 ix86_expand_vector_init (false, operands[0], operands[1]);
3894 ;; Avoid combining registers from different units in a single alternative,
3895 ;; see comment above inline_secondary_memory_needed function in i386.c
3896 (define_insn "vec_set<mode>_0"
3897 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
3898 "=x,x,x ,x,x,x,x ,x ,m ,m ,m")
3900 (vec_duplicate:VI4F_128
3901 (match_operand:<ssescalarmode> 2 "general_operand"
3902 " x,m,*r,m,x,x,*rm,*rm,!x,!*re,!*fF"))
3903 (match_operand:VI4F_128 1 "vector_move_operand"
3904 " C,C,C ,C,0,x,0 ,x ,0 ,0 ,0")
3908 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
3909 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
3910 %vmovd\t{%2, %0|%0, %2}
3911 movss\t{%2, %0|%0, %2}
3912 movss\t{%2, %0|%0, %2}
3913 vmovss\t{%2, %1, %0|%0, %1, %2}
3914 pinsrd\t{$0, %2, %0|%0, %2, 0}
3915 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
3919 [(set_attr "isa" "sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,avx,*,*,*")
3921 (cond [(eq_attr "alternative" "0,6,7")
3922 (const_string "sselog")
3923 (eq_attr "alternative" "9")
3924 (const_string "imov")
3925 (eq_attr "alternative" "10")
3926 (const_string "fmov")
3928 (const_string "ssemov")))
3929 (set_attr "prefix_extra" "*,*,*,*,*,*,1,1,*,*,*")
3930 (set_attr "length_immediate" "*,*,*,*,*,*,1,1,*,*,*")
3931 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex,*,*,*")
3932 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,*,*,*")])
3934 ;; A subset is vec_setv4sf.
3935 (define_insn "*vec_setv4sf_sse4_1"
3936 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3939 (match_operand:SF 2 "nonimmediate_operand" "xm,xm"))
3940 (match_operand:V4SF 1 "register_operand" "0,x")
3941 (match_operand:SI 3 "const_int_operand" "")))]
3943 && ((unsigned) exact_log2 (INTVAL (operands[3]))
3944 < GET_MODE_NUNITS (V4SFmode))"
3946 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
3947 switch (which_alternative)
3950 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3952 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3957 [(set_attr "isa" "noavx,avx")
3958 (set_attr "type" "sselog")
3959 (set_attr "prefix_data16" "1,*")
3960 (set_attr "prefix_extra" "1")
3961 (set_attr "length_immediate" "1")
3962 (set_attr "prefix" "orig,vex")
3963 (set_attr "mode" "V4SF")])
3965 (define_insn "sse4_1_insertps"
3966 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3967 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
3968 (match_operand:V4SF 1 "register_operand" "0,x")
3969 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
3973 if (MEM_P (operands[2]))
3975 unsigned count_s = INTVAL (operands[3]) >> 6;
3977 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
3978 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
3980 switch (which_alternative)
3983 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3985 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3990 [(set_attr "isa" "noavx,avx")
3991 (set_attr "type" "sselog")
3992 (set_attr "prefix_data16" "1,*")
3993 (set_attr "prefix_extra" "1")
3994 (set_attr "length_immediate" "1")
3995 (set_attr "prefix" "orig,vex")
3996 (set_attr "mode" "V4SF")])
3999 [(set (match_operand:VI4F_128 0 "memory_operand" "")
4001 (vec_duplicate:VI4F_128
4002 (match_operand:<ssescalarmode> 1 "nonmemory_operand" ""))
4005 "TARGET_SSE && reload_completed"
4008 emit_move_insn (adjust_address (operands[0], <ssescalarmode>mode, 0),
4013 (define_expand "vec_set<mode>"
4014 [(match_operand:V 0 "register_operand" "")
4015 (match_operand:<ssescalarmode> 1 "register_operand" "")
4016 (match_operand 2 "const_int_operand" "")]
4019 ix86_expand_vector_set (false, operands[0], operands[1],
4020 INTVAL (operands[2]));
4024 (define_insn_and_split "*vec_extractv4sf_0"
4025 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
4027 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
4028 (parallel [(const_int 0)])))]
4029 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4031 "&& reload_completed"
4034 rtx op1 = operands[1];
4036 op1 = gen_rtx_REG (SFmode, REGNO (op1));
4038 op1 = gen_lowpart (SFmode, op1);
4039 emit_move_insn (operands[0], op1);
4043 (define_insn_and_split "*sse4_1_extractps"
4044 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,x,x")
4046 (match_operand:V4SF 1 "register_operand" "x,0,x")
4047 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n")])))]
4050 %vextractps\t{%2, %1, %0|%0, %1, %2}
4053 "&& reload_completed && SSE_REG_P (operands[0])"
4056 rtx dest = gen_rtx_REG (V4SFmode, REGNO (operands[0]));
4057 switch (INTVAL (operands[2]))
4061 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
4062 operands[2], operands[2],
4063 GEN_INT (INTVAL (operands[2]) + 4),
4064 GEN_INT (INTVAL (operands[2]) + 4)));
4067 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
4070 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
4075 [(set_attr "isa" "*,noavx,avx")
4076 (set_attr "type" "sselog,*,*")
4077 (set_attr "prefix_data16" "1,*,*")
4078 (set_attr "prefix_extra" "1,*,*")
4079 (set_attr "length_immediate" "1,*,*")
4080 (set_attr "prefix" "maybe_vex,*,*")
4081 (set_attr "mode" "V4SF,*,*")])
4083 (define_insn_and_split "*vec_extract_v4sf_mem"
4084 [(set (match_operand:SF 0 "register_operand" "=x,*r,f")
4086 (match_operand:V4SF 1 "memory_operand" "o,o,o")
4087 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
4090 "&& reload_completed"
4093 int i = INTVAL (operands[2]);
4095 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
4099 (define_expand "avx_vextractf128<mode>"
4100 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "")
4101 (match_operand:V_256 1 "register_operand" "")
4102 (match_operand:SI 2 "const_0_to_1_operand" "")]
4105 rtx (*insn)(rtx, rtx);
4107 switch (INTVAL (operands[2]))
4110 insn = gen_vec_extract_lo_<mode>;
4113 insn = gen_vec_extract_hi_<mode>;
4119 emit_insn (insn (operands[0], operands[1]));
4123 (define_insn_and_split "vec_extract_lo_<mode>"
4124 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4125 (vec_select:<ssehalfvecmode>
4126 (match_operand:VI8F_256 1 "nonimmediate_operand" "xm,x")
4127 (parallel [(const_int 0) (const_int 1)])))]
4128 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4130 "&& reload_completed"
4133 rtx op1 = operands[1];
4135 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
4137 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
4138 emit_move_insn (operands[0], op1);
4142 (define_insn "vec_extract_hi_<mode>"
4143 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4144 (vec_select:<ssehalfvecmode>
4145 (match_operand:VI8F_256 1 "register_operand" "x,x")
4146 (parallel [(const_int 2) (const_int 3)])))]
4148 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
4149 [(set_attr "type" "sselog")
4150 (set_attr "prefix_extra" "1")
4151 (set_attr "length_immediate" "1")
4152 (set_attr "memory" "none,store")
4153 (set_attr "prefix" "vex")
4154 (set_attr "mode" "<sseinsnmode>")])
4156 (define_insn_and_split "vec_extract_lo_<mode>"
4157 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4158 (vec_select:<ssehalfvecmode>
4159 (match_operand:VI4F_256 1 "nonimmediate_operand" "xm,x")
4160 (parallel [(const_int 0) (const_int 1)
4161 (const_int 2) (const_int 3)])))]
4162 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4164 "&& reload_completed"
4167 rtx op1 = operands[1];
4169 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
4171 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
4172 emit_move_insn (operands[0], op1);
4176 (define_insn "vec_extract_hi_<mode>"
4177 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4178 (vec_select:<ssehalfvecmode>
4179 (match_operand:VI4F_256 1 "register_operand" "x,x")
4180 (parallel [(const_int 4) (const_int 5)
4181 (const_int 6) (const_int 7)])))]
4183 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
4184 [(set_attr "type" "sselog")
4185 (set_attr "prefix_extra" "1")
4186 (set_attr "length_immediate" "1")
4187 (set_attr "memory" "none,store")
4188 (set_attr "prefix" "vex")
4189 (set_attr "mode" "<sseinsnmode>")])
4191 (define_insn_and_split "vec_extract_lo_v16hi"
4192 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4194 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
4195 (parallel [(const_int 0) (const_int 1)
4196 (const_int 2) (const_int 3)
4197 (const_int 4) (const_int 5)
4198 (const_int 6) (const_int 7)])))]
4199 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4201 "&& reload_completed"
4204 rtx op1 = operands[1];
4206 op1 = gen_rtx_REG (V8HImode, REGNO (op1));
4208 op1 = gen_lowpart (V8HImode, op1);
4209 emit_move_insn (operands[0], op1);
4213 (define_insn "vec_extract_hi_v16hi"
4214 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4216 (match_operand:V16HI 1 "register_operand" "x,x")
4217 (parallel [(const_int 8) (const_int 9)
4218 (const_int 10) (const_int 11)
4219 (const_int 12) (const_int 13)
4220 (const_int 14) (const_int 15)])))]
4222 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
4223 [(set_attr "type" "sselog")
4224 (set_attr "prefix_extra" "1")
4225 (set_attr "length_immediate" "1")
4226 (set_attr "memory" "none,store")
4227 (set_attr "prefix" "vex")
4228 (set_attr "mode" "OI")])
4230 (define_insn_and_split "vec_extract_lo_v32qi"
4231 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4233 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
4234 (parallel [(const_int 0) (const_int 1)
4235 (const_int 2) (const_int 3)
4236 (const_int 4) (const_int 5)
4237 (const_int 6) (const_int 7)
4238 (const_int 8) (const_int 9)
4239 (const_int 10) (const_int 11)
4240 (const_int 12) (const_int 13)
4241 (const_int 14) (const_int 15)])))]
4242 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4244 "&& reload_completed"
4247 rtx op1 = operands[1];
4249 op1 = gen_rtx_REG (V16QImode, REGNO (op1));
4251 op1 = gen_lowpart (V16QImode, op1);
4252 emit_move_insn (operands[0], op1);
4256 (define_insn "vec_extract_hi_v32qi"
4257 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4259 (match_operand:V32QI 1 "register_operand" "x,x")
4260 (parallel [(const_int 16) (const_int 17)
4261 (const_int 18) (const_int 19)
4262 (const_int 20) (const_int 21)
4263 (const_int 22) (const_int 23)
4264 (const_int 24) (const_int 25)
4265 (const_int 26) (const_int 27)
4266 (const_int 28) (const_int 29)
4267 (const_int 30) (const_int 31)])))]
4269 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
4270 [(set_attr "type" "sselog")
4271 (set_attr "prefix_extra" "1")
4272 (set_attr "length_immediate" "1")
4273 (set_attr "memory" "none,store")
4274 (set_attr "prefix" "vex")
4275 (set_attr "mode" "OI")])
4277 ;; Modes handled by vec_extract patterns.
4278 (define_mode_iterator VEC_EXTRACT_MODE
4279 [(V32QI "TARGET_AVX") V16QI
4280 (V16HI "TARGET_AVX") V8HI
4281 (V8SI "TARGET_AVX") V4SI
4282 (V4DI "TARGET_AVX") V2DI
4283 (V8SF "TARGET_AVX") V4SF
4284 (V4DF "TARGET_AVX") V2DF])
4286 (define_expand "vec_extract<mode>"
4287 [(match_operand:<ssescalarmode> 0 "register_operand" "")
4288 (match_operand:VEC_EXTRACT_MODE 1 "register_operand" "")
4289 (match_operand 2 "const_int_operand" "")]
4292 ix86_expand_vector_extract (false, operands[0], operands[1],
4293 INTVAL (operands[2]));
4297 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4299 ;; Parallel double-precision floating point element swizzling
4301 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4303 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4304 (define_insn "avx_unpckhpd256"
4305 [(set (match_operand:V4DF 0 "register_operand" "=x")
4308 (match_operand:V4DF 1 "register_operand" "x")
4309 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4310 (parallel [(const_int 1) (const_int 5)
4311 (const_int 3) (const_int 7)])))]
4313 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
4314 [(set_attr "type" "sselog")
4315 (set_attr "prefix" "vex")
4316 (set_attr "mode" "V4DF")])
4318 (define_expand "vec_interleave_highv4df"
4322 (match_operand:V4DF 1 "register_operand" "x")
4323 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4324 (parallel [(const_int 0) (const_int 4)
4325 (const_int 2) (const_int 6)])))
4331 (parallel [(const_int 1) (const_int 5)
4332 (const_int 3) (const_int 7)])))
4333 (set (match_operand:V4DF 0 "register_operand" "")
4338 (parallel [(const_int 2) (const_int 3)
4339 (const_int 6) (const_int 7)])))]
4342 operands[3] = gen_reg_rtx (V4DFmode);
4343 operands[4] = gen_reg_rtx (V4DFmode);
4347 (define_expand "vec_interleave_highv2df"
4348 [(set (match_operand:V2DF 0 "register_operand" "")
4351 (match_operand:V2DF 1 "nonimmediate_operand" "")
4352 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4353 (parallel [(const_int 1)
4357 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
4358 operands[2] = force_reg (V2DFmode, operands[2]);
4361 (define_insn "*vec_interleave_highv2df"
4362 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
4365 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x")
4366 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0"))
4367 (parallel [(const_int 1)
4369 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4371 unpckhpd\t{%2, %0|%0, %2}
4372 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
4373 %vmovddup\t{%H1, %0|%0, %H1}
4374 movlpd\t{%H1, %0|%0, %H1}
4375 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
4376 %vmovhpd\t{%1, %0|%0, %1}"
4377 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
4378 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
4379 (set_attr "prefix_data16" "*,*,*,1,*,1")
4380 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
4381 (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")])
4383 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4384 (define_expand "avx_movddup256"
4385 [(set (match_operand:V4DF 0 "register_operand" "")
4388 (match_operand:V4DF 1 "nonimmediate_operand" "")
4390 (parallel [(const_int 0) (const_int 4)
4391 (const_int 2) (const_int 6)])))]
4394 (define_expand "avx_unpcklpd256"
4395 [(set (match_operand:V4DF 0 "register_operand" "")
4398 (match_operand:V4DF 1 "register_operand" "")
4399 (match_operand:V4DF 2 "nonimmediate_operand" ""))
4400 (parallel [(const_int 0) (const_int 4)
4401 (const_int 2) (const_int 6)])))]
4404 (define_insn "*avx_unpcklpd256"
4405 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
4408 (match_operand:V4DF 1 "nonimmediate_operand" " x,m")
4409 (match_operand:V4DF 2 "nonimmediate_operand" "xm,1"))
4410 (parallel [(const_int 0) (const_int 4)
4411 (const_int 2) (const_int 6)])))]
4414 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4415 vmovddup\t{%1, %0|%0, %1}"
4416 [(set_attr "type" "sselog")
4417 (set_attr "prefix" "vex")
4418 (set_attr "mode" "V4DF")])
4420 (define_expand "vec_interleave_lowv4df"
4424 (match_operand:V4DF 1 "register_operand" "x")
4425 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4426 (parallel [(const_int 0) (const_int 4)
4427 (const_int 2) (const_int 6)])))
4433 (parallel [(const_int 1) (const_int 5)
4434 (const_int 3) (const_int 7)])))
4435 (set (match_operand:V4DF 0 "register_operand" "")
4440 (parallel [(const_int 0) (const_int 1)
4441 (const_int 4) (const_int 5)])))]
4444 operands[3] = gen_reg_rtx (V4DFmode);
4445 operands[4] = gen_reg_rtx (V4DFmode);
4448 (define_expand "vec_interleave_lowv2df"
4449 [(set (match_operand:V2DF 0 "register_operand" "")
4452 (match_operand:V2DF 1 "nonimmediate_operand" "")
4453 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4454 (parallel [(const_int 0)
4458 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
4459 operands[1] = force_reg (V2DFmode, operands[1]);
4462 (define_insn "*vec_interleave_lowv2df"
4463 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,o")
4466 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0")
4467 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x"))
4468 (parallel [(const_int 0)
4470 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4472 unpcklpd\t{%2, %0|%0, %2}
4473 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4474 %vmovddup\t{%1, %0|%0, %1}
4475 movhpd\t{%2, %0|%0, %2}
4476 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4477 %vmovlpd\t{%2, %H0|%H0, %2}"
4478 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
4479 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
4480 (set_attr "prefix_data16" "*,*,*,1,*,1")
4481 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
4482 (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")])
4485 [(set (match_operand:V2DF 0 "memory_operand" "")
4488 (match_operand:V2DF 1 "register_operand" "")
4490 (parallel [(const_int 0)
4492 "TARGET_SSE3 && reload_completed"
4495 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4496 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4497 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4502 [(set (match_operand:V2DF 0 "register_operand" "")
4505 (match_operand:V2DF 1 "memory_operand" "")
4507 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "")
4508 (match_operand:SI 3 "const_int_operand" "")])))]
4509 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
4510 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
4512 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
4515 (define_expand "avx_shufpd256"
4516 [(match_operand:V4DF 0 "register_operand" "")
4517 (match_operand:V4DF 1 "register_operand" "")
4518 (match_operand:V4DF 2 "nonimmediate_operand" "")
4519 (match_operand:SI 3 "const_int_operand" "")]
4522 int mask = INTVAL (operands[3]);
4523 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4525 GEN_INT (mask & 2 ? 5 : 4),
4526 GEN_INT (mask & 4 ? 3 : 2),
4527 GEN_INT (mask & 8 ? 7 : 6)));
4531 (define_insn "avx_shufpd256_1"
4532 [(set (match_operand:V4DF 0 "register_operand" "=x")
4535 (match_operand:V4DF 1 "register_operand" "x")
4536 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4537 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4538 (match_operand 4 "const_4_to_5_operand" "")
4539 (match_operand 5 "const_2_to_3_operand" "")
4540 (match_operand 6 "const_6_to_7_operand" "")])))]
4544 mask = INTVAL (operands[3]);
4545 mask |= (INTVAL (operands[4]) - 4) << 1;
4546 mask |= (INTVAL (operands[5]) - 2) << 2;
4547 mask |= (INTVAL (operands[6]) - 6) << 3;
4548 operands[3] = GEN_INT (mask);
4550 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4552 [(set_attr "type" "sselog")
4553 (set_attr "length_immediate" "1")
4554 (set_attr "prefix" "vex")
4555 (set_attr "mode" "V4DF")])
4557 (define_expand "sse2_shufpd"
4558 [(match_operand:V2DF 0 "register_operand" "")
4559 (match_operand:V2DF 1 "register_operand" "")
4560 (match_operand:V2DF 2 "nonimmediate_operand" "")
4561 (match_operand:SI 3 "const_int_operand" "")]
4564 int mask = INTVAL (operands[3]);
4565 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4567 GEN_INT (mask & 2 ? 3 : 2)));
4571 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4572 (define_insn "avx2_interleave_highv4di"
4573 [(set (match_operand:V4DI 0 "register_operand" "=x")
4576 (match_operand:V4DI 1 "register_operand" "x")
4577 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
4578 (parallel [(const_int 1)
4583 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4584 [(set_attr "type" "sselog")
4585 (set_attr "prefix" "vex")
4586 (set_attr "mode" "OI")])
4588 (define_insn "vec_interleave_highv2di"
4589 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4592 (match_operand:V2DI 1 "register_operand" "0,x")
4593 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4594 (parallel [(const_int 1)
4598 punpckhqdq\t{%2, %0|%0, %2}
4599 vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4600 [(set_attr "isa" "noavx,avx")
4601 (set_attr "type" "sselog")
4602 (set_attr "prefix_data16" "1,*")
4603 (set_attr "prefix" "orig,vex")
4604 (set_attr "mode" "TI")])
4606 (define_insn "avx2_interleave_lowv4di"
4607 [(set (match_operand:V4DI 0 "register_operand" "=x")
4610 (match_operand:V4DI 1 "register_operand" "x")
4611 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
4612 (parallel [(const_int 0)
4617 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4618 [(set_attr "type" "sselog")
4619 (set_attr "prefix" "vex")
4620 (set_attr "mode" "OI")])
4622 (define_insn "vec_interleave_lowv2di"
4623 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4626 (match_operand:V2DI 1 "register_operand" "0,x")
4627 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4628 (parallel [(const_int 0)
4632 punpcklqdq\t{%2, %0|%0, %2}
4633 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4634 [(set_attr "isa" "noavx,avx")
4635 (set_attr "type" "sselog")
4636 (set_attr "prefix_data16" "1,*")
4637 (set_attr "prefix" "orig,vex")
4638 (set_attr "mode" "TI")])
4640 (define_insn "sse2_shufpd_<mode>"
4641 [(set (match_operand:VI8F_128 0 "register_operand" "=x,x")
4642 (vec_select:VI8F_128
4643 (vec_concat:<ssedoublevecmode>
4644 (match_operand:VI8F_128 1 "register_operand" "0,x")
4645 (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm"))
4646 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4647 (match_operand 4 "const_2_to_3_operand" "")])))]
4651 mask = INTVAL (operands[3]);
4652 mask |= (INTVAL (operands[4]) - 2) << 1;
4653 operands[3] = GEN_INT (mask);
4655 switch (which_alternative)
4658 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4660 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4665 [(set_attr "isa" "noavx,avx")
4666 (set_attr "type" "sselog")
4667 (set_attr "length_immediate" "1")
4668 (set_attr "prefix" "orig,vex")
4669 (set_attr "mode" "V2DF")])
4671 ;; Avoid combining registers from different units in a single alternative,
4672 ;; see comment above inline_secondary_memory_needed function in i386.c
4673 (define_insn "sse2_storehpd"
4674 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r")
4676 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
4677 (parallel [(const_int 1)])))]
4678 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4680 %vmovhpd\t{%1, %0|%0, %1}
4682 vunpckhpd\t{%d1, %0|%0, %d1}
4686 [(set_attr "isa" "*,noavx,avx,*,*,*")
4687 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
4688 (set (attr "prefix_data16")
4690 (and (eq_attr "alternative" "0")
4691 (not (match_test "TARGET_AVX")))
4693 (const_string "*")))
4694 (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
4695 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
4698 [(set (match_operand:DF 0 "register_operand" "")
4700 (match_operand:V2DF 1 "memory_operand" "")
4701 (parallel [(const_int 1)])))]
4702 "TARGET_SSE2 && reload_completed"
4703 [(set (match_dup 0) (match_dup 1))]
4704 "operands[1] = adjust_address (operands[1], DFmode, 8);")
4706 (define_insn "*vec_extractv2df_1_sse"
4707 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4709 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
4710 (parallel [(const_int 1)])))]
4711 "!TARGET_SSE2 && TARGET_SSE
4712 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4714 movhps\t{%1, %0|%0, %1}
4715 movhlps\t{%1, %0|%0, %1}
4716 movlps\t{%H1, %0|%0, %H1}"
4717 [(set_attr "type" "ssemov")
4718 (set_attr "mode" "V2SF,V4SF,V2SF")])
4720 ;; Avoid combining registers from different units in a single alternative,
4721 ;; see comment above inline_secondary_memory_needed function in i386.c
4722 (define_insn "sse2_storelpd"
4723 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4725 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4726 (parallel [(const_int 0)])))]
4727 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4729 %vmovlpd\t{%1, %0|%0, %1}
4734 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4735 (set_attr "prefix_data16" "1,*,*,*,*")
4736 (set_attr "prefix" "maybe_vex")
4737 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4740 [(set (match_operand:DF 0 "register_operand" "")
4742 (match_operand:V2DF 1 "nonimmediate_operand" "")
4743 (parallel [(const_int 0)])))]
4744 "TARGET_SSE2 && reload_completed"
4747 rtx op1 = operands[1];
4749 op1 = gen_rtx_REG (DFmode, REGNO (op1));
4751 op1 = gen_lowpart (DFmode, op1);
4752 emit_move_insn (operands[0], op1);
4756 (define_insn "*vec_extractv2df_0_sse"
4757 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4759 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
4760 (parallel [(const_int 0)])))]
4761 "!TARGET_SSE2 && TARGET_SSE
4762 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4764 movlps\t{%1, %0|%0, %1}
4765 movaps\t{%1, %0|%0, %1}
4766 movlps\t{%1, %0|%0, %1}"
4767 [(set_attr "type" "ssemov")
4768 (set_attr "mode" "V2SF,V4SF,V2SF")])
4770 (define_expand "sse2_loadhpd_exp"
4771 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4774 (match_operand:V2DF 1 "nonimmediate_operand" "")
4775 (parallel [(const_int 0)]))
4776 (match_operand:DF 2 "nonimmediate_operand" "")))]
4779 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4781 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
4783 /* Fix up the destination if needed. */
4784 if (dst != operands[0])
4785 emit_move_insn (operands[0], dst);
4790 ;; Avoid combining registers from different units in a single alternative,
4791 ;; see comment above inline_secondary_memory_needed function in i386.c
4792 (define_insn "sse2_loadhpd"
4793 [(set (match_operand:V2DF 0 "nonimmediate_operand"
4797 (match_operand:V2DF 1 "nonimmediate_operand"
4799 (parallel [(const_int 0)]))
4800 (match_operand:DF 2 "nonimmediate_operand"
4801 " m,m,x,x,x,*f,r")))]
4802 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4804 movhpd\t{%2, %0|%0, %2}
4805 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4806 unpcklpd\t{%2, %0|%0, %2}
4807 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4811 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
4812 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
4813 (set_attr "prefix_data16" "1,*,*,*,*,*,*")
4814 (set_attr "prefix" "orig,vex,orig,vex,*,*,*")
4815 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
4818 [(set (match_operand:V2DF 0 "memory_operand" "")
4820 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
4821 (match_operand:DF 1 "register_operand" "")))]
4822 "TARGET_SSE2 && reload_completed"
4823 [(set (match_dup 0) (match_dup 1))]
4824 "operands[0] = adjust_address (operands[0], DFmode, 8);")
4826 (define_expand "sse2_loadlpd_exp"
4827 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4829 (match_operand:DF 2 "nonimmediate_operand" "")
4831 (match_operand:V2DF 1 "nonimmediate_operand" "")
4832 (parallel [(const_int 1)]))))]
4835 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4837 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
4839 /* Fix up the destination if needed. */
4840 if (dst != operands[0])
4841 emit_move_insn (operands[0], dst);
4846 ;; Avoid combining registers from different units in a single alternative,
4847 ;; see comment above inline_secondary_memory_needed function in i386.c
4848 (define_insn "sse2_loadlpd"
4849 [(set (match_operand:V2DF 0 "nonimmediate_operand"
4850 "=x,x,x,x,x,x,x,x,m,m ,m")
4852 (match_operand:DF 2 "nonimmediate_operand"
4853 " m,m,m,x,x,0,0,x,x,*f,r")
4855 (match_operand:V2DF 1 "vector_move_operand"
4856 " C,0,x,0,x,x,o,o,0,0 ,0")
4857 (parallel [(const_int 1)]))))]
4858 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4860 %vmovsd\t{%2, %0|%0, %2}
4861 movlpd\t{%2, %0|%0, %2}
4862 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4863 movsd\t{%2, %0|%0, %2}
4864 vmovsd\t{%2, %1, %0|%0, %1, %2}
4865 shufpd\t{$2, %1, %0|%0, %1, 2}
4866 movhpd\t{%H1, %0|%0, %H1}
4867 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
4871 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
4873 (cond [(eq_attr "alternative" "5")
4874 (const_string "sselog")
4875 (eq_attr "alternative" "9")
4876 (const_string "fmov")
4877 (eq_attr "alternative" "10")
4878 (const_string "imov")
4880 (const_string "ssemov")))
4881 (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
4882 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
4883 (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
4884 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
4887 [(set (match_operand:V2DF 0 "memory_operand" "")
4889 (match_operand:DF 1 "register_operand" "")
4890 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
4891 "TARGET_SSE2 && reload_completed"
4892 [(set (match_dup 0) (match_dup 1))]
4893 "operands[0] = adjust_address (operands[0], DFmode, 0);")
4895 (define_insn "sse2_movsd"
4896 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o")
4898 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
4899 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
4903 movsd\t{%2, %0|%0, %2}
4904 vmovsd\t{%2, %1, %0|%0, %1, %2}
4905 movlpd\t{%2, %0|%0, %2}
4906 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4907 %vmovlpd\t{%2, %0|%0, %2}
4908 shufpd\t{$2, %1, %0|%0, %1, 2}
4909 movhps\t{%H1, %0|%0, %H1}
4910 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
4911 %vmovhps\t{%1, %H0|%H0, %1}"
4912 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
4915 (eq_attr "alternative" "5")
4916 (const_string "sselog")
4917 (const_string "ssemov")))
4918 (set (attr "prefix_data16")
4920 (and (eq_attr "alternative" "2,4")
4921 (not (match_test "TARGET_AVX")))
4923 (const_string "*")))
4924 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
4925 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
4926 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
4928 (define_insn "vec_dupv2df"
4929 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
4931 (match_operand:DF 1 "nonimmediate_operand" " 0,xm")))]
4935 %vmovddup\t{%1, %0|%0, %1}"
4936 [(set_attr "isa" "noavx,sse3")
4937 (set_attr "type" "sselog1")
4938 (set_attr "prefix" "orig,maybe_vex")
4939 (set_attr "mode" "V2DF")])
4941 (define_insn "*vec_concatv2df"
4942 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x,x,x,x,x,x")
4944 (match_operand:DF 1 "nonimmediate_operand" " 0,x,m,0,x,m,0,0")
4945 (match_operand:DF 2 "vector_move_operand" " x,x,1,m,m,C,x,m")))]
4948 unpcklpd\t{%2, %0|%0, %2}
4949 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4950 %vmovddup\t{%1, %0|%0, %1}
4951 movhpd\t{%2, %0|%0, %2}
4952 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4953 %vmovsd\t{%1, %0|%0, %1}
4954 movlhps\t{%2, %0|%0, %2}
4955 movhps\t{%2, %0|%0, %2}"
4956 [(set_attr "isa" "sse2_noavx,avx,sse3,sse2_noavx,avx,sse2,noavx,noavx")
4959 (eq_attr "alternative" "0,1,2")
4960 (const_string "sselog")
4961 (const_string "ssemov")))
4962 (set_attr "prefix_data16" "*,*,*,1,*,*,*,*")
4963 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex,orig,orig")
4964 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,DF,V4SF,V2SF")])
4966 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4968 ;; Parallel integral arithmetic
4970 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4972 (define_expand "neg<mode>2"
4973 [(set (match_operand:VI_AVX2 0 "register_operand" "")
4976 (match_operand:VI_AVX2 1 "nonimmediate_operand" "")))]
4978 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
4980 (define_expand "<plusminus_insn><mode>3"
4981 [(set (match_operand:VI_AVX2 0 "register_operand" "")
4983 (match_operand:VI_AVX2 1 "nonimmediate_operand" "")
4984 (match_operand:VI_AVX2 2 "nonimmediate_operand" "")))]
4986 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4988 (define_insn "*<plusminus_insn><mode>3"
4989 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,x")
4991 (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,x")
4992 (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,xm")))]
4993 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4995 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
4996 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
4997 [(set_attr "isa" "noavx,avx")
4998 (set_attr "type" "sseiadd")
4999 (set_attr "prefix_data16" "1,*")
5000 (set_attr "prefix" "orig,vex")
5001 (set_attr "mode" "<sseinsnmode>")])
5003 (define_expand "<sse2_avx2>_<plusminus_insn><mode>3"
5004 [(set (match_operand:VI12_AVX2 0 "register_operand" "")
5005 (sat_plusminus:VI12_AVX2
5006 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "")
5007 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "")))]
5009 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5011 (define_insn "*<sse2_avx2>_<plusminus_insn><mode>3"
5012 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,x")
5013 (sat_plusminus:VI12_AVX2
5014 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,x")
5015 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,xm")))]
5016 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5018 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
5019 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5020 [(set_attr "isa" "noavx,avx")
5021 (set_attr "type" "sseiadd")
5022 (set_attr "prefix_data16" "1,*")
5023 (set_attr "prefix" "orig,vex")
5024 (set_attr "mode" "TI")])
5026 (define_insn_and_split "mul<mode>3"
5027 [(set (match_operand:VI1_AVX2 0 "register_operand" "")
5028 (mult:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand" "")
5029 (match_operand:VI1_AVX2 2 "register_operand" "")))]
5031 && can_create_pseudo_p ()"
5038 enum machine_mode mulmode = <sseunpackmode>mode;
5040 for (i = 0; i < 6; ++i)
5041 t[i] = gen_reg_rtx (<MODE>mode);
5043 /* Unpack data such that we've got a source byte in each low byte of
5044 each word. We don't care what goes into the high byte of each word.
5045 Rather than trying to get zero in there, most convenient is to let
5046 it be a copy of the low byte. */
5047 emit_insn (gen_<vec_avx2>_interleave_high<mode> (t[0], operands[1],
5049 emit_insn (gen_<vec_avx2>_interleave_high<mode> (t[1], operands[2],
5051 emit_insn (gen_<vec_avx2>_interleave_low<mode> (t[2], operands[1],
5053 emit_insn (gen_<vec_avx2>_interleave_low<mode> (t[3], operands[2],
5056 /* Multiply words. The end-of-line annotations here give a picture of what
5057 the output of that instruction looks like. Dot means don't care; the
5058 letters are the bytes of the result with A being the most significant. */
5059 emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (mulmode, t[4]),
5060 gen_rtx_MULT (mulmode, /* .A.B.C.D.E.F.G.H */
5061 gen_lowpart (mulmode, t[0]),
5062 gen_lowpart (mulmode, t[1]))));
5063 emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (mulmode, t[5]),
5064 gen_rtx_MULT (mulmode, /* .I.J.K.L.M.N.O.P */
5065 gen_lowpart (mulmode, t[2]),
5066 gen_lowpart (mulmode, t[3]))));
5068 /* Extract the even bytes and merge them back together. */
5069 if (<MODE>mode == V16QImode)
5070 ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0);
5073 /* Since avx2_interleave_{low,high}v32qi used above aren't cross-lane,
5074 this can't be normal even extraction, but one where additionally
5075 the second and third quarter are swapped. That is even one insn
5076 shorter than even extraction. */
5077 rtvec v = rtvec_alloc (32);
5078 for (i = 0; i < 32; ++i)
5080 = GEN_INT (i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0));
5084 t[3] = gen_rtx_CONST_VECTOR (<MODE>mode, v);
5085 ix86_expand_vec_perm_const (t);
5088 set_unique_reg_note (get_last_insn (), REG_EQUAL,
5089 gen_rtx_MULT (<MODE>mode, operands[1], operands[2]));
5093 (define_expand "mul<mode>3"
5094 [(set (match_operand:VI2_AVX2 0 "register_operand" "")
5095 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "")
5096 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "")))]
5098 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
5100 (define_insn "*mul<mode>3"
5101 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
5102 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x")
5103 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))]
5104 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
5106 pmullw\t{%2, %0|%0, %2}
5107 vpmullw\t{%2, %1, %0|%0, %1, %2}"
5108 [(set_attr "isa" "noavx,avx")
5109 (set_attr "type" "sseimul")
5110 (set_attr "prefix_data16" "1,*")
5111 (set_attr "prefix" "orig,vex")
5112 (set_attr "mode" "<sseinsnmode>")])
5114 (define_expand "<s>mul<mode>3_highpart"
5115 [(set (match_operand:VI2_AVX2 0 "register_operand" "")
5117 (lshiftrt:<ssedoublemode>
5118 (mult:<ssedoublemode>
5119 (any_extend:<ssedoublemode>
5120 (match_operand:VI2_AVX2 1 "nonimmediate_operand" ""))
5121 (any_extend:<ssedoublemode>
5122 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "")))
5125 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
5127 (define_insn "*<s>mul<mode>3_highpart"
5128 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
5130 (lshiftrt:<ssedoublemode>
5131 (mult:<ssedoublemode>
5132 (any_extend:<ssedoublemode>
5133 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
5134 (any_extend:<ssedoublemode>
5135 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
5137 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
5139 pmulh<u>w\t{%2, %0|%0, %2}
5140 vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
5141 [(set_attr "isa" "noavx,avx")
5142 (set_attr "type" "sseimul")
5143 (set_attr "prefix_data16" "1,*")
5144 (set_attr "prefix" "orig,vex")
5145 (set_attr "mode" "<sseinsnmode>")])
5147 (define_expand "avx2_umulv4siv4di3"
5148 [(set (match_operand:V4DI 0 "register_operand" "")
5152 (match_operand:V8SI 1 "nonimmediate_operand" "")
5153 (parallel [(const_int 0) (const_int 2)
5154 (const_int 4) (const_int 6)])))
5157 (match_operand:V8SI 2 "nonimmediate_operand" "")
5158 (parallel [(const_int 0) (const_int 2)
5159 (const_int 4) (const_int 6)])))))]
5161 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
5163 (define_insn "*avx_umulv4siv4di3"
5164 [(set (match_operand:V4DI 0 "register_operand" "=x")
5168 (match_operand:V8SI 1 "nonimmediate_operand" "%x")
5169 (parallel [(const_int 0) (const_int 2)
5170 (const_int 4) (const_int 6)])))
5173 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
5174 (parallel [(const_int 0) (const_int 2)
5175 (const_int 4) (const_int 6)])))))]
5176 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
5177 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5178 [(set_attr "type" "sseimul")
5179 (set_attr "prefix" "vex")
5180 (set_attr "mode" "OI")])
5182 (define_expand "sse2_umulv2siv2di3"
5183 [(set (match_operand:V2DI 0 "register_operand" "")
5187 (match_operand:V4SI 1 "nonimmediate_operand" "")
5188 (parallel [(const_int 0) (const_int 2)])))
5191 (match_operand:V4SI 2 "nonimmediate_operand" "")
5192 (parallel [(const_int 0) (const_int 2)])))))]
5194 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5196 (define_insn "*sse2_umulv2siv2di3"
5197 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5201 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
5202 (parallel [(const_int 0) (const_int 2)])))
5205 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
5206 (parallel [(const_int 0) (const_int 2)])))))]
5207 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5209 pmuludq\t{%2, %0|%0, %2}
5210 vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5211 [(set_attr "isa" "noavx,avx")
5212 (set_attr "type" "sseimul")
5213 (set_attr "prefix_data16" "1,*")
5214 (set_attr "prefix" "orig,vex")
5215 (set_attr "mode" "TI")])
5217 (define_expand "avx2_mulv4siv4di3"
5218 [(set (match_operand:V4DI 0 "register_operand" "")
5222 (match_operand:V8SI 1 "nonimmediate_operand" "")
5223 (parallel [(const_int 0) (const_int 2)
5224 (const_int 4) (const_int 6)])))
5227 (match_operand:V8SI 2 "nonimmediate_operand" "")
5228 (parallel [(const_int 0) (const_int 2)
5229 (const_int 4) (const_int 6)])))))]
5231 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
5233 (define_insn "*avx2_mulv4siv4di3"
5234 [(set (match_operand:V4DI 0 "register_operand" "=x")
5238 (match_operand:V8SI 1 "nonimmediate_operand" "x")
5239 (parallel [(const_int 0) (const_int 2)
5240 (const_int 4) (const_int 6)])))
5243 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
5244 (parallel [(const_int 0) (const_int 2)
5245 (const_int 4) (const_int 6)])))))]
5246 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
5247 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5248 [(set_attr "isa" "avx")
5249 (set_attr "type" "sseimul")
5250 (set_attr "prefix_extra" "1")
5251 (set_attr "prefix" "vex")
5252 (set_attr "mode" "OI")])
5254 (define_expand "sse4_1_mulv2siv2di3"
5255 [(set (match_operand:V2DI 0 "register_operand" "")
5259 (match_operand:V4SI 1 "nonimmediate_operand" "")
5260 (parallel [(const_int 0) (const_int 2)])))
5263 (match_operand:V4SI 2 "nonimmediate_operand" "")
5264 (parallel [(const_int 0) (const_int 2)])))))]
5266 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5268 (define_insn "*sse4_1_mulv2siv2di3"
5269 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5273 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
5274 (parallel [(const_int 0) (const_int 2)])))
5277 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
5278 (parallel [(const_int 0) (const_int 2)])))))]
5279 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5281 pmuldq\t{%2, %0|%0, %2}
5282 vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5283 [(set_attr "isa" "noavx,avx")
5284 (set_attr "type" "sseimul")
5285 (set_attr "prefix_data16" "1,*")
5286 (set_attr "prefix_extra" "1")
5287 (set_attr "prefix" "orig,vex")
5288 (set_attr "mode" "TI")])
5290 (define_expand "avx2_pmaddwd"
5291 [(set (match_operand:V8SI 0 "register_operand" "")
5296 (match_operand:V16HI 1 "nonimmediate_operand" "")
5297 (parallel [(const_int 0)
5307 (match_operand:V16HI 2 "nonimmediate_operand" "")
5308 (parallel [(const_int 0)
5318 (vec_select:V8HI (match_dup 1)
5319 (parallel [(const_int 1)
5328 (vec_select:V8HI (match_dup 2)
5329 (parallel [(const_int 1)
5336 (const_int 15)]))))))]
5338 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
5340 (define_expand "sse2_pmaddwd"
5341 [(set (match_operand:V4SI 0 "register_operand" "")
5346 (match_operand:V8HI 1 "nonimmediate_operand" "")
5347 (parallel [(const_int 0)
5353 (match_operand:V8HI 2 "nonimmediate_operand" "")
5354 (parallel [(const_int 0)
5360 (vec_select:V4HI (match_dup 1)
5361 (parallel [(const_int 1)
5366 (vec_select:V4HI (match_dup 2)
5367 (parallel [(const_int 1)
5370 (const_int 7)]))))))]
5372 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5374 (define_insn "*avx2_pmaddwd"
5375 [(set (match_operand:V8SI 0 "register_operand" "=x")
5380 (match_operand:V16HI 1 "nonimmediate_operand" "%x")
5381 (parallel [(const_int 0)
5391 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
5392 (parallel [(const_int 0)
5402 (vec_select:V8HI (match_dup 1)
5403 (parallel [(const_int 1)
5412 (vec_select:V8HI (match_dup 2)
5413 (parallel [(const_int 1)
5420 (const_int 15)]))))))]
5421 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
5422 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5423 [(set_attr "type" "sseiadd")
5424 (set_attr "prefix" "vex")
5425 (set_attr "mode" "OI")])
5427 (define_insn "*sse2_pmaddwd"
5428 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
5433 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
5434 (parallel [(const_int 0)
5440 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
5441 (parallel [(const_int 0)
5447 (vec_select:V4HI (match_dup 1)
5448 (parallel [(const_int 1)
5453 (vec_select:V4HI (match_dup 2)
5454 (parallel [(const_int 1)
5457 (const_int 7)]))))))]
5458 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5460 pmaddwd\t{%2, %0|%0, %2}
5461 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5462 [(set_attr "isa" "noavx,avx")
5463 (set_attr "type" "sseiadd")
5464 (set_attr "atom_unit" "simul")
5465 (set_attr "prefix_data16" "1,*")
5466 (set_attr "prefix" "orig,vex")
5467 (set_attr "mode" "TI")])
5469 (define_expand "mul<mode>3"
5470 [(set (match_operand:VI4_AVX2 0 "register_operand" "")
5471 (mult:VI4_AVX2 (match_operand:VI4_AVX2 1 "register_operand" "")
5472 (match_operand:VI4_AVX2 2 "register_operand" "")))]
5475 if (TARGET_SSE4_1 || TARGET_AVX)
5476 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
5479 (define_insn "*<sse4_1_avx2>_mul<mode>3"
5480 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,x")
5481 (mult:VI4_AVX2 (match_operand:VI4_AVX2 1 "nonimmediate_operand" "%0,x")
5482 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm,xm")))]
5483 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
5485 pmulld\t{%2, %0|%0, %2}
5486 vpmulld\t{%2, %1, %0|%0, %1, %2}"
5487 [(set_attr "isa" "noavx,avx")
5488 (set_attr "type" "sseimul")
5489 (set_attr "prefix_extra" "1")
5490 (set_attr "prefix" "orig,vex")
5491 (set_attr "mode" "<sseinsnmode>")])
5493 (define_insn_and_split "*sse2_mulv4si3"
5494 [(set (match_operand:V4SI 0 "register_operand" "")
5495 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5496 (match_operand:V4SI 2 "register_operand" "")))]
5497 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_AVX
5498 && can_create_pseudo_p ()"
5503 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5509 t1 = gen_reg_rtx (V4SImode);
5510 t2 = gen_reg_rtx (V4SImode);
5511 t3 = gen_reg_rtx (V4SImode);
5512 t4 = gen_reg_rtx (V4SImode);
5513 t5 = gen_reg_rtx (V4SImode);
5514 t6 = gen_reg_rtx (V4SImode);
5515 thirtytwo = GEN_INT (32);
5517 /* Multiply elements 2 and 0. */
5518 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
5521 /* Shift both input vectors down one element, so that elements 3
5522 and 1 are now in the slots for elements 2 and 0. For K8, at
5523 least, this is faster than using a shuffle. */
5524 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5525 gen_lowpart (V1TImode, op1),
5527 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5528 gen_lowpart (V1TImode, op2),
5530 /* Multiply elements 3 and 1. */
5531 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
5534 /* Move the results in element 2 down to element 1; we don't care
5535 what goes in elements 2 and 3. */
5536 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
5537 const0_rtx, const0_rtx));
5538 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
5539 const0_rtx, const0_rtx));
5541 /* Merge the parts back together. */
5542 emit_insn (gen_vec_interleave_lowv4si (op0, t5, t6));
5544 set_unique_reg_note (get_last_insn (), REG_EQUAL,
5545 gen_rtx_MULT (V4SImode, operands[1], operands[2]));
5549 (define_insn_and_split "mul<mode>3"
5550 [(set (match_operand:VI8_AVX2 0 "register_operand" "")
5551 (mult:VI8_AVX2 (match_operand:VI8_AVX2 1 "register_operand" "")
5552 (match_operand:VI8_AVX2 2 "register_operand" "")))]
5554 && can_create_pseudo_p ()"
5559 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5566 if (TARGET_XOP && <MODE>mode == V2DImode)
5568 /* op1: A,B,C,D, op2: E,F,G,H */
5569 op1 = gen_lowpart (V4SImode, op1);
5570 op2 = gen_lowpart (V4SImode, op2);
5572 t1 = gen_reg_rtx (V4SImode);
5573 t2 = gen_reg_rtx (V4SImode);
5574 t3 = gen_reg_rtx (V2DImode);
5575 t4 = gen_reg_rtx (V2DImode);
5578 emit_insn (gen_sse2_pshufd_1 (t1, op1,
5584 /* t2: (B*E),(A*F),(D*G),(C*H) */
5585 emit_insn (gen_mulv4si3 (t2, t1, op2));
5587 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
5588 emit_insn (gen_xop_phadddq (t3, t2));
5590 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
5591 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
5593 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
5594 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
5598 t1 = gen_reg_rtx (<MODE>mode);
5599 t2 = gen_reg_rtx (<MODE>mode);
5600 t3 = gen_reg_rtx (<MODE>mode);
5601 t4 = gen_reg_rtx (<MODE>mode);
5602 t5 = gen_reg_rtx (<MODE>mode);
5603 t6 = gen_reg_rtx (<MODE>mode);
5604 thirtytwo = GEN_INT (32);
5606 /* Multiply low parts. */
5607 emit_insn (gen_<sse2_avx2>_umulv<ssescalarnum>si<mode>3
5608 (t1, gen_lowpart (<ssepackmode>mode, op1),
5609 gen_lowpart (<ssepackmode>mode, op2)));
5611 /* Shift input vectors right 32 bits so we can multiply high parts. */
5612 emit_insn (gen_lshr<mode>3 (t2, op1, thirtytwo));
5613 emit_insn (gen_lshr<mode>3 (t3, op2, thirtytwo));
5615 /* Multiply high parts by low parts. */
5616 emit_insn (gen_<sse2_avx2>_umulv<ssescalarnum>si<mode>3
5617 (t4, gen_lowpart (<ssepackmode>mode, op1),
5618 gen_lowpart (<ssepackmode>mode, t3)));
5619 emit_insn (gen_<sse2_avx2>_umulv<ssescalarnum>si<mode>3
5620 (t5, gen_lowpart (<ssepackmode>mode, op2),
5621 gen_lowpart (<ssepackmode>mode, t2)));
5623 /* Shift them back. */
5624 emit_insn (gen_ashl<mode>3 (t4, t4, thirtytwo));
5625 emit_insn (gen_ashl<mode>3 (t5, t5, thirtytwo));
5627 /* Add the three parts together. */
5628 emit_insn (gen_add<mode>3 (t6, t1, t4));
5629 emit_insn (gen_add<mode>3 (op0, t6, t5));
5632 set_unique_reg_note (get_last_insn (), REG_EQUAL,
5633 gen_rtx_MULT (<MODE>mode, operands[1], operands[2]));
5637 (define_expand "vec_widen_<s>mult_hi_<mode>"
5638 [(match_operand:<sseunpackmode> 0 "register_operand" "")
5639 (any_extend:<sseunpackmode>
5640 (match_operand:VI2_AVX2 1 "register_operand" ""))
5641 (match_operand:VI2_AVX2 2 "register_operand" "")]
5644 rtx op1, op2, t1, t2, dest;
5648 t1 = gen_reg_rtx (<MODE>mode);
5649 t2 = gen_reg_rtx (<MODE>mode);
5650 dest = gen_lowpart (<MODE>mode, operands[0]);
5652 emit_insn (gen_mul<mode>3 (t1, op1, op2));
5653 emit_insn (gen_<s>mul<mode>3_highpart (t2, op1, op2));
5654 emit_insn (gen_vec_interleave_high<mode> (dest, t1, t2));
5658 (define_expand "vec_widen_<s>mult_lo_<mode>"
5659 [(match_operand:<sseunpackmode> 0 "register_operand" "")
5660 (any_extend:<sseunpackmode>
5661 (match_operand:VI2_AVX2 1 "register_operand" ""))
5662 (match_operand:VI2_AVX2 2 "register_operand" "")]
5665 rtx op1, op2, t1, t2, dest;
5669 t1 = gen_reg_rtx (<MODE>mode);
5670 t2 = gen_reg_rtx (<MODE>mode);
5671 dest = gen_lowpart (<MODE>mode, operands[0]);
5673 emit_insn (gen_mul<mode>3 (t1, op1, op2));
5674 emit_insn (gen_<s>mul<mode>3_highpart (t2, op1, op2));
5675 emit_insn (gen_vec_interleave_low<mode> (dest, t1, t2));
5679 (define_expand "vec_widen_<s>mult_hi_v8si"
5680 [(match_operand:V4DI 0 "register_operand" "")
5681 (any_extend:V4DI (match_operand:V8SI 1 "nonimmediate_operand" ""))
5682 (match_operand:V8SI 2 "nonimmediate_operand" "")]
5687 t1 = gen_reg_rtx (V4DImode);
5688 t2 = gen_reg_rtx (V4DImode);
5689 t3 = gen_reg_rtx (V8SImode);
5690 t4 = gen_reg_rtx (V8SImode);
5691 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, operands[1]),
5692 const0_rtx, const2_rtx,
5693 const1_rtx, GEN_INT (3)));
5694 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, operands[2]),
5695 const0_rtx, const2_rtx,
5696 const1_rtx, GEN_INT (3)));
5697 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1),
5698 GEN_INT (2 + (2 << 2) + (3 << 4) + (3 << 6))));
5699 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2),
5700 GEN_INT (2 + (2 << 2) + (3 << 4) + (3 << 6))));
5701 emit_insn (gen_avx2_<u>mulv4siv4di3 (operands[0], t3, t4));
5705 (define_expand "vec_widen_<s>mult_lo_v8si"
5706 [(match_operand:V4DI 0 "register_operand" "")
5707 (any_extend:V4DI (match_operand:V8SI 1 "nonimmediate_operand" ""))
5708 (match_operand:V8SI 2 "nonimmediate_operand" "")]
5713 t1 = gen_reg_rtx (V4DImode);
5714 t2 = gen_reg_rtx (V4DImode);
5715 t3 = gen_reg_rtx (V8SImode);
5716 t4 = gen_reg_rtx (V8SImode);
5717 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, operands[1]),
5718 const0_rtx, const2_rtx,
5719 const1_rtx, GEN_INT (3)));
5720 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, operands[2]),
5721 const0_rtx, const2_rtx,
5722 const1_rtx, GEN_INT (3)));
5723 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1),
5724 GEN_INT (0 + (0 << 2) + (1 << 4) + (1 << 6))));
5725 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2),
5726 GEN_INT (0 + (0 << 2) + (1 << 4) + (1 << 6))));
5727 emit_insn (gen_avx2_<u>mulv4siv4di3 (operands[0], t3, t4));
5731 (define_expand "vec_widen_smult_hi_v4si"
5732 [(match_operand:V2DI 0 "register_operand" "")
5733 (match_operand:V4SI 1 "register_operand" "")
5734 (match_operand:V4SI 2 "register_operand" "")]
5737 rtx op1, op2, t1, t2;
5741 t1 = gen_reg_rtx (V4SImode);
5742 t2 = gen_reg_rtx (V4SImode);
5746 emit_insn (gen_sse2_pshufd_1 (t1, op1, GEN_INT (0), GEN_INT (2),
5747 GEN_INT (1), GEN_INT (3)));
5748 emit_insn (gen_sse2_pshufd_1 (t2, op2, GEN_INT (0), GEN_INT (2),
5749 GEN_INT (1), GEN_INT (3)));
5750 emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
5754 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5755 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5756 emit_insn (gen_sse4_1_mulv2siv2di3 (operands[0], t1, t2));
5760 (define_expand "vec_widen_smult_lo_v4si"
5761 [(match_operand:V2DI 0 "register_operand" "")
5762 (match_operand:V4SI 1 "register_operand" "")
5763 (match_operand:V4SI 2 "register_operand" "")]
5766 rtx op1, op2, t1, t2;
5770 t1 = gen_reg_rtx (V4SImode);
5771 t2 = gen_reg_rtx (V4SImode);
5775 emit_insn (gen_sse2_pshufd_1 (t1, op1, GEN_INT (0), GEN_INT (2),
5776 GEN_INT (1), GEN_INT (3)));
5777 emit_insn (gen_sse2_pshufd_1 (t2, op2, GEN_INT (0), GEN_INT (2),
5778 GEN_INT (1), GEN_INT (3)));
5779 emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
5783 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5784 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5785 emit_insn (gen_sse4_1_mulv2siv2di3 (operands[0], t1, t2));
5789 (define_expand "vec_widen_umult_hi_v4si"
5790 [(match_operand:V2DI 0 "register_operand" "")
5791 (match_operand:V4SI 1 "register_operand" "")
5792 (match_operand:V4SI 2 "register_operand" "")]
5795 rtx op1, op2, t1, t2;
5799 t1 = gen_reg_rtx (V4SImode);
5800 t2 = gen_reg_rtx (V4SImode);
5802 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5803 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5804 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5808 (define_expand "vec_widen_umult_lo_v4si"
5809 [(match_operand:V2DI 0 "register_operand" "")
5810 (match_operand:V4SI 1 "register_operand" "")
5811 (match_operand:V4SI 2 "register_operand" "")]
5814 rtx op1, op2, t1, t2;
5818 t1 = gen_reg_rtx (V4SImode);
5819 t2 = gen_reg_rtx (V4SImode);
5821 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5822 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5823 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5827 (define_expand "sdot_prod<mode>"
5828 [(match_operand:<sseunpackmode> 0 "register_operand" "")
5829 (match_operand:VI2_AVX2 1 "register_operand" "")
5830 (match_operand:VI2_AVX2 2 "register_operand" "")
5831 (match_operand:<sseunpackmode> 3 "register_operand" "")]
5834 rtx t = gen_reg_rtx (<sseunpackmode>mode);
5835 emit_insn (gen_<sse2_avx2>_pmaddwd (t, operands[1], operands[2]));
5836 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5837 gen_rtx_PLUS (<sseunpackmode>mode,
5842 (define_code_attr sse2_sse4_1
5843 [(zero_extend "sse2") (sign_extend "sse4_1")])
5845 (define_expand "<s>dot_prodv4si"
5846 [(match_operand:V2DI 0 "register_operand" "")
5847 (any_extend:V2DI (match_operand:V4SI 1 "register_operand" ""))
5848 (match_operand:V4SI 2 "register_operand" "")
5849 (match_operand:V2DI 3 "register_operand" "")]
5850 "<CODE> == ZERO_EXTEND ? TARGET_SSE2 : TARGET_SSE4_1"
5854 t1 = gen_reg_rtx (V2DImode);
5855 emit_insn (gen_<sse2_sse4_1>_<u>mulv2siv2di3 (t1, operands[1], operands[2]));
5856 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5858 t2 = gen_reg_rtx (V4SImode);
5859 t3 = gen_reg_rtx (V4SImode);
5860 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5861 gen_lowpart (V1TImode, operands[1]),
5863 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5864 gen_lowpart (V1TImode, operands[2]),
5867 t4 = gen_reg_rtx (V2DImode);
5868 emit_insn (gen_<sse2_sse4_1>_<u>mulv2siv2di3 (t4, t2, t3));
5870 emit_insn (gen_addv2di3 (operands[0], t1, t4));
5874 (define_expand "<s>dot_prodv8si"
5875 [(match_operand:V4DI 0 "register_operand" "")
5876 (any_extend:V4DI (match_operand:V8SI 1 "register_operand" ""))
5877 (match_operand:V8SI 2 "register_operand" "")
5878 (match_operand:V4DI 3 "register_operand" "")]
5883 t1 = gen_reg_rtx (V4DImode);
5884 emit_insn (gen_avx2_<u>mulv4siv4di3 (t1, operands[1], operands[2]));
5885 emit_insn (gen_addv4di3 (t1, t1, operands[3]));
5887 t2 = gen_reg_rtx (V8SImode);
5888 t3 = gen_reg_rtx (V8SImode);
5889 emit_insn (gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, t2),
5890 gen_lowpart (V2TImode, operands[1]),
5892 emit_insn (gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, t3),
5893 gen_lowpart (V2TImode, operands[2]),
5896 t4 = gen_reg_rtx (V4DImode);
5897 emit_insn (gen_avx2_<u>mulv4siv4di3 (t4, t2, t3));
5899 emit_insn (gen_addv4di3 (operands[0], t1, t4));
5903 (define_insn "ashr<mode>3"
5904 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
5906 (match_operand:VI24_AVX2 1 "register_operand" "0,x")
5907 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5910 psra<ssemodesuffix>\t{%2, %0|%0, %2}
5911 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5912 [(set_attr "isa" "noavx,avx")
5913 (set_attr "type" "sseishft")
5914 (set (attr "length_immediate")
5915 (if_then_else (match_operand 2 "const_int_operand" "")
5917 (const_string "0")))
5918 (set_attr "prefix_data16" "1,*")
5919 (set_attr "prefix" "orig,vex")
5920 (set_attr "mode" "<sseinsnmode>")])
5922 (define_insn "<shift_insn><mode>3"
5923 [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
5924 (any_lshift:VI248_AVX2
5925 (match_operand:VI248_AVX2 1 "register_operand" "0,x")
5926 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5929 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
5930 vp<vshift><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5931 [(set_attr "isa" "noavx,avx")
5932 (set_attr "type" "sseishft")
5933 (set (attr "length_immediate")
5934 (if_then_else (match_operand 2 "const_int_operand" "")
5936 (const_string "0")))
5937 (set_attr "prefix_data16" "1,*")
5938 (set_attr "prefix" "orig,vex")
5939 (set_attr "mode" "<sseinsnmode>")])
5941 (define_expand "vec_shl_<mode>"
5942 [(set (match_operand:VI_128 0 "register_operand" "")
5944 (match_operand:VI_128 1 "register_operand" "")
5945 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5948 operands[0] = gen_lowpart (V1TImode, operands[0]);
5949 operands[1] = gen_lowpart (V1TImode, operands[1]);
5952 (define_insn "<sse2_avx2>_ashl<mode>3"
5953 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
5955 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
5956 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5959 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5961 switch (which_alternative)
5964 return "pslldq\t{%2, %0|%0, %2}";
5966 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
5971 [(set_attr "isa" "noavx,avx")
5972 (set_attr "type" "sseishft")
5973 (set_attr "length_immediate" "1")
5974 (set_attr "prefix_data16" "1,*")
5975 (set_attr "prefix" "orig,vex")
5976 (set_attr "mode" "<sseinsnmode>")])
5978 (define_expand "vec_shr_<mode>"
5979 [(set (match_operand:VI_128 0 "register_operand" "")
5981 (match_operand:VI_128 1 "register_operand" "")
5982 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5985 operands[0] = gen_lowpart (V1TImode, operands[0]);
5986 operands[1] = gen_lowpart (V1TImode, operands[1]);
5989 (define_insn "<sse2_avx2>_lshr<mode>3"
5990 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
5991 (lshiftrt:VIMAX_AVX2
5992 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
5993 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5996 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5998 switch (which_alternative)
6001 return "psrldq\t{%2, %0|%0, %2}";
6003 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
6008 [(set_attr "isa" "noavx,avx")
6009 (set_attr "type" "sseishft")
6010 (set_attr "length_immediate" "1")
6011 (set_attr "atom_unit" "sishuf")
6012 (set_attr "prefix_data16" "1,*")
6013 (set_attr "prefix" "orig,vex")
6014 (set_attr "mode" "<sseinsnmode>")])
6017 (define_expand "<code><mode>3"
6018 [(set (match_operand:VI124_256 0 "register_operand" "")
6020 (match_operand:VI124_256 1 "nonimmediate_operand" "")
6021 (match_operand:VI124_256 2 "nonimmediate_operand" "")))]
6023 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
6025 (define_insn "*avx2_<code><mode>3"
6026 [(set (match_operand:VI124_256 0 "register_operand" "=x")
6028 (match_operand:VI124_256 1 "nonimmediate_operand" "%x")
6029 (match_operand:VI124_256 2 "nonimmediate_operand" "xm")))]
6030 "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6031 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6032 [(set_attr "type" "sseiadd")
6033 (set_attr "prefix_extra" "1")
6034 (set_attr "prefix" "vex")
6035 (set_attr "mode" "OI")])
6037 (define_expand "<code><mode>3"
6038 [(set (match_operand:VI8_AVX2 0 "register_operand" "")
6040 (match_operand:VI8_AVX2 1 "register_operand" "")
6041 (match_operand:VI8_AVX2 2 "register_operand" "")))]
6048 xops[0] = operands[0];
6050 if (<CODE> == SMAX || <CODE> == UMAX)
6052 xops[1] = operands[1];
6053 xops[2] = operands[2];
6057 xops[1] = operands[2];
6058 xops[2] = operands[1];
6061 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
6063 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
6064 xops[4] = operands[1];
6065 xops[5] = operands[2];
6067 ok = ix86_expand_int_vcond (xops);
6072 (define_expand "<code><mode>3"
6073 [(set (match_operand:VI124_128 0 "register_operand" "")
6075 (match_operand:VI124_128 1 "nonimmediate_operand" "")
6076 (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
6079 if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
6080 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
6086 xops[0] = operands[0];
6087 operands[1] = force_reg (<MODE>mode, operands[1]);
6088 operands[2] = force_reg (<MODE>mode, operands[2]);
6092 xops[1] = operands[1];
6093 xops[2] = operands[2];
6097 xops[1] = operands[2];
6098 xops[2] = operands[1];
6101 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6102 xops[4] = operands[1];
6103 xops[5] = operands[2];
6105 ok = ix86_expand_int_vcond (xops);
6111 (define_insn "*sse4_1_<code><mode>3"
6112 [(set (match_operand:VI14_128 0 "register_operand" "=x,x")
6114 (match_operand:VI14_128 1 "nonimmediate_operand" "%0,x")
6115 (match_operand:VI14_128 2 "nonimmediate_operand" "xm,xm")))]
6116 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6118 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
6119 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6120 [(set_attr "isa" "noavx,avx")
6121 (set_attr "type" "sseiadd")
6122 (set_attr "prefix_extra" "1,*")
6123 (set_attr "prefix" "orig,vex")
6124 (set_attr "mode" "TI")])
6126 (define_insn "*<code>v8hi3"
6127 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6129 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
6130 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
6131 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
6133 p<maxmin_int>w\t{%2, %0|%0, %2}
6134 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
6135 [(set_attr "isa" "noavx,avx")
6136 (set_attr "type" "sseiadd")
6137 (set_attr "prefix_data16" "1,*")
6138 (set_attr "prefix_extra" "*,1")
6139 (set_attr "prefix" "orig,vex")
6140 (set_attr "mode" "TI")])
6142 (define_expand "<code><mode>3"
6143 [(set (match_operand:VI124_128 0 "register_operand" "")
6145 (match_operand:VI124_128 1 "nonimmediate_operand" "")
6146 (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
6149 if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
6150 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
6151 else if (<CODE> == UMAX && <MODE>mode == V8HImode)
6153 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
6154 operands[1] = force_reg (<MODE>mode, operands[1]);
6155 if (rtx_equal_p (op3, op2))
6156 op3 = gen_reg_rtx (V8HImode);
6157 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
6158 emit_insn (gen_addv8hi3 (op0, op3, op2));
6166 operands[1] = force_reg (<MODE>mode, operands[1]);
6167 operands[2] = force_reg (<MODE>mode, operands[2]);
6169 xops[0] = operands[0];
6173 xops[1] = operands[1];
6174 xops[2] = operands[2];
6178 xops[1] = operands[2];
6179 xops[2] = operands[1];
6182 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6183 xops[4] = operands[1];
6184 xops[5] = operands[2];
6186 ok = ix86_expand_int_vcond (xops);
6192 (define_insn "*sse4_1_<code><mode>3"
6193 [(set (match_operand:VI24_128 0 "register_operand" "=x,x")
6195 (match_operand:VI24_128 1 "nonimmediate_operand" "%0,x")
6196 (match_operand:VI24_128 2 "nonimmediate_operand" "xm,xm")))]
6197 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6199 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
6200 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6201 [(set_attr "isa" "noavx,avx")
6202 (set_attr "type" "sseiadd")
6203 (set_attr "prefix_extra" "1,*")
6204 (set_attr "prefix" "orig,vex")
6205 (set_attr "mode" "TI")])
6207 (define_insn "*<code>v16qi3"
6208 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6210 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")
6211 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))]
6212 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
6214 p<maxmin_int>b\t{%2, %0|%0, %2}
6215 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
6216 [(set_attr "isa" "noavx,avx")
6217 (set_attr "type" "sseiadd")
6218 (set_attr "prefix_data16" "1,*")
6219 (set_attr "prefix_extra" "*,1")
6220 (set_attr "prefix" "orig,vex")
6221 (set_attr "mode" "TI")])
6223 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6225 ;; Parallel integral comparisons
6227 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6229 (define_expand "avx2_eq<mode>3"
6230 [(set (match_operand:VI_256 0 "register_operand" "")
6232 (match_operand:VI_256 1 "nonimmediate_operand" "")
6233 (match_operand:VI_256 2 "nonimmediate_operand" "")))]
6235 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6237 (define_insn "*avx2_eq<mode>3"
6238 [(set (match_operand:VI_256 0 "register_operand" "=x")
6240 (match_operand:VI_256 1 "nonimmediate_operand" "%x")
6241 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
6242 "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6243 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6244 [(set_attr "type" "ssecmp")
6245 (set_attr "prefix_extra" "1")
6246 (set_attr "prefix" "vex")
6247 (set_attr "mode" "OI")])
6249 (define_insn "*sse4_1_eqv2di3"
6250 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6252 (match_operand:V2DI 1 "nonimmediate_operand" "%0,x")
6253 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
6254 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
6256 pcmpeqq\t{%2, %0|%0, %2}
6257 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
6258 [(set_attr "isa" "noavx,avx")
6259 (set_attr "type" "ssecmp")
6260 (set_attr "prefix_extra" "1")
6261 (set_attr "prefix" "orig,vex")
6262 (set_attr "mode" "TI")])
6264 (define_insn "*sse2_eq<mode>3"
6265 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
6267 (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x")
6268 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
6269 "TARGET_SSE2 && !TARGET_XOP
6270 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6272 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
6273 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6274 [(set_attr "isa" "noavx,avx")
6275 (set_attr "type" "ssecmp")
6276 (set_attr "prefix_data16" "1,*")
6277 (set_attr "prefix" "orig,vex")
6278 (set_attr "mode" "TI")])
6280 (define_expand "sse2_eq<mode>3"
6281 [(set (match_operand:VI124_128 0 "register_operand" "")
6283 (match_operand:VI124_128 1 "nonimmediate_operand" "")
6284 (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
6285 "TARGET_SSE2 && !TARGET_XOP "
6286 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6288 (define_expand "sse4_1_eqv2di3"
6289 [(set (match_operand:V2DI 0 "register_operand" "")
6291 (match_operand:V2DI 1 "nonimmediate_operand" "")
6292 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
6294 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
6296 (define_insn "sse4_2_gtv2di3"
6297 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6299 (match_operand:V2DI 1 "register_operand" "0,x")
6300 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
6303 pcmpgtq\t{%2, %0|%0, %2}
6304 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
6305 [(set_attr "isa" "noavx,avx")
6306 (set_attr "type" "ssecmp")
6307 (set_attr "prefix_extra" "1")
6308 (set_attr "prefix" "orig,vex")
6309 (set_attr "mode" "TI")])
6311 (define_insn "avx2_gt<mode>3"
6312 [(set (match_operand:VI_256 0 "register_operand" "=x")
6314 (match_operand:VI_256 1 "register_operand" "x")
6315 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
6317 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6318 [(set_attr "type" "ssecmp")
6319 (set_attr "prefix_extra" "1")
6320 (set_attr "prefix" "vex")
6321 (set_attr "mode" "OI")])
6323 (define_insn "sse2_gt<mode>3"
6324 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
6326 (match_operand:VI124_128 1 "register_operand" "0,x")
6327 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
6328 "TARGET_SSE2 && !TARGET_XOP"
6330 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
6331 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6332 [(set_attr "isa" "noavx,avx")
6333 (set_attr "type" "ssecmp")
6334 (set_attr "prefix_data16" "1,*")
6335 (set_attr "prefix" "orig,vex")
6336 (set_attr "mode" "TI")])
6338 (define_expand "vcond<V_256:mode><VI_256:mode>"
6339 [(set (match_operand:V_256 0 "register_operand" "")
6341 (match_operator 3 ""
6342 [(match_operand:VI_256 4 "nonimmediate_operand" "")
6343 (match_operand:VI_256 5 "general_operand" "")])
6344 (match_operand:V_256 1 "" "")
6345 (match_operand:V_256 2 "" "")))]
6347 && (GET_MODE_NUNITS (<V_256:MODE>mode)
6348 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
6350 bool ok = ix86_expand_int_vcond (operands);
6355 (define_expand "vcond<V_128:mode><VI124_128:mode>"
6356 [(set (match_operand:V_128 0 "register_operand" "")
6358 (match_operator 3 ""
6359 [(match_operand:VI124_128 4 "nonimmediate_operand" "")
6360 (match_operand:VI124_128 5 "general_operand" "")])
6361 (match_operand:V_128 1 "" "")
6362 (match_operand:V_128 2 "" "")))]
6364 && (GET_MODE_NUNITS (<V_128:MODE>mode)
6365 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
6367 bool ok = ix86_expand_int_vcond (operands);
6372 (define_expand "vcond<VI8F_128:mode>v2di"
6373 [(set (match_operand:VI8F_128 0 "register_operand" "")
6374 (if_then_else:VI8F_128
6375 (match_operator 3 ""
6376 [(match_operand:V2DI 4 "nonimmediate_operand" "")
6377 (match_operand:V2DI 5 "general_operand" "")])
6378 (match_operand:VI8F_128 1 "" "")
6379 (match_operand:VI8F_128 2 "" "")))]
6382 bool ok = ix86_expand_int_vcond (operands);
6387 (define_expand "vcondu<V_256:mode><VI_256:mode>"
6388 [(set (match_operand:V_256 0 "register_operand" "")
6390 (match_operator 3 ""
6391 [(match_operand:VI_256 4 "nonimmediate_operand" "")
6392 (match_operand:VI_256 5 "nonimmediate_operand" "")])
6393 (match_operand:V_256 1 "general_operand" "")
6394 (match_operand:V_256 2 "general_operand" "")))]
6396 && (GET_MODE_NUNITS (<V_256:MODE>mode)
6397 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
6399 bool ok = ix86_expand_int_vcond (operands);
6404 (define_expand "vcondu<V_128:mode><VI124_128:mode>"
6405 [(set (match_operand:V_128 0 "register_operand" "")
6407 (match_operator 3 ""
6408 [(match_operand:VI124_128 4 "nonimmediate_operand" "")
6409 (match_operand:VI124_128 5 "nonimmediate_operand" "")])
6410 (match_operand:V_128 1 "general_operand" "")
6411 (match_operand:V_128 2 "general_operand" "")))]
6413 && (GET_MODE_NUNITS (<V_128:MODE>mode)
6414 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
6416 bool ok = ix86_expand_int_vcond (operands);
6421 (define_expand "vcondu<VI8F_128:mode>v2di"
6422 [(set (match_operand:VI8F_128 0 "register_operand" "")
6423 (if_then_else:VI8F_128
6424 (match_operator 3 ""
6425 [(match_operand:V2DI 4 "nonimmediate_operand" "")
6426 (match_operand:V2DI 5 "nonimmediate_operand" "")])
6427 (match_operand:VI8F_128 1 "general_operand" "")
6428 (match_operand:VI8F_128 2 "general_operand" "")))]
6431 bool ok = ix86_expand_int_vcond (operands);
6436 (define_mode_iterator VEC_PERM_AVX2
6437 [V16QI V8HI V4SI V2DI V4SF V2DF
6438 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
6439 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
6440 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")])
6442 (define_expand "vec_perm<mode>"
6443 [(match_operand:VEC_PERM_AVX2 0 "register_operand" "")
6444 (match_operand:VEC_PERM_AVX2 1 "register_operand" "")
6445 (match_operand:VEC_PERM_AVX2 2 "register_operand" "")
6446 (match_operand:<sseintvecmode> 3 "register_operand" "")]
6447 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
6449 ix86_expand_vec_perm (operands);
6453 (define_mode_iterator VEC_PERM_CONST
6454 [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE")
6455 (V2DF "TARGET_SSE") (V2DI "TARGET_SSE")
6456 (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2")
6457 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
6458 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
6459 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")])
6461 (define_expand "vec_perm_const<mode>"
6462 [(match_operand:VEC_PERM_CONST 0 "register_operand" "")
6463 (match_operand:VEC_PERM_CONST 1 "register_operand" "")
6464 (match_operand:VEC_PERM_CONST 2 "register_operand" "")
6465 (match_operand:<sseintvecmode> 3 "" "")]
6468 if (ix86_expand_vec_perm_const (operands))
6474 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6476 ;; Parallel bitwise logical operations
6478 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6480 (define_expand "one_cmpl<mode>2"
6481 [(set (match_operand:VI 0 "register_operand" "")
6482 (xor:VI (match_operand:VI 1 "nonimmediate_operand" "")
6486 int i, n = GET_MODE_NUNITS (<MODE>mode);
6487 rtvec v = rtvec_alloc (n);
6489 for (i = 0; i < n; ++i)
6490 RTVEC_ELT (v, i) = constm1_rtx;
6492 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
6495 (define_expand "<sse2_avx2>_andnot<mode>3"
6496 [(set (match_operand:VI_AVX2 0 "register_operand" "")
6498 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand" ""))
6499 (match_operand:VI_AVX2 2 "nonimmediate_operand" "")))]
6502 (define_insn "*andnot<mode>3"
6503 [(set (match_operand:VI 0 "register_operand" "=x,x")
6505 (not:VI (match_operand:VI 1 "register_operand" "0,x"))
6506 (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
6509 static char buf[32];
6513 switch (get_attr_mode (insn))
6516 gcc_assert (TARGET_AVX2);
6518 gcc_assert (TARGET_SSE2);
6524 gcc_assert (TARGET_AVX);
6526 gcc_assert (TARGET_SSE);
6535 switch (which_alternative)
6538 ops = "%s\t{%%2, %%0|%%0, %%2}";
6541 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
6547 snprintf (buf, sizeof (buf), ops, tmp);
6550 [(set_attr "isa" "noavx,avx")
6551 (set_attr "type" "sselog")
6552 (set (attr "prefix_data16")
6554 (and (eq_attr "alternative" "0")
6555 (eq_attr "mode" "TI"))
6557 (const_string "*")))
6558 (set_attr "prefix" "orig,vex")
6560 (cond [(and (not (match_test "TARGET_AVX2"))
6561 (match_test "GET_MODE_SIZE (<MODE>mode) > 16"))
6562 (const_string "V8SF")
6563 (not (match_test "TARGET_SSE2"))
6564 (const_string "V4SF")
6566 (const_string "<sseinsnmode>")))])
6568 (define_expand "<code><mode>3"
6569 [(set (match_operand:VI 0 "register_operand" "")
6571 (match_operand:VI 1 "nonimmediate_operand" "")
6572 (match_operand:VI 2 "nonimmediate_operand" "")))]
6574 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
6576 (define_insn "*<code><mode>3"
6577 [(set (match_operand:VI 0 "register_operand" "=x,x")
6579 (match_operand:VI 1 "nonimmediate_operand" "%0,x")
6580 (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
6582 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6584 static char buf[32];
6588 switch (get_attr_mode (insn))
6591 gcc_assert (TARGET_AVX2);
6593 gcc_assert (TARGET_SSE2);
6599 gcc_assert (TARGET_AVX);
6601 gcc_assert (TARGET_SSE);
6610 switch (which_alternative)
6613 ops = "%s\t{%%2, %%0|%%0, %%2}";
6616 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
6622 snprintf (buf, sizeof (buf), ops, tmp);
6625 [(set_attr "isa" "noavx,avx")
6626 (set_attr "type" "sselog")
6627 (set (attr "prefix_data16")
6629 (and (eq_attr "alternative" "0")
6630 (eq_attr "mode" "TI"))
6632 (const_string "*")))
6633 (set_attr "prefix" "orig,vex")
6635 (cond [(and (not (match_test "TARGET_AVX2"))
6636 (match_test "GET_MODE_SIZE (<MODE>mode) > 16"))
6637 (const_string "V8SF")
6638 (not (match_test "TARGET_SSE2"))
6639 (const_string "V4SF")
6641 (const_string "<sseinsnmode>")))])
6643 (define_insn "*andnottf3"
6644 [(set (match_operand:TF 0 "register_operand" "=x,x")
6646 (not:TF (match_operand:TF 1 "register_operand" "0,x"))
6647 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
6650 pandn\t{%2, %0|%0, %2}
6651 vpandn\t{%2, %1, %0|%0, %1, %2}"
6652 [(set_attr "isa" "noavx,avx")
6653 (set_attr "type" "sselog")
6654 (set_attr "prefix_data16" "1,*")
6655 (set_attr "prefix" "orig,vex")
6656 (set_attr "mode" "TI")])
6658 (define_expand "<code>tf3"
6659 [(set (match_operand:TF 0 "register_operand" "")
6661 (match_operand:TF 1 "nonimmediate_operand" "")
6662 (match_operand:TF 2 "nonimmediate_operand" "")))]
6664 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
6666 (define_insn "*<code>tf3"
6667 [(set (match_operand:TF 0 "register_operand" "=x,x")
6669 (match_operand:TF 1 "nonimmediate_operand" "%0,x")
6670 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
6672 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
6674 p<logic>\t{%2, %0|%0, %2}
6675 vp<logic>\t{%2, %1, %0|%0, %1, %2}"
6676 [(set_attr "isa" "noavx,avx")
6677 (set_attr "type" "sselog")
6678 (set_attr "prefix_data16" "1,*")
6679 (set_attr "prefix" "orig,vex")
6680 (set_attr "mode" "TI")])
6682 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6684 ;; Parallel integral element swizzling
6686 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6688 (define_expand "vec_pack_trunc_<mode>"
6689 [(match_operand:<ssepackmode> 0 "register_operand" "")
6690 (match_operand:VI248_AVX2 1 "register_operand" "")
6691 (match_operand:VI248_AVX2 2 "register_operand" "")]
6694 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
6695 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
6696 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6700 (define_insn "<sse2_avx2>_packsswb"
6701 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
6702 (vec_concat:VI1_AVX2
6703 (ss_truncate:<ssehalfvecmode>
6704 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6705 (ss_truncate:<ssehalfvecmode>
6706 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6709 packsswb\t{%2, %0|%0, %2}
6710 vpacksswb\t{%2, %1, %0|%0, %1, %2}"
6711 [(set_attr "isa" "noavx,avx")
6712 (set_attr "type" "sselog")
6713 (set_attr "prefix_data16" "1,*")
6714 (set_attr "prefix" "orig,vex")
6715 (set_attr "mode" "<sseinsnmode>")])
6717 (define_insn "<sse2_avx2>_packssdw"
6718 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
6719 (vec_concat:VI2_AVX2
6720 (ss_truncate:<ssehalfvecmode>
6721 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6722 (ss_truncate:<ssehalfvecmode>
6723 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6726 packssdw\t{%2, %0|%0, %2}
6727 vpackssdw\t{%2, %1, %0|%0, %1, %2}"
6728 [(set_attr "isa" "noavx,avx")
6729 (set_attr "type" "sselog")
6730 (set_attr "prefix_data16" "1,*")
6731 (set_attr "prefix" "orig,vex")
6732 (set_attr "mode" "<sseinsnmode>")])
6734 (define_insn "<sse2_avx2>_packuswb"
6735 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
6736 (vec_concat:VI1_AVX2
6737 (us_truncate:<ssehalfvecmode>
6738 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6739 (us_truncate:<ssehalfvecmode>
6740 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6743 packuswb\t{%2, %0|%0, %2}
6744 vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6745 [(set_attr "isa" "noavx,avx")
6746 (set_attr "type" "sselog")
6747 (set_attr "prefix_data16" "1,*")
6748 (set_attr "prefix" "orig,vex")
6749 (set_attr "mode" "<sseinsnmode>")])
6751 (define_insn "avx2_interleave_highv32qi"
6752 [(set (match_operand:V32QI 0 "register_operand" "=x")
6755 (match_operand:V32QI 1 "register_operand" "x")
6756 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
6757 (parallel [(const_int 8) (const_int 40)
6758 (const_int 9) (const_int 41)
6759 (const_int 10) (const_int 42)
6760 (const_int 11) (const_int 43)
6761 (const_int 12) (const_int 44)
6762 (const_int 13) (const_int 45)
6763 (const_int 14) (const_int 46)
6764 (const_int 15) (const_int 47)
6765 (const_int 24) (const_int 56)
6766 (const_int 25) (const_int 57)
6767 (const_int 26) (const_int 58)
6768 (const_int 27) (const_int 59)
6769 (const_int 28) (const_int 60)
6770 (const_int 29) (const_int 61)
6771 (const_int 30) (const_int 62)
6772 (const_int 31) (const_int 63)])))]
6774 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6775 [(set_attr "type" "sselog")
6776 (set_attr "prefix" "vex")
6777 (set_attr "mode" "OI")])
6779 (define_insn "vec_interleave_highv16qi"
6780 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6783 (match_operand:V16QI 1 "register_operand" "0,x")
6784 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
6785 (parallel [(const_int 8) (const_int 24)
6786 (const_int 9) (const_int 25)
6787 (const_int 10) (const_int 26)
6788 (const_int 11) (const_int 27)
6789 (const_int 12) (const_int 28)
6790 (const_int 13) (const_int 29)
6791 (const_int 14) (const_int 30)
6792 (const_int 15) (const_int 31)])))]
6795 punpckhbw\t{%2, %0|%0, %2}
6796 vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6797 [(set_attr "isa" "noavx,avx")
6798 (set_attr "type" "sselog")
6799 (set_attr "prefix_data16" "1,*")
6800 (set_attr "prefix" "orig,vex")
6801 (set_attr "mode" "TI")])
6803 (define_insn "avx2_interleave_lowv32qi"
6804 [(set (match_operand:V32QI 0 "register_operand" "=x")
6807 (match_operand:V32QI 1 "register_operand" "x")
6808 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
6809 (parallel [(const_int 0) (const_int 32)
6810 (const_int 1) (const_int 33)
6811 (const_int 2) (const_int 34)
6812 (const_int 3) (const_int 35)
6813 (const_int 4) (const_int 36)
6814 (const_int 5) (const_int 37)
6815 (const_int 6) (const_int 38)
6816 (const_int 7) (const_int 39)
6817 (const_int 16) (const_int 48)
6818 (const_int 17) (const_int 49)
6819 (const_int 18) (const_int 50)
6820 (const_int 19) (const_int 51)
6821 (const_int 20) (const_int 52)
6822 (const_int 21) (const_int 53)
6823 (const_int 22) (const_int 54)
6824 (const_int 23) (const_int 55)])))]
6826 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6827 [(set_attr "type" "sselog")
6828 (set_attr "prefix" "vex")
6829 (set_attr "mode" "OI")])
6831 (define_insn "vec_interleave_lowv16qi"
6832 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6835 (match_operand:V16QI 1 "register_operand" "0,x")
6836 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
6837 (parallel [(const_int 0) (const_int 16)
6838 (const_int 1) (const_int 17)
6839 (const_int 2) (const_int 18)
6840 (const_int 3) (const_int 19)
6841 (const_int 4) (const_int 20)
6842 (const_int 5) (const_int 21)
6843 (const_int 6) (const_int 22)
6844 (const_int 7) (const_int 23)])))]
6847 punpcklbw\t{%2, %0|%0, %2}
6848 vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6849 [(set_attr "isa" "noavx,avx")
6850 (set_attr "type" "sselog")
6851 (set_attr "prefix_data16" "1,*")
6852 (set_attr "prefix" "orig,vex")
6853 (set_attr "mode" "TI")])
6855 (define_insn "avx2_interleave_highv16hi"
6856 [(set (match_operand:V16HI 0 "register_operand" "=x")
6859 (match_operand:V16HI 1 "register_operand" "x")
6860 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
6861 (parallel [(const_int 4) (const_int 20)
6862 (const_int 5) (const_int 21)
6863 (const_int 6) (const_int 22)
6864 (const_int 7) (const_int 23)
6865 (const_int 12) (const_int 28)
6866 (const_int 13) (const_int 29)
6867 (const_int 14) (const_int 30)
6868 (const_int 15) (const_int 31)])))]
6870 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6871 [(set_attr "type" "sselog")
6872 (set_attr "prefix" "vex")
6873 (set_attr "mode" "OI")])
6875 (define_insn "vec_interleave_highv8hi"
6876 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6879 (match_operand:V8HI 1 "register_operand" "0,x")
6880 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
6881 (parallel [(const_int 4) (const_int 12)
6882 (const_int 5) (const_int 13)
6883 (const_int 6) (const_int 14)
6884 (const_int 7) (const_int 15)])))]
6887 punpckhwd\t{%2, %0|%0, %2}
6888 vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6889 [(set_attr "isa" "noavx,avx")
6890 (set_attr "type" "sselog")
6891 (set_attr "prefix_data16" "1,*")
6892 (set_attr "prefix" "orig,vex")
6893 (set_attr "mode" "TI")])
6895 (define_insn "avx2_interleave_lowv16hi"
6896 [(set (match_operand:V16HI 0 "register_operand" "=x")
6899 (match_operand:V16HI 1 "register_operand" "x")
6900 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
6901 (parallel [(const_int 0) (const_int 16)
6902 (const_int 1) (const_int 17)
6903 (const_int 2) (const_int 18)
6904 (const_int 3) (const_int 19)
6905 (const_int 8) (const_int 24)
6906 (const_int 9) (const_int 25)
6907 (const_int 10) (const_int 26)
6908 (const_int 11) (const_int 27)])))]
6910 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6911 [(set_attr "type" "sselog")
6912 (set_attr "prefix" "vex")
6913 (set_attr "mode" "OI")])
6915 (define_insn "vec_interleave_lowv8hi"
6916 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6919 (match_operand:V8HI 1 "register_operand" "0,x")
6920 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
6921 (parallel [(const_int 0) (const_int 8)
6922 (const_int 1) (const_int 9)
6923 (const_int 2) (const_int 10)
6924 (const_int 3) (const_int 11)])))]
6927 punpcklwd\t{%2, %0|%0, %2}
6928 vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6929 [(set_attr "isa" "noavx,avx")
6930 (set_attr "type" "sselog")
6931 (set_attr "prefix_data16" "1,*")
6932 (set_attr "prefix" "orig,vex")
6933 (set_attr "mode" "TI")])
6935 (define_insn "avx2_interleave_highv8si"
6936 [(set (match_operand:V8SI 0 "register_operand" "=x")
6939 (match_operand:V8SI 1 "register_operand" "x")
6940 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
6941 (parallel [(const_int 2) (const_int 10)
6942 (const_int 3) (const_int 11)
6943 (const_int 6) (const_int 14)
6944 (const_int 7) (const_int 15)])))]
6946 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6947 [(set_attr "type" "sselog")
6948 (set_attr "prefix" "vex")
6949 (set_attr "mode" "OI")])
6951 (define_insn "vec_interleave_highv4si"
6952 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6955 (match_operand:V4SI 1 "register_operand" "0,x")
6956 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6957 (parallel [(const_int 2) (const_int 6)
6958 (const_int 3) (const_int 7)])))]
6961 punpckhdq\t{%2, %0|%0, %2}
6962 vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6963 [(set_attr "isa" "noavx,avx")
6964 (set_attr "type" "sselog")
6965 (set_attr "prefix_data16" "1,*")
6966 (set_attr "prefix" "orig,vex")
6967 (set_attr "mode" "TI")])
6969 (define_insn "avx2_interleave_lowv8si"
6970 [(set (match_operand:V8SI 0 "register_operand" "=x")
6973 (match_operand:V8SI 1 "register_operand" "x")
6974 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
6975 (parallel [(const_int 0) (const_int 8)
6976 (const_int 1) (const_int 9)
6977 (const_int 4) (const_int 12)
6978 (const_int 5) (const_int 13)])))]
6980 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6981 [(set_attr "type" "sselog")
6982 (set_attr "prefix" "vex")
6983 (set_attr "mode" "OI")])
6985 (define_insn "vec_interleave_lowv4si"
6986 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6989 (match_operand:V4SI 1 "register_operand" "0,x")
6990 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6991 (parallel [(const_int 0) (const_int 4)
6992 (const_int 1) (const_int 5)])))]
6995 punpckldq\t{%2, %0|%0, %2}
6996 vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6997 [(set_attr "isa" "noavx,avx")
6998 (set_attr "type" "sselog")
6999 (set_attr "prefix_data16" "1,*")
7000 (set_attr "prefix" "orig,vex")
7001 (set_attr "mode" "TI")])
7003 (define_expand "vec_interleave_high<mode>"
7004 [(match_operand:VI_256 0 "register_operand" "=x")
7005 (match_operand:VI_256 1 "register_operand" "x")
7006 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
7009 rtx t1 = gen_reg_rtx (<MODE>mode);
7010 rtx t2 = gen_reg_rtx (<MODE>mode);
7011 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
7012 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
7013 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, operands[0]),
7014 gen_lowpart (V4DImode, t1),
7015 gen_lowpart (V4DImode, t2), GEN_INT (1 + (3 << 4))));
7019 (define_expand "vec_interleave_low<mode>"
7020 [(match_operand:VI_256 0 "register_operand" "=x")
7021 (match_operand:VI_256 1 "register_operand" "x")
7022 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
7025 rtx t1 = gen_reg_rtx (<MODE>mode);
7026 rtx t2 = gen_reg_rtx (<MODE>mode);
7027 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
7028 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
7029 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, operands[0]),
7030 gen_lowpart (V4DImode, t1),
7031 gen_lowpart (V4DImode, t2), GEN_INT (0 + (2 << 4))));
7035 ;; Modes handled by pinsr patterns.
7036 (define_mode_iterator PINSR_MODE
7037 [(V16QI "TARGET_SSE4_1") V8HI
7038 (V4SI "TARGET_SSE4_1")
7039 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
7041 (define_mode_attr sse2p4_1
7042 [(V16QI "sse4_1") (V8HI "sse2")
7043 (V4SI "sse4_1") (V2DI "sse4_1")])
7045 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
7046 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
7047 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x")
7048 (vec_merge:PINSR_MODE
7049 (vec_duplicate:PINSR_MODE
7050 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m"))
7051 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x")
7052 (match_operand:SI 3 "const_int_operand" "")))]
7054 && ((unsigned) exact_log2 (INTVAL (operands[3]))
7055 < GET_MODE_NUNITS (<MODE>mode))"
7057 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7059 switch (which_alternative)
7062 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
7063 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
7066 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
7068 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
7069 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
7072 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7077 [(set_attr "isa" "noavx,noavx,avx,avx")
7078 (set_attr "type" "sselog")
7079 (set (attr "prefix_rex")
7081 (and (not (match_test "TARGET_AVX"))
7082 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
7084 (const_string "*")))
7085 (set (attr "prefix_data16")
7087 (and (not (match_test "TARGET_AVX"))
7088 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
7090 (const_string "*")))
7091 (set (attr "prefix_extra")
7093 (and (not (match_test "TARGET_AVX"))
7094 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
7096 (const_string "1")))
7097 (set_attr "length_immediate" "1")
7098 (set_attr "prefix" "orig,orig,vex,vex")
7099 (set_attr "mode" "TI")])
7101 (define_insn "*sse4_1_pextrb_<mode>"
7102 [(set (match_operand:SWI48 0 "register_operand" "=r")
7105 (match_operand:V16QI 1 "register_operand" "x")
7106 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
7108 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
7109 [(set_attr "type" "sselog")
7110 (set_attr "prefix_extra" "1")
7111 (set_attr "length_immediate" "1")
7112 (set_attr "prefix" "maybe_vex")
7113 (set_attr "mode" "TI")])
7115 (define_insn "*sse4_1_pextrb_memory"
7116 [(set (match_operand:QI 0 "memory_operand" "=m")
7118 (match_operand:V16QI 1 "register_operand" "x")
7119 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
7121 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
7122 [(set_attr "type" "sselog")
7123 (set_attr "prefix_extra" "1")
7124 (set_attr "length_immediate" "1")
7125 (set_attr "prefix" "maybe_vex")
7126 (set_attr "mode" "TI")])
7128 (define_insn "*sse2_pextrw_<mode>"
7129 [(set (match_operand:SWI48 0 "register_operand" "=r")
7132 (match_operand:V8HI 1 "register_operand" "x")
7133 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
7135 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
7136 [(set_attr "type" "sselog")
7137 (set_attr "prefix_data16" "1")
7138 (set_attr "length_immediate" "1")
7139 (set_attr "prefix" "maybe_vex")
7140 (set_attr "mode" "TI")])
7142 (define_insn "*sse4_1_pextrw_memory"
7143 [(set (match_operand:HI 0 "memory_operand" "=m")
7145 (match_operand:V8HI 1 "register_operand" "x")
7146 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
7148 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
7149 [(set_attr "type" "sselog")
7150 (set_attr "prefix_extra" "1")
7151 (set_attr "length_immediate" "1")
7152 (set_attr "prefix" "maybe_vex")
7153 (set_attr "mode" "TI")])
7155 (define_insn "*sse4_1_pextrd"
7156 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
7158 (match_operand:V4SI 1 "register_operand" "x")
7159 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
7161 "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
7162 [(set_attr "type" "sselog")
7163 (set_attr "prefix_extra" "1")
7164 (set_attr "length_immediate" "1")
7165 (set_attr "prefix" "maybe_vex")
7166 (set_attr "mode" "TI")])
7168 (define_insn "*sse4_1_pextrd_zext"
7169 [(set (match_operand:DI 0 "register_operand" "=r")
7172 (match_operand:V4SI 1 "register_operand" "x")
7173 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))]
7174 "TARGET_64BIT && TARGET_SSE4_1"
7175 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
7176 [(set_attr "type" "sselog")
7177 (set_attr "prefix_extra" "1")
7178 (set_attr "length_immediate" "1")
7179 (set_attr "prefix" "maybe_vex")
7180 (set_attr "mode" "TI")])
7182 ;; It must come before *vec_extractv2di_1_rex64 since it is preferred.
7183 (define_insn "*sse4_1_pextrq"
7184 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
7186 (match_operand:V2DI 1 "register_operand" "x")
7187 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
7188 "TARGET_SSE4_1 && TARGET_64BIT"
7189 "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
7190 [(set_attr "type" "sselog")
7191 (set_attr "prefix_rex" "1")
7192 (set_attr "prefix_extra" "1")
7193 (set_attr "length_immediate" "1")
7194 (set_attr "prefix" "maybe_vex")
7195 (set_attr "mode" "TI")])
7197 (define_expand "avx2_pshufdv3"
7198 [(match_operand:V8SI 0 "register_operand" "")
7199 (match_operand:V8SI 1 "nonimmediate_operand" "")
7200 (match_operand:SI 2 "const_0_to_255_operand" "")]
7203 int mask = INTVAL (operands[2]);
7204 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
7205 GEN_INT ((mask >> 0) & 3),
7206 GEN_INT ((mask >> 2) & 3),
7207 GEN_INT ((mask >> 4) & 3),
7208 GEN_INT ((mask >> 6) & 3),
7209 GEN_INT (((mask >> 0) & 3) + 4),
7210 GEN_INT (((mask >> 2) & 3) + 4),
7211 GEN_INT (((mask >> 4) & 3) + 4),
7212 GEN_INT (((mask >> 6) & 3) + 4)));
7216 (define_insn "avx2_pshufd_1"
7217 [(set (match_operand:V8SI 0 "register_operand" "=x")
7219 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
7220 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7221 (match_operand 3 "const_0_to_3_operand" "")
7222 (match_operand 4 "const_0_to_3_operand" "")
7223 (match_operand 5 "const_0_to_3_operand" "")
7224 (match_operand 6 "const_4_to_7_operand" "")
7225 (match_operand 7 "const_4_to_7_operand" "")
7226 (match_operand 8 "const_4_to_7_operand" "")
7227 (match_operand 9 "const_4_to_7_operand" "")])))]
7229 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
7230 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
7231 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
7232 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
7235 mask |= INTVAL (operands[2]) << 0;
7236 mask |= INTVAL (operands[3]) << 2;
7237 mask |= INTVAL (operands[4]) << 4;
7238 mask |= INTVAL (operands[5]) << 6;
7239 operands[2] = GEN_INT (mask);
7241 return "vpshufd\t{%2, %1, %0|%0, %1, %2}";
7243 [(set_attr "type" "sselog1")
7244 (set_attr "prefix" "vex")
7245 (set_attr "length_immediate" "1")
7246 (set_attr "mode" "OI")])
7248 (define_expand "sse2_pshufd"
7249 [(match_operand:V4SI 0 "register_operand" "")
7250 (match_operand:V4SI 1 "nonimmediate_operand" "")
7251 (match_operand:SI 2 "const_int_operand" "")]
7254 int mask = INTVAL (operands[2]);
7255 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
7256 GEN_INT ((mask >> 0) & 3),
7257 GEN_INT ((mask >> 2) & 3),
7258 GEN_INT ((mask >> 4) & 3),
7259 GEN_INT ((mask >> 6) & 3)));
7263 (define_insn "sse2_pshufd_1"
7264 [(set (match_operand:V4SI 0 "register_operand" "=x")
7266 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7267 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7268 (match_operand 3 "const_0_to_3_operand" "")
7269 (match_operand 4 "const_0_to_3_operand" "")
7270 (match_operand 5 "const_0_to_3_operand" "")])))]
7274 mask |= INTVAL (operands[2]) << 0;
7275 mask |= INTVAL (operands[3]) << 2;
7276 mask |= INTVAL (operands[4]) << 4;
7277 mask |= INTVAL (operands[5]) << 6;
7278 operands[2] = GEN_INT (mask);
7280 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
7282 [(set_attr "type" "sselog1")
7283 (set_attr "prefix_data16" "1")
7284 (set_attr "prefix" "maybe_vex")
7285 (set_attr "length_immediate" "1")
7286 (set_attr "mode" "TI")])
7288 (define_expand "avx2_pshuflwv3"
7289 [(match_operand:V16HI 0 "register_operand" "")
7290 (match_operand:V16HI 1 "nonimmediate_operand" "")
7291 (match_operand:SI 2 "const_0_to_255_operand" "")]
7294 int mask = INTVAL (operands[2]);
7295 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
7296 GEN_INT ((mask >> 0) & 3),
7297 GEN_INT ((mask >> 2) & 3),
7298 GEN_INT ((mask >> 4) & 3),
7299 GEN_INT ((mask >> 6) & 3),
7300 GEN_INT (((mask >> 0) & 3) + 8),
7301 GEN_INT (((mask >> 2) & 3) + 8),
7302 GEN_INT (((mask >> 4) & 3) + 8),
7303 GEN_INT (((mask >> 6) & 3) + 8)));
7307 (define_insn "avx2_pshuflw_1"
7308 [(set (match_operand:V16HI 0 "register_operand" "=x")
7310 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
7311 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7312 (match_operand 3 "const_0_to_3_operand" "")
7313 (match_operand 4 "const_0_to_3_operand" "")
7314 (match_operand 5 "const_0_to_3_operand" "")
7319 (match_operand 6 "const_8_to_11_operand" "")
7320 (match_operand 7 "const_8_to_11_operand" "")
7321 (match_operand 8 "const_8_to_11_operand" "")
7322 (match_operand 9 "const_8_to_11_operand" "")
7328 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
7329 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
7330 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
7331 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
7334 mask |= INTVAL (operands[2]) << 0;
7335 mask |= INTVAL (operands[3]) << 2;
7336 mask |= INTVAL (operands[4]) << 4;
7337 mask |= INTVAL (operands[5]) << 6;
7338 operands[2] = GEN_INT (mask);
7340 return "vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7342 [(set_attr "type" "sselog")
7343 (set_attr "prefix" "vex")
7344 (set_attr "length_immediate" "1")
7345 (set_attr "mode" "OI")])
7347 (define_expand "sse2_pshuflw"
7348 [(match_operand:V8HI 0 "register_operand" "")
7349 (match_operand:V8HI 1 "nonimmediate_operand" "")
7350 (match_operand:SI 2 "const_int_operand" "")]
7353 int mask = INTVAL (operands[2]);
7354 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
7355 GEN_INT ((mask >> 0) & 3),
7356 GEN_INT ((mask >> 2) & 3),
7357 GEN_INT ((mask >> 4) & 3),
7358 GEN_INT ((mask >> 6) & 3)));
7362 (define_insn "sse2_pshuflw_1"
7363 [(set (match_operand:V8HI 0 "register_operand" "=x")
7365 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7366 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7367 (match_operand 3 "const_0_to_3_operand" "")
7368 (match_operand 4 "const_0_to_3_operand" "")
7369 (match_operand 5 "const_0_to_3_operand" "")
7377 mask |= INTVAL (operands[2]) << 0;
7378 mask |= INTVAL (operands[3]) << 2;
7379 mask |= INTVAL (operands[4]) << 4;
7380 mask |= INTVAL (operands[5]) << 6;
7381 operands[2] = GEN_INT (mask);
7383 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7385 [(set_attr "type" "sselog")
7386 (set_attr "prefix_data16" "0")
7387 (set_attr "prefix_rep" "1")
7388 (set_attr "prefix" "maybe_vex")
7389 (set_attr "length_immediate" "1")
7390 (set_attr "mode" "TI")])
7392 (define_expand "avx2_pshufhwv3"
7393 [(match_operand:V16HI 0 "register_operand" "")
7394 (match_operand:V16HI 1 "nonimmediate_operand" "")
7395 (match_operand:SI 2 "const_0_to_255_operand" "")]
7398 int mask = INTVAL (operands[2]);
7399 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
7400 GEN_INT (((mask >> 0) & 3) + 4),
7401 GEN_INT (((mask >> 2) & 3) + 4),
7402 GEN_INT (((mask >> 4) & 3) + 4),
7403 GEN_INT (((mask >> 6) & 3) + 4),
7404 GEN_INT (((mask >> 0) & 3) + 12),
7405 GEN_INT (((mask >> 2) & 3) + 12),
7406 GEN_INT (((mask >> 4) & 3) + 12),
7407 GEN_INT (((mask >> 6) & 3) + 12)));
7411 (define_insn "avx2_pshufhw_1"
7412 [(set (match_operand:V16HI 0 "register_operand" "=x")
7414 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
7415 (parallel [(const_int 0)
7419 (match_operand 2 "const_4_to_7_operand" "")
7420 (match_operand 3 "const_4_to_7_operand" "")
7421 (match_operand 4 "const_4_to_7_operand" "")
7422 (match_operand 5 "const_4_to_7_operand" "")
7427 (match_operand 6 "const_12_to_15_operand" "")
7428 (match_operand 7 "const_12_to_15_operand" "")
7429 (match_operand 8 "const_12_to_15_operand" "")
7430 (match_operand 9 "const_12_to_15_operand" "")])))]
7432 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
7433 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
7434 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
7435 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
7438 mask |= (INTVAL (operands[2]) - 4) << 0;
7439 mask |= (INTVAL (operands[3]) - 4) << 2;
7440 mask |= (INTVAL (operands[4]) - 4) << 4;
7441 mask |= (INTVAL (operands[5]) - 4) << 6;
7442 operands[2] = GEN_INT (mask);
7444 return "vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7446 [(set_attr "type" "sselog")
7447 (set_attr "prefix" "vex")
7448 (set_attr "length_immediate" "1")
7449 (set_attr "mode" "OI")])
7451 (define_expand "sse2_pshufhw"
7452 [(match_operand:V8HI 0 "register_operand" "")
7453 (match_operand:V8HI 1 "nonimmediate_operand" "")
7454 (match_operand:SI 2 "const_int_operand" "")]
7457 int mask = INTVAL (operands[2]);
7458 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
7459 GEN_INT (((mask >> 0) & 3) + 4),
7460 GEN_INT (((mask >> 2) & 3) + 4),
7461 GEN_INT (((mask >> 4) & 3) + 4),
7462 GEN_INT (((mask >> 6) & 3) + 4)));
7466 (define_insn "sse2_pshufhw_1"
7467 [(set (match_operand:V8HI 0 "register_operand" "=x")
7469 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7470 (parallel [(const_int 0)
7474 (match_operand 2 "const_4_to_7_operand" "")
7475 (match_operand 3 "const_4_to_7_operand" "")
7476 (match_operand 4 "const_4_to_7_operand" "")
7477 (match_operand 5 "const_4_to_7_operand" "")])))]
7481 mask |= (INTVAL (operands[2]) - 4) << 0;
7482 mask |= (INTVAL (operands[3]) - 4) << 2;
7483 mask |= (INTVAL (operands[4]) - 4) << 4;
7484 mask |= (INTVAL (operands[5]) - 4) << 6;
7485 operands[2] = GEN_INT (mask);
7487 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7489 [(set_attr "type" "sselog")
7490 (set_attr "prefix_rep" "1")
7491 (set_attr "prefix_data16" "0")
7492 (set_attr "prefix" "maybe_vex")
7493 (set_attr "length_immediate" "1")
7494 (set_attr "mode" "TI")])
7496 (define_expand "sse2_loadd"
7497 [(set (match_operand:V4SI 0 "register_operand" "")
7500 (match_operand:SI 1 "nonimmediate_operand" ""))
7504 "operands[2] = CONST0_RTX (V4SImode);")
7506 (define_insn "sse2_loadld"
7507 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x,x,x")
7510 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x"))
7511 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,x")
7515 %vmovd\t{%2, %0|%0, %2}
7516 %vmovd\t{%2, %0|%0, %2}
7517 movss\t{%2, %0|%0, %2}
7518 movss\t{%2, %0|%0, %2}
7519 vmovss\t{%2, %1, %0|%0, %1, %2}"
7520 [(set_attr "isa" "sse2,*,noavx,noavx,avx")
7521 (set_attr "type" "ssemov")
7522 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex")
7523 (set_attr "mode" "TI,TI,V4SF,SF,SF")])
7525 (define_insn_and_split "sse2_stored"
7526 [(set (match_operand:SI 0 "nonimmediate_operand" "=xm,r")
7528 (match_operand:V4SI 1 "register_operand" "x,Yi")
7529 (parallel [(const_int 0)])))]
7532 "&& reload_completed
7533 && (TARGET_INTER_UNIT_MOVES
7534 || MEM_P (operands [0])
7535 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7536 [(set (match_dup 0) (match_dup 1))]
7537 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
7539 (define_insn_and_split "*vec_ext_v4si_mem"
7540 [(set (match_operand:SI 0 "register_operand" "=r")
7542 (match_operand:V4SI 1 "memory_operand" "o")
7543 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
7549 int i = INTVAL (operands[2]);
7551 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
7555 (define_expand "sse_storeq"
7556 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7558 (match_operand:V2DI 1 "register_operand" "")
7559 (parallel [(const_int 0)])))]
7562 (define_insn "*sse2_storeq_rex64"
7563 [(set (match_operand:DI 0 "nonimmediate_operand" "=xm,*r,r")
7565 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
7566 (parallel [(const_int 0)])))]
7567 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7571 mov{q}\t{%1, %0|%0, %1}"
7572 [(set_attr "type" "*,*,imov")
7573 (set_attr "mode" "*,*,DI")])
7575 (define_insn "*sse2_storeq"
7576 [(set (match_operand:DI 0 "nonimmediate_operand" "=xm")
7578 (match_operand:V2DI 1 "register_operand" "x")
7579 (parallel [(const_int 0)])))]
7584 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7586 (match_operand:V2DI 1 "register_operand" "")
7587 (parallel [(const_int 0)])))]
7590 && (TARGET_INTER_UNIT_MOVES
7591 || MEM_P (operands [0])
7592 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7593 [(set (match_dup 0) (match_dup 1))]
7594 "operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));")
7596 (define_insn "*vec_extractv2di_1_rex64"
7597 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,x,r")
7599 (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o,o")
7600 (parallel [(const_int 1)])))]
7601 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7603 %vmovhps\t{%1, %0|%0, %1}
7604 psrldq\t{$8, %0|%0, 8}
7605 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7606 %vmovq\t{%H1, %0|%0, %H1}
7607 mov{q}\t{%H1, %0|%0, %H1}"
7608 [(set_attr "isa" "*,noavx,avx,*,*")
7609 (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,imov")
7610 (set_attr "length_immediate" "*,1,1,*,*")
7611 (set_attr "memory" "*,none,none,*,*")
7612 (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig")
7613 (set_attr "mode" "V2SF,TI,TI,TI,DI")])
7615 (define_insn "*vec_extractv2di_1"
7616 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,x,x,x")
7618 (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o,x,o")
7619 (parallel [(const_int 1)])))]
7620 "!TARGET_64BIT && TARGET_SSE
7621 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7623 %vmovhps\t{%1, %0|%0, %1}
7624 psrldq\t{$8, %0|%0, 8}
7625 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7626 %vmovq\t{%H1, %0|%0, %H1}
7627 movhlps\t{%1, %0|%0, %1}
7628 movlps\t{%H1, %0|%0, %H1}"
7629 [(set_attr "isa" "*,sse2_noavx,avx,sse2,noavx,noavx")
7630 (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,ssemov,ssemov")
7631 (set_attr "length_immediate" "*,1,1,*,*,*")
7632 (set_attr "memory" "*,none,none,*,*,*")
7633 (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig,orig")
7634 (set_attr "mode" "V2SF,TI,TI,TI,V4SF,V2SF")])
7636 (define_insn "*vec_dupv4si"
7637 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7639 (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))]
7642 %vpshufd\t{$0, %1, %0|%0, %1, 0}
7643 vbroadcastss\t{%1, %0|%0, %1}
7644 shufps\t{$0, %0, %0|%0, %0, 0}"
7645 [(set_attr "isa" "sse2,avx,noavx")
7646 (set_attr "type" "sselog1,ssemov,sselog1")
7647 (set_attr "length_immediate" "1,0,1")
7648 (set_attr "prefix_extra" "0,1,*")
7649 (set_attr "prefix" "maybe_vex,vex,orig")
7650 (set_attr "mode" "TI,V4SF,V4SF")])
7652 (define_insn "*vec_dupv2di"
7653 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,x")
7655 (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))]
7659 vpunpcklqdq\t{%d1, %0|%0, %d1}
7660 %vmovddup\t{%1, %0|%0, %1}
7662 [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
7663 (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
7664 (set_attr "prefix" "orig,vex,maybe_vex,orig")
7665 (set_attr "mode" "TI,TI,DF,V4SF")])
7667 (define_insn "*vec_concatv2si_sse4_1"
7668 [(set (match_operand:V2SI 0 "register_operand" "=x, x,x,x, x, *y,*y")
7670 (match_operand:SI 1 "nonimmediate_operand" " 0, x,0,x,rm, 0,rm")
7671 (match_operand:SI 2 "vector_move_operand" "rm,rm,x,x, C,*ym, C")))]
7674 pinsrd\t{$1, %2, %0|%0, %2, 1}
7675 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
7676 punpckldq\t{%2, %0|%0, %2}
7677 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7678 %vmovd\t{%1, %0|%0, %1}
7679 punpckldq\t{%2, %0|%0, %2}
7680 movd\t{%1, %0|%0, %1}"
7681 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
7682 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
7683 (set_attr "prefix_extra" "1,1,*,*,*,*,*")
7684 (set_attr "length_immediate" "1,1,*,*,*,*,*")
7685 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
7686 (set_attr "mode" "TI,TI,TI,TI,TI,DI,DI")])
7688 ;; ??? In theory we can match memory for the MMX alternative, but allowing
7689 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7690 ;; alternatives pretty much forces the MMX alternative to be chosen.
7691 (define_insn "*vec_concatv2si_sse2"
7692 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
7694 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
7695 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
7698 punpckldq\t{%2, %0|%0, %2}
7699 movd\t{%1, %0|%0, %1}
7700 punpckldq\t{%2, %0|%0, %2}
7701 movd\t{%1, %0|%0, %1}"
7702 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7703 (set_attr "mode" "TI,TI,DI,DI")])
7705 (define_insn "*vec_concatv2si_sse"
7706 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
7708 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
7709 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
7712 unpcklps\t{%2, %0|%0, %2}
7713 movss\t{%1, %0|%0, %1}
7714 punpckldq\t{%2, %0|%0, %2}
7715 movd\t{%1, %0|%0, %1}"
7716 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7717 (set_attr "mode" "V4SF,V4SF,DI,DI")])
7719 (define_insn "*vec_concatv4si"
7720 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x,x")
7722 (match_operand:V2SI 1 "register_operand" " 0,x,0,0,x")
7723 (match_operand:V2SI 2 "nonimmediate_operand" " x,x,x,m,m")))]
7726 punpcklqdq\t{%2, %0|%0, %2}
7727 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7728 movlhps\t{%2, %0|%0, %2}
7729 movhps\t{%2, %0|%0, %2}
7730 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7731 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
7732 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
7733 (set_attr "prefix" "orig,vex,orig,orig,vex")
7734 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
7736 ;; movd instead of movq is required to handle broken assemblers.
7737 (define_insn "*vec_concatv2di_rex64"
7738 [(set (match_operand:V2DI 0 "register_operand"
7739 "=x,x ,x ,Yi,!x,x,x,x,x")
7741 (match_operand:DI 1 "nonimmediate_operand"
7742 " 0,x ,xm,r ,*y,0,x,0,x")
7743 (match_operand:DI 2 "vector_move_operand"
7744 "rm,rm,C ,C ,C ,x,x,m,m")))]
7747 pinsrq\t{$1, %2, %0|%0, %2, 1}
7748 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
7749 %vmovq\t{%1, %0|%0, %1}
7750 %vmovd\t{%1, %0|%0, %1}
7751 movq2dq\t{%1, %0|%0, %1}
7752 punpcklqdq\t{%2, %0|%0, %2}
7753 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7754 movhps\t{%2, %0|%0, %2}
7755 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7756 [(set_attr "isa" "sse4_noavx,avx,*,*,*,noavx,avx,noavx,avx")
7759 (eq_attr "alternative" "0,1,5,6")
7760 (const_string "sselog")
7761 (const_string "ssemov")))
7762 (set (attr "prefix_rex")
7764 (and (eq_attr "alternative" "0,3")
7765 (not (match_test "TARGET_AVX")))
7767 (const_string "*")))
7768 (set_attr "prefix_extra" "1,1,*,*,*,*,*,*,*")
7769 (set_attr "length_immediate" "1,1,*,*,*,*,*,*,*")
7770 (set_attr "prefix" "orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex")
7771 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,V2SF,V2SF")])
7773 (define_insn "vec_concatv2di"
7774 [(set (match_operand:V2DI 0 "register_operand" "=x,?x,x,x,x,x,x")
7776 (match_operand:DI 1 "nonimmediate_operand" "xm,*y,0,x,0,0,x")
7777 (match_operand:DI 2 "vector_move_operand" " C, C,x,x,x,m,m")))]
7778 "!TARGET_64BIT && TARGET_SSE"
7780 %vmovq\t{%1, %0|%0, %1}
7781 movq2dq\t{%1, %0|%0, %1}
7782 punpcklqdq\t{%2, %0|%0, %2}
7783 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7784 movlhps\t{%2, %0|%0, %2}
7785 movhps\t{%2, %0|%0, %2}
7786 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7787 [(set_attr "isa" "sse2,sse2,sse2_noavx,avx,noavx,noavx,avx")
7788 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,ssemov,ssemov")
7789 (set_attr "prefix" "maybe_vex,orig,orig,vex,orig,orig,vex")
7790 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")])
7792 (define_expand "vec_unpacks_lo_<mode>"
7793 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7794 (match_operand:VI124_AVX2 1 "register_operand" "")]
7796 "ix86_expand_sse_unpack (operands, false, false); DONE;")
7798 (define_expand "vec_unpacks_hi_<mode>"
7799 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7800 (match_operand:VI124_AVX2 1 "register_operand" "")]
7802 "ix86_expand_sse_unpack (operands, false, true); DONE;")
7804 (define_expand "vec_unpacku_lo_<mode>"
7805 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7806 (match_operand:VI124_AVX2 1 "register_operand" "")]
7808 "ix86_expand_sse_unpack (operands, true, false); DONE;")
7810 (define_expand "vec_unpacku_hi_<mode>"
7811 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7812 (match_operand:VI124_AVX2 1 "register_operand" "")]
7814 "ix86_expand_sse_unpack (operands, true, true); DONE;")
7816 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7820 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7822 (define_expand "avx2_uavgv32qi3"
7823 [(set (match_operand:V32QI 0 "register_operand" "")
7829 (match_operand:V32QI 1 "nonimmediate_operand" ""))
7831 (match_operand:V32QI 2 "nonimmediate_operand" "")))
7832 (const_vector:V32QI [(const_int 1) (const_int 1)
7833 (const_int 1) (const_int 1)
7834 (const_int 1) (const_int 1)
7835 (const_int 1) (const_int 1)
7836 (const_int 1) (const_int 1)
7837 (const_int 1) (const_int 1)
7838 (const_int 1) (const_int 1)
7839 (const_int 1) (const_int 1)
7840 (const_int 1) (const_int 1)
7841 (const_int 1) (const_int 1)
7842 (const_int 1) (const_int 1)
7843 (const_int 1) (const_int 1)
7844 (const_int 1) (const_int 1)
7845 (const_int 1) (const_int 1)
7846 (const_int 1) (const_int 1)
7847 (const_int 1) (const_int 1)]))
7850 "ix86_fixup_binary_operands_no_copy (PLUS, V32QImode, operands);")
7852 (define_expand "sse2_uavgv16qi3"
7853 [(set (match_operand:V16QI 0 "register_operand" "")
7859 (match_operand:V16QI 1 "nonimmediate_operand" ""))
7861 (match_operand:V16QI 2 "nonimmediate_operand" "")))
7862 (const_vector:V16QI [(const_int 1) (const_int 1)
7863 (const_int 1) (const_int 1)
7864 (const_int 1) (const_int 1)
7865 (const_int 1) (const_int 1)
7866 (const_int 1) (const_int 1)
7867 (const_int 1) (const_int 1)
7868 (const_int 1) (const_int 1)
7869 (const_int 1) (const_int 1)]))
7872 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
7874 (define_insn "*avx2_uavgv32qi3"
7875 [(set (match_operand:V32QI 0 "register_operand" "=x")
7881 (match_operand:V32QI 1 "nonimmediate_operand" "%x"))
7883 (match_operand:V32QI 2 "nonimmediate_operand" "xm")))
7884 (const_vector:V32QI [(const_int 1) (const_int 1)
7885 (const_int 1) (const_int 1)
7886 (const_int 1) (const_int 1)
7887 (const_int 1) (const_int 1)
7888 (const_int 1) (const_int 1)
7889 (const_int 1) (const_int 1)
7890 (const_int 1) (const_int 1)
7891 (const_int 1) (const_int 1)
7892 (const_int 1) (const_int 1)
7893 (const_int 1) (const_int 1)
7894 (const_int 1) (const_int 1)
7895 (const_int 1) (const_int 1)
7896 (const_int 1) (const_int 1)
7897 (const_int 1) (const_int 1)
7898 (const_int 1) (const_int 1)
7899 (const_int 1) (const_int 1)]))
7901 "TARGET_AVX2 && ix86_binary_operator_ok (PLUS, V32QImode, operands)"
7902 "vpavgb\t{%2, %1, %0|%0, %1, %2}"
7903 [(set_attr "type" "sseiadd")
7904 (set_attr "prefix" "vex")
7905 (set_attr "mode" "OI")])
7907 (define_insn "*sse2_uavgv16qi3"
7908 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
7914 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x"))
7916 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))
7917 (const_vector:V16QI [(const_int 1) (const_int 1)
7918 (const_int 1) (const_int 1)
7919 (const_int 1) (const_int 1)
7920 (const_int 1) (const_int 1)
7921 (const_int 1) (const_int 1)
7922 (const_int 1) (const_int 1)
7923 (const_int 1) (const_int 1)
7924 (const_int 1) (const_int 1)]))
7926 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7928 pavgb\t{%2, %0|%0, %2}
7929 vpavgb\t{%2, %1, %0|%0, %1, %2}"
7930 [(set_attr "isa" "noavx,avx")
7931 (set_attr "type" "sseiadd")
7932 (set_attr "prefix_data16" "1,*")
7933 (set_attr "prefix" "orig,vex")
7934 (set_attr "mode" "TI")])
7936 (define_expand "avx2_uavgv16hi3"
7937 [(set (match_operand:V16HI 0 "register_operand" "")
7943 (match_operand:V16HI 1 "nonimmediate_operand" ""))
7945 (match_operand:V16HI 2 "nonimmediate_operand" "")))
7946 (const_vector:V16HI [(const_int 1) (const_int 1)
7947 (const_int 1) (const_int 1)
7948 (const_int 1) (const_int 1)
7949 (const_int 1) (const_int 1)
7950 (const_int 1) (const_int 1)
7951 (const_int 1) (const_int 1)
7952 (const_int 1) (const_int 1)
7953 (const_int 1) (const_int 1)]))
7956 "ix86_fixup_binary_operands_no_copy (PLUS, V16HImode, operands);")
7958 (define_expand "sse2_uavgv8hi3"
7959 [(set (match_operand:V8HI 0 "register_operand" "")
7965 (match_operand:V8HI 1 "nonimmediate_operand" ""))
7967 (match_operand:V8HI 2 "nonimmediate_operand" "")))
7968 (const_vector:V8HI [(const_int 1) (const_int 1)
7969 (const_int 1) (const_int 1)
7970 (const_int 1) (const_int 1)
7971 (const_int 1) (const_int 1)]))
7974 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
7976 (define_insn "*avx2_uavgv16hi3"
7977 [(set (match_operand:V16HI 0 "register_operand" "=x")
7983 (match_operand:V16HI 1 "nonimmediate_operand" "%x"))
7985 (match_operand:V16HI 2 "nonimmediate_operand" "xm")))
7986 (const_vector:V16HI [(const_int 1) (const_int 1)
7987 (const_int 1) (const_int 1)
7988 (const_int 1) (const_int 1)
7989 (const_int 1) (const_int 1)
7990 (const_int 1) (const_int 1)
7991 (const_int 1) (const_int 1)
7992 (const_int 1) (const_int 1)
7993 (const_int 1) (const_int 1)]))
7995 "TARGET_AVX2 && ix86_binary_operator_ok (PLUS, V16HImode, operands)"
7996 "vpavgw\t{%2, %1, %0|%0, %1, %2}"
7997 [(set_attr "type" "sseiadd")
7998 (set_attr "prefix" "vex")
7999 (set_attr "mode" "OI")])
8001 (define_insn "*sse2_uavgv8hi3"
8002 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8008 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
8010 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
8011 (const_vector:V8HI [(const_int 1) (const_int 1)
8012 (const_int 1) (const_int 1)
8013 (const_int 1) (const_int 1)
8014 (const_int 1) (const_int 1)]))
8016 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
8018 pavgw\t{%2, %0|%0, %2}
8019 vpavgw\t{%2, %1, %0|%0, %1, %2}"
8020 [(set_attr "isa" "noavx,avx")
8021 (set_attr "type" "sseiadd")
8022 (set_attr "prefix_data16" "1,*")
8023 (set_attr "prefix" "orig,vex")
8024 (set_attr "mode" "TI")])
8026 ;; The correct representation for this is absolutely enormous, and
8027 ;; surely not generally useful.
8028 (define_insn "<sse2_avx2>_psadbw"
8029 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x,x")
8030 (unspec:VI8_AVX2 [(match_operand:<ssebytemode> 1 "register_operand" "0,x")
8031 (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,xm")]
8035 psadbw\t{%2, %0|%0, %2}
8036 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
8037 [(set_attr "isa" "noavx,avx")
8038 (set_attr "type" "sseiadd")
8039 (set_attr "atom_unit" "simul")
8040 (set_attr "prefix_data16" "1,*")
8041 (set_attr "prefix" "orig,vex")
8042 (set_attr "mode" "<sseinsnmode>")])
8044 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
8045 [(set (match_operand:SI 0 "register_operand" "=r")
8047 [(match_operand:VF 1 "register_operand" "x")]
8050 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
8051 [(set_attr "type" "ssemov")
8052 (set_attr "prefix" "maybe_vex")
8053 (set_attr "mode" "<MODE>")])
8055 (define_insn "avx2_pmovmskb"
8056 [(set (match_operand:SI 0 "register_operand" "=r")
8057 (unspec:SI [(match_operand:V32QI 1 "register_operand" "x")]
8060 "vpmovmskb\t{%1, %0|%0, %1}"
8061 [(set_attr "type" "ssemov")
8062 (set_attr "prefix" "vex")
8063 (set_attr "mode" "DI")])
8065 (define_insn "sse2_pmovmskb"
8066 [(set (match_operand:SI 0 "register_operand" "=r")
8067 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
8070 "%vpmovmskb\t{%1, %0|%0, %1}"
8071 [(set_attr "type" "ssemov")
8072 (set_attr "prefix_data16" "1")
8073 (set_attr "prefix" "maybe_vex")
8074 (set_attr "mode" "SI")])
8076 (define_expand "sse2_maskmovdqu"
8077 [(set (match_operand:V16QI 0 "memory_operand" "")
8078 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
8079 (match_operand:V16QI 2 "register_operand" "")
8084 (define_insn "*sse2_maskmovdqu"
8085 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
8086 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8087 (match_operand:V16QI 2 "register_operand" "x")
8088 (mem:V16QI (match_dup 0))]
8091 "%vmaskmovdqu\t{%2, %1|%1, %2}"
8092 [(set_attr "type" "ssemov")
8093 (set_attr "prefix_data16" "1")
8094 ;; The implicit %rdi operand confuses default length_vex computation.
8095 (set (attr "length_vex")
8096 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
8097 (set_attr "prefix" "maybe_vex")
8098 (set_attr "mode" "TI")])
8100 (define_insn "sse_ldmxcsr"
8101 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
8105 [(set_attr "type" "sse")
8106 (set_attr "atom_sse_attr" "mxcsr")
8107 (set_attr "prefix" "maybe_vex")
8108 (set_attr "memory" "load")])
8110 (define_insn "sse_stmxcsr"
8111 [(set (match_operand:SI 0 "memory_operand" "=m")
8112 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
8115 [(set_attr "type" "sse")
8116 (set_attr "atom_sse_attr" "mxcsr")
8117 (set_attr "prefix" "maybe_vex")
8118 (set_attr "memory" "store")])
8120 (define_insn "sse2_clflush"
8121 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
8125 [(set_attr "type" "sse")
8126 (set_attr "atom_sse_attr" "fence")
8127 (set_attr "memory" "unknown")])
8130 (define_insn "sse3_mwait"
8131 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8132 (match_operand:SI 1 "register_operand" "c")]
8135 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
8136 ;; Since 32bit register operands are implicitly zero extended to 64bit,
8137 ;; we only need to set up 32bit registers.
8139 [(set_attr "length" "3")])
8141 (define_insn "sse3_monitor"
8142 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8143 (match_operand:SI 1 "register_operand" "c")
8144 (match_operand:SI 2 "register_operand" "d")]
8146 "TARGET_SSE3 && !TARGET_64BIT"
8147 "monitor\t%0, %1, %2"
8148 [(set_attr "length" "3")])
8150 (define_insn "sse3_monitor64"
8151 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
8152 (match_operand:SI 1 "register_operand" "c")
8153 (match_operand:SI 2 "register_operand" "d")]
8155 "TARGET_SSE3 && TARGET_64BIT"
8156 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
8157 ;; RCX and RDX are used. Since 32bit register operands are implicitly
8158 ;; zero extended to 64bit, we only need to set up 32bit registers.
8160 [(set_attr "length" "3")])
8162 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8164 ;; SSSE3 instructions
8166 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8168 (define_insn "avx2_phaddwv16hi3"
8169 [(set (match_operand:V16HI 0 "register_operand" "=x")
8176 (match_operand:V16HI 1 "register_operand" "x")
8177 (parallel [(const_int 0)]))
8178 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8180 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8181 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8184 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8185 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8187 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8188 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8192 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8193 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8195 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8196 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8199 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8200 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8202 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8203 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8209 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8210 (parallel [(const_int 0)]))
8211 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8213 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8214 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8217 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8218 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8220 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8221 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8225 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8226 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8228 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8229 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8232 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8233 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8235 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8236 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8238 "vphaddw\t{%2, %1, %0|%0, %1, %2}"
8239 [(set_attr "type" "sseiadd")
8240 (set_attr "prefix_extra" "1")
8241 (set_attr "prefix" "vex")
8242 (set_attr "mode" "OI")])
8244 (define_insn "ssse3_phaddwv8hi3"
8245 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8251 (match_operand:V8HI 1 "register_operand" "0,x")
8252 (parallel [(const_int 0)]))
8253 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8255 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8256 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8259 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8260 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8262 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8263 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8268 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8269 (parallel [(const_int 0)]))
8270 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8272 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8273 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8276 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8277 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8279 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8280 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8283 phaddw\t{%2, %0|%0, %2}
8284 vphaddw\t{%2, %1, %0|%0, %1, %2}"
8285 [(set_attr "isa" "noavx,avx")
8286 (set_attr "type" "sseiadd")
8287 (set_attr "atom_unit" "complex")
8288 (set_attr "prefix_data16" "1,*")
8289 (set_attr "prefix_extra" "1")
8290 (set_attr "prefix" "orig,vex")
8291 (set_attr "mode" "TI")])
8293 (define_insn "ssse3_phaddwv4hi3"
8294 [(set (match_operand:V4HI 0 "register_operand" "=y")
8299 (match_operand:V4HI 1 "register_operand" "0")
8300 (parallel [(const_int 0)]))
8301 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8303 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8304 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8308 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8309 (parallel [(const_int 0)]))
8310 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8312 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8313 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8315 "phaddw\t{%2, %0|%0, %2}"
8316 [(set_attr "type" "sseiadd")
8317 (set_attr "atom_unit" "complex")
8318 (set_attr "prefix_extra" "1")
8319 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8320 (set_attr "mode" "DI")])
8322 (define_insn "avx2_phadddv8si3"
8323 [(set (match_operand:V8SI 0 "register_operand" "=x")
8329 (match_operand:V8SI 1 "register_operand" "x")
8330 (parallel [(const_int 0)]))
8331 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8333 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8334 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8337 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
8338 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
8340 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
8341 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
8346 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
8347 (parallel [(const_int 0)]))
8348 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8350 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8351 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
8354 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
8355 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
8357 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
8358 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
8360 "vphaddd\t{%2, %1, %0|%0, %1, %2}"
8361 [(set_attr "type" "sseiadd")
8362 (set_attr "prefix_extra" "1")
8363 (set_attr "prefix" "vex")
8364 (set_attr "mode" "OI")])
8366 (define_insn "ssse3_phadddv4si3"
8367 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
8372 (match_operand:V4SI 1 "register_operand" "0,x")
8373 (parallel [(const_int 0)]))
8374 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8376 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8377 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8381 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
8382 (parallel [(const_int 0)]))
8383 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8385 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8386 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8389 phaddd\t{%2, %0|%0, %2}
8390 vphaddd\t{%2, %1, %0|%0, %1, %2}"
8391 [(set_attr "isa" "noavx,avx")
8392 (set_attr "type" "sseiadd")
8393 (set_attr "atom_unit" "complex")
8394 (set_attr "prefix_data16" "1,*")
8395 (set_attr "prefix_extra" "1")
8396 (set_attr "prefix" "orig,vex")
8397 (set_attr "mode" "TI")])
8399 (define_insn "ssse3_phadddv2si3"
8400 [(set (match_operand:V2SI 0 "register_operand" "=y")
8404 (match_operand:V2SI 1 "register_operand" "0")
8405 (parallel [(const_int 0)]))
8406 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8409 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8410 (parallel [(const_int 0)]))
8411 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8413 "phaddd\t{%2, %0|%0, %2}"
8414 [(set_attr "type" "sseiadd")
8415 (set_attr "atom_unit" "complex")
8416 (set_attr "prefix_extra" "1")
8417 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8418 (set_attr "mode" "DI")])
8420 (define_insn "avx2_phaddswv16hi3"
8421 [(set (match_operand:V16HI 0 "register_operand" "=x")
8428 (match_operand:V16HI 1 "register_operand" "x")
8429 (parallel [(const_int 0)]))
8430 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8432 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8433 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8436 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8437 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8439 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8440 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8444 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8445 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8447 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8448 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8451 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8452 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8454 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8455 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8461 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8462 (parallel [(const_int 0)]))
8463 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8465 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8466 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8469 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8470 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8472 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8473 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8477 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8478 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8480 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8481 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8484 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8485 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8487 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8488 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8490 "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8491 [(set_attr "type" "sseiadd")
8492 (set_attr "prefix_extra" "1")
8493 (set_attr "prefix" "vex")
8494 (set_attr "mode" "OI")])
8496 (define_insn "ssse3_phaddswv8hi3"
8497 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8503 (match_operand:V8HI 1 "register_operand" "0,x")
8504 (parallel [(const_int 0)]))
8505 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8507 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8508 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8511 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8512 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8514 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8515 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8520 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8521 (parallel [(const_int 0)]))
8522 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8524 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8525 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8528 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8529 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8531 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8532 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8535 phaddsw\t{%2, %0|%0, %2}
8536 vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8537 [(set_attr "isa" "noavx,avx")
8538 (set_attr "type" "sseiadd")
8539 (set_attr "atom_unit" "complex")
8540 (set_attr "prefix_data16" "1,*")
8541 (set_attr "prefix_extra" "1")
8542 (set_attr "prefix" "orig,vex")
8543 (set_attr "mode" "TI")])
8545 (define_insn "ssse3_phaddswv4hi3"
8546 [(set (match_operand:V4HI 0 "register_operand" "=y")
8551 (match_operand:V4HI 1 "register_operand" "0")
8552 (parallel [(const_int 0)]))
8553 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8555 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8556 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8560 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8561 (parallel [(const_int 0)]))
8562 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8564 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8565 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8567 "phaddsw\t{%2, %0|%0, %2}"
8568 [(set_attr "type" "sseiadd")
8569 (set_attr "atom_unit" "complex")
8570 (set_attr "prefix_extra" "1")
8571 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8572 (set_attr "mode" "DI")])
8574 (define_insn "avx2_phsubwv16hi3"
8575 [(set (match_operand:V16HI 0 "register_operand" "=x")
8582 (match_operand:V16HI 1 "register_operand" "x")
8583 (parallel [(const_int 0)]))
8584 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8586 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8587 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8590 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8591 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8593 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8594 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8598 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8599 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8601 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8602 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8605 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8606 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8608 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8609 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8615 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8616 (parallel [(const_int 0)]))
8617 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8619 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8620 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8623 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8624 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8626 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8627 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8631 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8632 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8634 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8635 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8638 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8639 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8641 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8642 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8644 "vphsubw\t{%2, %1, %0|%0, %1, %2}"
8645 [(set_attr "type" "sseiadd")
8646 (set_attr "prefix_extra" "1")
8647 (set_attr "prefix" "vex")
8648 (set_attr "mode" "OI")])
8650 (define_insn "ssse3_phsubwv8hi3"
8651 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8657 (match_operand:V8HI 1 "register_operand" "0,x")
8658 (parallel [(const_int 0)]))
8659 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8661 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8662 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8665 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8666 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8668 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8669 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8674 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8675 (parallel [(const_int 0)]))
8676 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8678 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8679 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8682 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8683 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8685 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8686 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8689 phsubw\t{%2, %0|%0, %2}
8690 vphsubw\t{%2, %1, %0|%0, %1, %2}"
8691 [(set_attr "isa" "noavx,avx")
8692 (set_attr "type" "sseiadd")
8693 (set_attr "atom_unit" "complex")
8694 (set_attr "prefix_data16" "1,*")
8695 (set_attr "prefix_extra" "1")
8696 (set_attr "prefix" "orig,vex")
8697 (set_attr "mode" "TI")])
8699 (define_insn "ssse3_phsubwv4hi3"
8700 [(set (match_operand:V4HI 0 "register_operand" "=y")
8705 (match_operand:V4HI 1 "register_operand" "0")
8706 (parallel [(const_int 0)]))
8707 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8709 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8710 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8714 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8715 (parallel [(const_int 0)]))
8716 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8718 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8719 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8721 "phsubw\t{%2, %0|%0, %2}"
8722 [(set_attr "type" "sseiadd")
8723 (set_attr "atom_unit" "complex")
8724 (set_attr "prefix_extra" "1")
8725 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8726 (set_attr "mode" "DI")])
8728 (define_insn "avx2_phsubdv8si3"
8729 [(set (match_operand:V8SI 0 "register_operand" "=x")
8735 (match_operand:V8SI 1 "register_operand" "x")
8736 (parallel [(const_int 0)]))
8737 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8739 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8740 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8743 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
8744 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
8746 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
8747 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
8752 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
8753 (parallel [(const_int 0)]))
8754 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8756 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8757 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
8760 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
8761 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
8763 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
8764 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
8766 "vphsubd\t{%2, %1, %0|%0, %1, %2}"
8767 [(set_attr "type" "sseiadd")
8768 (set_attr "prefix_extra" "1")
8769 (set_attr "prefix" "vex")
8770 (set_attr "mode" "OI")])
8772 (define_insn "ssse3_phsubdv4si3"
8773 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
8778 (match_operand:V4SI 1 "register_operand" "0,x")
8779 (parallel [(const_int 0)]))
8780 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8782 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8783 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8787 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
8788 (parallel [(const_int 0)]))
8789 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8791 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8792 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8795 phsubd\t{%2, %0|%0, %2}
8796 vphsubd\t{%2, %1, %0|%0, %1, %2}"
8798 [(set_attr "isa" "noavx,avx")
8799 (set_attr "type" "sseiadd")
8800 (set_attr "atom_unit" "complex")
8801 (set_attr "prefix_data16" "1,*")
8802 (set_attr "prefix_extra" "1")
8803 (set_attr "prefix" "orig,vex")
8804 (set_attr "mode" "TI")])
8806 (define_insn "ssse3_phsubdv2si3"
8807 [(set (match_operand:V2SI 0 "register_operand" "=y")
8811 (match_operand:V2SI 1 "register_operand" "0")
8812 (parallel [(const_int 0)]))
8813 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8816 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8817 (parallel [(const_int 0)]))
8818 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8820 "phsubd\t{%2, %0|%0, %2}"
8821 [(set_attr "type" "sseiadd")
8822 (set_attr "atom_unit" "complex")
8823 (set_attr "prefix_extra" "1")
8824 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8825 (set_attr "mode" "DI")])
8827 (define_insn "avx2_phsubswv16hi3"
8828 [(set (match_operand:V16HI 0 "register_operand" "=x")
8835 (match_operand:V16HI 1 "register_operand" "x")
8836 (parallel [(const_int 0)]))
8837 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8839 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8840 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8843 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8844 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8846 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8847 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8851 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8852 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8854 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8855 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8858 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8859 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8861 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8862 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8868 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8869 (parallel [(const_int 0)]))
8870 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8872 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8873 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8876 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8877 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8879 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8880 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8884 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8885 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8887 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8888 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8891 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8892 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8894 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8895 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8897 "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8898 [(set_attr "type" "sseiadd")
8899 (set_attr "prefix_extra" "1")
8900 (set_attr "prefix" "vex")
8901 (set_attr "mode" "OI")])
8903 (define_insn "ssse3_phsubswv8hi3"
8904 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8910 (match_operand:V8HI 1 "register_operand" "0,x")
8911 (parallel [(const_int 0)]))
8912 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8914 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8915 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8918 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8919 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8921 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8922 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8927 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8928 (parallel [(const_int 0)]))
8929 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8931 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8932 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8935 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8936 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8938 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8939 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8942 phsubsw\t{%2, %0|%0, %2}
8943 vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8944 [(set_attr "isa" "noavx,avx")
8945 (set_attr "type" "sseiadd")
8946 (set_attr "atom_unit" "complex")
8947 (set_attr "prefix_data16" "1,*")
8948 (set_attr "prefix_extra" "1")
8949 (set_attr "prefix" "orig,vex")
8950 (set_attr "mode" "TI")])
8952 (define_insn "ssse3_phsubswv4hi3"
8953 [(set (match_operand:V4HI 0 "register_operand" "=y")
8958 (match_operand:V4HI 1 "register_operand" "0")
8959 (parallel [(const_int 0)]))
8960 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8962 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8963 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8967 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8968 (parallel [(const_int 0)]))
8969 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8971 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8972 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8974 "phsubsw\t{%2, %0|%0, %2}"
8975 [(set_attr "type" "sseiadd")
8976 (set_attr "atom_unit" "complex")
8977 (set_attr "prefix_extra" "1")
8978 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8979 (set_attr "mode" "DI")])
8981 (define_insn "avx2_pmaddubsw256"
8982 [(set (match_operand:V16HI 0 "register_operand" "=x")
8987 (match_operand:V32QI 1 "register_operand" "x")
8988 (parallel [(const_int 0)
9006 (match_operand:V32QI 2 "nonimmediate_operand" "xm")
9007 (parallel [(const_int 0)
9025 (vec_select:V16QI (match_dup 1)
9026 (parallel [(const_int 1)
9043 (vec_select:V16QI (match_dup 2)
9044 (parallel [(const_int 1)
9059 (const_int 31)]))))))]
9061 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
9062 [(set_attr "type" "sseiadd")
9063 (set_attr "prefix_extra" "1")
9064 (set_attr "prefix" "vex")
9065 (set_attr "mode" "OI")])
9067 (define_insn "ssse3_pmaddubsw128"
9068 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9073 (match_operand:V16QI 1 "register_operand" "0,x")
9074 (parallel [(const_int 0)
9084 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
9085 (parallel [(const_int 0)
9095 (vec_select:V8QI (match_dup 1)
9096 (parallel [(const_int 1)
9105 (vec_select:V8QI (match_dup 2)
9106 (parallel [(const_int 1)
9113 (const_int 15)]))))))]
9116 pmaddubsw\t{%2, %0|%0, %2}
9117 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
9118 [(set_attr "isa" "noavx,avx")
9119 (set_attr "type" "sseiadd")
9120 (set_attr "atom_unit" "simul")
9121 (set_attr "prefix_data16" "1,*")
9122 (set_attr "prefix_extra" "1")
9123 (set_attr "prefix" "orig,vex")
9124 (set_attr "mode" "TI")])
9126 (define_insn "ssse3_pmaddubsw"
9127 [(set (match_operand:V4HI 0 "register_operand" "=y")
9132 (match_operand:V8QI 1 "register_operand" "0")
9133 (parallel [(const_int 0)
9139 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
9140 (parallel [(const_int 0)
9146 (vec_select:V4QI (match_dup 1)
9147 (parallel [(const_int 1)
9152 (vec_select:V4QI (match_dup 2)
9153 (parallel [(const_int 1)
9156 (const_int 7)]))))))]
9158 "pmaddubsw\t{%2, %0|%0, %2}"
9159 [(set_attr "type" "sseiadd")
9160 (set_attr "atom_unit" "simul")
9161 (set_attr "prefix_extra" "1")
9162 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9163 (set_attr "mode" "DI")])
9165 (define_expand "avx2_umulhrswv16hi3"
9166 [(set (match_operand:V16HI 0 "register_operand" "")
9173 (match_operand:V16HI 1 "nonimmediate_operand" ""))
9175 (match_operand:V16HI 2 "nonimmediate_operand" "")))
9177 (const_vector:V16HI [(const_int 1) (const_int 1)
9178 (const_int 1) (const_int 1)
9179 (const_int 1) (const_int 1)
9180 (const_int 1) (const_int 1)
9181 (const_int 1) (const_int 1)
9182 (const_int 1) (const_int 1)
9183 (const_int 1) (const_int 1)
9184 (const_int 1) (const_int 1)]))
9187 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
9189 (define_insn "*avx2_umulhrswv16hi3"
9190 [(set (match_operand:V16HI 0 "register_operand" "=x")
9197 (match_operand:V16HI 1 "nonimmediate_operand" "%x"))
9199 (match_operand:V16HI 2 "nonimmediate_operand" "xm")))
9201 (const_vector:V16HI [(const_int 1) (const_int 1)
9202 (const_int 1) (const_int 1)
9203 (const_int 1) (const_int 1)
9204 (const_int 1) (const_int 1)
9205 (const_int 1) (const_int 1)
9206 (const_int 1) (const_int 1)
9207 (const_int 1) (const_int 1)
9208 (const_int 1) (const_int 1)]))
9210 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
9211 "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9212 [(set_attr "type" "sseimul")
9213 (set_attr "prefix_extra" "1")
9214 (set_attr "prefix" "vex")
9215 (set_attr "mode" "OI")])
9217 (define_expand "ssse3_pmulhrswv8hi3"
9218 [(set (match_operand:V8HI 0 "register_operand" "")
9225 (match_operand:V8HI 1 "nonimmediate_operand" ""))
9227 (match_operand:V8HI 2 "nonimmediate_operand" "")))
9229 (const_vector:V8HI [(const_int 1) (const_int 1)
9230 (const_int 1) (const_int 1)
9231 (const_int 1) (const_int 1)
9232 (const_int 1) (const_int 1)]))
9235 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
9237 (define_insn "*ssse3_pmulhrswv8hi3"
9238 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9245 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
9247 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
9249 (const_vector:V8HI [(const_int 1) (const_int 1)
9250 (const_int 1) (const_int 1)
9251 (const_int 1) (const_int 1)
9252 (const_int 1) (const_int 1)]))
9254 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9256 pmulhrsw\t{%2, %0|%0, %2}
9257 vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9258 [(set_attr "isa" "noavx,avx")
9259 (set_attr "type" "sseimul")
9260 (set_attr "prefix_data16" "1,*")
9261 (set_attr "prefix_extra" "1")
9262 (set_attr "prefix" "orig,vex")
9263 (set_attr "mode" "TI")])
9265 (define_expand "ssse3_pmulhrswv4hi3"
9266 [(set (match_operand:V4HI 0 "register_operand" "")
9273 (match_operand:V4HI 1 "nonimmediate_operand" ""))
9275 (match_operand:V4HI 2 "nonimmediate_operand" "")))
9277 (const_vector:V4HI [(const_int 1) (const_int 1)
9278 (const_int 1) (const_int 1)]))
9281 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
9283 (define_insn "*ssse3_pmulhrswv4hi3"
9284 [(set (match_operand:V4HI 0 "register_operand" "=y")
9291 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
9293 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
9295 (const_vector:V4HI [(const_int 1) (const_int 1)
9296 (const_int 1) (const_int 1)]))
9298 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
9299 "pmulhrsw\t{%2, %0|%0, %2}"
9300 [(set_attr "type" "sseimul")
9301 (set_attr "prefix_extra" "1")
9302 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9303 (set_attr "mode" "DI")])
9305 (define_insn "<ssse3_avx2>_pshufb<mode>3"
9306 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
9307 (unspec:VI1_AVX2 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
9308 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")]
9312 pshufb\t{%2, %0|%0, %2}
9313 vpshufb\t{%2, %1, %0|%0, %1, %2}"
9314 [(set_attr "isa" "noavx,avx")
9315 (set_attr "type" "sselog1")
9316 (set_attr "prefix_data16" "1,*")
9317 (set_attr "prefix_extra" "1")
9318 (set_attr "prefix" "orig,vex")
9319 (set_attr "mode" "<sseinsnmode>")])
9321 (define_insn "ssse3_pshufbv8qi3"
9322 [(set (match_operand:V8QI 0 "register_operand" "=y")
9323 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
9324 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
9327 "pshufb\t{%2, %0|%0, %2}";
9328 [(set_attr "type" "sselog1")
9329 (set_attr "prefix_extra" "1")
9330 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9331 (set_attr "mode" "DI")])
9333 (define_insn "<ssse3_avx2>_psign<mode>3"
9334 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
9336 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
9337 (match_operand:VI124_AVX2 2 "nonimmediate_operand" "xm,xm")]
9341 psign<ssemodesuffix>\t{%2, %0|%0, %2}
9342 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9343 [(set_attr "isa" "noavx,avx")
9344 (set_attr "type" "sselog1")
9345 (set_attr "prefix_data16" "1,*")
9346 (set_attr "prefix_extra" "1")
9347 (set_attr "prefix" "orig,vex")
9348 (set_attr "mode" "<sseinsnmode>")])
9350 (define_insn "ssse3_psign<mode>3"
9351 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9353 [(match_operand:MMXMODEI 1 "register_operand" "0")
9354 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
9357 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
9358 [(set_attr "type" "sselog1")
9359 (set_attr "prefix_extra" "1")
9360 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9361 (set_attr "mode" "DI")])
9363 (define_insn "<ssse3_avx2>_palignr<mode>"
9364 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x")
9365 (unspec:SSESCALARMODE [(match_operand:SSESCALARMODE 1 "register_operand" "0,x")
9366 (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,xm")
9367 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
9371 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9373 switch (which_alternative)
9376 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9378 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9383 [(set_attr "isa" "noavx,avx")
9384 (set_attr "type" "sseishft")
9385 (set_attr "atom_unit" "sishuf")
9386 (set_attr "prefix_data16" "1,*")
9387 (set_attr "prefix_extra" "1")
9388 (set_attr "length_immediate" "1")
9389 (set_attr "prefix" "orig,vex")
9390 (set_attr "mode" "<sseinsnmode>")])
9392 (define_insn "ssse3_palignrdi"
9393 [(set (match_operand:DI 0 "register_operand" "=y")
9394 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
9395 (match_operand:DI 2 "nonimmediate_operand" "ym")
9396 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9400 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9401 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9403 [(set_attr "type" "sseishft")
9404 (set_attr "atom_unit" "sishuf")
9405 (set_attr "prefix_extra" "1")
9406 (set_attr "length_immediate" "1")
9407 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9408 (set_attr "mode" "DI")])
9410 (define_insn "abs<mode>2"
9411 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x")
9413 (match_operand:VI124_AVX2 1 "nonimmediate_operand" "xm")))]
9415 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
9416 [(set_attr "type" "sselog1")
9417 (set_attr "prefix_data16" "1")
9418 (set_attr "prefix_extra" "1")
9419 (set_attr "prefix" "maybe_vex")
9420 (set_attr "mode" "<sseinsnmode>")])
9422 (define_insn "abs<mode>2"
9423 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9425 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
9427 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
9428 [(set_attr "type" "sselog1")
9429 (set_attr "prefix_rep" "0")
9430 (set_attr "prefix_extra" "1")
9431 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9432 (set_attr "mode" "DI")])
9434 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9436 ;; AMD SSE4A instructions
9438 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9440 (define_insn "sse4a_movnt<mode>"
9441 [(set (match_operand:MODEF 0 "memory_operand" "=m")
9443 [(match_operand:MODEF 1 "register_operand" "x")]
9446 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
9447 [(set_attr "type" "ssemov")
9448 (set_attr "mode" "<MODE>")])
9450 (define_insn "sse4a_vmmovnt<mode>"
9451 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
9452 (unspec:<ssescalarmode>
9453 [(vec_select:<ssescalarmode>
9454 (match_operand:VF_128 1 "register_operand" "x")
9455 (parallel [(const_int 0)]))]
9458 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
9459 [(set_attr "type" "ssemov")
9460 (set_attr "mode" "<ssescalarmode>")])
9462 (define_insn "sse4a_extrqi"
9463 [(set (match_operand:V2DI 0 "register_operand" "=x")
9464 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9465 (match_operand 2 "const_0_to_255_operand" "")
9466 (match_operand 3 "const_0_to_255_operand" "")]
9469 "extrq\t{%3, %2, %0|%0, %2, %3}"
9470 [(set_attr "type" "sse")
9471 (set_attr "prefix_data16" "1")
9472 (set_attr "length_immediate" "2")
9473 (set_attr "mode" "TI")])
9475 (define_insn "sse4a_extrq"
9476 [(set (match_operand:V2DI 0 "register_operand" "=x")
9477 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9478 (match_operand:V16QI 2 "register_operand" "x")]
9481 "extrq\t{%2, %0|%0, %2}"
9482 [(set_attr "type" "sse")
9483 (set_attr "prefix_data16" "1")
9484 (set_attr "mode" "TI")])
9486 (define_insn "sse4a_insertqi"
9487 [(set (match_operand:V2DI 0 "register_operand" "=x")
9488 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9489 (match_operand:V2DI 2 "register_operand" "x")
9490 (match_operand 3 "const_0_to_255_operand" "")
9491 (match_operand 4 "const_0_to_255_operand" "")]
9494 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
9495 [(set_attr "type" "sseins")
9496 (set_attr "prefix_data16" "0")
9497 (set_attr "prefix_rep" "1")
9498 (set_attr "length_immediate" "2")
9499 (set_attr "mode" "TI")])
9501 (define_insn "sse4a_insertq"
9502 [(set (match_operand:V2DI 0 "register_operand" "=x")
9503 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9504 (match_operand:V2DI 2 "register_operand" "x")]
9507 "insertq\t{%2, %0|%0, %2}"
9508 [(set_attr "type" "sseins")
9509 (set_attr "prefix_data16" "0")
9510 (set_attr "prefix_rep" "1")
9511 (set_attr "mode" "TI")])
9513 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9515 ;; Intel SSE4.1 instructions
9517 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9519 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
9520 [(set (match_operand:VF 0 "register_operand" "=x,x")
9522 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
9523 (match_operand:VF 1 "register_operand" "0,x")
9524 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "")))]
9527 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
9528 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9529 [(set_attr "isa" "noavx,avx")
9530 (set_attr "type" "ssemov")
9531 (set_attr "length_immediate" "1")
9532 (set_attr "prefix_data16" "1,*")
9533 (set_attr "prefix_extra" "1")
9534 (set_attr "prefix" "orig,vex")
9535 (set_attr "mode" "<MODE>")])
9537 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
9538 [(set (match_operand:VF 0 "reg_not_xmm0_operand_maybe_avx" "=x,x")
9540 [(match_operand:VF 1 "reg_not_xmm0_operand_maybe_avx" "0,x")
9541 (match_operand:VF 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
9542 (match_operand:VF 3 "register_operand" "Yz,x")]
9546 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
9547 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9548 [(set_attr "isa" "noavx,avx")
9549 (set_attr "type" "ssemov")
9550 (set_attr "length_immediate" "1")
9551 (set_attr "prefix_data16" "1,*")
9552 (set_attr "prefix_extra" "1")
9553 (set_attr "prefix" "orig,vex")
9554 (set_attr "mode" "<MODE>")])
9556 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
9557 [(set (match_operand:VF 0 "register_operand" "=x,x")
9559 [(match_operand:VF 1 "nonimmediate_operand" "%0,x")
9560 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
9561 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9565 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
9566 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9567 [(set_attr "isa" "noavx,avx")
9568 (set_attr "type" "ssemul")
9569 (set_attr "length_immediate" "1")
9570 (set_attr "prefix_data16" "1,*")
9571 (set_attr "prefix_extra" "1")
9572 (set_attr "prefix" "orig,vex")
9573 (set_attr "mode" "<MODE>")])
9575 (define_insn "<sse4_1_avx2>_movntdqa"
9576 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x")
9577 (unspec:VI8_AVX2 [(match_operand:VI8_AVX2 1 "memory_operand" "m")]
9580 "%vmovntdqa\t{%1, %0|%0, %1}"
9581 [(set_attr "type" "ssemov")
9582 (set_attr "prefix_extra" "1")
9583 (set_attr "prefix" "maybe_vex")
9584 (set_attr "mode" "<sseinsnmode>")])
9586 (define_insn "<sse4_1_avx2>_mpsadbw"
9587 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
9588 (unspec:VI1_AVX2 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
9589 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
9590 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9594 mpsadbw\t{%3, %2, %0|%0, %2, %3}
9595 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9596 [(set_attr "isa" "noavx,avx")
9597 (set_attr "type" "sselog1")
9598 (set_attr "length_immediate" "1")
9599 (set_attr "prefix_extra" "1")
9600 (set_attr "prefix" "orig,vex")
9601 (set_attr "mode" "<sseinsnmode>")])
9603 (define_insn "avx2_packusdw"
9604 [(set (match_operand:V16HI 0 "register_operand" "=x")
9607 (match_operand:V8SI 1 "register_operand" "x"))
9609 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))))]
9611 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9612 [(set_attr "type" "sselog")
9613 (set_attr "prefix_extra" "1")
9614 (set_attr "prefix" "vex")
9615 (set_attr "mode" "OI")])
9617 (define_insn "sse4_1_packusdw"
9618 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9621 (match_operand:V4SI 1 "register_operand" "0,x"))
9623 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))]
9626 packusdw\t{%2, %0|%0, %2}
9627 vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9628 [(set_attr "isa" "noavx,avx")
9629 (set_attr "type" "sselog")
9630 (set_attr "prefix_extra" "1")
9631 (set_attr "prefix" "orig,vex")
9632 (set_attr "mode" "TI")])
9634 (define_insn "<sse4_1_avx2>_pblendvb"
9635 [(set (match_operand:VI1_AVX2 0 "reg_not_xmm0_operand" "=x,x")
9637 [(match_operand:VI1_AVX2 1 "reg_not_xmm0_operand_maybe_avx" "0,x")
9638 (match_operand:VI1_AVX2 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
9639 (match_operand:VI1_AVX2 3 "register_operand" "Yz,x")]
9643 pblendvb\t{%3, %2, %0|%0, %2, %3}
9644 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9645 [(set_attr "isa" "noavx,avx")
9646 (set_attr "type" "ssemov")
9647 (set_attr "prefix_extra" "1")
9648 (set_attr "length_immediate" "*,1")
9649 (set_attr "prefix" "orig,vex")
9650 (set_attr "mode" "<sseinsnmode>")])
9652 (define_insn "sse4_1_pblendw"
9653 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9655 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
9656 (match_operand:V8HI 1 "register_operand" "0,x")
9657 (match_operand:SI 3 "const_0_to_255_operand" "n,n")))]
9660 pblendw\t{%3, %2, %0|%0, %2, %3}
9661 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9662 [(set_attr "isa" "noavx,avx")
9663 (set_attr "type" "ssemov")
9664 (set_attr "prefix_extra" "1")
9665 (set_attr "length_immediate" "1")
9666 (set_attr "prefix" "orig,vex")
9667 (set_attr "mode" "TI")])
9669 ;; The builtin uses an 8-bit immediate. Expand that.
9670 (define_expand "avx2_pblendw"
9671 [(set (match_operand:V16HI 0 "register_operand" "")
9673 (match_operand:V16HI 2 "nonimmediate_operand" "")
9674 (match_operand:V16HI 1 "register_operand" "")
9675 (match_operand:SI 3 "const_0_to_255_operand" "")))]
9678 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
9679 operands[3] = GEN_INT (val << 8 | val);
9682 (define_insn "*avx2_pblendw"
9683 [(set (match_operand:V16HI 0 "register_operand" "=x")
9685 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
9686 (match_operand:V16HI 1 "register_operand" "x")
9687 (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
9690 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
9691 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9693 [(set_attr "type" "ssemov")
9694 (set_attr "prefix_extra" "1")
9695 (set_attr "length_immediate" "1")
9696 (set_attr "prefix" "vex")
9697 (set_attr "mode" "OI")])
9699 (define_insn "avx2_pblendd<mode>"
9700 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
9702 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
9703 (match_operand:VI4_AVX2 1 "register_operand" "x")
9704 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9706 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9707 [(set_attr "type" "ssemov")
9708 (set_attr "prefix_extra" "1")
9709 (set_attr "length_immediate" "1")
9710 (set_attr "prefix" "vex")
9711 (set_attr "mode" "<sseinsnmode>")])
9713 (define_insn "sse4_1_phminposuw"
9714 [(set (match_operand:V8HI 0 "register_operand" "=x")
9715 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
9716 UNSPEC_PHMINPOSUW))]
9718 "%vphminposuw\t{%1, %0|%0, %1}"
9719 [(set_attr "type" "sselog1")
9720 (set_attr "prefix_extra" "1")
9721 (set_attr "prefix" "maybe_vex")
9722 (set_attr "mode" "TI")])
9724 (define_insn "avx2_<code>v16qiv16hi2"
9725 [(set (match_operand:V16HI 0 "register_operand" "=x")
9727 (match_operand:V16QI 1 "nonimmediate_operand" "xm")))]
9729 "vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
9730 [(set_attr "type" "ssemov")
9731 (set_attr "prefix_extra" "1")
9732 (set_attr "prefix" "vex")
9733 (set_attr "mode" "OI")])
9735 (define_insn "sse4_1_<code>v8qiv8hi2"
9736 [(set (match_operand:V8HI 0 "register_operand" "=x")
9739 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9740 (parallel [(const_int 0)
9749 "%vpmov<extsuffix>bw\t{%1, %0|%0, %q1}"
9750 [(set_attr "type" "ssemov")
9751 (set_attr "prefix_extra" "1")
9752 (set_attr "prefix" "maybe_vex")
9753 (set_attr "mode" "TI")])
9755 (define_insn "avx2_<code>v8qiv8si2"
9756 [(set (match_operand:V8SI 0 "register_operand" "=x")
9759 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9760 (parallel [(const_int 0)
9769 "vpmov<extsuffix>bd\t{%1, %0|%0, %q1}"
9770 [(set_attr "type" "ssemov")
9771 (set_attr "prefix_extra" "1")
9772 (set_attr "prefix" "vex")
9773 (set_attr "mode" "OI")])
9775 (define_insn "sse4_1_<code>v4qiv4si2"
9776 [(set (match_operand:V4SI 0 "register_operand" "=x")
9779 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9780 (parallel [(const_int 0)
9785 "%vpmov<extsuffix>bd\t{%1, %0|%0, %k1}"
9786 [(set_attr "type" "ssemov")
9787 (set_attr "prefix_extra" "1")
9788 (set_attr "prefix" "maybe_vex")
9789 (set_attr "mode" "TI")])
9791 (define_insn "avx2_<code>v8hiv8si2"
9792 [(set (match_operand:V8SI 0 "register_operand" "=x")
9794 (match_operand:V8HI 1 "nonimmediate_operand" "xm")))]
9796 "vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
9797 [(set_attr "type" "ssemov")
9798 (set_attr "prefix_extra" "1")
9799 (set_attr "prefix" "vex")
9800 (set_attr "mode" "OI")])
9802 (define_insn "sse4_1_<code>v4hiv4si2"
9803 [(set (match_operand:V4SI 0 "register_operand" "=x")
9806 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9807 (parallel [(const_int 0)
9812 "%vpmov<extsuffix>wd\t{%1, %0|%0, %q1}"
9813 [(set_attr "type" "ssemov")
9814 (set_attr "prefix_extra" "1")
9815 (set_attr "prefix" "maybe_vex")
9816 (set_attr "mode" "TI")])
9818 (define_insn "avx2_<code>v4qiv4di2"
9819 [(set (match_operand:V4DI 0 "register_operand" "=x")
9822 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9823 (parallel [(const_int 0)
9828 "vpmov<extsuffix>bq\t{%1, %0|%0, %k1}"
9829 [(set_attr "type" "ssemov")
9830 (set_attr "prefix_extra" "1")
9831 (set_attr "prefix" "vex")
9832 (set_attr "mode" "OI")])
9834 (define_insn "sse4_1_<code>v2qiv2di2"
9835 [(set (match_operand:V2DI 0 "register_operand" "=x")
9838 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9839 (parallel [(const_int 0)
9842 "%vpmov<extsuffix>bq\t{%1, %0|%0, %w1}"
9843 [(set_attr "type" "ssemov")
9844 (set_attr "prefix_extra" "1")
9845 (set_attr "prefix" "maybe_vex")
9846 (set_attr "mode" "TI")])
9848 (define_insn "avx2_<code>v4hiv4di2"
9849 [(set (match_operand:V4DI 0 "register_operand" "=x")
9852 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9853 (parallel [(const_int 0)
9858 "vpmov<extsuffix>wq\t{%1, %0|%0, %q1}"
9859 [(set_attr "type" "ssemov")
9860 (set_attr "prefix_extra" "1")
9861 (set_attr "prefix" "vex")
9862 (set_attr "mode" "OI")])
9864 (define_insn "sse4_1_<code>v2hiv2di2"
9865 [(set (match_operand:V2DI 0 "register_operand" "=x")
9868 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9869 (parallel [(const_int 0)
9872 "%vpmov<extsuffix>wq\t{%1, %0|%0, %k1}"
9873 [(set_attr "type" "ssemov")
9874 (set_attr "prefix_extra" "1")
9875 (set_attr "prefix" "maybe_vex")
9876 (set_attr "mode" "TI")])
9878 (define_insn "avx2_<code>v4siv4di2"
9879 [(set (match_operand:V4DI 0 "register_operand" "=x")
9881 (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
9883 "vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
9884 [(set_attr "type" "ssemov")
9885 (set_attr "prefix_extra" "1")
9886 (set_attr "mode" "OI")])
9888 (define_insn "sse4_1_<code>v2siv2di2"
9889 [(set (match_operand:V2DI 0 "register_operand" "=x")
9892 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9893 (parallel [(const_int 0)
9896 "%vpmov<extsuffix>dq\t{%1, %0|%0, %q1}"
9897 [(set_attr "type" "ssemov")
9898 (set_attr "prefix_extra" "1")
9899 (set_attr "prefix" "maybe_vex")
9900 (set_attr "mode" "TI")])
9902 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
9903 ;; setting FLAGS_REG. But it is not a really compare instruction.
9904 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
9905 [(set (reg:CC FLAGS_REG)
9906 (unspec:CC [(match_operand:VF 0 "register_operand" "x")
9907 (match_operand:VF 1 "nonimmediate_operand" "xm")]
9910 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
9911 [(set_attr "type" "ssecomi")
9912 (set_attr "prefix_extra" "1")
9913 (set_attr "prefix" "vex")
9914 (set_attr "mode" "<MODE>")])
9916 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
9917 ;; But it is not a really compare instruction.
9918 (define_insn "avx_ptest256"
9919 [(set (reg:CC FLAGS_REG)
9920 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
9921 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
9924 "vptest\t{%1, %0|%0, %1}"
9925 [(set_attr "type" "ssecomi")
9926 (set_attr "prefix_extra" "1")
9927 (set_attr "prefix" "vex")
9928 (set_attr "mode" "OI")])
9930 (define_insn "sse4_1_ptest"
9931 [(set (reg:CC FLAGS_REG)
9932 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
9933 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
9936 "%vptest\t{%1, %0|%0, %1}"
9937 [(set_attr "type" "ssecomi")
9938 (set_attr "prefix_extra" "1")
9939 (set_attr "prefix" "maybe_vex")
9940 (set_attr "mode" "TI")])
9942 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
9943 [(set (match_operand:VF 0 "register_operand" "=x")
9945 [(match_operand:VF 1 "nonimmediate_operand" "xm")
9946 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9949 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9950 [(set_attr "type" "ssecvt")
9951 (set (attr "prefix_data16")
9953 (match_test "TARGET_AVX")
9955 (const_string "1")))
9956 (set_attr "prefix_extra" "1")
9957 (set_attr "length_immediate" "1")
9958 (set_attr "prefix" "maybe_vex")
9959 (set_attr "mode" "<MODE>")])
9961 (define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
9962 [(match_operand:<sseintvecmode> 0 "register_operand" "")
9963 (match_operand:VF1 1 "nonimmediate_operand" "")
9964 (match_operand:SI 2 "const_0_to_15_operand" "")]
9967 rtx tmp = gen_reg_rtx (<MODE>mode);
9970 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
9973 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
9977 (define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
9978 [(match_operand:<ssepackfltmode> 0 "register_operand" "")
9979 (match_operand:VF2 1 "nonimmediate_operand" "")
9980 (match_operand:VF2 2 "nonimmediate_operand" "")
9981 (match_operand:SI 3 "const_0_to_15_operand" "")]
9986 if (<MODE>mode == V2DFmode
9987 && TARGET_AVX && !TARGET_PREFER_AVX128)
9989 rtx tmp2 = gen_reg_rtx (V4DFmode);
9991 tmp0 = gen_reg_rtx (V4DFmode);
9992 tmp1 = force_reg (V2DFmode, operands[1]);
9994 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
9995 emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
9996 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
10000 tmp0 = gen_reg_rtx (<MODE>mode);
10001 tmp1 = gen_reg_rtx (<MODE>mode);
10004 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
10007 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
10010 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
10015 (define_insn "sse4_1_round<ssescalarmodesuffix>"
10016 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
10019 [(match_operand:VF_128 2 "register_operand" "x,x")
10020 (match_operand:SI 3 "const_0_to_15_operand" "n,n")]
10022 (match_operand:VF_128 1 "register_operand" "0,x")
10026 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
10027 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10028 [(set_attr "isa" "noavx,avx")
10029 (set_attr "type" "ssecvt")
10030 (set_attr "length_immediate" "1")
10031 (set_attr "prefix_data16" "1,*")
10032 (set_attr "prefix_extra" "1")
10033 (set_attr "prefix" "orig,vex")
10034 (set_attr "mode" "<MODE>")])
10036 (define_expand "round<mode>2"
10037 [(set (match_dup 4)
10039 (match_operand:VF 1 "register_operand" "")
10041 (set (match_operand:VF 0 "register_operand" "")
10043 [(match_dup 4) (match_dup 5)]
10045 "TARGET_ROUND && !flag_trapping_math"
10047 enum machine_mode scalar_mode;
10048 const struct real_format *fmt;
10049 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
10050 rtx half, vec_half;
10052 scalar_mode = GET_MODE_INNER (<MODE>mode);
10054 /* load nextafter (0.5, 0.0) */
10055 fmt = REAL_MODE_FORMAT (scalar_mode);
10056 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
10057 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
10058 half = const_double_from_real_value (pred_half, scalar_mode);
10060 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
10061 vec_half = force_reg (<MODE>mode, vec_half);
10063 operands[3] = gen_reg_rtx (<MODE>mode);
10064 emit_insn (gen_copysign<mode>3 (operands[3], vec_half, operands[1]));
10066 operands[4] = gen_reg_rtx (<MODE>mode);
10067 operands[5] = GEN_INT (ROUND_TRUNC);
10070 (define_expand "round<mode>2_sfix"
10071 [(match_operand:<sseintvecmode> 0 "register_operand" "")
10072 (match_operand:VF1 1 "register_operand" "")]
10073 "TARGET_ROUND && !flag_trapping_math"
10075 rtx tmp = gen_reg_rtx (<MODE>mode);
10077 emit_insn (gen_round<mode>2 (tmp, operands[1]));
10080 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
10084 (define_expand "round<mode>2_vec_pack_sfix"
10085 [(match_operand:<ssepackfltmode> 0 "register_operand" "")
10086 (match_operand:VF2 1 "register_operand" "")
10087 (match_operand:VF2 2 "register_operand" "")]
10088 "TARGET_ROUND && !flag_trapping_math"
10092 if (<MODE>mode == V2DFmode
10093 && TARGET_AVX && !TARGET_PREFER_AVX128)
10095 rtx tmp2 = gen_reg_rtx (V4DFmode);
10097 tmp0 = gen_reg_rtx (V4DFmode);
10098 tmp1 = force_reg (V2DFmode, operands[1]);
10100 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
10101 emit_insn (gen_roundv4df2 (tmp2, tmp0));
10102 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
10106 tmp0 = gen_reg_rtx (<MODE>mode);
10107 tmp1 = gen_reg_rtx (<MODE>mode);
10109 emit_insn (gen_round<mode>2 (tmp0, operands[1]));
10110 emit_insn (gen_round<mode>2 (tmp1, operands[2]));
10113 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
10118 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10120 ;; Intel SSE4.2 string/text processing instructions
10122 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10124 (define_insn_and_split "sse4_2_pcmpestr"
10125 [(set (match_operand:SI 0 "register_operand" "=c,c")
10127 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
10128 (match_operand:SI 3 "register_operand" "a,a")
10129 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
10130 (match_operand:SI 5 "register_operand" "d,d")
10131 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
10133 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
10141 (set (reg:CC FLAGS_REG)
10150 && can_create_pseudo_p ()"
10155 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10156 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10157 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10160 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
10161 operands[3], operands[4],
10162 operands[5], operands[6]));
10164 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
10165 operands[3], operands[4],
10166 operands[5], operands[6]));
10167 if (flags && !(ecx || xmm0))
10168 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
10169 operands[2], operands[3],
10170 operands[4], operands[5],
10172 if (!(flags || ecx || xmm0))
10173 emit_note (NOTE_INSN_DELETED);
10177 [(set_attr "type" "sselog")
10178 (set_attr "prefix_data16" "1")
10179 (set_attr "prefix_extra" "1")
10180 (set_attr "length_immediate" "1")
10181 (set_attr "memory" "none,load")
10182 (set_attr "mode" "TI")])
10184 (define_insn "sse4_2_pcmpestri"
10185 [(set (match_operand:SI 0 "register_operand" "=c,c")
10187 [(match_operand:V16QI 1 "register_operand" "x,x")
10188 (match_operand:SI 2 "register_operand" "a,a")
10189 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
10190 (match_operand:SI 4 "register_operand" "d,d")
10191 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
10193 (set (reg:CC FLAGS_REG)
10202 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
10203 [(set_attr "type" "sselog")
10204 (set_attr "prefix_data16" "1")
10205 (set_attr "prefix_extra" "1")
10206 (set_attr "prefix" "maybe_vex")
10207 (set_attr "length_immediate" "1")
10208 (set_attr "memory" "none,load")
10209 (set_attr "mode" "TI")])
10211 (define_insn "sse4_2_pcmpestrm"
10212 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10214 [(match_operand:V16QI 1 "register_operand" "x,x")
10215 (match_operand:SI 2 "register_operand" "a,a")
10216 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
10217 (match_operand:SI 4 "register_operand" "d,d")
10218 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
10220 (set (reg:CC FLAGS_REG)
10229 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
10230 [(set_attr "type" "sselog")
10231 (set_attr "prefix_data16" "1")
10232 (set_attr "prefix_extra" "1")
10233 (set_attr "length_immediate" "1")
10234 (set_attr "prefix" "maybe_vex")
10235 (set_attr "memory" "none,load")
10236 (set_attr "mode" "TI")])
10238 (define_insn "sse4_2_pcmpestr_cconly"
10239 [(set (reg:CC FLAGS_REG)
10241 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10242 (match_operand:SI 3 "register_operand" "a,a,a,a")
10243 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
10244 (match_operand:SI 5 "register_operand" "d,d,d,d")
10245 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
10247 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10248 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10251 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10252 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10253 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
10254 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
10255 [(set_attr "type" "sselog")
10256 (set_attr "prefix_data16" "1")
10257 (set_attr "prefix_extra" "1")
10258 (set_attr "length_immediate" "1")
10259 (set_attr "memory" "none,load,none,load")
10260 (set_attr "prefix" "maybe_vex")
10261 (set_attr "mode" "TI")])
10263 (define_insn_and_split "sse4_2_pcmpistr"
10264 [(set (match_operand:SI 0 "register_operand" "=c,c")
10266 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
10267 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
10268 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
10270 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
10276 (set (reg:CC FLAGS_REG)
10283 && can_create_pseudo_p ()"
10288 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10289 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10290 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10293 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
10294 operands[3], operands[4]));
10296 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
10297 operands[3], operands[4]));
10298 if (flags && !(ecx || xmm0))
10299 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
10300 operands[2], operands[3],
10302 if (!(flags || ecx || xmm0))
10303 emit_note (NOTE_INSN_DELETED);
10307 [(set_attr "type" "sselog")
10308 (set_attr "prefix_data16" "1")
10309 (set_attr "prefix_extra" "1")
10310 (set_attr "length_immediate" "1")
10311 (set_attr "memory" "none,load")
10312 (set_attr "mode" "TI")])
10314 (define_insn "sse4_2_pcmpistri"
10315 [(set (match_operand:SI 0 "register_operand" "=c,c")
10317 [(match_operand:V16QI 1 "register_operand" "x,x")
10318 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10319 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10321 (set (reg:CC FLAGS_REG)
10328 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
10329 [(set_attr "type" "sselog")
10330 (set_attr "prefix_data16" "1")
10331 (set_attr "prefix_extra" "1")
10332 (set_attr "length_immediate" "1")
10333 (set_attr "prefix" "maybe_vex")
10334 (set_attr "memory" "none,load")
10335 (set_attr "mode" "TI")])
10337 (define_insn "sse4_2_pcmpistrm"
10338 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10340 [(match_operand:V16QI 1 "register_operand" "x,x")
10341 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10342 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10344 (set (reg:CC FLAGS_REG)
10351 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
10352 [(set_attr "type" "sselog")
10353 (set_attr "prefix_data16" "1")
10354 (set_attr "prefix_extra" "1")
10355 (set_attr "length_immediate" "1")
10356 (set_attr "prefix" "maybe_vex")
10357 (set_attr "memory" "none,load")
10358 (set_attr "mode" "TI")])
10360 (define_insn "sse4_2_pcmpistr_cconly"
10361 [(set (reg:CC FLAGS_REG)
10363 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10364 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
10365 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
10367 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10368 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10371 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10372 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10373 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
10374 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
10375 [(set_attr "type" "sselog")
10376 (set_attr "prefix_data16" "1")
10377 (set_attr "prefix_extra" "1")
10378 (set_attr "length_immediate" "1")
10379 (set_attr "memory" "none,load,none,load")
10380 (set_attr "prefix" "maybe_vex")
10381 (set_attr "mode" "TI")])
10383 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10385 ;; XOP instructions
10387 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10389 ;; XOP parallel integer multiply/add instructions.
10390 ;; Note the XOP multiply/add instructions
10391 ;; a[i] = b[i] * c[i] + d[i];
10392 ;; do not allow the value being added to be a memory operation.
10393 (define_insn "xop_pmacsww"
10394 [(set (match_operand:V8HI 0 "register_operand" "=x")
10397 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10398 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10399 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10401 "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10402 [(set_attr "type" "ssemuladd")
10403 (set_attr "mode" "TI")])
10405 (define_insn "xop_pmacssww"
10406 [(set (match_operand:V8HI 0 "register_operand" "=x")
10408 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10409 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10410 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10412 "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10413 [(set_attr "type" "ssemuladd")
10414 (set_attr "mode" "TI")])
10416 (define_insn "xop_pmacsdd"
10417 [(set (match_operand:V4SI 0 "register_operand" "=x")
10420 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10421 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10422 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10424 "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10425 [(set_attr "type" "ssemuladd")
10426 (set_attr "mode" "TI")])
10428 (define_insn "xop_pmacssdd"
10429 [(set (match_operand:V4SI 0 "register_operand" "=x")
10431 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10432 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10433 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10435 "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10436 [(set_attr "type" "ssemuladd")
10437 (set_attr "mode" "TI")])
10439 (define_insn "xop_pmacssdql"
10440 [(set (match_operand:V2DI 0 "register_operand" "=x")
10445 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10446 (parallel [(const_int 1)
10449 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10450 (parallel [(const_int 1)
10452 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10454 "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10455 [(set_attr "type" "ssemuladd")
10456 (set_attr "mode" "TI")])
10458 (define_insn "xop_pmacssdqh"
10459 [(set (match_operand:V2DI 0 "register_operand" "=x")
10464 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10465 (parallel [(const_int 0)
10469 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10470 (parallel [(const_int 0)
10472 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10474 "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10475 [(set_attr "type" "ssemuladd")
10476 (set_attr "mode" "TI")])
10478 (define_insn "xop_pmacsdql"
10479 [(set (match_operand:V2DI 0 "register_operand" "=x")
10484 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10485 (parallel [(const_int 1)
10489 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10490 (parallel [(const_int 1)
10492 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10494 "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10495 [(set_attr "type" "ssemuladd")
10496 (set_attr "mode" "TI")])
10498 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10499 ;; fake it with a multiply/add. In general, we expect the define_split to
10500 ;; occur before register allocation, so we have to handle the corner case where
10501 ;; the target is the same as operands 1/2
10502 (define_insn_and_split "xop_mulv2div2di3_low"
10503 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10507 (match_operand:V4SI 1 "register_operand" "%x")
10508 (parallel [(const_int 1)
10512 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10513 (parallel [(const_int 1)
10514 (const_int 3)])))))]
10517 "&& reload_completed"
10518 [(set (match_dup 0)
10526 (parallel [(const_int 1)
10531 (parallel [(const_int 1)
10535 operands[3] = CONST0_RTX (V2DImode);
10537 [(set_attr "type" "ssemul")
10538 (set_attr "mode" "TI")])
10540 (define_insn "xop_pmacsdqh"
10541 [(set (match_operand:V2DI 0 "register_operand" "=x")
10546 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10547 (parallel [(const_int 0)
10551 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10552 (parallel [(const_int 0)
10554 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10556 "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10557 [(set_attr "type" "ssemuladd")
10558 (set_attr "mode" "TI")])
10560 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10561 ;; fake it with a multiply/add. In general, we expect the define_split to
10562 ;; occur before register allocation, so we have to handle the corner case where
10563 ;; the target is the same as either operands[1] or operands[2]
10564 (define_insn_and_split "xop_mulv2div2di3_high"
10565 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10569 (match_operand:V4SI 1 "register_operand" "%x")
10570 (parallel [(const_int 0)
10574 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10575 (parallel [(const_int 0)
10576 (const_int 2)])))))]
10579 "&& reload_completed"
10580 [(set (match_dup 0)
10588 (parallel [(const_int 0)
10593 (parallel [(const_int 0)
10597 operands[3] = CONST0_RTX (V2DImode);
10599 [(set_attr "type" "ssemul")
10600 (set_attr "mode" "TI")])
10602 ;; XOP parallel integer multiply/add instructions for the intrinisics
10603 (define_insn "xop_pmacsswd"
10604 [(set (match_operand:V4SI 0 "register_operand" "=x")
10609 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10610 (parallel [(const_int 1)
10616 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10617 (parallel [(const_int 1)
10621 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10623 "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10624 [(set_attr "type" "ssemuladd")
10625 (set_attr "mode" "TI")])
10627 (define_insn "xop_pmacswd"
10628 [(set (match_operand:V4SI 0 "register_operand" "=x")
10633 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10634 (parallel [(const_int 1)
10640 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10641 (parallel [(const_int 1)
10645 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10647 "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10648 [(set_attr "type" "ssemuladd")
10649 (set_attr "mode" "TI")])
10651 (define_insn "xop_pmadcsswd"
10652 [(set (match_operand:V4SI 0 "register_operand" "=x")
10658 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10659 (parallel [(const_int 0)
10665 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10666 (parallel [(const_int 0)
10674 (parallel [(const_int 1)
10681 (parallel [(const_int 1)
10684 (const_int 7)])))))
10685 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10687 "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10688 [(set_attr "type" "ssemuladd")
10689 (set_attr "mode" "TI")])
10691 (define_insn "xop_pmadcswd"
10692 [(set (match_operand:V4SI 0 "register_operand" "=x")
10698 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10699 (parallel [(const_int 0)
10705 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10706 (parallel [(const_int 0)
10714 (parallel [(const_int 1)
10721 (parallel [(const_int 1)
10724 (const_int 7)])))))
10725 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10727 "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10728 [(set_attr "type" "ssemuladd")
10729 (set_attr "mode" "TI")])
10731 ;; XOP parallel XMM conditional moves
10732 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
10733 [(set (match_operand:V 0 "register_operand" "=x,x")
10735 (match_operand:V 3 "nonimmediate_operand" "x,m")
10736 (match_operand:V 1 "register_operand" "x,x")
10737 (match_operand:V 2 "nonimmediate_operand" "xm,x")))]
10739 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10740 [(set_attr "type" "sse4arg")])
10742 ;; XOP horizontal add/subtract instructions
10743 (define_insn "xop_phaddbw"
10744 [(set (match_operand:V8HI 0 "register_operand" "=x")
10748 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10749 (parallel [(const_int 0)
10760 (parallel [(const_int 1)
10767 (const_int 15)])))))]
10769 "vphaddbw\t{%1, %0|%0, %1}"
10770 [(set_attr "type" "sseiadd1")])
10772 (define_insn "xop_phaddbd"
10773 [(set (match_operand:V4SI 0 "register_operand" "=x")
10778 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10779 (parallel [(const_int 0)
10786 (parallel [(const_int 1)
10789 (const_int 13)]))))
10794 (parallel [(const_int 2)
10801 (parallel [(const_int 3)
10804 (const_int 15)]))))))]
10806 "vphaddbd\t{%1, %0|%0, %1}"
10807 [(set_attr "type" "sseiadd1")])
10809 (define_insn "xop_phaddbq"
10810 [(set (match_operand:V2DI 0 "register_operand" "=x")
10816 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10817 (parallel [(const_int 0)
10822 (parallel [(const_int 1)
10828 (parallel [(const_int 2)
10833 (parallel [(const_int 3)
10834 (const_int 7)])))))
10840 (parallel [(const_int 8)
10845 (parallel [(const_int 9)
10846 (const_int 13)]))))
10851 (parallel [(const_int 10)
10856 (parallel [(const_int 11)
10857 (const_int 15)])))))))]
10859 "vphaddbq\t{%1, %0|%0, %1}"
10860 [(set_attr "type" "sseiadd1")])
10862 (define_insn "xop_phaddwd"
10863 [(set (match_operand:V4SI 0 "register_operand" "=x")
10867 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10868 (parallel [(const_int 0)
10875 (parallel [(const_int 1)
10878 (const_int 7)])))))]
10880 "vphaddwd\t{%1, %0|%0, %1}"
10881 [(set_attr "type" "sseiadd1")])
10883 (define_insn "xop_phaddwq"
10884 [(set (match_operand:V2DI 0 "register_operand" "=x")
10889 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10890 (parallel [(const_int 0)
10895 (parallel [(const_int 1)
10901 (parallel [(const_int 2)
10906 (parallel [(const_int 3)
10907 (const_int 7)]))))))]
10909 "vphaddwq\t{%1, %0|%0, %1}"
10910 [(set_attr "type" "sseiadd1")])
10912 (define_insn "xop_phadddq"
10913 [(set (match_operand:V2DI 0 "register_operand" "=x")
10917 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10918 (parallel [(const_int 0)
10923 (parallel [(const_int 1)
10924 (const_int 3)])))))]
10926 "vphadddq\t{%1, %0|%0, %1}"
10927 [(set_attr "type" "sseiadd1")])
10929 (define_insn "xop_phaddubw"
10930 [(set (match_operand:V8HI 0 "register_operand" "=x")
10934 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10935 (parallel [(const_int 0)
10946 (parallel [(const_int 1)
10953 (const_int 15)])))))]
10955 "vphaddubw\t{%1, %0|%0, %1}"
10956 [(set_attr "type" "sseiadd1")])
10958 (define_insn "xop_phaddubd"
10959 [(set (match_operand:V4SI 0 "register_operand" "=x")
10964 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10965 (parallel [(const_int 0)
10972 (parallel [(const_int 1)
10975 (const_int 13)]))))
10980 (parallel [(const_int 2)
10987 (parallel [(const_int 3)
10990 (const_int 15)]))))))]
10992 "vphaddubd\t{%1, %0|%0, %1}"
10993 [(set_attr "type" "sseiadd1")])
10995 (define_insn "xop_phaddubq"
10996 [(set (match_operand:V2DI 0 "register_operand" "=x")
11002 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11003 (parallel [(const_int 0)
11008 (parallel [(const_int 1)
11014 (parallel [(const_int 2)
11019 (parallel [(const_int 3)
11020 (const_int 7)])))))
11026 (parallel [(const_int 8)
11031 (parallel [(const_int 9)
11032 (const_int 13)]))))
11037 (parallel [(const_int 10)
11042 (parallel [(const_int 11)
11043 (const_int 15)])))))))]
11045 "vphaddubq\t{%1, %0|%0, %1}"
11046 [(set_attr "type" "sseiadd1")])
11048 (define_insn "xop_phadduwd"
11049 [(set (match_operand:V4SI 0 "register_operand" "=x")
11053 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11054 (parallel [(const_int 0)
11061 (parallel [(const_int 1)
11064 (const_int 7)])))))]
11066 "vphadduwd\t{%1, %0|%0, %1}"
11067 [(set_attr "type" "sseiadd1")])
11069 (define_insn "xop_phadduwq"
11070 [(set (match_operand:V2DI 0 "register_operand" "=x")
11075 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11076 (parallel [(const_int 0)
11081 (parallel [(const_int 1)
11087 (parallel [(const_int 2)
11092 (parallel [(const_int 3)
11093 (const_int 7)]))))))]
11095 "vphadduwq\t{%1, %0|%0, %1}"
11096 [(set_attr "type" "sseiadd1")])
11098 (define_insn "xop_phaddudq"
11099 [(set (match_operand:V2DI 0 "register_operand" "=x")
11103 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
11104 (parallel [(const_int 0)
11109 (parallel [(const_int 1)
11110 (const_int 3)])))))]
11112 "vphaddudq\t{%1, %0|%0, %1}"
11113 [(set_attr "type" "sseiadd1")])
11115 (define_insn "xop_phsubbw"
11116 [(set (match_operand:V8HI 0 "register_operand" "=x")
11120 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11121 (parallel [(const_int 0)
11132 (parallel [(const_int 1)
11139 (const_int 15)])))))]
11141 "vphsubbw\t{%1, %0|%0, %1}"
11142 [(set_attr "type" "sseiadd1")])
11144 (define_insn "xop_phsubwd"
11145 [(set (match_operand:V4SI 0 "register_operand" "=x")
11149 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11150 (parallel [(const_int 0)
11157 (parallel [(const_int 1)
11160 (const_int 7)])))))]
11162 "vphsubwd\t{%1, %0|%0, %1}"
11163 [(set_attr "type" "sseiadd1")])
11165 (define_insn "xop_phsubdq"
11166 [(set (match_operand:V2DI 0 "register_operand" "=x")
11170 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
11171 (parallel [(const_int 0)
11176 (parallel [(const_int 1)
11177 (const_int 3)])))))]
11179 "vphsubdq\t{%1, %0|%0, %1}"
11180 [(set_attr "type" "sseiadd1")])
11182 ;; XOP permute instructions
11183 (define_insn "xop_pperm"
11184 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
11186 [(match_operand:V16QI 1 "register_operand" "x,x")
11187 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
11188 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
11189 UNSPEC_XOP_PERMUTE))]
11190 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11191 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11192 [(set_attr "type" "sse4arg")
11193 (set_attr "mode" "TI")])
11195 ;; XOP pack instructions that combine two vectors into a smaller vector
11196 (define_insn "xop_pperm_pack_v2di_v4si"
11197 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
11200 (match_operand:V2DI 1 "register_operand" "x,x"))
11202 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
11203 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11204 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11205 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11206 [(set_attr "type" "sse4arg")
11207 (set_attr "mode" "TI")])
11209 (define_insn "xop_pperm_pack_v4si_v8hi"
11210 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
11213 (match_operand:V4SI 1 "register_operand" "x,x"))
11215 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
11216 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11217 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11218 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11219 [(set_attr "type" "sse4arg")
11220 (set_attr "mode" "TI")])
11222 (define_insn "xop_pperm_pack_v8hi_v16qi"
11223 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
11226 (match_operand:V8HI 1 "register_operand" "x,x"))
11228 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
11229 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11230 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11231 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11232 [(set_attr "type" "sse4arg")
11233 (set_attr "mode" "TI")])
11235 ;; XOP packed rotate instructions
11236 (define_expand "rotl<mode>3"
11237 [(set (match_operand:VI_128 0 "register_operand" "")
11239 (match_operand:VI_128 1 "nonimmediate_operand" "")
11240 (match_operand:SI 2 "general_operand")))]
11243 /* If we were given a scalar, convert it to parallel */
11244 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11246 rtvec vs = rtvec_alloc (<ssescalarnum>);
11247 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11248 rtx reg = gen_reg_rtx (<MODE>mode);
11249 rtx op2 = operands[2];
11252 if (GET_MODE (op2) != <ssescalarmode>mode)
11254 op2 = gen_reg_rtx (<ssescalarmode>mode);
11255 convert_move (op2, operands[2], false);
11258 for (i = 0; i < <ssescalarnum>; i++)
11259 RTVEC_ELT (vs, i) = op2;
11261 emit_insn (gen_vec_init<mode> (reg, par));
11262 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11267 (define_expand "rotr<mode>3"
11268 [(set (match_operand:VI_128 0 "register_operand" "")
11270 (match_operand:VI_128 1 "nonimmediate_operand" "")
11271 (match_operand:SI 2 "general_operand")))]
11274 /* If we were given a scalar, convert it to parallel */
11275 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11277 rtvec vs = rtvec_alloc (<ssescalarnum>);
11278 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11279 rtx neg = gen_reg_rtx (<MODE>mode);
11280 rtx reg = gen_reg_rtx (<MODE>mode);
11281 rtx op2 = operands[2];
11284 if (GET_MODE (op2) != <ssescalarmode>mode)
11286 op2 = gen_reg_rtx (<ssescalarmode>mode);
11287 convert_move (op2, operands[2], false);
11290 for (i = 0; i < <ssescalarnum>; i++)
11291 RTVEC_ELT (vs, i) = op2;
11293 emit_insn (gen_vec_init<mode> (reg, par));
11294 emit_insn (gen_neg<mode>2 (neg, reg));
11295 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
11300 (define_insn "xop_rotl<mode>3"
11301 [(set (match_operand:VI_128 0 "register_operand" "=x")
11303 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
11304 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11306 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11307 [(set_attr "type" "sseishft")
11308 (set_attr "length_immediate" "1")
11309 (set_attr "mode" "TI")])
11311 (define_insn "xop_rotr<mode>3"
11312 [(set (match_operand:VI_128 0 "register_operand" "=x")
11314 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
11315 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11318 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
11319 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
11321 [(set_attr "type" "sseishft")
11322 (set_attr "length_immediate" "1")
11323 (set_attr "mode" "TI")])
11325 (define_expand "vrotr<mode>3"
11326 [(match_operand:VI_128 0 "register_operand" "")
11327 (match_operand:VI_128 1 "register_operand" "")
11328 (match_operand:VI_128 2 "register_operand" "")]
11331 rtx reg = gen_reg_rtx (<MODE>mode);
11332 emit_insn (gen_neg<mode>2 (reg, operands[2]));
11333 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11337 (define_expand "vrotl<mode>3"
11338 [(match_operand:VI_128 0 "register_operand" "")
11339 (match_operand:VI_128 1 "register_operand" "")
11340 (match_operand:VI_128 2 "register_operand" "")]
11343 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
11347 (define_insn "xop_vrotl<mode>3"
11348 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
11349 (if_then_else:VI_128
11351 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
11354 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
11358 (neg:VI_128 (match_dup 2)))))]
11359 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11360 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11361 [(set_attr "type" "sseishft")
11362 (set_attr "prefix_data16" "0")
11363 (set_attr "prefix_extra" "2")
11364 (set_attr "mode" "TI")])
11366 ;; XOP packed shift instructions.
11367 (define_expand "vlshr<mode>3"
11368 [(set (match_operand:VI12_128 0 "register_operand" "")
11370 (match_operand:VI12_128 1 "register_operand" "")
11371 (match_operand:VI12_128 2 "nonimmediate_operand" "")))]
11374 rtx neg = gen_reg_rtx (<MODE>mode);
11375 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11376 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
11380 (define_expand "vlshr<mode>3"
11381 [(set (match_operand:VI48_128 0 "register_operand" "")
11383 (match_operand:VI48_128 1 "register_operand" "")
11384 (match_operand:VI48_128 2 "nonimmediate_operand" "")))]
11385 "TARGET_AVX2 || TARGET_XOP"
11389 rtx neg = gen_reg_rtx (<MODE>mode);
11390 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11391 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
11396 (define_expand "vlshr<mode>3"
11397 [(set (match_operand:VI48_256 0 "register_operand" "")
11399 (match_operand:VI48_256 1 "register_operand" "")
11400 (match_operand:VI48_256 2 "nonimmediate_operand" "")))]
11403 (define_expand "vashr<mode>3"
11404 [(set (match_operand:VI128_128 0 "register_operand" "")
11405 (ashiftrt:VI128_128
11406 (match_operand:VI128_128 1 "register_operand" "")
11407 (match_operand:VI128_128 2 "nonimmediate_operand" "")))]
11410 rtx neg = gen_reg_rtx (<MODE>mode);
11411 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11412 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], neg));
11416 (define_expand "vashrv4si3"
11417 [(set (match_operand:V4SI 0 "register_operand" "")
11418 (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand" "")
11419 (match_operand:V4SI 2 "nonimmediate_operand" "")))]
11420 "TARGET_AVX2 || TARGET_XOP"
11424 rtx neg = gen_reg_rtx (V4SImode);
11425 emit_insn (gen_negv4si2 (neg, operands[2]));
11426 emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
11431 (define_expand "vashrv8si3"
11432 [(set (match_operand:V8SI 0 "register_operand" "")
11433 (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand" "")
11434 (match_operand:V8SI 2 "nonimmediate_operand" "")))]
11437 (define_expand "vashl<mode>3"
11438 [(set (match_operand:VI12_128 0 "register_operand" "")
11440 (match_operand:VI12_128 1 "register_operand" "")
11441 (match_operand:VI12_128 2 "nonimmediate_operand" "")))]
11444 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
11448 (define_expand "vashl<mode>3"
11449 [(set (match_operand:VI48_128 0 "register_operand" "")
11451 (match_operand:VI48_128 1 "register_operand" "")
11452 (match_operand:VI48_128 2 "nonimmediate_operand" "")))]
11453 "TARGET_AVX2 || TARGET_XOP"
11457 operands[2] = force_reg (<MODE>mode, operands[2]);
11458 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
11463 (define_expand "vashl<mode>3"
11464 [(set (match_operand:VI48_256 0 "register_operand" "")
11466 (match_operand:VI48_256 1 "register_operand" "")
11467 (match_operand:VI48_256 2 "nonimmediate_operand" "")))]
11470 (define_insn "xop_sha<mode>3"
11471 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
11472 (if_then_else:VI_128
11474 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
11477 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
11481 (neg:VI_128 (match_dup 2)))))]
11482 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11483 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11484 [(set_attr "type" "sseishft")
11485 (set_attr "prefix_data16" "0")
11486 (set_attr "prefix_extra" "2")
11487 (set_attr "mode" "TI")])
11489 (define_insn "xop_shl<mode>3"
11490 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
11491 (if_then_else:VI_128
11493 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
11496 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
11500 (neg:VI_128 (match_dup 2)))))]
11501 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11502 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11503 [(set_attr "type" "sseishft")
11504 (set_attr "prefix_data16" "0")
11505 (set_attr "prefix_extra" "2")
11506 (set_attr "mode" "TI")])
11508 ;; SSE2 doesn't have some shift variants, so define versions for XOP
11509 (define_expand "ashlv16qi3"
11510 [(set (match_operand:V16QI 0 "register_operand" "")
11512 (match_operand:V16QI 1 "register_operand" "")
11513 (match_operand:SI 2 "nonmemory_operand" "")))]
11516 rtx reg = gen_reg_rtx (V16QImode);
11520 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
11521 for (i = 0; i < 16; i++)
11522 XVECEXP (par, 0, i) = operands[2];
11524 emit_insn (gen_vec_initv16qi (reg, par));
11525 emit_insn (gen_xop_shav16qi3 (operands[0], operands[1], reg));
11529 (define_expand "<shift_insn>v16qi3"
11530 [(set (match_operand:V16QI 0 "register_operand" "")
11532 (match_operand:V16QI 1 "register_operand" "")
11533 (match_operand:SI 2 "nonmemory_operand" "")))]
11536 rtx reg = gen_reg_rtx (V16QImode);
11538 bool negate = false;
11539 rtx (*shift_insn)(rtx, rtx, rtx);
11542 if (CONST_INT_P (operands[2]))
11543 operands[2] = GEN_INT (-INTVAL (operands[2]));
11547 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
11548 for (i = 0; i < 16; i++)
11549 XVECEXP (par, 0, i) = operands[2];
11551 emit_insn (gen_vec_initv16qi (reg, par));
11554 emit_insn (gen_negv16qi2 (reg, reg));
11556 if (<CODE> == LSHIFTRT)
11557 shift_insn = gen_xop_shlv16qi3;
11559 shift_insn = gen_xop_shav16qi3;
11561 emit_insn (shift_insn (operands[0], operands[1], reg));
11565 (define_expand "ashrv2di3"
11566 [(set (match_operand:V2DI 0 "register_operand" "")
11568 (match_operand:V2DI 1 "register_operand" "")
11569 (match_operand:DI 2 "nonmemory_operand" "")))]
11572 rtx reg = gen_reg_rtx (V2DImode);
11574 bool negate = false;
11577 if (CONST_INT_P (operands[2]))
11578 operands[2] = GEN_INT (-INTVAL (operands[2]));
11582 par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
11583 for (i = 0; i < 2; i++)
11584 XVECEXP (par, 0, i) = operands[2];
11586 emit_insn (gen_vec_initv2di (reg, par));
11589 emit_insn (gen_negv2di2 (reg, reg));
11591 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
11595 ;; XOP FRCZ support
11596 (define_insn "xop_frcz<mode>2"
11597 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
11599 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
11602 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
11603 [(set_attr "type" "ssecvt1")
11604 (set_attr "mode" "<MODE>")])
11607 (define_expand "xop_vmfrcz<mode>2"
11608 [(set (match_operand:VF_128 0 "register_operand")
11611 [(match_operand:VF_128 1 "nonimmediate_operand")]
11617 operands[3] = CONST0_RTX (<MODE>mode);
11620 (define_insn "*xop_vmfrcz_<mode>"
11621 [(set (match_operand:VF_128 0 "register_operand" "=x")
11624 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
11626 (match_operand:VF_128 2 "const0_operand")
11629 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
11630 [(set_attr "type" "ssecvt1")
11631 (set_attr "mode" "<MODE>")])
11633 (define_insn "xop_maskcmp<mode>3"
11634 [(set (match_operand:VI_128 0 "register_operand" "=x")
11635 (match_operator:VI_128 1 "ix86_comparison_int_operator"
11636 [(match_operand:VI_128 2 "register_operand" "x")
11637 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
11639 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
11640 [(set_attr "type" "sse4arg")
11641 (set_attr "prefix_data16" "0")
11642 (set_attr "prefix_rep" "0")
11643 (set_attr "prefix_extra" "2")
11644 (set_attr "length_immediate" "1")
11645 (set_attr "mode" "TI")])
11647 (define_insn "xop_maskcmp_uns<mode>3"
11648 [(set (match_operand:VI_128 0 "register_operand" "=x")
11649 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
11650 [(match_operand:VI_128 2 "register_operand" "x")
11651 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
11653 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
11654 [(set_attr "type" "ssecmp")
11655 (set_attr "prefix_data16" "0")
11656 (set_attr "prefix_rep" "0")
11657 (set_attr "prefix_extra" "2")
11658 (set_attr "length_immediate" "1")
11659 (set_attr "mode" "TI")])
11661 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
11662 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
11663 ;; the exact instruction generated for the intrinsic.
11664 (define_insn "xop_maskcmp_uns2<mode>3"
11665 [(set (match_operand:VI_128 0 "register_operand" "=x")
11667 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
11668 [(match_operand:VI_128 2 "register_operand" "x")
11669 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
11670 UNSPEC_XOP_UNSIGNED_CMP))]
11672 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
11673 [(set_attr "type" "ssecmp")
11674 (set_attr "prefix_data16" "0")
11675 (set_attr "prefix_extra" "2")
11676 (set_attr "length_immediate" "1")
11677 (set_attr "mode" "TI")])
11679 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
11680 ;; being added here to be complete.
11681 (define_insn "xop_pcom_tf<mode>3"
11682 [(set (match_operand:VI_128 0 "register_operand" "=x")
11684 [(match_operand:VI_128 1 "register_operand" "x")
11685 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
11686 (match_operand:SI 3 "const_int_operand" "n")]
11687 UNSPEC_XOP_TRUEFALSE))]
11690 return ((INTVAL (operands[3]) != 0)
11691 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11692 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
11694 [(set_attr "type" "ssecmp")
11695 (set_attr "prefix_data16" "0")
11696 (set_attr "prefix_extra" "2")
11697 (set_attr "length_immediate" "1")
11698 (set_attr "mode" "TI")])
11700 (define_insn "xop_vpermil2<mode>3"
11701 [(set (match_operand:VF 0 "register_operand" "=x")
11703 [(match_operand:VF 1 "register_operand" "x")
11704 (match_operand:VF 2 "nonimmediate_operand" "%x")
11705 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm")
11706 (match_operand:SI 4 "const_0_to_3_operand" "n")]
11709 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
11710 [(set_attr "type" "sse4arg")
11711 (set_attr "length_immediate" "1")
11712 (set_attr "mode" "<MODE>")])
11714 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11716 (define_insn "aesenc"
11717 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11718 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11719 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11723 aesenc\t{%2, %0|%0, %2}
11724 vaesenc\t{%2, %1, %0|%0, %1, %2}"
11725 [(set_attr "isa" "noavx,avx")
11726 (set_attr "type" "sselog1")
11727 (set_attr "prefix_extra" "1")
11728 (set_attr "prefix" "orig,vex")
11729 (set_attr "mode" "TI")])
11731 (define_insn "aesenclast"
11732 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11733 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11734 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11735 UNSPEC_AESENCLAST))]
11738 aesenclast\t{%2, %0|%0, %2}
11739 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
11740 [(set_attr "isa" "noavx,avx")
11741 (set_attr "type" "sselog1")
11742 (set_attr "prefix_extra" "1")
11743 (set_attr "prefix" "orig,vex")
11744 (set_attr "mode" "TI")])
11746 (define_insn "aesdec"
11747 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11748 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11749 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11753 aesdec\t{%2, %0|%0, %2}
11754 vaesdec\t{%2, %1, %0|%0, %1, %2}"
11755 [(set_attr "isa" "noavx,avx")
11756 (set_attr "type" "sselog1")
11757 (set_attr "prefix_extra" "1")
11758 (set_attr "prefix" "orig,vex")
11759 (set_attr "mode" "TI")])
11761 (define_insn "aesdeclast"
11762 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11763 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11764 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11765 UNSPEC_AESDECLAST))]
11768 aesdeclast\t{%2, %0|%0, %2}
11769 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
11770 [(set_attr "isa" "noavx,avx")
11771 (set_attr "type" "sselog1")
11772 (set_attr "prefix_extra" "1")
11773 (set_attr "prefix" "orig,vex")
11774 (set_attr "mode" "TI")])
11776 (define_insn "aesimc"
11777 [(set (match_operand:V2DI 0 "register_operand" "=x")
11778 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
11781 "%vaesimc\t{%1, %0|%0, %1}"
11782 [(set_attr "type" "sselog1")
11783 (set_attr "prefix_extra" "1")
11784 (set_attr "prefix" "maybe_vex")
11785 (set_attr "mode" "TI")])
11787 (define_insn "aeskeygenassist"
11788 [(set (match_operand:V2DI 0 "register_operand" "=x")
11789 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
11790 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11791 UNSPEC_AESKEYGENASSIST))]
11793 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
11794 [(set_attr "type" "sselog1")
11795 (set_attr "prefix_extra" "1")
11796 (set_attr "length_immediate" "1")
11797 (set_attr "prefix" "maybe_vex")
11798 (set_attr "mode" "TI")])
11800 (define_insn "pclmulqdq"
11801 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11802 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11803 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")
11804 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
11808 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
11809 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11810 [(set_attr "isa" "noavx,avx")
11811 (set_attr "type" "sselog1")
11812 (set_attr "prefix_extra" "1")
11813 (set_attr "length_immediate" "1")
11814 (set_attr "prefix" "orig,vex")
11815 (set_attr "mode" "TI")])
11817 (define_expand "avx_vzeroall"
11818 [(match_par_dup 0 [(const_int 0)])]
11821 int nregs = TARGET_64BIT ? 16 : 8;
11824 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11826 XVECEXP (operands[0], 0, 0)
11827 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11830 for (regno = 0; regno < nregs; regno++)
11831 XVECEXP (operands[0], 0, regno + 1)
11832 = gen_rtx_SET (VOIDmode,
11833 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
11834 CONST0_RTX (V8SImode));
11837 (define_insn "*avx_vzeroall"
11838 [(match_parallel 0 "vzeroall_operation"
11839 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
11842 [(set_attr "type" "sse")
11843 (set_attr "modrm" "0")
11844 (set_attr "memory" "none")
11845 (set_attr "prefix" "vex")
11846 (set_attr "mode" "OI")])
11848 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
11849 ;; if the upper 128bits are unused.
11850 (define_insn "avx_vzeroupper"
11851 [(unspec_volatile [(match_operand 0 "const_int_operand" "")]
11852 UNSPECV_VZEROUPPER)]
11855 [(set_attr "type" "sse")
11856 (set_attr "modrm" "0")
11857 (set_attr "memory" "none")
11858 (set_attr "prefix" "vex")
11859 (set_attr "mode" "OI")])
11861 (define_mode_attr AVXTOSSEMODE
11862 [(V4DI "V2DI") (V2DI "V2DI")
11863 (V8SI "V4SI") (V4SI "V4SI")
11864 (V16HI "V8HI") (V8HI "V8HI")
11865 (V32QI "V16QI") (V16QI "V16QI")])
11867 (define_insn "avx2_pbroadcast<mode>"
11868 [(set (match_operand:VI 0 "register_operand" "=x")
11870 (vec_select:<ssescalarmode>
11871 (match_operand:<AVXTOSSEMODE> 1 "nonimmediate_operand" "xm")
11872 (parallel [(const_int 0)]))))]
11874 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %1}"
11875 [(set_attr "type" "ssemov")
11876 (set_attr "prefix_extra" "1")
11877 (set_attr "prefix" "vex")
11878 (set_attr "mode" "<sseinsnmode>")])
11880 (define_insn "avx2_permvarv8si"
11881 [(set (match_operand:V8SI 0 "register_operand" "=x")
11883 [(match_operand:V8SI 1 "register_operand" "x")
11884 (match_operand:V8SI 2 "nonimmediate_operand" "xm")]
11887 "vpermd\t{%2, %1, %0|%0, %1, %2}"
11888 [(set_attr "type" "sselog")
11889 (set_attr "prefix" "vex")
11890 (set_attr "mode" "OI")])
11892 (define_insn "avx2_permv4df"
11893 [(set (match_operand:V4DF 0 "register_operand" "=x")
11895 [(match_operand:V4DF 1 "register_operand" "xm")
11896 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11899 "vpermpd\t{%2, %1, %0|%0, %1, %2}"
11900 [(set_attr "type" "sselog")
11901 (set_attr "prefix_extra" "1")
11902 (set_attr "prefix" "vex")
11903 (set_attr "mode" "OI")])
11905 (define_insn "avx2_permvarv8sf"
11906 [(set (match_operand:V8SF 0 "register_operand" "=x")
11908 [(match_operand:V8SF 1 "register_operand" "x")
11909 (match_operand:V8SF 2 "nonimmediate_operand" "xm")]
11912 "vpermps\t{%2, %1, %0|%0, %1, %2}"
11913 [(set_attr "type" "sselog")
11914 (set_attr "prefix" "vex")
11915 (set_attr "mode" "OI")])
11917 (define_expand "avx2_permv4di"
11918 [(match_operand:V4DI 0 "register_operand" "")
11919 (match_operand:V4DI 1 "nonimmediate_operand" "")
11920 (match_operand:SI 2 "const_0_to_255_operand" "")]
11923 int mask = INTVAL (operands[2]);
11924 emit_insn (gen_avx2_permv4di_1 (operands[0], operands[1],
11925 GEN_INT ((mask >> 0) & 3),
11926 GEN_INT ((mask >> 2) & 3),
11927 GEN_INT ((mask >> 4) & 3),
11928 GEN_INT ((mask >> 6) & 3)));
11932 (define_insn "avx2_permv4di_1"
11933 [(set (match_operand:V4DI 0 "register_operand" "=x")
11935 (match_operand:V4DI 1 "nonimmediate_operand" "xm")
11936 (parallel [(match_operand 2 "const_0_to_3_operand" "")
11937 (match_operand 3 "const_0_to_3_operand" "")
11938 (match_operand 4 "const_0_to_3_operand" "")
11939 (match_operand 5 "const_0_to_3_operand" "")])))]
11943 mask |= INTVAL (operands[2]) << 0;
11944 mask |= INTVAL (operands[3]) << 2;
11945 mask |= INTVAL (operands[4]) << 4;
11946 mask |= INTVAL (operands[5]) << 6;
11947 operands[2] = GEN_INT (mask);
11948 return "vpermq\t{%2, %1, %0|%0, %1, %2}";
11950 [(set_attr "type" "sselog")
11951 (set_attr "prefix" "vex")
11952 (set_attr "mode" "OI")])
11954 (define_insn "avx2_permv2ti"
11955 [(set (match_operand:V4DI 0 "register_operand" "=x")
11957 [(match_operand:V4DI 1 "register_operand" "x")
11958 (match_operand:V4DI 2 "nonimmediate_operand" "xm")
11959 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11962 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11963 [(set_attr "type" "sselog")
11964 (set_attr "prefix" "vex")
11965 (set_attr "mode" "OI")])
11967 (define_insn "avx2_vec_dupv4df"
11968 [(set (match_operand:V4DF 0 "register_operand" "=x")
11969 (vec_duplicate:V4DF
11971 (match_operand:V2DF 1 "register_operand" "x")
11972 (parallel [(const_int 0)]))))]
11974 "vbroadcastsd\t{%1, %0|%0, %1}"
11975 [(set_attr "type" "sselog1")
11976 (set_attr "prefix" "vex")
11977 (set_attr "mode" "V4DF")])
11979 ;; Modes handled by AVX vec_dup patterns.
11980 (define_mode_iterator AVX_VEC_DUP_MODE
11981 [V8SI V8SF V4DI V4DF])
11983 (define_insn "vec_dup<mode>"
11984 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x")
11985 (vec_duplicate:AVX_VEC_DUP_MODE
11986 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,?x")))]
11989 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
11991 [(set_attr "type" "ssemov")
11992 (set_attr "prefix_extra" "1")
11993 (set_attr "prefix" "vex")
11994 (set_attr "mode" "V8SF")])
11996 (define_insn "avx2_vbroadcasti128_<mode>"
11997 [(set (match_operand:VI_256 0 "register_operand" "=x")
11999 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m")
12002 "vbroadcasti128\t{%1, %0|%0, %1}"
12003 [(set_attr "type" "ssemov")
12004 (set_attr "prefix_extra" "1")
12005 (set_attr "prefix" "vex")
12006 (set_attr "mode" "OI")])
12009 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "")
12010 (vec_duplicate:AVX_VEC_DUP_MODE
12011 (match_operand:<ssescalarmode> 1 "register_operand" "")))]
12012 "TARGET_AVX && reload_completed"
12013 [(set (match_dup 2)
12014 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
12016 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
12017 "operands[2] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[0]));")
12019 (define_insn "avx_vbroadcastf128_<mode>"
12020 [(set (match_operand:V_256 0 "register_operand" "=x,x,x")
12022 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
12026 vbroadcast<i128>\t{%1, %0|%0, %1}
12027 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
12028 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
12029 [(set_attr "type" "ssemov,sselog1,sselog1")
12030 (set_attr "prefix_extra" "1")
12031 (set_attr "length_immediate" "0,1,1")
12032 (set_attr "prefix" "vex")
12033 (set_attr "mode" "<sseinsnmode>")])
12035 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
12036 ;; If it so happens that the input is in memory, use vbroadcast.
12037 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
12038 (define_insn "*avx_vperm_broadcast_v4sf"
12039 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
12041 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
12042 (match_parallel 2 "avx_vbroadcast_operand"
12043 [(match_operand 3 "const_int_operand" "C,n,n")])))]
12046 int elt = INTVAL (operands[3]);
12047 switch (which_alternative)
12051 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
12052 return "vbroadcastss\t{%1, %0|%0, %1}";
12054 operands[2] = GEN_INT (elt * 0x55);
12055 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
12057 gcc_unreachable ();
12060 [(set_attr "type" "ssemov,ssemov,sselog1")
12061 (set_attr "prefix_extra" "1")
12062 (set_attr "length_immediate" "0,0,1")
12063 (set_attr "prefix" "vex")
12064 (set_attr "mode" "SF,SF,V4SF")])
12066 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
12067 [(set (match_operand:VF_256 0 "register_operand" "=x,x,x")
12069 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x")
12070 (match_parallel 2 "avx_vbroadcast_operand"
12071 [(match_operand 3 "const_int_operand" "C,n,n")])))]
12074 "&& reload_completed"
12075 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
12077 rtx op0 = operands[0], op1 = operands[1];
12078 int elt = INTVAL (operands[3]);
12084 /* Shuffle element we care about into all elements of the 128-bit lane.
12085 The other lane gets shuffled too, but we don't care. */
12086 if (<MODE>mode == V4DFmode)
12087 mask = (elt & 1 ? 15 : 0);
12089 mask = (elt & 3) * 0x55;
12090 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
12092 /* Shuffle the lane we care about into both lanes of the dest. */
12093 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
12094 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
12098 operands[1] = adjust_address_nv (op1, <ssescalarmode>mode,
12099 elt * GET_MODE_SIZE (<ssescalarmode>mode));
12102 (define_expand "avx_vpermil<mode>"
12103 [(set (match_operand:VF2 0 "register_operand" "")
12105 (match_operand:VF2 1 "nonimmediate_operand" "")
12106 (match_operand:SI 2 "const_0_to_255_operand" "")))]
12109 int mask = INTVAL (operands[2]);
12110 rtx perm[<ssescalarnum>];
12112 perm[0] = GEN_INT (mask & 1);
12113 perm[1] = GEN_INT ((mask >> 1) & 1);
12114 if (<MODE>mode == V4DFmode)
12116 perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
12117 perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
12121 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
12124 (define_expand "avx_vpermil<mode>"
12125 [(set (match_operand:VF1 0 "register_operand" "")
12127 (match_operand:VF1 1 "nonimmediate_operand" "")
12128 (match_operand:SI 2 "const_0_to_255_operand" "")))]
12131 int mask = INTVAL (operands[2]);
12132 rtx perm[<ssescalarnum>];
12134 perm[0] = GEN_INT (mask & 3);
12135 perm[1] = GEN_INT ((mask >> 2) & 3);
12136 perm[2] = GEN_INT ((mask >> 4) & 3);
12137 perm[3] = GEN_INT ((mask >> 6) & 3);
12138 if (<MODE>mode == V8SFmode)
12140 perm[4] = GEN_INT ((mask & 3) + 4);
12141 perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
12142 perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
12143 perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
12147 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
12150 (define_insn "*avx_vpermilp<mode>"
12151 [(set (match_operand:VF 0 "register_operand" "=x")
12153 (match_operand:VF 1 "nonimmediate_operand" "xm")
12154 (match_parallel 2 ""
12155 [(match_operand 3 "const_int_operand" "")])))]
12157 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
12159 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
12160 operands[2] = GEN_INT (mask);
12161 return "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
12163 [(set_attr "type" "sselog")
12164 (set_attr "prefix_extra" "1")
12165 (set_attr "length_immediate" "1")
12166 (set_attr "prefix" "vex")
12167 (set_attr "mode" "<MODE>")])
12169 (define_insn "avx_vpermilvar<mode>3"
12170 [(set (match_operand:VF 0 "register_operand" "=x")
12172 [(match_operand:VF 1 "register_operand" "x")
12173 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "xm")]
12176 "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12177 [(set_attr "type" "sselog")
12178 (set_attr "prefix_extra" "1")
12179 (set_attr "prefix" "vex")
12180 (set_attr "mode" "<MODE>")])
12182 (define_expand "avx_vperm2f128<mode>3"
12183 [(set (match_operand:AVX256MODE2P 0 "register_operand" "")
12184 (unspec:AVX256MODE2P
12185 [(match_operand:AVX256MODE2P 1 "register_operand" "")
12186 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "")
12187 (match_operand:SI 3 "const_0_to_255_operand" "")]
12188 UNSPEC_VPERMIL2F128))]
12191 int mask = INTVAL (operands[3]);
12192 if ((mask & 0x88) == 0)
12194 rtx perm[<ssescalarnum>], t1, t2;
12195 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
12197 base = (mask & 3) * nelt2;
12198 for (i = 0; i < nelt2; ++i)
12199 perm[i] = GEN_INT (base + i);
12201 base = ((mask >> 4) & 3) * nelt2;
12202 for (i = 0; i < nelt2; ++i)
12203 perm[i + nelt2] = GEN_INT (base + i);
12205 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
12206 operands[1], operands[2]);
12207 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
12208 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
12209 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
12215 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
12216 ;; means that in order to represent this properly in rtl we'd have to
12217 ;; nest *another* vec_concat with a zero operand and do the select from
12218 ;; a 4x wide vector. That doesn't seem very nice.
12219 (define_insn "*avx_vperm2f128<mode>_full"
12220 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
12221 (unspec:AVX256MODE2P
12222 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
12223 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
12224 (match_operand:SI 3 "const_0_to_255_operand" "n")]
12225 UNSPEC_VPERMIL2F128))]
12227 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12228 [(set_attr "type" "sselog")
12229 (set_attr "prefix_extra" "1")
12230 (set_attr "length_immediate" "1")
12231 (set_attr "prefix" "vex")
12232 (set_attr "mode" "<sseinsnmode>")])
12234 (define_insn "*avx_vperm2f128<mode>_nozero"
12235 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
12236 (vec_select:AVX256MODE2P
12237 (vec_concat:<ssedoublevecmode>
12238 (match_operand:AVX256MODE2P 1 "register_operand" "x")
12239 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
12240 (match_parallel 3 ""
12241 [(match_operand 4 "const_int_operand" "")])))]
12243 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
12245 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
12247 return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
12249 return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
12250 operands[3] = GEN_INT (mask);
12251 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
12253 [(set_attr "type" "sselog")
12254 (set_attr "prefix_extra" "1")
12255 (set_attr "length_immediate" "1")
12256 (set_attr "prefix" "vex")
12257 (set_attr "mode" "<sseinsnmode>")])
12259 (define_expand "avx_vinsertf128<mode>"
12260 [(match_operand:V_256 0 "register_operand" "")
12261 (match_operand:V_256 1 "register_operand" "")
12262 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "")
12263 (match_operand:SI 3 "const_0_to_1_operand" "")]
12266 rtx (*insn)(rtx, rtx, rtx);
12268 switch (INTVAL (operands[3]))
12271 insn = gen_vec_set_lo_<mode>;
12274 insn = gen_vec_set_hi_<mode>;
12277 gcc_unreachable ();
12280 emit_insn (insn (operands[0], operands[1], operands[2]));
12284 (define_insn "avx2_vec_set_lo_v4di"
12285 [(set (match_operand:V4DI 0 "register_operand" "=x")
12287 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
12289 (match_operand:V4DI 1 "register_operand" "x")
12290 (parallel [(const_int 2) (const_int 3)]))))]
12292 "vinserti128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12293 [(set_attr "type" "sselog")
12294 (set_attr "prefix_extra" "1")
12295 (set_attr "length_immediate" "1")
12296 (set_attr "prefix" "vex")
12297 (set_attr "mode" "OI")])
12299 (define_insn "avx2_vec_set_hi_v4di"
12300 [(set (match_operand:V4DI 0 "register_operand" "=x")
12303 (match_operand:V4DI 1 "register_operand" "x")
12304 (parallel [(const_int 0) (const_int 1)]))
12305 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
12307 "vinserti128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12308 [(set_attr "type" "sselog")
12309 (set_attr "prefix_extra" "1")
12310 (set_attr "length_immediate" "1")
12311 (set_attr "prefix" "vex")
12312 (set_attr "mode" "OI")])
12314 (define_insn "vec_set_lo_<mode>"
12315 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
12316 (vec_concat:VI8F_256
12317 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
12318 (vec_select:<ssehalfvecmode>
12319 (match_operand:VI8F_256 1 "register_operand" "x")
12320 (parallel [(const_int 2) (const_int 3)]))))]
12322 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12323 [(set_attr "type" "sselog")
12324 (set_attr "prefix_extra" "1")
12325 (set_attr "length_immediate" "1")
12326 (set_attr "prefix" "vex")
12327 (set_attr "mode" "<sseinsnmode>")])
12329 (define_insn "vec_set_hi_<mode>"
12330 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
12331 (vec_concat:VI8F_256
12332 (vec_select:<ssehalfvecmode>
12333 (match_operand:VI8F_256 1 "register_operand" "x")
12334 (parallel [(const_int 0) (const_int 1)]))
12335 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
12337 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12338 [(set_attr "type" "sselog")
12339 (set_attr "prefix_extra" "1")
12340 (set_attr "length_immediate" "1")
12341 (set_attr "prefix" "vex")
12342 (set_attr "mode" "<sseinsnmode>")])
12344 (define_insn "vec_set_lo_<mode>"
12345 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
12346 (vec_concat:VI4F_256
12347 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
12348 (vec_select:<ssehalfvecmode>
12349 (match_operand:VI4F_256 1 "register_operand" "x")
12350 (parallel [(const_int 4) (const_int 5)
12351 (const_int 6) (const_int 7)]))))]
12353 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12354 [(set_attr "type" "sselog")
12355 (set_attr "prefix_extra" "1")
12356 (set_attr "length_immediate" "1")
12357 (set_attr "prefix" "vex")
12358 (set_attr "mode" "<sseinsnmode>")])
12360 (define_insn "vec_set_hi_<mode>"
12361 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
12362 (vec_concat:VI4F_256
12363 (vec_select:<ssehalfvecmode>
12364 (match_operand:VI4F_256 1 "register_operand" "x")
12365 (parallel [(const_int 0) (const_int 1)
12366 (const_int 2) (const_int 3)]))
12367 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
12369 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12370 [(set_attr "type" "sselog")
12371 (set_attr "prefix_extra" "1")
12372 (set_attr "length_immediate" "1")
12373 (set_attr "prefix" "vex")
12374 (set_attr "mode" "<sseinsnmode>")])
12376 (define_insn "vec_set_lo_v16hi"
12377 [(set (match_operand:V16HI 0 "register_operand" "=x")
12379 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
12381 (match_operand:V16HI 1 "register_operand" "x")
12382 (parallel [(const_int 8) (const_int 9)
12383 (const_int 10) (const_int 11)
12384 (const_int 12) (const_int 13)
12385 (const_int 14) (const_int 15)]))))]
12387 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12388 [(set_attr "type" "sselog")
12389 (set_attr "prefix_extra" "1")
12390 (set_attr "length_immediate" "1")
12391 (set_attr "prefix" "vex")
12392 (set_attr "mode" "OI")])
12394 (define_insn "vec_set_hi_v16hi"
12395 [(set (match_operand:V16HI 0 "register_operand" "=x")
12398 (match_operand:V16HI 1 "register_operand" "x")
12399 (parallel [(const_int 0) (const_int 1)
12400 (const_int 2) (const_int 3)
12401 (const_int 4) (const_int 5)
12402 (const_int 6) (const_int 7)]))
12403 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
12405 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12406 [(set_attr "type" "sselog")
12407 (set_attr "prefix_extra" "1")
12408 (set_attr "length_immediate" "1")
12409 (set_attr "prefix" "vex")
12410 (set_attr "mode" "OI")])
12412 (define_insn "vec_set_lo_v32qi"
12413 [(set (match_operand:V32QI 0 "register_operand" "=x")
12415 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
12417 (match_operand:V32QI 1 "register_operand" "x")
12418 (parallel [(const_int 16) (const_int 17)
12419 (const_int 18) (const_int 19)
12420 (const_int 20) (const_int 21)
12421 (const_int 22) (const_int 23)
12422 (const_int 24) (const_int 25)
12423 (const_int 26) (const_int 27)
12424 (const_int 28) (const_int 29)
12425 (const_int 30) (const_int 31)]))))]
12427 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12428 [(set_attr "type" "sselog")
12429 (set_attr "prefix_extra" "1")
12430 (set_attr "length_immediate" "1")
12431 (set_attr "prefix" "vex")
12432 (set_attr "mode" "OI")])
12434 (define_insn "vec_set_hi_v32qi"
12435 [(set (match_operand:V32QI 0 "register_operand" "=x")
12438 (match_operand:V32QI 1 "register_operand" "x")
12439 (parallel [(const_int 0) (const_int 1)
12440 (const_int 2) (const_int 3)
12441 (const_int 4) (const_int 5)
12442 (const_int 6) (const_int 7)
12443 (const_int 8) (const_int 9)
12444 (const_int 10) (const_int 11)
12445 (const_int 12) (const_int 13)
12446 (const_int 14) (const_int 15)]))
12447 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
12449 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12450 [(set_attr "type" "sselog")
12451 (set_attr "prefix_extra" "1")
12452 (set_attr "length_immediate" "1")
12453 (set_attr "prefix" "vex")
12454 (set_attr "mode" "OI")])
12456 (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
12457 [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
12459 [(match_operand:<sseintvecmode> 2 "register_operand" "x")
12460 (match_operand:V48_AVX2 1 "memory_operand" "m")]
12463 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
12464 [(set_attr "type" "sselog1")
12465 (set_attr "prefix_extra" "1")
12466 (set_attr "prefix" "vex")
12467 (set_attr "mode" "<sseinsnmode>")])
12469 (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
12470 [(set (match_operand:V48_AVX2 0 "memory_operand" "=m")
12472 [(match_operand:<sseintvecmode> 1 "register_operand" "x")
12473 (match_operand:V48_AVX2 2 "register_operand" "x")
12477 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12478 [(set_attr "type" "sselog1")
12479 (set_attr "prefix_extra" "1")
12480 (set_attr "prefix" "vex")
12481 (set_attr "mode" "<sseinsnmode>")])
12483 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
12484 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
12485 (unspec:AVX256MODE2P
12486 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
12490 "&& reload_completed"
12493 rtx op0 = operands[0];
12494 rtx op1 = operands[1];
12496 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
12498 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
12499 emit_move_insn (op0, op1);
12503 (define_expand "vec_init<mode>"
12504 [(match_operand:V_256 0 "register_operand" "")
12505 (match_operand 1 "" "")]
12508 ix86_expand_vector_init (false, operands[0], operands[1]);
12512 (define_expand "avx2_extracti128"
12513 [(match_operand:V2DI 0 "nonimmediate_operand" "")
12514 (match_operand:V4DI 1 "register_operand" "")
12515 (match_operand:SI 2 "const_0_to_1_operand" "")]
12518 rtx (*insn)(rtx, rtx);
12520 switch (INTVAL (operands[2]))
12523 insn = gen_vec_extract_lo_v4di;
12526 insn = gen_vec_extract_hi_v4di;
12529 gcc_unreachable ();
12532 emit_insn (insn (operands[0], operands[1]));
12536 (define_expand "avx2_inserti128"
12537 [(match_operand:V4DI 0 "register_operand" "")
12538 (match_operand:V4DI 1 "register_operand" "")
12539 (match_operand:V2DI 2 "nonimmediate_operand" "")
12540 (match_operand:SI 3 "const_0_to_1_operand" "")]
12543 rtx (*insn)(rtx, rtx, rtx);
12545 switch (INTVAL (operands[3]))
12548 insn = gen_avx2_vec_set_lo_v4di;
12551 insn = gen_avx2_vec_set_hi_v4di;
12554 gcc_unreachable ();
12557 emit_insn (insn (operands[0], operands[1], operands[2]));
12561 (define_insn "avx2_ashrv<mode>"
12562 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
12564 (match_operand:VI4_AVX2 1 "register_operand" "x")
12565 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")))]
12567 "vpsravd\t{%2, %1, %0|%0, %1, %2}"
12568 [(set_attr "type" "sseishft")
12569 (set_attr "prefix" "vex")
12570 (set_attr "mode" "<sseinsnmode>")])
12572 (define_insn "avx2_<shift_insn>v<mode>"
12573 [(set (match_operand:VI48_AVX2 0 "register_operand" "=x")
12574 (any_lshift:VI48_AVX2
12575 (match_operand:VI48_AVX2 1 "register_operand" "x")
12576 (match_operand:VI48_AVX2 2 "nonimmediate_operand" "xm")))]
12578 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12579 [(set_attr "type" "sseishft")
12580 (set_attr "prefix" "vex")
12581 (set_attr "mode" "<sseinsnmode>")])
12583 (define_insn "avx_vec_concat<mode>"
12584 [(set (match_operand:V_256 0 "register_operand" "=x,x")
12586 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
12587 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))]
12590 switch (which_alternative)
12593 return "vinsert<i128>\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
12595 switch (get_attr_mode (insn))
12598 return "vmovaps\t{%1, %x0|%x0, %1}";
12600 return "vmovapd\t{%1, %x0|%x0, %1}";
12602 return "vmovdqa\t{%1, %x0|%x0, %1}";
12605 gcc_unreachable ();
12608 [(set_attr "type" "sselog,ssemov")
12609 (set_attr "prefix_extra" "1,*")
12610 (set_attr "length_immediate" "1,*")
12611 (set_attr "prefix" "vex")
12612 (set_attr "mode" "<sseinsnmode>")])
12614 (define_insn "vcvtph2ps"
12615 [(set (match_operand:V4SF 0 "register_operand" "=x")
12617 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
12619 (parallel [(const_int 0) (const_int 1)
12620 (const_int 1) (const_int 2)])))]
12622 "vcvtph2ps\t{%1, %0|%0, %1}"
12623 [(set_attr "type" "ssecvt")
12624 (set_attr "prefix" "vex")
12625 (set_attr "mode" "V4SF")])
12627 (define_insn "*vcvtph2ps_load"
12628 [(set (match_operand:V4SF 0 "register_operand" "=x")
12629 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
12630 UNSPEC_VCVTPH2PS))]
12632 "vcvtph2ps\t{%1, %0|%0, %1}"
12633 [(set_attr "type" "ssecvt")
12634 (set_attr "prefix" "vex")
12635 (set_attr "mode" "V8SF")])
12637 (define_insn "vcvtph2ps256"
12638 [(set (match_operand:V8SF 0 "register_operand" "=x")
12639 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
12640 UNSPEC_VCVTPH2PS))]
12642 "vcvtph2ps\t{%1, %0|%0, %1}"
12643 [(set_attr "type" "ssecvt")
12644 (set_attr "prefix" "vex")
12645 (set_attr "mode" "V8SF")])
12647 (define_expand "vcvtps2ph"
12648 [(set (match_operand:V8HI 0 "register_operand" "")
12650 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "")
12651 (match_operand:SI 2 "const_0_to_255_operand" "")]
12655 "operands[3] = CONST0_RTX (V4HImode);")
12657 (define_insn "*vcvtps2ph"
12658 [(set (match_operand:V8HI 0 "register_operand" "=x")
12660 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12661 (match_operand:SI 2 "const_0_to_255_operand" "N")]
12663 (match_operand:V4HI 3 "const0_operand" "")))]
12665 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12666 [(set_attr "type" "ssecvt")
12667 (set_attr "prefix" "vex")
12668 (set_attr "mode" "V4SF")])
12670 (define_insn "*vcvtps2ph_store"
12671 [(set (match_operand:V4HI 0 "memory_operand" "=m")
12672 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12673 (match_operand:SI 2 "const_0_to_255_operand" "N")]
12674 UNSPEC_VCVTPS2PH))]
12676 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12677 [(set_attr "type" "ssecvt")
12678 (set_attr "prefix" "vex")
12679 (set_attr "mode" "V4SF")])
12681 (define_insn "vcvtps2ph256"
12682 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
12683 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
12684 (match_operand:SI 2 "const_0_to_255_operand" "N")]
12685 UNSPEC_VCVTPS2PH))]
12687 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12688 [(set_attr "type" "ssecvt")
12689 (set_attr "prefix" "vex")
12690 (set_attr "mode" "V8SF")])
12692 ;; For gather* insn patterns
12693 (define_mode_iterator VEC_GATHER_MODE
12694 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
12695 (define_mode_attr VEC_GATHER_IDXSI
12696 [(V2DI "V4SI") (V2DF "V4SI")
12697 (V4DI "V4SI") (V4DF "V4SI")
12698 (V4SI "V4SI") (V4SF "V4SI")
12699 (V8SI "V8SI") (V8SF "V8SI")])
12700 (define_mode_attr VEC_GATHER_IDXDI
12701 [(V2DI "V2DI") (V2DF "V2DI")
12702 (V4DI "V4DI") (V4DF "V4DI")
12703 (V4SI "V2DI") (V4SF "V2DI")
12704 (V8SI "V4DI") (V8SF "V4DI")])
12705 (define_mode_attr VEC_GATHER_SRCDI
12706 [(V2DI "V2DI") (V2DF "V2DF")
12707 (V4DI "V4DI") (V4DF "V4DF")
12708 (V4SI "V4SI") (V4SF "V4SF")
12709 (V8SI "V4SI") (V8SF "V4SF")])
12711 (define_expand "avx2_gathersi<mode>"
12712 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "")
12713 (unspec:VEC_GATHER_MODE
12714 [(match_operand:VEC_GATHER_MODE 1 "register_operand" "")
12715 (mem:<ssescalarmode>
12717 [(match_operand 2 "vsib_address_operand" "")
12718 (match_operand:<VEC_GATHER_IDXSI>
12719 3 "register_operand" "")
12720 (match_operand:SI 5 "const1248_operand " "")]))
12721 (mem:BLK (scratch))
12722 (match_operand:VEC_GATHER_MODE 4 "register_operand" "")]
12724 (clobber (match_scratch:VEC_GATHER_MODE 6 ""))])]
12728 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
12729 operands[5]), UNSPEC_VSIBADDR);
12732 (define_insn "*avx2_gathersi<mode>"
12733 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
12734 (unspec:VEC_GATHER_MODE
12735 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
12736 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
12738 [(match_operand:P 3 "vsib_address_operand" "p")
12739 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
12740 (match_operand:SI 6 "const1248_operand" "n")]
12742 (mem:BLK (scratch))
12743 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
12745 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
12747 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
12748 [(set_attr "type" "ssemov")
12749 (set_attr "prefix" "vex")
12750 (set_attr "mode" "<sseinsnmode>")])
12752 (define_insn "*avx2_gathersi<mode>_2"
12753 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
12754 (unspec:VEC_GATHER_MODE
12756 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
12758 [(match_operand:P 2 "vsib_address_operand" "p")
12759 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
12760 (match_operand:SI 5 "const1248_operand" "n")]
12762 (mem:BLK (scratch))
12763 (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
12765 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
12767 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
12768 [(set_attr "type" "ssemov")
12769 (set_attr "prefix" "vex")
12770 (set_attr "mode" "<sseinsnmode>")])
12772 (define_expand "avx2_gatherdi<mode>"
12773 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "")
12774 (unspec:VEC_GATHER_MODE
12775 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "")
12776 (mem:<ssescalarmode>
12778 [(match_operand 2 "vsib_address_operand" "")
12779 (match_operand:<VEC_GATHER_IDXDI>
12780 3 "register_operand" "")
12781 (match_operand:SI 5 "const1248_operand " "")]))
12782 (mem:BLK (scratch))
12783 (match_operand:<VEC_GATHER_SRCDI>
12784 4 "register_operand" "")]
12786 (clobber (match_scratch:VEC_GATHER_MODE 6 ""))])]
12790 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
12791 operands[5]), UNSPEC_VSIBADDR);
12794 (define_insn "*avx2_gatherdi<mode>"
12795 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
12796 (unspec:VEC_GATHER_MODE
12797 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
12798 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
12800 [(match_operand:P 3 "vsib_address_operand" "p")
12801 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
12802 (match_operand:SI 6 "const1248_operand" "n")]
12804 (mem:BLK (scratch))
12805 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
12807 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
12809 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
12810 [(set_attr "type" "ssemov")
12811 (set_attr "prefix" "vex")
12812 (set_attr "mode" "<sseinsnmode>")])
12814 (define_insn "*avx2_gatherdi<mode>_2"
12815 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
12816 (unspec:VEC_GATHER_MODE
12818 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
12820 [(match_operand:P 2 "vsib_address_operand" "p")
12821 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
12822 (match_operand:SI 5 "const1248_operand" "n")]
12824 (mem:BLK (scratch))
12825 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
12827 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
12830 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
12831 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
12832 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
12834 [(set_attr "type" "ssemov")
12835 (set_attr "prefix" "vex")
12836 (set_attr "mode" "<sseinsnmode>")])
12838 (define_insn "*avx2_gatherdi<mode>_3"
12839 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
12840 (vec_select:<VEC_GATHER_SRCDI>
12842 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
12843 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
12845 [(match_operand:P 3 "vsib_address_operand" "p")
12846 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
12847 (match_operand:SI 6 "const1248_operand" "n")]
12849 (mem:BLK (scratch))
12850 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
12852 (parallel [(const_int 0) (const_int 1)
12853 (const_int 2) (const_int 3)])))
12854 (clobber (match_scratch:VI4F_256 1 "=&x"))]
12856 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
12857 [(set_attr "type" "ssemov")
12858 (set_attr "prefix" "vex")
12859 (set_attr "mode" "<sseinsnmode>")])
12861 (define_insn "*avx2_gatherdi<mode>_4"
12862 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
12863 (vec_select:<VEC_GATHER_SRCDI>
12866 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
12868 [(match_operand:P 2 "vsib_address_operand" "p")
12869 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
12870 (match_operand:SI 5 "const1248_operand" "n")]
12872 (mem:BLK (scratch))
12873 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
12875 (parallel [(const_int 0) (const_int 1)
12876 (const_int 2) (const_int 3)])))
12877 (clobber (match_scratch:VI4F_256 1 "=&x"))]
12879 "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
12880 [(set_attr "type" "ssemov")
12881 (set_attr "prefix" "vex")
12882 (set_attr "mode" "<sseinsnmode>")])