1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 (define_c_enum "unspec" [
55 UNSPEC_XOP_UNSIGNED_CMP
66 UNSPEC_AESKEYGENASSIST
90 (define_c_enum "unspecv" [
100 ;; All vector modes including V?TImode, used in move patterns.
101 (define_mode_iterator V16
102 [(V32QI "TARGET_AVX") V16QI
103 (V16HI "TARGET_AVX") V8HI
104 (V8SI "TARGET_AVX") V4SI
105 (V4DI "TARGET_AVX") V2DI
106 (V2TI "TARGET_AVX") V1TI
107 (V8SF "TARGET_AVX") V4SF
108 (V4DF "TARGET_AVX") V2DF])
111 (define_mode_iterator V
112 [(V32QI "TARGET_AVX") V16QI
113 (V16HI "TARGET_AVX") V8HI
114 (V8SI "TARGET_AVX") V4SI
115 (V4DI "TARGET_AVX") V2DI
116 (V8SF "TARGET_AVX") V4SF
117 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
119 ;; All 128bit vector modes
120 (define_mode_iterator V_128
121 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
123 ;; All 256bit vector modes
124 (define_mode_iterator V_256
125 [V32QI V16HI V8SI V4DI V8SF V4DF])
127 ;; All vector float modes
128 (define_mode_iterator VF
129 [(V8SF "TARGET_AVX") V4SF
130 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
132 ;; All SFmode vector float modes
133 (define_mode_iterator VF1
134 [(V8SF "TARGET_AVX") V4SF])
136 ;; All DFmode vector float modes
137 (define_mode_iterator VF2
138 [(V4DF "TARGET_AVX") V2DF])
140 ;; All 128bit vector float modes
141 (define_mode_iterator VF_128
142 [V4SF (V2DF "TARGET_SSE2")])
144 ;; All 256bit vector float modes
145 (define_mode_iterator VF_256
148 ;; All vector integer modes
149 (define_mode_iterator VI
150 [(V32QI "TARGET_AVX") V16QI
151 (V16HI "TARGET_AVX") V8HI
152 (V8SI "TARGET_AVX") V4SI
153 (V4DI "TARGET_AVX") V2DI])
155 (define_mode_iterator VI_AVX2
156 [(V32QI "TARGET_AVX2") V16QI
157 (V16HI "TARGET_AVX2") V8HI
158 (V8SI "TARGET_AVX2") V4SI
159 (V4DI "TARGET_AVX2") V2DI])
161 ;; All QImode vector integer modes
162 (define_mode_iterator VI1
163 [(V32QI "TARGET_AVX") V16QI])
165 ;; All DImode vector integer modes
166 (define_mode_iterator VI8
167 [(V4DI "TARGET_AVX") V2DI])
169 (define_mode_iterator VI1_AVX2
170 [(V32QI "TARGET_AVX2") V16QI])
172 (define_mode_iterator VI2_AVX2
173 [(V16HI "TARGET_AVX2") V8HI])
175 (define_mode_iterator VI4_AVX2
176 [(V8SI "TARGET_AVX2") V4SI])
178 (define_mode_iterator VI8_AVX2
179 [(V4DI "TARGET_AVX2") V2DI])
181 ;; ??? We should probably use TImode instead.
182 (define_mode_iterator VIMAX_AVX2
183 [(V2TI "TARGET_AVX2") V1TI])
185 ;; ??? This should probably be dropped in favor of VIMAX_AVX2.
186 (define_mode_iterator SSESCALARMODE
187 [(V2TI "TARGET_AVX2") TI])
189 (define_mode_iterator VI12_AVX2
190 [(V32QI "TARGET_AVX2") V16QI
191 (V16HI "TARGET_AVX2") V8HI])
193 (define_mode_iterator VI24_AVX2
194 [(V16HI "TARGET_AVX2") V8HI
195 (V8SI "TARGET_AVX2") V4SI])
197 (define_mode_iterator VI124_AVX2
198 [(V32QI "TARGET_AVX2") V16QI
199 (V16HI "TARGET_AVX2") V8HI
200 (V8SI "TARGET_AVX2") V4SI])
202 (define_mode_iterator VI248_AVX2
203 [(V16HI "TARGET_AVX2") V8HI
204 (V8SI "TARGET_AVX2") V4SI
205 (V4DI "TARGET_AVX2") V2DI])
207 (define_mode_iterator VI48_AVX2
208 [(V8SI "TARGET_AVX2") V4SI
209 (V4DI "TARGET_AVX2") V2DI])
211 (define_mode_iterator V48_AVX2
214 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
215 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
217 (define_mode_attr sse2_avx2
218 [(V16QI "sse2") (V32QI "avx2")
219 (V8HI "sse2") (V16HI "avx2")
220 (V4SI "sse2") (V8SI "avx2")
221 (V2DI "sse2") (V4DI "avx2")
222 (V1TI "sse2") (V2TI "avx2")])
224 (define_mode_attr ssse3_avx2
225 [(V16QI "ssse3") (V32QI "avx2")
226 (V8HI "ssse3") (V16HI "avx2")
227 (V4SI "ssse3") (V8SI "avx2")
228 (V2DI "ssse3") (V4DI "avx2")
229 (TI "ssse3") (V2TI "avx2")])
231 (define_mode_attr sse4_1_avx2
232 [(V16QI "sse4_1") (V32QI "avx2")
233 (V8HI "sse4_1") (V16HI "avx2")
234 (V4SI "sse4_1") (V8SI "avx2")
235 (V2DI "sse4_1") (V4DI "avx2")])
237 (define_mode_attr avx_avx2
238 [(V4SF "avx") (V2DF "avx")
239 (V8SF "avx") (V4DF "avx")
240 (V4SI "avx2") (V2DI "avx2")
241 (V8SI "avx2") (V4DI "avx2")])
243 (define_mode_attr vec_avx2
244 [(V16QI "vec") (V32QI "avx2")
245 (V8HI "vec") (V16HI "avx2")
246 (V4SI "vec") (V8SI "avx2")
247 (V2DI "vec") (V4DI "avx2")])
249 (define_mode_attr ssedoublemode
250 [(V16HI "V16SI") (V8HI "V8SI")])
252 (define_mode_attr ssebytemode
253 [(V4DI "V32QI") (V2DI "V16QI")])
255 ;; All 128bit vector integer modes
256 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
258 ;; All 256bit vector integer modes
259 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
261 ;; Random 128bit vector integer mode combinations
262 (define_mode_iterator VI12_128 [V16QI V8HI])
263 (define_mode_iterator VI14_128 [V16QI V4SI])
264 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
265 (define_mode_iterator VI128_128 [V16QI V8HI V2DI])
266 (define_mode_iterator VI24_128 [V8HI V4SI])
267 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
268 (define_mode_iterator VI48_128 [V4SI V2DI])
270 ;; Random 256bit vector integer mode combinations
271 (define_mode_iterator VI124_256 [V32QI V16HI V8SI])
272 (define_mode_iterator VI48_256 [V8SI V4DI])
274 ;; Int-float size matches
275 (define_mode_iterator VI4F_128 [V4SI V4SF])
276 (define_mode_iterator VI8F_128 [V2DI V2DF])
277 (define_mode_iterator VI4F_256 [V8SI V8SF])
278 (define_mode_iterator VI8F_256 [V4DI V4DF])
280 ;; Mapping from float mode to required SSE level
281 (define_mode_attr sse
282 [(SF "sse") (DF "sse2")
283 (V4SF "sse") (V2DF "sse2")
284 (V8SF "avx") (V4DF "avx")])
286 (define_mode_attr sse2
287 [(V16QI "sse2") (V32QI "avx")
288 (V2DI "sse2") (V4DI "avx")])
290 (define_mode_attr sse3
291 [(V16QI "sse3") (V32QI "avx")])
293 (define_mode_attr sse4_1
294 [(V4SF "sse4_1") (V2DF "sse4_1")
295 (V8SF "avx") (V4DF "avx")])
297 (define_mode_attr avxsizesuffix
298 [(V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
299 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
300 (V8SF "256") (V4DF "256")
301 (V4SF "") (V2DF "")])
303 ;; SSE instruction mode
304 (define_mode_attr sseinsnmode
305 [(V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
306 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
307 (V8SF "V8SF") (V4DF "V4DF")
308 (V4SF "V4SF") (V2DF "V2DF")
311 ;; Mapping of vector float modes to an integer mode of the same size
312 (define_mode_attr sseintvecmode
313 [(V8SF "V8SI") (V4DF "V4DI")
314 (V4SF "V4SI") (V2DF "V2DI")
315 (V8SI "V8SI") (V4DI "V4DI")
316 (V4SI "V4SI") (V2DI "V2DI")
317 (V16HI "V16HI") (V8HI "V8HI")
318 (V32QI "V32QI") (V16QI "V16QI")])
320 (define_mode_attr sseintvecmodelower
321 [(V8SF "v8si") (V4DF "v4di")
322 (V4SF "v4si") (V2DF "v2di")
323 (V8SI "v8si") (V4DI "v4di")
324 (V4SI "v4si") (V2DI "v2di")
325 (V16HI "v16hi") (V8HI "v8hi")
326 (V32QI "v32qi") (V16QI "v16qi")])
328 ;; Mapping of vector modes to a vector mode of double size
329 (define_mode_attr ssedoublevecmode
330 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
331 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
332 (V8SF "V16SF") (V4DF "V8DF")
333 (V4SF "V8SF") (V2DF "V4DF")])
335 ;; Mapping of vector modes to a vector mode of half size
336 (define_mode_attr ssehalfvecmode
337 [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
338 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
339 (V8SF "V4SF") (V4DF "V2DF")
342 ;; Mapping of vector modes back to the scalar modes
343 (define_mode_attr ssescalarmode
344 [(V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI")
345 (V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI")
346 (V8SF "SF") (V4DF "DF")
347 (V4SF "SF") (V2DF "DF")])
349 ;; Number of scalar elements in each vector type
350 (define_mode_attr ssescalarnum
351 [(V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
352 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
353 (V8SF "8") (V4DF "4")
354 (V4SF "4") (V2DF "2")])
356 ;; SSE prefix for integer vector modes
357 (define_mode_attr sseintprefix
358 [(V2DI "p") (V2DF "")
361 (V8SI "p") (V8SF "")])
363 ;; SSE scalar suffix for vector modes
364 (define_mode_attr ssescalarmodesuffix
366 (V8SF "ss") (V4DF "sd")
367 (V4SF "ss") (V2DF "sd")
368 (V8SI "ss") (V4DI "sd")
371 ;; Pack/unpack vector modes
372 (define_mode_attr sseunpackmode
373 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
374 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")])
376 (define_mode_attr ssepackmode
377 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
378 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")])
380 ;; Mapping of the max integer size for xop rotate immediate constraint
381 (define_mode_attr sserotatemax
382 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
384 ;; Mapping of mode to cast intrinsic name
385 (define_mode_attr castmode [(V8SI "si") (V8SF "ps") (V4DF "pd")])
387 ;; Instruction suffix for sign and zero extensions.
388 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
390 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
391 (define_mode_attr i128
392 [(V8SF "f128") (V4DF "f128") (V32QI "%~128") (V16HI "%~128")
393 (V8SI "%~128") (V4DI "%~128")])
396 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
398 (define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
400 ;; Mapping of immediate bits for blend instructions
401 (define_mode_attr blendbits
402 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
404 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
406 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
410 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
412 ;; All of these patterns are enabled for SSE1 as well as SSE2.
413 ;; This is essential for maintaining stable calling conventions.
415 (define_expand "mov<mode>"
416 [(set (match_operand:V16 0 "nonimmediate_operand" "")
417 (match_operand:V16 1 "nonimmediate_operand" ""))]
420 ix86_expand_vector_move (<MODE>mode, operands);
424 (define_insn "*mov<mode>_internal"
425 [(set (match_operand:V16 0 "nonimmediate_operand" "=x,x ,m")
426 (match_operand:V16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
428 && (register_operand (operands[0], <MODE>mode)
429 || register_operand (operands[1], <MODE>mode))"
431 switch (which_alternative)
434 return standard_sse_constant_opcode (insn, operands[1]);
437 switch (get_attr_mode (insn))
442 && (misaligned_operand (operands[0], <MODE>mode)
443 || misaligned_operand (operands[1], <MODE>mode)))
444 return "vmovups\t{%1, %0|%0, %1}";
446 return "%vmovaps\t{%1, %0|%0, %1}";
451 && (misaligned_operand (operands[0], <MODE>mode)
452 || misaligned_operand (operands[1], <MODE>mode)))
453 return "vmovupd\t{%1, %0|%0, %1}";
454 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
455 return "%vmovaps\t{%1, %0|%0, %1}";
457 return "%vmovapd\t{%1, %0|%0, %1}";
462 && (misaligned_operand (operands[0], <MODE>mode)
463 || misaligned_operand (operands[1], <MODE>mode)))
464 return "vmovdqu\t{%1, %0|%0, %1}";
465 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
466 return "%vmovaps\t{%1, %0|%0, %1}";
468 return "%vmovdqa\t{%1, %0|%0, %1}";
477 [(set_attr "type" "sselog1,ssemov,ssemov")
478 (set_attr "prefix" "maybe_vex")
480 (cond [(match_test "TARGET_AVX")
481 (const_string "<sseinsnmode>")
482 (ior (ior (match_test "optimize_function_for_size_p (cfun)")
483 (not (match_test "TARGET_SSE2")))
484 (and (eq_attr "alternative" "2")
485 (match_test "TARGET_SSE_TYPELESS_STORES")))
486 (const_string "V4SF")
487 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
488 (const_string "V4SF")
489 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
490 (const_string "V2DF")
492 (const_string "TI")))])
494 (define_insn "sse2_movq128"
495 [(set (match_operand:V2DI 0 "register_operand" "=x")
498 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
499 (parallel [(const_int 0)]))
502 "%vmovq\t{%1, %0|%0, %1}"
503 [(set_attr "type" "ssemov")
504 (set_attr "prefix" "maybe_vex")
505 (set_attr "mode" "TI")])
507 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
508 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
509 ;; from memory, we'd prefer to load the memory directly into the %xmm
510 ;; register. To facilitate this happy circumstance, this pattern won't
511 ;; split until after register allocation. If the 64-bit value didn't
512 ;; come from memory, this is the best we can do. This is much better
513 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
516 (define_insn_and_split "movdi_to_sse"
518 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
519 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
520 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
521 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
523 "&& reload_completed"
526 if (register_operand (operands[1], DImode))
528 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
529 Assemble the 64-bit DImode value in an xmm register. */
530 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
531 gen_rtx_SUBREG (SImode, operands[1], 0)));
532 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
533 gen_rtx_SUBREG (SImode, operands[1], 4)));
534 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
537 else if (memory_operand (operands[1], DImode))
538 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
539 operands[1], const0_rtx));
545 [(set (match_operand:V4SF 0 "register_operand" "")
546 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
547 "TARGET_SSE && reload_completed"
550 (vec_duplicate:V4SF (match_dup 1))
554 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
555 operands[2] = CONST0_RTX (V4SFmode);
559 [(set (match_operand:V2DF 0 "register_operand" "")
560 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
561 "TARGET_SSE2 && reload_completed"
562 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
564 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
565 operands[2] = CONST0_RTX (DFmode);
568 (define_expand "push<mode>1"
569 [(match_operand:V16 0 "register_operand" "")]
572 ix86_expand_push (<MODE>mode, operands[0]);
576 (define_expand "movmisalign<mode>"
577 [(set (match_operand:V16 0 "nonimmediate_operand" "")
578 (match_operand:V16 1 "nonimmediate_operand" ""))]
581 ix86_expand_vector_move_misalign (<MODE>mode, operands);
585 (define_expand "<sse>_movu<ssemodesuffix><avxsizesuffix>"
586 [(set (match_operand:VF 0 "nonimmediate_operand" "")
588 [(match_operand:VF 1 "nonimmediate_operand" "")]
592 if (MEM_P (operands[0]) && MEM_P (operands[1]))
593 operands[1] = force_reg (<MODE>mode, operands[1]);
596 (define_insn "*<sse>_movu<ssemodesuffix><avxsizesuffix>"
597 [(set (match_operand:VF 0 "nonimmediate_operand" "=x,m")
599 [(match_operand:VF 1 "nonimmediate_operand" "xm,x")]
601 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
602 "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
603 [(set_attr "type" "ssemov")
604 (set_attr "movu" "1")
605 (set_attr "prefix" "maybe_vex")
606 (set_attr "mode" "<MODE>")])
608 (define_expand "<sse2>_movdqu<avxsizesuffix>"
609 [(set (match_operand:VI1 0 "nonimmediate_operand" "")
610 (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "")]
614 if (MEM_P (operands[0]) && MEM_P (operands[1]))
615 operands[1] = force_reg (<MODE>mode, operands[1]);
618 (define_insn "*<sse2>_movdqu<avxsizesuffix>"
619 [(set (match_operand:VI1 0 "nonimmediate_operand" "=x,m")
620 (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "xm,x")]
622 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
623 "%vmovdqu\t{%1, %0|%0, %1}"
624 [(set_attr "type" "ssemov")
625 (set_attr "movu" "1")
626 (set (attr "prefix_data16")
628 (match_test "TARGET_AVX")
631 (set_attr "prefix" "maybe_vex")
632 (set_attr "mode" "<sseinsnmode>")])
634 (define_insn "<sse3>_lddqu<avxsizesuffix>"
635 [(set (match_operand:VI1 0 "register_operand" "=x")
636 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
639 "%vlddqu\t{%1, %0|%0, %1}"
640 [(set_attr "type" "ssemov")
641 (set_attr "movu" "1")
642 (set (attr "prefix_data16")
644 (match_test "TARGET_AVX")
647 (set (attr "prefix_rep")
649 (match_test "TARGET_AVX")
652 (set_attr "prefix" "maybe_vex")
653 (set_attr "mode" "<sseinsnmode>")])
655 (define_insn "sse2_movnti<mode>"
656 [(set (match_operand:SWI48 0 "memory_operand" "=m")
657 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
660 "movnti\t{%1, %0|%0, %1}"
661 [(set_attr "type" "ssemov")
662 (set_attr "prefix_data16" "0")
663 (set_attr "mode" "<MODE>")])
665 (define_insn "<sse>_movnt<mode>"
666 [(set (match_operand:VF 0 "memory_operand" "=m")
667 (unspec:VF [(match_operand:VF 1 "register_operand" "x")]
670 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
671 [(set_attr "type" "ssemov")
672 (set_attr "prefix" "maybe_vex")
673 (set_attr "mode" "<MODE>")])
675 (define_insn "<sse2>_movnt<mode>"
676 [(set (match_operand:VI8 0 "memory_operand" "=m")
677 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "x")]
680 "%vmovntdq\t{%1, %0|%0, %1}"
681 [(set_attr "type" "ssecvt")
682 (set (attr "prefix_data16")
684 (match_test "TARGET_AVX")
687 (set_attr "prefix" "maybe_vex")
688 (set_attr "mode" "<sseinsnmode>")])
690 ; Expand patterns for non-temporal stores. At the moment, only those
691 ; that directly map to insns are defined; it would be possible to
692 ; define patterns for other modes that would expand to several insns.
694 ;; Modes handled by storent patterns.
695 (define_mode_iterator STORENT_MODE
696 [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
697 (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
698 (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
699 (V8SF "TARGET_AVX") V4SF
700 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
702 (define_expand "storent<mode>"
703 [(set (match_operand:STORENT_MODE 0 "memory_operand" "")
705 [(match_operand:STORENT_MODE 1 "register_operand" "")]
709 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
711 ;; Parallel floating point arithmetic
713 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
715 (define_expand "<code><mode>2"
716 [(set (match_operand:VF 0 "register_operand" "")
718 (match_operand:VF 1 "register_operand" "")))]
720 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
722 (define_insn_and_split "*absneg<mode>2"
723 [(set (match_operand:VF 0 "register_operand" "=x,x,x,x")
724 (match_operator:VF 3 "absneg_operator"
725 [(match_operand:VF 1 "nonimmediate_operand" "0, xm,x, m")]))
726 (use (match_operand:VF 2 "nonimmediate_operand" "xm,0, xm,x"))]
729 "&& reload_completed"
732 enum rtx_code absneg_op;
738 if (MEM_P (operands[1]))
739 op1 = operands[2], op2 = operands[1];
741 op1 = operands[1], op2 = operands[2];
746 if (rtx_equal_p (operands[0], operands[1]))
752 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
753 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
754 t = gen_rtx_SET (VOIDmode, operands[0], t);
758 [(set_attr "isa" "noavx,noavx,avx,avx")])
760 (define_expand "<plusminus_insn><mode>3"
761 [(set (match_operand:VF 0 "register_operand" "")
763 (match_operand:VF 1 "nonimmediate_operand" "")
764 (match_operand:VF 2 "nonimmediate_operand" "")))]
766 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
768 (define_insn "*<plusminus_insn><mode>3"
769 [(set (match_operand:VF 0 "register_operand" "=x,x")
771 (match_operand:VF 1 "nonimmediate_operand" "<comm>0,x")
772 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
773 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
775 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
776 v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
777 [(set_attr "isa" "noavx,avx")
778 (set_attr "type" "sseadd")
779 (set_attr "prefix" "orig,vex")
780 (set_attr "mode" "<MODE>")])
782 (define_insn "<sse>_vm<plusminus_insn><mode>3"
783 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
786 (match_operand:VF_128 1 "register_operand" "0,x")
787 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
792 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}
793 v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
794 [(set_attr "isa" "noavx,avx")
795 (set_attr "type" "sseadd")
796 (set_attr "prefix" "orig,vex")
797 (set_attr "mode" "<ssescalarmode>")])
799 (define_expand "mul<mode>3"
800 [(set (match_operand:VF 0 "register_operand" "")
802 (match_operand:VF 1 "nonimmediate_operand" "")
803 (match_operand:VF 2 "nonimmediate_operand" "")))]
805 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
807 (define_insn "*mul<mode>3"
808 [(set (match_operand:VF 0 "register_operand" "=x,x")
810 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
811 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
812 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
814 mul<ssemodesuffix>\t{%2, %0|%0, %2}
815 vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
816 [(set_attr "isa" "noavx,avx")
817 (set_attr "type" "ssemul")
818 (set_attr "prefix" "orig,vex")
819 (set_attr "mode" "<MODE>")])
821 (define_insn "<sse>_vmmul<mode>3"
822 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
825 (match_operand:VF_128 1 "register_operand" "0,x")
826 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
831 mul<ssescalarmodesuffix>\t{%2, %0|%0, %2}
832 vmul<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
833 [(set_attr "isa" "noavx,avx")
834 (set_attr "type" "ssemul")
835 (set_attr "prefix" "orig,vex")
836 (set_attr "mode" "<ssescalarmode>")])
838 (define_expand "div<mode>3"
839 [(set (match_operand:VF2 0 "register_operand" "")
840 (div:VF2 (match_operand:VF2 1 "register_operand" "")
841 (match_operand:VF2 2 "nonimmediate_operand" "")))]
843 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
845 (define_expand "div<mode>3"
846 [(set (match_operand:VF1 0 "register_operand" "")
847 (div:VF1 (match_operand:VF1 1 "register_operand" "")
848 (match_operand:VF1 2 "nonimmediate_operand" "")))]
851 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
854 && TARGET_RECIP_VEC_DIV
855 && !optimize_insn_for_size_p ()
856 && flag_finite_math_only && !flag_trapping_math
857 && flag_unsafe_math_optimizations)
859 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
864 (define_insn "<sse>_div<mode>3"
865 [(set (match_operand:VF 0 "register_operand" "=x,x")
867 (match_operand:VF 1 "register_operand" "0,x")
868 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
871 div<ssemodesuffix>\t{%2, %0|%0, %2}
872 vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
873 [(set_attr "isa" "noavx,avx")
874 (set_attr "type" "ssediv")
875 (set_attr "prefix" "orig,vex")
876 (set_attr "mode" "<MODE>")])
878 (define_insn "<sse>_vmdiv<mode>3"
879 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
882 (match_operand:VF_128 1 "register_operand" "0,x")
883 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
888 div<ssescalarmodesuffix>\t{%2, %0|%0, %2}
889 vdiv<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
890 [(set_attr "isa" "noavx,avx")
891 (set_attr "type" "ssediv")
892 (set_attr "prefix" "orig,vex")
893 (set_attr "mode" "<ssescalarmode>")])
895 (define_insn "<sse>_rcp<mode>2"
896 [(set (match_operand:VF1 0 "register_operand" "=x")
898 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
900 "%vrcpps\t{%1, %0|%0, %1}"
901 [(set_attr "type" "sse")
902 (set_attr "atom_sse_attr" "rcp")
903 (set_attr "prefix" "maybe_vex")
904 (set_attr "mode" "<MODE>")])
906 (define_insn "sse_vmrcpv4sf2"
907 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
909 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
911 (match_operand:V4SF 2 "register_operand" "0,x")
915 rcpss\t{%1, %0|%0, %1}
916 vrcpss\t{%1, %2, %0|%0, %2, %1}"
917 [(set_attr "isa" "noavx,avx")
918 (set_attr "type" "sse")
919 (set_attr "atom_sse_attr" "rcp")
920 (set_attr "prefix" "orig,vex")
921 (set_attr "mode" "SF")])
923 (define_expand "sqrt<mode>2"
924 [(set (match_operand:VF2 0 "register_operand" "")
925 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand" "")))]
928 (define_expand "sqrt<mode>2"
929 [(set (match_operand:VF1 0 "register_operand" "")
930 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand" "")))]
934 && TARGET_RECIP_VEC_SQRT
935 && !optimize_insn_for_size_p ()
936 && flag_finite_math_only && !flag_trapping_math
937 && flag_unsafe_math_optimizations)
939 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
944 (define_insn "<sse>_sqrt<mode>2"
945 [(set (match_operand:VF 0 "register_operand" "=x")
946 (sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "xm")))]
948 "%vsqrt<ssemodesuffix>\t{%1, %0|%0, %1}"
949 [(set_attr "type" "sse")
950 (set_attr "atom_sse_attr" "sqrt")
951 (set_attr "prefix" "maybe_vex")
952 (set_attr "mode" "<MODE>")])
954 (define_insn "<sse>_vmsqrt<mode>2"
955 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
958 (match_operand:VF_128 1 "nonimmediate_operand" "xm,xm"))
959 (match_operand:VF_128 2 "register_operand" "0,x")
963 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}
964 vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
965 [(set_attr "isa" "noavx,avx")
966 (set_attr "type" "sse")
967 (set_attr "atom_sse_attr" "sqrt")
968 (set_attr "prefix" "orig,vex")
969 (set_attr "mode" "<ssescalarmode>")])
971 (define_expand "rsqrt<mode>2"
972 [(set (match_operand:VF1 0 "register_operand" "")
974 [(match_operand:VF1 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
977 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
981 (define_insn "<sse>_rsqrt<mode>2"
982 [(set (match_operand:VF1 0 "register_operand" "=x")
984 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
986 "%vrsqrtps\t{%1, %0|%0, %1}"
987 [(set_attr "type" "sse")
988 (set_attr "prefix" "maybe_vex")
989 (set_attr "mode" "<MODE>")])
991 (define_insn "sse_vmrsqrtv4sf2"
992 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
994 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
996 (match_operand:V4SF 2 "register_operand" "0,x")
1000 rsqrtss\t{%1, %0|%0, %1}
1001 vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
1002 [(set_attr "isa" "noavx,avx")
1003 (set_attr "type" "sse")
1004 (set_attr "prefix" "orig,vex")
1005 (set_attr "mode" "SF")])
1007 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1008 ;; isn't really correct, as those rtl operators aren't defined when
1009 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1011 (define_expand "<code><mode>3"
1012 [(set (match_operand:VF 0 "register_operand" "")
1014 (match_operand:VF 1 "nonimmediate_operand" "")
1015 (match_operand:VF 2 "nonimmediate_operand" "")))]
1018 if (!flag_finite_math_only)
1019 operands[1] = force_reg (<MODE>mode, operands[1]);
1020 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1023 (define_insn "*<code><mode>3_finite"
1024 [(set (match_operand:VF 0 "register_operand" "=x,x")
1026 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
1027 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1028 "TARGET_SSE && flag_finite_math_only
1029 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1031 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1032 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1033 [(set_attr "isa" "noavx,avx")
1034 (set_attr "type" "sseadd")
1035 (set_attr "prefix" "orig,vex")
1036 (set_attr "mode" "<MODE>")])
1038 (define_insn "*<code><mode>3"
1039 [(set (match_operand:VF 0 "register_operand" "=x,x")
1041 (match_operand:VF 1 "register_operand" "0,x")
1042 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1043 "TARGET_SSE && !flag_finite_math_only"
1045 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1046 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1047 [(set_attr "isa" "noavx,avx")
1048 (set_attr "type" "sseadd")
1049 (set_attr "prefix" "orig,vex")
1050 (set_attr "mode" "<MODE>")])
1052 (define_insn "<sse>_vm<code><mode>3"
1053 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1056 (match_operand:VF_128 1 "register_operand" "0,x")
1057 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
1062 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %2}
1063 v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1064 [(set_attr "isa" "noavx,avx")
1065 (set_attr "type" "sse")
1066 (set_attr "prefix" "orig,vex")
1067 (set_attr "mode" "<ssescalarmode>")])
1069 ;; These versions of the min/max patterns implement exactly the operations
1070 ;; min = (op1 < op2 ? op1 : op2)
1071 ;; max = (!(op1 < op2) ? op1 : op2)
1072 ;; Their operands are not commutative, and thus they may be used in the
1073 ;; presence of -0.0 and NaN.
1075 (define_insn "*ieee_smin<mode>3"
1076 [(set (match_operand:VF 0 "register_operand" "=x,x")
1078 [(match_operand:VF 1 "register_operand" "0,x")
1079 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
1083 min<ssemodesuffix>\t{%2, %0|%0, %2}
1084 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1085 [(set_attr "isa" "noavx,avx")
1086 (set_attr "type" "sseadd")
1087 (set_attr "prefix" "orig,vex")
1088 (set_attr "mode" "<MODE>")])
1090 (define_insn "*ieee_smax<mode>3"
1091 [(set (match_operand:VF 0 "register_operand" "=x,x")
1093 [(match_operand:VF 1 "register_operand" "0,x")
1094 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
1098 max<ssemodesuffix>\t{%2, %0|%0, %2}
1099 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1100 [(set_attr "isa" "noavx,avx")
1101 (set_attr "type" "sseadd")
1102 (set_attr "prefix" "orig,vex")
1103 (set_attr "mode" "<MODE>")])
1105 (define_insn "avx_addsubv4df3"
1106 [(set (match_operand:V4DF 0 "register_operand" "=x")
1109 (match_operand:V4DF 1 "register_operand" "x")
1110 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1111 (minus:V4DF (match_dup 1) (match_dup 2))
1114 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1115 [(set_attr "type" "sseadd")
1116 (set_attr "prefix" "vex")
1117 (set_attr "mode" "V4DF")])
1119 (define_insn "sse3_addsubv2df3"
1120 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1123 (match_operand:V2DF 1 "register_operand" "0,x")
1124 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
1125 (minus:V2DF (match_dup 1) (match_dup 2))
1129 addsubpd\t{%2, %0|%0, %2}
1130 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1131 [(set_attr "isa" "noavx,avx")
1132 (set_attr "type" "sseadd")
1133 (set_attr "atom_unit" "complex")
1134 (set_attr "prefix" "orig,vex")
1135 (set_attr "mode" "V2DF")])
1137 (define_insn "avx_addsubv8sf3"
1138 [(set (match_operand:V8SF 0 "register_operand" "=x")
1141 (match_operand:V8SF 1 "register_operand" "x")
1142 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1143 (minus:V8SF (match_dup 1) (match_dup 2))
1146 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1147 [(set_attr "type" "sseadd")
1148 (set_attr "prefix" "vex")
1149 (set_attr "mode" "V8SF")])
1151 (define_insn "sse3_addsubv4sf3"
1152 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1155 (match_operand:V4SF 1 "register_operand" "0,x")
1156 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
1157 (minus:V4SF (match_dup 1) (match_dup 2))
1161 addsubps\t{%2, %0|%0, %2}
1162 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1163 [(set_attr "isa" "noavx,avx")
1164 (set_attr "type" "sseadd")
1165 (set_attr "prefix" "orig,vex")
1166 (set_attr "prefix_rep" "1,*")
1167 (set_attr "mode" "V4SF")])
1169 (define_insn "avx_h<plusminus_insn>v4df3"
1170 [(set (match_operand:V4DF 0 "register_operand" "=x")
1175 (match_operand:V4DF 1 "register_operand" "x")
1176 (parallel [(const_int 0)]))
1177 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1179 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1180 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
1184 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1185 (parallel [(const_int 0)]))
1186 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
1188 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1189 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1191 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1192 [(set_attr "type" "sseadd")
1193 (set_attr "prefix" "vex")
1194 (set_attr "mode" "V4DF")])
1196 (define_insn "sse3_h<plusminus_insn>v2df3"
1197 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1201 (match_operand:V2DF 1 "register_operand" "0,x")
1202 (parallel [(const_int 0)]))
1203 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1206 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
1207 (parallel [(const_int 0)]))
1208 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1211 h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}
1212 vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1213 [(set_attr "isa" "noavx,avx")
1214 (set_attr "type" "sseadd")
1215 (set_attr "prefix" "orig,vex")
1216 (set_attr "mode" "V2DF")])
1218 (define_insn "avx_h<plusminus_insn>v8sf3"
1219 [(set (match_operand:V8SF 0 "register_operand" "=x")
1225 (match_operand:V8SF 1 "register_operand" "x")
1226 (parallel [(const_int 0)]))
1227 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1229 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1230 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1234 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1235 (parallel [(const_int 0)]))
1236 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1238 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1239 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1243 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1244 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1246 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1247 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1250 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1251 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1253 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1254 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1256 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1257 [(set_attr "type" "sseadd")
1258 (set_attr "prefix" "vex")
1259 (set_attr "mode" "V8SF")])
1261 (define_insn "sse3_h<plusminus_insn>v4sf3"
1262 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1267 (match_operand:V4SF 1 "register_operand" "0,x")
1268 (parallel [(const_int 0)]))
1269 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1271 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1272 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1276 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
1277 (parallel [(const_int 0)]))
1278 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1280 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1281 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1284 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
1285 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1286 [(set_attr "isa" "noavx,avx")
1287 (set_attr "type" "sseadd")
1288 (set_attr "atom_unit" "complex")
1289 (set_attr "prefix" "orig,vex")
1290 (set_attr "prefix_rep" "1,*")
1291 (set_attr "mode" "V4SF")])
1293 (define_expand "reduc_splus_v4df"
1294 [(match_operand:V4DF 0 "register_operand" "")
1295 (match_operand:V4DF 1 "register_operand" "")]
1298 rtx tmp = gen_reg_rtx (V4DFmode);
1299 rtx tmp2 = gen_reg_rtx (V4DFmode);
1300 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
1301 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
1302 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
1306 (define_expand "reduc_splus_v2df"
1307 [(match_operand:V2DF 0 "register_operand" "")
1308 (match_operand:V2DF 1 "register_operand" "")]
1311 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1315 (define_expand "reduc_splus_v8sf"
1316 [(match_operand:V8SF 0 "register_operand" "")
1317 (match_operand:V8SF 1 "register_operand" "")]
1320 rtx tmp = gen_reg_rtx (V8SFmode);
1321 rtx tmp2 = gen_reg_rtx (V8SFmode);
1322 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
1323 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
1324 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
1325 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
1329 (define_expand "reduc_splus_v4sf"
1330 [(match_operand:V4SF 0 "register_operand" "")
1331 (match_operand:V4SF 1 "register_operand" "")]
1336 rtx tmp = gen_reg_rtx (V4SFmode);
1337 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1338 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1341 ix86_expand_reduc (gen_addv4sf3, operands[0], operands[1]);
1345 ;; Modes handled by reduc_sm{in,ax}* patterns.
1346 (define_mode_iterator REDUC_SMINMAX_MODE
1347 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
1348 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
1349 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
1350 (V4SF "TARGET_SSE")])
1352 (define_expand "reduc_<code>_<mode>"
1353 [(smaxmin:REDUC_SMINMAX_MODE
1354 (match_operand:REDUC_SMINMAX_MODE 0 "register_operand" "")
1355 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand" ""))]
1358 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1362 (define_expand "reduc_<code>_<mode>"
1364 (match_operand:VI_256 0 "register_operand" "")
1365 (match_operand:VI_256 1 "register_operand" ""))]
1368 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1372 (define_expand "reduc_umin_v8hi"
1374 (match_operand:V8HI 0 "register_operand" "")
1375 (match_operand:V8HI 1 "register_operand" ""))]
1378 ix86_expand_reduc (gen_uminv8hi3, operands[0], operands[1]);
1382 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1384 ;; Parallel floating point comparisons
1386 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1388 (define_insn "avx_cmp<mode>3"
1389 [(set (match_operand:VF 0 "register_operand" "=x")
1391 [(match_operand:VF 1 "register_operand" "x")
1392 (match_operand:VF 2 "nonimmediate_operand" "xm")
1393 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1396 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1397 [(set_attr "type" "ssecmp")
1398 (set_attr "length_immediate" "1")
1399 (set_attr "prefix" "vex")
1400 (set_attr "mode" "<MODE>")])
1402 (define_insn "avx_vmcmp<mode>3"
1403 [(set (match_operand:VF_128 0 "register_operand" "=x")
1406 [(match_operand:VF_128 1 "register_operand" "x")
1407 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
1408 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1413 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1414 [(set_attr "type" "ssecmp")
1415 (set_attr "length_immediate" "1")
1416 (set_attr "prefix" "vex")
1417 (set_attr "mode" "<ssescalarmode>")])
1419 (define_insn "*<sse>_maskcmp<mode>3_comm"
1420 [(set (match_operand:VF 0 "register_operand" "=x,x")
1421 (match_operator:VF 3 "sse_comparison_operator"
1422 [(match_operand:VF 1 "register_operand" "%0,x")
1423 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1425 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
1427 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1428 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1429 [(set_attr "isa" "noavx,avx")
1430 (set_attr "type" "ssecmp")
1431 (set_attr "length_immediate" "1")
1432 (set_attr "prefix" "orig,vex")
1433 (set_attr "mode" "<MODE>")])
1435 (define_insn "<sse>_maskcmp<mode>3"
1436 [(set (match_operand:VF 0 "register_operand" "=x,x")
1437 (match_operator:VF 3 "sse_comparison_operator"
1438 [(match_operand:VF 1 "register_operand" "0,x")
1439 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1442 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1443 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1444 [(set_attr "isa" "noavx,avx")
1445 (set_attr "type" "ssecmp")
1446 (set_attr "length_immediate" "1")
1447 (set_attr "prefix" "orig,vex")
1448 (set_attr "mode" "<MODE>")])
1450 (define_insn "<sse>_vmmaskcmp<mode>3"
1451 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1453 (match_operator:VF_128 3 "sse_comparison_operator"
1454 [(match_operand:VF_128 1 "register_operand" "0,x")
1455 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
1460 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %2}
1461 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1462 [(set_attr "isa" "noavx,avx")
1463 (set_attr "type" "ssecmp")
1464 (set_attr "length_immediate" "1,*")
1465 (set_attr "prefix" "orig,vex")
1466 (set_attr "mode" "<ssescalarmode>")])
1468 (define_insn "<sse>_comi"
1469 [(set (reg:CCFP FLAGS_REG)
1472 (match_operand:<ssevecmode> 0 "register_operand" "x")
1473 (parallel [(const_int 0)]))
1475 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1476 (parallel [(const_int 0)]))))]
1477 "SSE_FLOAT_MODE_P (<MODE>mode)"
1478 "%vcomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1479 [(set_attr "type" "ssecomi")
1480 (set_attr "prefix" "maybe_vex")
1481 (set_attr "prefix_rep" "0")
1482 (set (attr "prefix_data16")
1483 (if_then_else (eq_attr "mode" "DF")
1485 (const_string "0")))
1486 (set_attr "mode" "<MODE>")])
1488 (define_insn "<sse>_ucomi"
1489 [(set (reg:CCFPU FLAGS_REG)
1492 (match_operand:<ssevecmode> 0 "register_operand" "x")
1493 (parallel [(const_int 0)]))
1495 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1496 (parallel [(const_int 0)]))))]
1497 "SSE_FLOAT_MODE_P (<MODE>mode)"
1498 "%vucomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1499 [(set_attr "type" "ssecomi")
1500 (set_attr "prefix" "maybe_vex")
1501 (set_attr "prefix_rep" "0")
1502 (set (attr "prefix_data16")
1503 (if_then_else (eq_attr "mode" "DF")
1505 (const_string "0")))
1506 (set_attr "mode" "<MODE>")])
1508 (define_expand "vcond<V_256:mode><VF_256:mode>"
1509 [(set (match_operand:V_256 0 "register_operand" "")
1511 (match_operator 3 ""
1512 [(match_operand:VF_256 4 "nonimmediate_operand" "")
1513 (match_operand:VF_256 5 "nonimmediate_operand" "")])
1514 (match_operand:V_256 1 "general_operand" "")
1515 (match_operand:V_256 2 "general_operand" "")))]
1517 && (GET_MODE_NUNITS (<V_256:MODE>mode)
1518 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
1520 bool ok = ix86_expand_fp_vcond (operands);
1525 (define_expand "vcond<V_128:mode><VF_128:mode>"
1526 [(set (match_operand:V_128 0 "register_operand" "")
1528 (match_operator 3 ""
1529 [(match_operand:VF_128 4 "nonimmediate_operand" "")
1530 (match_operand:VF_128 5 "nonimmediate_operand" "")])
1531 (match_operand:V_128 1 "general_operand" "")
1532 (match_operand:V_128 2 "general_operand" "")))]
1534 && (GET_MODE_NUNITS (<V_128:MODE>mode)
1535 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
1537 bool ok = ix86_expand_fp_vcond (operands);
1542 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1544 ;; Parallel floating point logical operations
1546 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1548 (define_insn "<sse>_andnot<mode>3"
1549 [(set (match_operand:VF 0 "register_operand" "=x,x")
1552 (match_operand:VF 1 "register_operand" "0,x"))
1553 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1556 static char buf[32];
1559 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1561 switch (which_alternative)
1564 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1567 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1573 snprintf (buf, sizeof (buf), insn, suffix);
1576 [(set_attr "isa" "noavx,avx")
1577 (set_attr "type" "sselog")
1578 (set_attr "prefix" "orig,vex")
1579 (set_attr "mode" "<MODE>")])
1581 (define_expand "<code><mode>3"
1582 [(set (match_operand:VF 0 "register_operand" "")
1584 (match_operand:VF 1 "nonimmediate_operand" "")
1585 (match_operand:VF 2 "nonimmediate_operand" "")))]
1587 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1589 (define_insn "*<code><mode>3"
1590 [(set (match_operand:VF 0 "register_operand" "=x,x")
1592 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
1593 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1594 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1596 static char buf[32];
1599 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1601 switch (which_alternative)
1604 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1607 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1613 snprintf (buf, sizeof (buf), insn, suffix);
1616 [(set_attr "isa" "noavx,avx")
1617 (set_attr "type" "sselog")
1618 (set_attr "prefix" "orig,vex")
1619 (set_attr "mode" "<MODE>")])
1621 (define_expand "copysign<mode>3"
1624 (not:VF (match_dup 3))
1625 (match_operand:VF 1 "nonimmediate_operand" "")))
1627 (and:VF (match_dup 3)
1628 (match_operand:VF 2 "nonimmediate_operand" "")))
1629 (set (match_operand:VF 0 "register_operand" "")
1630 (ior:VF (match_dup 4) (match_dup 5)))]
1633 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
1635 operands[4] = gen_reg_rtx (<MODE>mode);
1636 operands[5] = gen_reg_rtx (<MODE>mode);
1639 ;; Also define scalar versions. These are used for abs, neg, and
1640 ;; conditional move. Using subregs into vector modes causes register
1641 ;; allocation lossage. These patterns do not allow memory operands
1642 ;; because the native instructions read the full 128-bits.
1644 (define_insn "*andnot<mode>3"
1645 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1648 (match_operand:MODEF 1 "register_operand" "0,x"))
1649 (match_operand:MODEF 2 "register_operand" "x,x")))]
1650 "SSE_FLOAT_MODE_P (<MODE>mode)"
1652 static char buf[32];
1655 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1657 switch (which_alternative)
1660 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1663 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1669 snprintf (buf, sizeof (buf), insn, suffix);
1672 [(set_attr "isa" "noavx,avx")
1673 (set_attr "type" "sselog")
1674 (set_attr "prefix" "orig,vex")
1675 (set_attr "mode" "<ssevecmode>")])
1677 (define_insn "*<code><mode>3"
1678 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1680 (match_operand:MODEF 1 "register_operand" "%0,x")
1681 (match_operand:MODEF 2 "register_operand" "x,x")))]
1682 "SSE_FLOAT_MODE_P (<MODE>mode)"
1684 static char buf[32];
1687 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1689 switch (which_alternative)
1692 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1695 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1701 snprintf (buf, sizeof (buf), insn, suffix);
1704 [(set_attr "isa" "noavx,avx")
1705 (set_attr "type" "sselog")
1706 (set_attr "prefix" "orig,vex")
1707 (set_attr "mode" "<ssevecmode>")])
1709 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1711 ;; FMA4 floating point multiply/accumulate instructions. This
1712 ;; includes the scalar version of the instructions as well as the
1715 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1717 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1718 ;; combine to generate a multiply/add with two memory references. We then
1719 ;; split this insn, into loading up the destination register with one of the
1720 ;; memory operations. If we don't manage to split the insn, reload will
1721 ;; generate the appropriate moves. The reason this is needed, is that combine
1722 ;; has already folded one of the memory references into both the multiply and
1723 ;; add insns, and it can't generate a new pseudo. I.e.:
1724 ;; (set (reg1) (mem (addr1)))
1725 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1726 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1728 ;; ??? This is historic, pre-dating the gimple fma transformation.
1729 ;; We could now properly represent that only one memory operand is
1730 ;; allowed and not be penalized during optimization.
1732 ;; Intrinsic FMA operations.
1734 ;; The standard names for fma is only available with SSE math enabled.
1735 (define_expand "fma<mode>4"
1736 [(set (match_operand:FMAMODE 0 "register_operand")
1738 (match_operand:FMAMODE 1 "nonimmediate_operand")
1739 (match_operand:FMAMODE 2 "nonimmediate_operand")
1740 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1741 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1743 (define_expand "fms<mode>4"
1744 [(set (match_operand:FMAMODE 0 "register_operand")
1746 (match_operand:FMAMODE 1 "nonimmediate_operand")
1747 (match_operand:FMAMODE 2 "nonimmediate_operand")
1748 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1749 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1751 (define_expand "fnma<mode>4"
1752 [(set (match_operand:FMAMODE 0 "register_operand")
1754 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1755 (match_operand:FMAMODE 2 "nonimmediate_operand")
1756 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1757 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1759 (define_expand "fnms<mode>4"
1760 [(set (match_operand:FMAMODE 0 "register_operand")
1762 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1763 (match_operand:FMAMODE 2 "nonimmediate_operand")
1764 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1765 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1767 ;; The builtin for fma4intrin.h is not constrained by SSE math enabled.
1768 (define_expand "fma4i_fmadd_<mode>"
1769 [(set (match_operand:FMAMODE 0 "register_operand")
1771 (match_operand:FMAMODE 1 "nonimmediate_operand")
1772 (match_operand:FMAMODE 2 "nonimmediate_operand")
1773 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1774 "TARGET_FMA || TARGET_FMA4")
1776 (define_insn "*fma4i_fmadd_<mode>"
1777 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1779 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1780 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1781 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1783 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1784 [(set_attr "type" "ssemuladd")
1785 (set_attr "mode" "<MODE>")])
1787 (define_insn "*fma4i_fmsub_<mode>"
1788 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1790 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1791 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1793 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1795 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1796 [(set_attr "type" "ssemuladd")
1797 (set_attr "mode" "<MODE>")])
1799 (define_insn "*fma4i_fnmadd_<mode>"
1800 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1803 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1804 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1805 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1807 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1808 [(set_attr "type" "ssemuladd")
1809 (set_attr "mode" "<MODE>")])
1811 (define_insn "*fma4i_fnmsub_<mode>"
1812 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1815 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1816 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1818 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1820 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1821 [(set_attr "type" "ssemuladd")
1822 (set_attr "mode" "<MODE>")])
1824 ;; Scalar versions of the above. Unlike ADDSS et al, these write the
1825 ;; entire destination register, with the high-order elements zeroed.
1827 (define_expand "fma4i_vmfmadd_<mode>"
1828 [(set (match_operand:VF_128 0 "register_operand")
1831 (match_operand:VF_128 1 "nonimmediate_operand")
1832 (match_operand:VF_128 2 "nonimmediate_operand")
1833 (match_operand:VF_128 3 "nonimmediate_operand"))
1838 operands[4] = CONST0_RTX (<MODE>mode);
1841 (define_expand "fmai_vmfmadd_<mode>"
1842 [(set (match_operand:VF_128 0 "register_operand")
1845 (match_operand:VF_128 1 "nonimmediate_operand")
1846 (match_operand:VF_128 2 "nonimmediate_operand")
1847 (match_operand:VF_128 3 "nonimmediate_operand"))
1852 (define_insn "*fmai_fmadd_<mode>"
1853 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1856 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")
1857 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1858 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))
1863 vfmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1864 vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1865 vfmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1866 [(set_attr "type" "ssemuladd")
1867 (set_attr "mode" "<MODE>")])
1869 (define_insn "*fmai_fmsub_<mode>"
1870 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1873 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")
1874 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1876 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")))
1881 vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1882 vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1883 vfmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1884 [(set_attr "type" "ssemuladd")
1885 (set_attr "mode" "<MODE>")])
1887 (define_insn "*fmai_fnmadd_<mode>"
1888 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1892 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x"))
1893 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1894 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))
1899 vfnmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1900 vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1901 vfnmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1902 [(set_attr "type" "ssemuladd")
1903 (set_attr "mode" "<MODE>")])
1905 (define_insn "*fmai_fnmsub_<mode>"
1906 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1910 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x"))
1911 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1913 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")))
1918 vfnmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1919 vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1920 vfnmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1921 [(set_attr "type" "ssemuladd")
1922 (set_attr "mode" "<MODE>")])
1924 (define_insn "*fma4i_vmfmadd_<mode>"
1925 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1928 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
1929 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1930 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
1931 (match_operand:VF_128 4 "const0_operand" "")
1934 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1935 [(set_attr "type" "ssemuladd")
1936 (set_attr "mode" "<MODE>")])
1938 (define_insn "*fma4i_vmfmsub_<mode>"
1939 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1942 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
1943 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1945 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
1946 (match_operand:VF_128 4 "const0_operand" "")
1949 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1950 [(set_attr "type" "ssemuladd")
1951 (set_attr "mode" "<MODE>")])
1953 (define_insn "*fma4i_vmfnmadd_<mode>"
1954 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1958 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
1959 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1960 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
1961 (match_operand:VF_128 4 "const0_operand" "")
1964 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1965 [(set_attr "type" "ssemuladd")
1966 (set_attr "mode" "<MODE>")])
1968 (define_insn "*fma4i_vmfnmsub_<mode>"
1969 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1973 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
1974 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1976 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
1977 (match_operand:VF_128 4 "const0_operand" "")
1980 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1981 [(set_attr "type" "ssemuladd")
1982 (set_attr "mode" "<MODE>")])
1984 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1986 ;; FMA4 Parallel floating point multiply addsub and subadd operations.
1988 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1990 ;; It would be possible to represent these without the UNSPEC as
1993 ;; (fma op1 op2 op3)
1994 ;; (fma op1 op2 (neg op3))
1997 ;; But this doesn't seem useful in practice.
1999 (define_expand "fmaddsub_<mode>"
2000 [(set (match_operand:VF 0 "register_operand")
2002 [(match_operand:VF 1 "nonimmediate_operand")
2003 (match_operand:VF 2 "nonimmediate_operand")
2004 (match_operand:VF 3 "nonimmediate_operand")]
2006 "TARGET_FMA || TARGET_FMA4")
2008 (define_insn "*fma4_fmaddsub_<mode>"
2009 [(set (match_operand:VF 0 "register_operand" "=x,x")
2011 [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
2012 (match_operand:VF 2 "nonimmediate_operand" " x,m")
2013 (match_operand:VF 3 "nonimmediate_operand" "xm,x")]
2016 "vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2017 [(set_attr "type" "ssemuladd")
2018 (set_attr "mode" "<MODE>")])
2020 (define_insn "*fma4_fmsubadd_<mode>"
2021 [(set (match_operand:VF 0 "register_operand" "=x,x")
2023 [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
2024 (match_operand:VF 2 "nonimmediate_operand" " x,m")
2026 (match_operand:VF 3 "nonimmediate_operand" "xm,x"))]
2029 "vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2030 [(set_attr "type" "ssemuladd")
2031 (set_attr "mode" "<MODE>")])
2033 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2035 ;; FMA3 floating point multiply/accumulate instructions.
2037 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2039 (define_insn "*fma_fmadd_<mode>"
2040 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
2042 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
2043 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
2044 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
2047 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2048 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2049 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2050 [(set_attr "type" "ssemuladd")
2051 (set_attr "mode" "<MODE>")])
2053 (define_insn "*fma_fmsub_<mode>"
2054 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
2056 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
2057 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
2059 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
2062 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2063 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2064 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2065 [(set_attr "type" "ssemuladd")
2066 (set_attr "mode" "<MODE>")])
2068 (define_insn "*fma_fnmadd_<mode>"
2069 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
2072 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
2073 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
2074 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
2077 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2078 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2079 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2080 [(set_attr "type" "ssemuladd")
2081 (set_attr "mode" "<MODE>")])
2083 (define_insn "*fma_fnmsub_<mode>"
2084 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
2087 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
2088 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
2090 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
2093 vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2094 vfnmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2095 vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2096 [(set_attr "type" "ssemuladd")
2097 (set_attr "mode" "<MODE>")])
2099 (define_insn "*fma_fmaddsub_<mode>"
2100 [(set (match_operand:VF 0 "register_operand" "=x,x,x")
2102 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
2103 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
2104 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0")]
2108 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2109 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2110 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2111 [(set_attr "type" "ssemuladd")
2112 (set_attr "mode" "<MODE>")])
2114 (define_insn "*fma_fmsubadd_<mode>"
2115 [(set (match_operand:VF 0 "register_operand" "=x,x,x")
2117 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
2118 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
2120 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0"))]
2124 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2125 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2126 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2127 [(set_attr "type" "ssemuladd")
2128 (set_attr "mode" "<MODE>")])
2130 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2132 ;; Parallel single-precision floating point conversion operations
2134 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2136 (define_insn "sse_cvtpi2ps"
2137 [(set (match_operand:V4SF 0 "register_operand" "=x")
2140 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2141 (match_operand:V4SF 1 "register_operand" "0")
2144 "cvtpi2ps\t{%2, %0|%0, %2}"
2145 [(set_attr "type" "ssecvt")
2146 (set_attr "mode" "V4SF")])
2148 (define_insn "sse_cvtps2pi"
2149 [(set (match_operand:V2SI 0 "register_operand" "=y")
2151 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2153 (parallel [(const_int 0) (const_int 1)])))]
2155 "cvtps2pi\t{%1, %0|%0, %1}"
2156 [(set_attr "type" "ssecvt")
2157 (set_attr "unit" "mmx")
2158 (set_attr "mode" "DI")])
2160 (define_insn "sse_cvttps2pi"
2161 [(set (match_operand:V2SI 0 "register_operand" "=y")
2163 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2164 (parallel [(const_int 0) (const_int 1)])))]
2166 "cvttps2pi\t{%1, %0|%0, %1}"
2167 [(set_attr "type" "ssecvt")
2168 (set_attr "unit" "mmx")
2169 (set_attr "prefix_rep" "0")
2170 (set_attr "mode" "SF")])
2172 (define_insn "sse_cvtsi2ss"
2173 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2176 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2177 (match_operand:V4SF 1 "register_operand" "0,0,x")
2181 cvtsi2ss\t{%2, %0|%0, %2}
2182 cvtsi2ss\t{%2, %0|%0, %2}
2183 vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2184 [(set_attr "isa" "noavx,noavx,avx")
2185 (set_attr "type" "sseicvt")
2186 (set_attr "athlon_decode" "vector,double,*")
2187 (set_attr "amdfam10_decode" "vector,double,*")
2188 (set_attr "bdver1_decode" "double,direct,*")
2189 (set_attr "prefix" "orig,orig,vex")
2190 (set_attr "mode" "SF")])
2192 (define_insn "sse_cvtsi2ssq"
2193 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2196 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2197 (match_operand:V4SF 1 "register_operand" "0,0,x")
2199 "TARGET_SSE && TARGET_64BIT"
2201 cvtsi2ssq\t{%2, %0|%0, %2}
2202 cvtsi2ssq\t{%2, %0|%0, %2}
2203 vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2204 [(set_attr "isa" "noavx,noavx,avx")
2205 (set_attr "type" "sseicvt")
2206 (set_attr "athlon_decode" "vector,double,*")
2207 (set_attr "amdfam10_decode" "vector,double,*")
2208 (set_attr "bdver1_decode" "double,direct,*")
2209 (set_attr "length_vex" "*,*,4")
2210 (set_attr "prefix_rex" "1,1,*")
2211 (set_attr "prefix" "orig,orig,vex")
2212 (set_attr "mode" "SF")])
2214 (define_insn "sse_cvtss2si"
2215 [(set (match_operand:SI 0 "register_operand" "=r,r")
2218 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2219 (parallel [(const_int 0)]))]
2220 UNSPEC_FIX_NOTRUNC))]
2222 "%vcvtss2si\t{%1, %0|%0, %1}"
2223 [(set_attr "type" "sseicvt")
2224 (set_attr "athlon_decode" "double,vector")
2225 (set_attr "bdver1_decode" "double,double")
2226 (set_attr "prefix_rep" "1")
2227 (set_attr "prefix" "maybe_vex")
2228 (set_attr "mode" "SI")])
2230 (define_insn "sse_cvtss2si_2"
2231 [(set (match_operand:SI 0 "register_operand" "=r,r")
2232 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2233 UNSPEC_FIX_NOTRUNC))]
2235 "%vcvtss2si\t{%1, %0|%0, %1}"
2236 [(set_attr "type" "sseicvt")
2237 (set_attr "athlon_decode" "double,vector")
2238 (set_attr "amdfam10_decode" "double,double")
2239 (set_attr "bdver1_decode" "double,double")
2240 (set_attr "prefix_rep" "1")
2241 (set_attr "prefix" "maybe_vex")
2242 (set_attr "mode" "SI")])
2244 (define_insn "sse_cvtss2siq"
2245 [(set (match_operand:DI 0 "register_operand" "=r,r")
2248 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2249 (parallel [(const_int 0)]))]
2250 UNSPEC_FIX_NOTRUNC))]
2251 "TARGET_SSE && TARGET_64BIT"
2252 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2253 [(set_attr "type" "sseicvt")
2254 (set_attr "athlon_decode" "double,vector")
2255 (set_attr "bdver1_decode" "double,double")
2256 (set_attr "prefix_rep" "1")
2257 (set_attr "prefix" "maybe_vex")
2258 (set_attr "mode" "DI")])
2260 (define_insn "sse_cvtss2siq_2"
2261 [(set (match_operand:DI 0 "register_operand" "=r,r")
2262 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2263 UNSPEC_FIX_NOTRUNC))]
2264 "TARGET_SSE && TARGET_64BIT"
2265 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2266 [(set_attr "type" "sseicvt")
2267 (set_attr "athlon_decode" "double,vector")
2268 (set_attr "amdfam10_decode" "double,double")
2269 (set_attr "bdver1_decode" "double,double")
2270 (set_attr "prefix_rep" "1")
2271 (set_attr "prefix" "maybe_vex")
2272 (set_attr "mode" "DI")])
2274 (define_insn "sse_cvttss2si"
2275 [(set (match_operand:SI 0 "register_operand" "=r,r")
2278 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2279 (parallel [(const_int 0)]))))]
2281 "%vcvttss2si\t{%1, %0|%0, %1}"
2282 [(set_attr "type" "sseicvt")
2283 (set_attr "athlon_decode" "double,vector")
2284 (set_attr "amdfam10_decode" "double,double")
2285 (set_attr "bdver1_decode" "double,double")
2286 (set_attr "prefix_rep" "1")
2287 (set_attr "prefix" "maybe_vex")
2288 (set_attr "mode" "SI")])
2290 (define_insn "sse_cvttss2siq"
2291 [(set (match_operand:DI 0 "register_operand" "=r,r")
2294 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2295 (parallel [(const_int 0)]))))]
2296 "TARGET_SSE && TARGET_64BIT"
2297 "%vcvttss2si{q}\t{%1, %0|%0, %1}"
2298 [(set_attr "type" "sseicvt")
2299 (set_attr "athlon_decode" "double,vector")
2300 (set_attr "amdfam10_decode" "double,double")
2301 (set_attr "bdver1_decode" "double,double")
2302 (set_attr "prefix_rep" "1")
2303 (set_attr "prefix" "maybe_vex")
2304 (set_attr "mode" "DI")])
2306 (define_insn "float<sseintvecmodelower><mode>2"
2307 [(set (match_operand:VF1 0 "register_operand" "=x")
2309 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "xm")))]
2311 "%vcvtdq2ps\t{%1, %0|%0, %1}"
2312 [(set_attr "type" "ssecvt")
2313 (set_attr "prefix" "maybe_vex")
2314 (set_attr "mode" "<sseinsnmode>")])
2316 (define_expand "floatuns<sseintvecmodelower><mode>2"
2317 [(match_operand:VF1 0 "register_operand" "")
2318 (match_operand:<sseintvecmode> 1 "register_operand" "")]
2319 "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
2321 ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
2325 (define_insn "avx_cvtps2dq256"
2326 [(set (match_operand:V8SI 0 "register_operand" "=x")
2327 (unspec:V8SI [(match_operand:V8SF 1 "nonimmediate_operand" "xm")]
2328 UNSPEC_FIX_NOTRUNC))]
2330 "vcvtps2dq\t{%1, %0|%0, %1}"
2331 [(set_attr "type" "ssecvt")
2332 (set_attr "prefix" "vex")
2333 (set_attr "mode" "OI")])
2335 (define_insn "sse2_cvtps2dq"
2336 [(set (match_operand:V4SI 0 "register_operand" "=x")
2337 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2338 UNSPEC_FIX_NOTRUNC))]
2340 "%vcvtps2dq\t{%1, %0|%0, %1}"
2341 [(set_attr "type" "ssecvt")
2342 (set (attr "prefix_data16")
2344 (match_test "TARGET_AVX")
2346 (const_string "1")))
2347 (set_attr "prefix" "maybe_vex")
2348 (set_attr "mode" "TI")])
2350 (define_insn "fix_truncv8sfv8si2"
2351 [(set (match_operand:V8SI 0 "register_operand" "=x")
2352 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
2354 "vcvttps2dq\t{%1, %0|%0, %1}"
2355 [(set_attr "type" "ssecvt")
2356 (set_attr "prefix" "vex")
2357 (set_attr "mode" "OI")])
2359 (define_insn "fix_truncv4sfv4si2"
2360 [(set (match_operand:V4SI 0 "register_operand" "=x")
2361 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2363 "%vcvttps2dq\t{%1, %0|%0, %1}"
2364 [(set_attr "type" "ssecvt")
2365 (set (attr "prefix_rep")
2367 (match_test "TARGET_AVX")
2369 (const_string "1")))
2370 (set (attr "prefix_data16")
2372 (match_test "TARGET_AVX")
2374 (const_string "0")))
2375 (set_attr "prefix_data16" "0")
2376 (set_attr "prefix" "maybe_vex")
2377 (set_attr "mode" "TI")])
2379 (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
2380 [(match_operand:<sseintvecmode> 0 "register_operand" "")
2381 (match_operand:VF1 1 "register_operand" "")]
2385 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
2386 tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
2387 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
2388 emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
2392 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2394 ;; Parallel double-precision floating point conversion operations
2396 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2398 (define_insn "sse2_cvtpi2pd"
2399 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2400 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2402 "cvtpi2pd\t{%1, %0|%0, %1}"
2403 [(set_attr "type" "ssecvt")
2404 (set_attr "unit" "mmx,*")
2405 (set_attr "prefix_data16" "1,*")
2406 (set_attr "mode" "V2DF")])
2408 (define_insn "sse2_cvtpd2pi"
2409 [(set (match_operand:V2SI 0 "register_operand" "=y")
2410 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2411 UNSPEC_FIX_NOTRUNC))]
2413 "cvtpd2pi\t{%1, %0|%0, %1}"
2414 [(set_attr "type" "ssecvt")
2415 (set_attr "unit" "mmx")
2416 (set_attr "bdver1_decode" "double")
2417 (set_attr "prefix_data16" "1")
2418 (set_attr "mode" "DI")])
2420 (define_insn "sse2_cvttpd2pi"
2421 [(set (match_operand:V2SI 0 "register_operand" "=y")
2422 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2424 "cvttpd2pi\t{%1, %0|%0, %1}"
2425 [(set_attr "type" "ssecvt")
2426 (set_attr "unit" "mmx")
2427 (set_attr "bdver1_decode" "double")
2428 (set_attr "prefix_data16" "1")
2429 (set_attr "mode" "TI")])
2431 (define_insn "sse2_cvtsi2sd"
2432 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2435 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2436 (match_operand:V2DF 1 "register_operand" "0,0,x")
2440 cvtsi2sd\t{%2, %0|%0, %2}
2441 cvtsi2sd\t{%2, %0|%0, %2}
2442 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2443 [(set_attr "isa" "noavx,noavx,avx")
2444 (set_attr "type" "sseicvt")
2445 (set_attr "athlon_decode" "double,direct,*")
2446 (set_attr "amdfam10_decode" "vector,double,*")
2447 (set_attr "bdver1_decode" "double,direct,*")
2448 (set_attr "prefix" "orig,orig,vex")
2449 (set_attr "mode" "DF")])
2451 (define_insn "sse2_cvtsi2sdq"
2452 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2455 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2456 (match_operand:V2DF 1 "register_operand" "0,0,x")
2458 "TARGET_SSE2 && TARGET_64BIT"
2460 cvtsi2sdq\t{%2, %0|%0, %2}
2461 cvtsi2sdq\t{%2, %0|%0, %2}
2462 vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2463 [(set_attr "isa" "noavx,noavx,avx")
2464 (set_attr "type" "sseicvt")
2465 (set_attr "athlon_decode" "double,direct,*")
2466 (set_attr "amdfam10_decode" "vector,double,*")
2467 (set_attr "bdver1_decode" "double,direct,*")
2468 (set_attr "length_vex" "*,*,4")
2469 (set_attr "prefix_rex" "1,1,*")
2470 (set_attr "prefix" "orig,orig,vex")
2471 (set_attr "mode" "DF")])
2473 (define_insn "sse2_cvtsd2si"
2474 [(set (match_operand:SI 0 "register_operand" "=r,r")
2477 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2478 (parallel [(const_int 0)]))]
2479 UNSPEC_FIX_NOTRUNC))]
2481 "%vcvtsd2si\t{%1, %0|%0, %1}"
2482 [(set_attr "type" "sseicvt")
2483 (set_attr "athlon_decode" "double,vector")
2484 (set_attr "bdver1_decode" "double,double")
2485 (set_attr "prefix_rep" "1")
2486 (set_attr "prefix" "maybe_vex")
2487 (set_attr "mode" "SI")])
2489 (define_insn "sse2_cvtsd2si_2"
2490 [(set (match_operand:SI 0 "register_operand" "=r,r")
2491 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2492 UNSPEC_FIX_NOTRUNC))]
2494 "%vcvtsd2si\t{%1, %0|%0, %1}"
2495 [(set_attr "type" "sseicvt")
2496 (set_attr "athlon_decode" "double,vector")
2497 (set_attr "amdfam10_decode" "double,double")
2498 (set_attr "bdver1_decode" "double,double")
2499 (set_attr "prefix_rep" "1")
2500 (set_attr "prefix" "maybe_vex")
2501 (set_attr "mode" "SI")])
2503 (define_insn "sse2_cvtsd2siq"
2504 [(set (match_operand:DI 0 "register_operand" "=r,r")
2507 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2508 (parallel [(const_int 0)]))]
2509 UNSPEC_FIX_NOTRUNC))]
2510 "TARGET_SSE2 && TARGET_64BIT"
2511 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2512 [(set_attr "type" "sseicvt")
2513 (set_attr "athlon_decode" "double,vector")
2514 (set_attr "bdver1_decode" "double,double")
2515 (set_attr "prefix_rep" "1")
2516 (set_attr "prefix" "maybe_vex")
2517 (set_attr "mode" "DI")])
2519 (define_insn "sse2_cvtsd2siq_2"
2520 [(set (match_operand:DI 0 "register_operand" "=r,r")
2521 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2522 UNSPEC_FIX_NOTRUNC))]
2523 "TARGET_SSE2 && TARGET_64BIT"
2524 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2525 [(set_attr "type" "sseicvt")
2526 (set_attr "athlon_decode" "double,vector")
2527 (set_attr "amdfam10_decode" "double,double")
2528 (set_attr "bdver1_decode" "double,double")
2529 (set_attr "prefix_rep" "1")
2530 (set_attr "prefix" "maybe_vex")
2531 (set_attr "mode" "DI")])
2533 (define_insn "sse2_cvttsd2si"
2534 [(set (match_operand:SI 0 "register_operand" "=r,r")
2537 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2538 (parallel [(const_int 0)]))))]
2540 "%vcvttsd2si\t{%1, %0|%0, %1}"
2541 [(set_attr "type" "sseicvt")
2542 (set_attr "athlon_decode" "double,vector")
2543 (set_attr "amdfam10_decode" "double,double")
2544 (set_attr "bdver1_decode" "double,double")
2545 (set_attr "prefix_rep" "1")
2546 (set_attr "prefix" "maybe_vex")
2547 (set_attr "mode" "SI")])
2549 (define_insn "sse2_cvttsd2siq"
2550 [(set (match_operand:DI 0 "register_operand" "=r,r")
2553 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2554 (parallel [(const_int 0)]))))]
2555 "TARGET_SSE2 && TARGET_64BIT"
2556 "%vcvttsd2si{q}\t{%1, %0|%0, %1}"
2557 [(set_attr "type" "sseicvt")
2558 (set_attr "athlon_decode" "double,vector")
2559 (set_attr "amdfam10_decode" "double,double")
2560 (set_attr "bdver1_decode" "double,double")
2561 (set_attr "prefix_rep" "1")
2562 (set_attr "prefix" "maybe_vex")
2563 (set_attr "mode" "DI")])
2565 (define_insn "floatv4siv4df2"
2566 [(set (match_operand:V4DF 0 "register_operand" "=x")
2567 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2569 "vcvtdq2pd\t{%1, %0|%0, %1}"
2570 [(set_attr "type" "ssecvt")
2571 (set_attr "prefix" "vex")
2572 (set_attr "mode" "V4DF")])
2574 (define_insn "avx_cvtdq2pd256_2"
2575 [(set (match_operand:V4DF 0 "register_operand" "=x")
2578 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
2579 (parallel [(const_int 0) (const_int 1)
2580 (const_int 2) (const_int 3)]))))]
2582 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
2583 [(set_attr "type" "ssecvt")
2584 (set_attr "prefix" "vex")
2585 (set_attr "mode" "V4DF")])
2587 (define_insn "sse2_cvtdq2pd"
2588 [(set (match_operand:V2DF 0 "register_operand" "=x")
2591 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2592 (parallel [(const_int 0) (const_int 1)]))))]
2594 "%vcvtdq2pd\t{%1, %0|%0, %q1}"
2595 [(set_attr "type" "ssecvt")
2596 (set_attr "prefix" "maybe_vex")
2597 (set_attr "mode" "V2DF")])
2599 (define_insn "avx_cvtpd2dq256"
2600 [(set (match_operand:V4SI 0 "register_operand" "=x")
2601 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2602 UNSPEC_FIX_NOTRUNC))]
2604 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2605 [(set_attr "type" "ssecvt")
2606 (set_attr "prefix" "vex")
2607 (set_attr "mode" "OI")])
2609 (define_expand "avx_cvtpd2dq256_2"
2610 [(set (match_operand:V8SI 0 "register_operand" "")
2612 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "")]
2616 "operands[2] = CONST0_RTX (V4SImode);")
2618 (define_insn "*avx_cvtpd2dq256_2"
2619 [(set (match_operand:V8SI 0 "register_operand" "=x")
2621 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2623 (match_operand:V4SI 2 "const0_operand" "")))]
2625 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
2626 [(set_attr "type" "ssecvt")
2627 (set_attr "prefix" "vex")
2628 (set_attr "mode" "OI")])
2630 (define_expand "sse2_cvtpd2dq"
2631 [(set (match_operand:V4SI 0 "register_operand" "")
2633 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2637 "operands[2] = CONST0_RTX (V2SImode);")
2639 (define_insn "*sse2_cvtpd2dq"
2640 [(set (match_operand:V4SI 0 "register_operand" "=x")
2642 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2644 (match_operand:V2SI 2 "const0_operand" "")))]
2648 return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
2650 return "cvtpd2dq\t{%1, %0|%0, %1}";
2652 [(set_attr "type" "ssecvt")
2653 (set_attr "prefix_rep" "1")
2654 (set_attr "prefix_data16" "0")
2655 (set_attr "prefix" "maybe_vex")
2656 (set_attr "mode" "TI")
2657 (set_attr "amdfam10_decode" "double")
2658 (set_attr "athlon_decode" "vector")
2659 (set_attr "bdver1_decode" "double")])
2661 (define_insn "fix_truncv4dfv4si2"
2662 [(set (match_operand:V4SI 0 "register_operand" "=x")
2663 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2665 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2666 [(set_attr "type" "ssecvt")
2667 (set_attr "prefix" "vex")
2668 (set_attr "mode" "OI")])
2670 (define_expand "avx_cvttpd2dq256_2"
2671 [(set (match_operand:V8SI 0 "register_operand" "")
2673 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" ""))
2676 "operands[2] = CONST0_RTX (V4SImode);")
2678 (define_insn "*avx_cvttpd2dq256_2"
2679 [(set (match_operand:V8SI 0 "register_operand" "=x")
2681 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm"))
2682 (match_operand:V4SI 2 "const0_operand" "")))]
2684 "vcvttpd2dq{y}\t{%1, %x0|%x0, %1}"
2685 [(set_attr "type" "ssecvt")
2686 (set_attr "prefix" "vex")
2687 (set_attr "mode" "OI")])
2689 (define_expand "sse2_cvttpd2dq"
2690 [(set (match_operand:V4SI 0 "register_operand" "")
2692 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2695 "operands[2] = CONST0_RTX (V2SImode);")
2697 (define_insn "*sse2_cvttpd2dq"
2698 [(set (match_operand:V4SI 0 "register_operand" "=x")
2700 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2701 (match_operand:V2SI 2 "const0_operand" "")))]
2705 return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
2707 return "cvttpd2dq\t{%1, %0|%0, %1}";
2709 [(set_attr "type" "ssecvt")
2710 (set_attr "amdfam10_decode" "double")
2711 (set_attr "athlon_decode" "vector")
2712 (set_attr "bdver1_decode" "double")
2713 (set_attr "prefix" "maybe_vex")
2714 (set_attr "mode" "TI")])
2716 (define_insn "sse2_cvtsd2ss"
2717 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2720 (float_truncate:V2SF
2721 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,xm")))
2722 (match_operand:V4SF 1 "register_operand" "0,0,x")
2726 cvtsd2ss\t{%2, %0|%0, %2}
2727 cvtsd2ss\t{%2, %0|%0, %2}
2728 vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2729 [(set_attr "isa" "noavx,noavx,avx")
2730 (set_attr "type" "ssecvt")
2731 (set_attr "athlon_decode" "vector,double,*")
2732 (set_attr "amdfam10_decode" "vector,double,*")
2733 (set_attr "bdver1_decode" "direct,direct,*")
2734 (set_attr "prefix" "orig,orig,vex")
2735 (set_attr "mode" "SF")])
2737 (define_insn "sse2_cvtss2sd"
2738 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2742 (match_operand:V4SF 2 "nonimmediate_operand" "x,m,xm")
2743 (parallel [(const_int 0) (const_int 1)])))
2744 (match_operand:V2DF 1 "register_operand" "0,0,x")
2748 cvtss2sd\t{%2, %0|%0, %2}
2749 cvtss2sd\t{%2, %0|%0, %2}
2750 vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
2751 [(set_attr "isa" "noavx,noavx,avx")
2752 (set_attr "type" "ssecvt")
2753 (set_attr "amdfam10_decode" "vector,double,*")
2754 (set_attr "athlon_decode" "direct,direct,*")
2755 (set_attr "bdver1_decode" "direct,direct,*")
2756 (set_attr "prefix" "orig,orig,vex")
2757 (set_attr "mode" "DF")])
2759 (define_insn "avx_cvtpd2ps256"
2760 [(set (match_operand:V4SF 0 "register_operand" "=x")
2761 (float_truncate:V4SF
2762 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2764 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
2765 [(set_attr "type" "ssecvt")
2766 (set_attr "prefix" "vex")
2767 (set_attr "mode" "V4SF")])
2769 (define_expand "sse2_cvtpd2ps"
2770 [(set (match_operand:V4SF 0 "register_operand" "")
2772 (float_truncate:V2SF
2773 (match_operand:V2DF 1 "nonimmediate_operand" ""))
2776 "operands[2] = CONST0_RTX (V2SFmode);")
2778 (define_insn "*sse2_cvtpd2ps"
2779 [(set (match_operand:V4SF 0 "register_operand" "=x")
2781 (float_truncate:V2SF
2782 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2783 (match_operand:V2SF 2 "const0_operand" "")))]
2787 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
2789 return "cvtpd2ps\t{%1, %0|%0, %1}";
2791 [(set_attr "type" "ssecvt")
2792 (set_attr "amdfam10_decode" "double")
2793 (set_attr "athlon_decode" "vector")
2794 (set_attr "bdver1_decode" "double")
2795 (set_attr "prefix_data16" "1")
2796 (set_attr "prefix" "maybe_vex")
2797 (set_attr "mode" "V4SF")])
2799 (define_insn "avx_cvtps2pd256"
2800 [(set (match_operand:V4DF 0 "register_operand" "=x")
2802 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2804 "vcvtps2pd\t{%1, %0|%0, %1}"
2805 [(set_attr "type" "ssecvt")
2806 (set_attr "prefix" "vex")
2807 (set_attr "mode" "V4DF")])
2809 (define_insn "*avx_cvtps2pd256_2"
2810 [(set (match_operand:V4DF 0 "register_operand" "=x")
2813 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
2814 (parallel [(const_int 0) (const_int 1)
2815 (const_int 2) (const_int 3)]))))]
2817 "vcvtps2pd\t{%x1, %0|%0, %x1}"
2818 [(set_attr "type" "ssecvt")
2819 (set_attr "prefix" "vex")
2820 (set_attr "mode" "V4DF")])
2822 (define_insn "sse2_cvtps2pd"
2823 [(set (match_operand:V2DF 0 "register_operand" "=x")
2826 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2827 (parallel [(const_int 0) (const_int 1)]))))]
2829 "%vcvtps2pd\t{%1, %0|%0, %q1}"
2830 [(set_attr "type" "ssecvt")
2831 (set_attr "amdfam10_decode" "direct")
2832 (set_attr "athlon_decode" "double")
2833 (set_attr "bdver1_decode" "double")
2834 (set_attr "prefix_data16" "0")
2835 (set_attr "prefix" "maybe_vex")
2836 (set_attr "mode" "V2DF")])
2838 (define_expand "vec_unpacks_hi_v4sf"
2843 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2844 (parallel [(const_int 6) (const_int 7)
2845 (const_int 2) (const_int 3)])))
2846 (set (match_operand:V2DF 0 "register_operand" "")
2850 (parallel [(const_int 0) (const_int 1)]))))]
2852 "operands[2] = gen_reg_rtx (V4SFmode);")
2854 (define_expand "vec_unpacks_hi_v8sf"
2857 (match_operand:V8SF 1 "nonimmediate_operand" "")
2858 (parallel [(const_int 4) (const_int 5)
2859 (const_int 6) (const_int 7)])))
2860 (set (match_operand:V4DF 0 "register_operand" "")
2864 "operands[2] = gen_reg_rtx (V4SFmode);")
2866 (define_expand "vec_unpacks_lo_v4sf"
2867 [(set (match_operand:V2DF 0 "register_operand" "")
2870 (match_operand:V4SF 1 "nonimmediate_operand" "")
2871 (parallel [(const_int 0) (const_int 1)]))))]
2874 (define_expand "vec_unpacks_lo_v8sf"
2875 [(set (match_operand:V4DF 0 "register_operand" "")
2878 (match_operand:V8SF 1 "nonimmediate_operand" "")
2879 (parallel [(const_int 0) (const_int 1)
2880 (const_int 2) (const_int 3)]))))]
2883 (define_mode_attr sseunpackfltmode
2884 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF") (V8SI "V4DF")])
2886 (define_expand "vec_unpacks_float_hi_<mode>"
2887 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2888 (match_operand:VI2_AVX2 1 "register_operand" "")]
2891 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2893 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
2894 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2895 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2899 (define_expand "vec_unpacks_float_lo_<mode>"
2900 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2901 (match_operand:VI2_AVX2 1 "register_operand" "")]
2904 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2906 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
2907 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2908 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2912 (define_expand "vec_unpacku_float_hi_<mode>"
2913 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2914 (match_operand:VI2_AVX2 1 "register_operand" "")]
2917 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2919 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
2920 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2921 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2925 (define_expand "vec_unpacku_float_lo_<mode>"
2926 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2927 (match_operand:VI2_AVX2 1 "register_operand" "")]
2930 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2932 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
2933 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2934 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2938 (define_expand "vec_unpacks_float_hi_v4si"
2941 (match_operand:V4SI 1 "nonimmediate_operand" "")
2942 (parallel [(const_int 2) (const_int 3)
2943 (const_int 2) (const_int 3)])))
2944 (set (match_operand:V2DF 0 "register_operand" "")
2948 (parallel [(const_int 0) (const_int 1)]))))]
2950 "operands[2] = gen_reg_rtx (V4SImode);")
2952 (define_expand "vec_unpacks_float_lo_v4si"
2953 [(set (match_operand:V2DF 0 "register_operand" "")
2956 (match_operand:V4SI 1 "nonimmediate_operand" "")
2957 (parallel [(const_int 0) (const_int 1)]))))]
2960 (define_expand "vec_unpacks_float_hi_v8si"
2963 (match_operand:V8SI 1 "nonimmediate_operand" "")
2964 (parallel [(const_int 4) (const_int 5)
2965 (const_int 6) (const_int 7)])))
2966 (set (match_operand:V4DF 0 "register_operand" "")
2970 "operands[2] = gen_reg_rtx (V4SImode);")
2972 (define_expand "vec_unpacks_float_lo_v8si"
2973 [(set (match_operand:V4DF 0 "register_operand" "")
2976 (match_operand:V8SI 1 "nonimmediate_operand" "")
2977 (parallel [(const_int 0) (const_int 1)
2978 (const_int 2) (const_int 3)]))))]
2981 (define_expand "vec_unpacku_float_hi_v4si"
2984 (match_operand:V4SI 1 "nonimmediate_operand" "")
2985 (parallel [(const_int 2) (const_int 3)
2986 (const_int 2) (const_int 3)])))
2991 (parallel [(const_int 0) (const_int 1)]))))
2993 (lt:V2DF (match_dup 6) (match_dup 3)))
2995 (and:V2DF (match_dup 7) (match_dup 4)))
2996 (set (match_operand:V2DF 0 "register_operand" "")
2997 (plus:V2DF (match_dup 6) (match_dup 8)))]
3000 REAL_VALUE_TYPE TWO32r;
3004 real_ldexp (&TWO32r, &dconst1, 32);
3005 x = const_double_from_real_value (TWO32r, DFmode);
3007 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3008 operands[4] = force_reg (V2DFmode,
3009 ix86_build_const_vector (V2DFmode, 1, x));
3011 operands[5] = gen_reg_rtx (V4SImode);
3013 for (i = 6; i < 9; i++)
3014 operands[i] = gen_reg_rtx (V2DFmode);
3017 (define_expand "vec_unpacku_float_lo_v4si"
3021 (match_operand:V4SI 1 "nonimmediate_operand" "")
3022 (parallel [(const_int 0) (const_int 1)]))))
3024 (lt:V2DF (match_dup 5) (match_dup 3)))
3026 (and:V2DF (match_dup 6) (match_dup 4)))
3027 (set (match_operand:V2DF 0 "register_operand" "")
3028 (plus:V2DF (match_dup 5) (match_dup 7)))]
3031 REAL_VALUE_TYPE TWO32r;
3035 real_ldexp (&TWO32r, &dconst1, 32);
3036 x = const_double_from_real_value (TWO32r, DFmode);
3038 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3039 operands[4] = force_reg (V2DFmode,
3040 ix86_build_const_vector (V2DFmode, 1, x));
3042 for (i = 5; i < 8; i++)
3043 operands[i] = gen_reg_rtx (V2DFmode);
3046 (define_expand "vec_unpacku_float_hi_v8si"
3047 [(match_operand:V4DF 0 "register_operand" "")
3048 (match_operand:V8SI 1 "register_operand" "")]
3051 REAL_VALUE_TYPE TWO32r;
3055 real_ldexp (&TWO32r, &dconst1, 32);
3056 x = const_double_from_real_value (TWO32r, DFmode);
3058 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
3059 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
3060 tmp[5] = gen_reg_rtx (V4SImode);
3062 for (i = 2; i < 5; i++)
3063 tmp[i] = gen_reg_rtx (V4DFmode);
3064 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
3065 emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
3066 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
3067 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
3068 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
3069 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
3073 (define_expand "vec_unpacku_float_lo_v8si"
3074 [(match_operand:V4DF 0 "register_operand" "")
3075 (match_operand:V8SI 1 "nonimmediate_operand" "")]
3078 REAL_VALUE_TYPE TWO32r;
3082 real_ldexp (&TWO32r, &dconst1, 32);
3083 x = const_double_from_real_value (TWO32r, DFmode);
3085 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
3086 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
3088 for (i = 2; i < 5; i++)
3089 tmp[i] = gen_reg_rtx (V4DFmode);
3090 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
3091 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
3092 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
3093 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
3094 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
3098 (define_expand "vec_pack_trunc_v4df"
3100 (float_truncate:V4SF
3101 (match_operand:V4DF 1 "nonimmediate_operand" "")))
3103 (float_truncate:V4SF
3104 (match_operand:V4DF 2 "nonimmediate_operand" "")))
3105 (set (match_operand:V8SF 0 "register_operand" "")
3111 operands[3] = gen_reg_rtx (V4SFmode);
3112 operands[4] = gen_reg_rtx (V4SFmode);
3115 (define_expand "vec_pack_trunc_v2df"
3116 [(match_operand:V4SF 0 "register_operand" "")
3117 (match_operand:V2DF 1 "nonimmediate_operand" "")
3118 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3123 if (TARGET_AVX && !TARGET_PREFER_AVX128)
3125 tmp0 = gen_reg_rtx (V4DFmode);
3126 tmp1 = force_reg (V2DFmode, operands[1]);
3128 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
3129 emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
3133 tmp0 = gen_reg_rtx (V4SFmode);
3134 tmp1 = gen_reg_rtx (V4SFmode);
3136 emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
3137 emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
3138 emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
3143 (define_expand "vec_pack_sfix_trunc_v4df"
3144 [(match_operand:V8SI 0 "register_operand" "")
3145 (match_operand:V4DF 1 "nonimmediate_operand" "")
3146 (match_operand:V4DF 2 "nonimmediate_operand" "")]
3151 r1 = gen_reg_rtx (V4SImode);
3152 r2 = gen_reg_rtx (V4SImode);
3154 emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
3155 emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
3156 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
3160 (define_expand "vec_pack_sfix_trunc_v2df"
3161 [(match_operand:V4SI 0 "register_operand" "")
3162 (match_operand:V2DF 1 "nonimmediate_operand" "")
3163 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3168 if (TARGET_AVX && !TARGET_PREFER_AVX128)
3170 tmp0 = gen_reg_rtx (V4DFmode);
3171 tmp1 = force_reg (V2DFmode, operands[1]);
3173 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
3174 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
3178 tmp0 = gen_reg_rtx (V4SImode);
3179 tmp1 = gen_reg_rtx (V4SImode);
3181 emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
3182 emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
3184 (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3185 gen_lowpart (V2DImode, tmp0),
3186 gen_lowpart (V2DImode, tmp1)));
3191 (define_mode_attr ssepackfltmode
3192 [(V4DF "V8SI") (V2DF "V4SI")])
3194 (define_expand "vec_pack_ufix_trunc_<mode>"
3195 [(match_operand:<ssepackfltmode> 0 "register_operand" "")
3196 (match_operand:VF2 1 "register_operand" "")
3197 (match_operand:VF2 2 "register_operand" "")]
3201 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
3202 tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
3203 tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
3204 emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
3205 if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
3207 tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
3208 ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
3212 tmp[5] = gen_reg_rtx (V8SFmode);
3213 ix86_expand_vec_extract_even_odd (tmp[5], gen_lowpart (V8SFmode, tmp[2]),
3214 gen_lowpart (V8SFmode, tmp[3]), 0);
3215 tmp[5] = gen_lowpart (V8SImode, tmp[5]);
3217 tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
3218 operands[0], 0, OPTAB_DIRECT);
3219 if (tmp[6] != operands[0])
3220 emit_move_insn (operands[0], tmp[6]);
3224 (define_expand "vec_pack_sfix_v4df"
3225 [(match_operand:V8SI 0 "register_operand" "")
3226 (match_operand:V4DF 1 "nonimmediate_operand" "")
3227 (match_operand:V4DF 2 "nonimmediate_operand" "")]
3232 r1 = gen_reg_rtx (V4SImode);
3233 r2 = gen_reg_rtx (V4SImode);
3235 emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
3236 emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
3237 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
3241 (define_expand "vec_pack_sfix_v2df"
3242 [(match_operand:V4SI 0 "register_operand" "")
3243 (match_operand:V2DF 1 "nonimmediate_operand" "")
3244 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3249 if (TARGET_AVX && !TARGET_PREFER_AVX128)
3251 tmp0 = gen_reg_rtx (V4DFmode);
3252 tmp1 = force_reg (V2DFmode, operands[1]);
3254 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
3255 emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
3259 tmp0 = gen_reg_rtx (V4SImode);
3260 tmp1 = gen_reg_rtx (V4SImode);
3262 emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
3263 emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
3265 (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3266 gen_lowpart (V2DImode, tmp0),
3267 gen_lowpart (V2DImode, tmp1)));
3272 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3274 ;; Parallel single-precision floating point element swizzling
3276 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3278 (define_expand "sse_movhlps_exp"
3279 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3282 (match_operand:V4SF 1 "nonimmediate_operand" "")
3283 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3284 (parallel [(const_int 6)
3290 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3292 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
3294 /* Fix up the destination if needed. */
3295 if (dst != operands[0])
3296 emit_move_insn (operands[0], dst);
3301 (define_insn "sse_movhlps"
3302 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
3305 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3306 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
3307 (parallel [(const_int 6)
3311 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3313 movhlps\t{%2, %0|%0, %2}
3314 vmovhlps\t{%2, %1, %0|%0, %1, %2}
3315 movlps\t{%H2, %0|%0, %H2}
3316 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3317 %vmovhps\t{%2, %0|%0, %2}"
3318 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3319 (set_attr "type" "ssemov")
3320 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3321 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3323 (define_expand "sse_movlhps_exp"
3324 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3327 (match_operand:V4SF 1 "nonimmediate_operand" "")
3328 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3329 (parallel [(const_int 0)
3335 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3337 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
3339 /* Fix up the destination if needed. */
3340 if (dst != operands[0])
3341 emit_move_insn (operands[0], dst);
3346 (define_insn "sse_movlhps"
3347 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
3350 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3351 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,x,x"))
3352 (parallel [(const_int 0)
3356 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3358 movlhps\t{%2, %0|%0, %2}
3359 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3360 movhps\t{%2, %0|%0, %2}
3361 vmovhps\t{%2, %1, %0|%0, %1, %2}
3362 %vmovlps\t{%2, %H0|%H0, %2}"
3363 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3364 (set_attr "type" "ssemov")
3365 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3366 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3368 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3369 (define_insn "avx_unpckhps256"
3370 [(set (match_operand:V8SF 0 "register_operand" "=x")
3373 (match_operand:V8SF 1 "register_operand" "x")
3374 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3375 (parallel [(const_int 2) (const_int 10)
3376 (const_int 3) (const_int 11)
3377 (const_int 6) (const_int 14)
3378 (const_int 7) (const_int 15)])))]
3380 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3381 [(set_attr "type" "sselog")
3382 (set_attr "prefix" "vex")
3383 (set_attr "mode" "V8SF")])
3385 (define_expand "vec_interleave_highv8sf"
3389 (match_operand:V8SF 1 "register_operand" "x")
3390 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3391 (parallel [(const_int 0) (const_int 8)
3392 (const_int 1) (const_int 9)
3393 (const_int 4) (const_int 12)
3394 (const_int 5) (const_int 13)])))
3400 (parallel [(const_int 2) (const_int 10)
3401 (const_int 3) (const_int 11)
3402 (const_int 6) (const_int 14)
3403 (const_int 7) (const_int 15)])))
3404 (set (match_operand:V8SF 0 "register_operand" "")
3409 (parallel [(const_int 4) (const_int 5)
3410 (const_int 6) (const_int 7)
3411 (const_int 12) (const_int 13)
3412 (const_int 14) (const_int 15)])))]
3415 operands[3] = gen_reg_rtx (V8SFmode);
3416 operands[4] = gen_reg_rtx (V8SFmode);
3419 (define_insn "vec_interleave_highv4sf"
3420 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3423 (match_operand:V4SF 1 "register_operand" "0,x")
3424 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3425 (parallel [(const_int 2) (const_int 6)
3426 (const_int 3) (const_int 7)])))]
3429 unpckhps\t{%2, %0|%0, %2}
3430 vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3431 [(set_attr "isa" "noavx,avx")
3432 (set_attr "type" "sselog")
3433 (set_attr "prefix" "orig,vex")
3434 (set_attr "mode" "V4SF")])
3436 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3437 (define_insn "avx_unpcklps256"
3438 [(set (match_operand:V8SF 0 "register_operand" "=x")
3441 (match_operand:V8SF 1 "register_operand" "x")
3442 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3443 (parallel [(const_int 0) (const_int 8)
3444 (const_int 1) (const_int 9)
3445 (const_int 4) (const_int 12)
3446 (const_int 5) (const_int 13)])))]
3448 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3449 [(set_attr "type" "sselog")
3450 (set_attr "prefix" "vex")
3451 (set_attr "mode" "V8SF")])
3453 (define_expand "vec_interleave_lowv8sf"
3457 (match_operand:V8SF 1 "register_operand" "x")
3458 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3459 (parallel [(const_int 0) (const_int 8)
3460 (const_int 1) (const_int 9)
3461 (const_int 4) (const_int 12)
3462 (const_int 5) (const_int 13)])))
3468 (parallel [(const_int 2) (const_int 10)
3469 (const_int 3) (const_int 11)
3470 (const_int 6) (const_int 14)
3471 (const_int 7) (const_int 15)])))
3472 (set (match_operand:V8SF 0 "register_operand" "")
3477 (parallel [(const_int 0) (const_int 1)
3478 (const_int 2) (const_int 3)
3479 (const_int 8) (const_int 9)
3480 (const_int 10) (const_int 11)])))]
3483 operands[3] = gen_reg_rtx (V8SFmode);
3484 operands[4] = gen_reg_rtx (V8SFmode);
3487 (define_insn "vec_interleave_lowv4sf"
3488 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3491 (match_operand:V4SF 1 "register_operand" "0,x")
3492 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3493 (parallel [(const_int 0) (const_int 4)
3494 (const_int 1) (const_int 5)])))]
3497 unpcklps\t{%2, %0|%0, %2}
3498 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3499 [(set_attr "isa" "noavx,avx")
3500 (set_attr "type" "sselog")
3501 (set_attr "prefix" "orig,vex")
3502 (set_attr "mode" "V4SF")])
3504 ;; These are modeled with the same vec_concat as the others so that we
3505 ;; capture users of shufps that can use the new instructions
3506 (define_insn "avx_movshdup256"
3507 [(set (match_operand:V8SF 0 "register_operand" "=x")
3510 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3512 (parallel [(const_int 1) (const_int 1)
3513 (const_int 3) (const_int 3)
3514 (const_int 5) (const_int 5)
3515 (const_int 7) (const_int 7)])))]
3517 "vmovshdup\t{%1, %0|%0, %1}"
3518 [(set_attr "type" "sse")
3519 (set_attr "prefix" "vex")
3520 (set_attr "mode" "V8SF")])
3522 (define_insn "sse3_movshdup"
3523 [(set (match_operand:V4SF 0 "register_operand" "=x")
3526 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3528 (parallel [(const_int 1)
3533 "%vmovshdup\t{%1, %0|%0, %1}"
3534 [(set_attr "type" "sse")
3535 (set_attr "prefix_rep" "1")
3536 (set_attr "prefix" "maybe_vex")
3537 (set_attr "mode" "V4SF")])
3539 (define_insn "avx_movsldup256"
3540 [(set (match_operand:V8SF 0 "register_operand" "=x")
3543 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3545 (parallel [(const_int 0) (const_int 0)
3546 (const_int 2) (const_int 2)
3547 (const_int 4) (const_int 4)
3548 (const_int 6) (const_int 6)])))]
3550 "vmovsldup\t{%1, %0|%0, %1}"
3551 [(set_attr "type" "sse")
3552 (set_attr "prefix" "vex")
3553 (set_attr "mode" "V8SF")])
3555 (define_insn "sse3_movsldup"
3556 [(set (match_operand:V4SF 0 "register_operand" "=x")
3559 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3561 (parallel [(const_int 0)
3566 "%vmovsldup\t{%1, %0|%0, %1}"
3567 [(set_attr "type" "sse")
3568 (set_attr "prefix_rep" "1")
3569 (set_attr "prefix" "maybe_vex")
3570 (set_attr "mode" "V4SF")])
3572 (define_expand "avx_shufps256"
3573 [(match_operand:V8SF 0 "register_operand" "")
3574 (match_operand:V8SF 1 "register_operand" "")
3575 (match_operand:V8SF 2 "nonimmediate_operand" "")
3576 (match_operand:SI 3 "const_int_operand" "")]
3579 int mask = INTVAL (operands[3]);
3580 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3581 GEN_INT ((mask >> 0) & 3),
3582 GEN_INT ((mask >> 2) & 3),
3583 GEN_INT (((mask >> 4) & 3) + 8),
3584 GEN_INT (((mask >> 6) & 3) + 8),
3585 GEN_INT (((mask >> 0) & 3) + 4),
3586 GEN_INT (((mask >> 2) & 3) + 4),
3587 GEN_INT (((mask >> 4) & 3) + 12),
3588 GEN_INT (((mask >> 6) & 3) + 12)));
3592 ;; One bit in mask selects 2 elements.
3593 (define_insn "avx_shufps256_1"
3594 [(set (match_operand:V8SF 0 "register_operand" "=x")
3597 (match_operand:V8SF 1 "register_operand" "x")
3598 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3599 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3600 (match_operand 4 "const_0_to_3_operand" "")
3601 (match_operand 5 "const_8_to_11_operand" "")
3602 (match_operand 6 "const_8_to_11_operand" "")
3603 (match_operand 7 "const_4_to_7_operand" "")
3604 (match_operand 8 "const_4_to_7_operand" "")
3605 (match_operand 9 "const_12_to_15_operand" "")
3606 (match_operand 10 "const_12_to_15_operand" "")])))]
3608 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3609 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3610 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3611 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3614 mask = INTVAL (operands[3]);
3615 mask |= INTVAL (operands[4]) << 2;
3616 mask |= (INTVAL (operands[5]) - 8) << 4;
3617 mask |= (INTVAL (operands[6]) - 8) << 6;
3618 operands[3] = GEN_INT (mask);
3620 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3622 [(set_attr "type" "sselog")
3623 (set_attr "length_immediate" "1")
3624 (set_attr "prefix" "vex")
3625 (set_attr "mode" "V8SF")])
3627 (define_expand "sse_shufps"
3628 [(match_operand:V4SF 0 "register_operand" "")
3629 (match_operand:V4SF 1 "register_operand" "")
3630 (match_operand:V4SF 2 "nonimmediate_operand" "")
3631 (match_operand:SI 3 "const_int_operand" "")]
3634 int mask = INTVAL (operands[3]);
3635 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3636 GEN_INT ((mask >> 0) & 3),
3637 GEN_INT ((mask >> 2) & 3),
3638 GEN_INT (((mask >> 4) & 3) + 4),
3639 GEN_INT (((mask >> 6) & 3) + 4)));
3643 (define_insn "sse_shufps_<mode>"
3644 [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
3645 (vec_select:VI4F_128
3646 (vec_concat:<ssedoublevecmode>
3647 (match_operand:VI4F_128 1 "register_operand" "0,x")
3648 (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm"))
3649 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3650 (match_operand 4 "const_0_to_3_operand" "")
3651 (match_operand 5 "const_4_to_7_operand" "")
3652 (match_operand 6 "const_4_to_7_operand" "")])))]
3656 mask |= INTVAL (operands[3]) << 0;
3657 mask |= INTVAL (operands[4]) << 2;
3658 mask |= (INTVAL (operands[5]) - 4) << 4;
3659 mask |= (INTVAL (operands[6]) - 4) << 6;
3660 operands[3] = GEN_INT (mask);
3662 switch (which_alternative)
3665 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3667 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3672 [(set_attr "isa" "noavx,avx")
3673 (set_attr "type" "sselog")
3674 (set_attr "length_immediate" "1")
3675 (set_attr "prefix" "orig,vex")
3676 (set_attr "mode" "V4SF")])
3678 (define_insn "sse_storehps"
3679 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3681 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3682 (parallel [(const_int 2) (const_int 3)])))]
3685 %vmovhps\t{%1, %0|%0, %1}
3686 %vmovhlps\t{%1, %d0|%d0, %1}
3687 %vmovlps\t{%H1, %d0|%d0, %H1}"
3688 [(set_attr "type" "ssemov")
3689 (set_attr "prefix" "maybe_vex")
3690 (set_attr "mode" "V2SF,V4SF,V2SF")])
3692 (define_expand "sse_loadhps_exp"
3693 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3696 (match_operand:V4SF 1 "nonimmediate_operand" "")
3697 (parallel [(const_int 0) (const_int 1)]))
3698 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3701 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3703 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
3705 /* Fix up the destination if needed. */
3706 if (dst != operands[0])
3707 emit_move_insn (operands[0], dst);
3712 (define_insn "sse_loadhps"
3713 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
3716 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3717 (parallel [(const_int 0) (const_int 1)]))
3718 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))]
3721 movhps\t{%2, %0|%0, %2}
3722 vmovhps\t{%2, %1, %0|%0, %1, %2}
3723 movlhps\t{%2, %0|%0, %2}
3724 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3725 %vmovlps\t{%2, %H0|%H0, %2}"
3726 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3727 (set_attr "type" "ssemov")
3728 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3729 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
3731 (define_insn "sse_storelps"
3732 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3734 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
3735 (parallel [(const_int 0) (const_int 1)])))]
3738 %vmovlps\t{%1, %0|%0, %1}
3739 %vmovaps\t{%1, %0|%0, %1}
3740 %vmovlps\t{%1, %d0|%d0, %1}"
3741 [(set_attr "type" "ssemov")
3742 (set_attr "prefix" "maybe_vex")
3743 (set_attr "mode" "V2SF,V4SF,V2SF")])
3745 (define_expand "sse_loadlps_exp"
3746 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3748 (match_operand:V2SF 2 "nonimmediate_operand" "")
3750 (match_operand:V4SF 1 "nonimmediate_operand" "")
3751 (parallel [(const_int 2) (const_int 3)]))))]
3754 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3756 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
3758 /* Fix up the destination if needed. */
3759 if (dst != operands[0])
3760 emit_move_insn (operands[0], dst);
3765 (define_insn "sse_loadlps"
3766 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
3768 (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,x,x")
3770 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
3771 (parallel [(const_int 2) (const_int 3)]))))]
3774 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3775 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3776 movlps\t{%2, %0|%0, %2}
3777 vmovlps\t{%2, %1, %0|%0, %1, %2}
3778 %vmovlps\t{%2, %0|%0, %2}"
3779 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3780 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
3781 (set_attr "length_immediate" "1,1,*,*,*")
3782 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3783 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3785 (define_insn "sse_movss"
3786 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3788 (match_operand:V4SF 2 "register_operand" " x,x")
3789 (match_operand:V4SF 1 "register_operand" " 0,x")
3793 movss\t{%2, %0|%0, %2}
3794 vmovss\t{%2, %1, %0|%0, %1, %2}"
3795 [(set_attr "isa" "noavx,avx")
3796 (set_attr "type" "ssemov")
3797 (set_attr "prefix" "orig,vex")
3798 (set_attr "mode" "SF")])
3800 (define_insn "avx2_vec_dup<mode>"
3801 [(set (match_operand:VF1 0 "register_operand" "=x")
3804 (match_operand:V4SF 1 "register_operand" "x")
3805 (parallel [(const_int 0)]))))]
3807 "vbroadcastss\t{%1, %0|%0, %1}"
3808 [(set_attr "type" "sselog1")
3809 (set_attr "prefix" "vex")
3810 (set_attr "mode" "<MODE>")])
3812 (define_insn "vec_dupv4sf"
3813 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
3815 (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))]
3818 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
3819 vbroadcastss\t{%1, %0|%0, %1}
3820 shufps\t{$0, %0, %0|%0, %0, 0}"
3821 [(set_attr "isa" "avx,avx,noavx")
3822 (set_attr "type" "sselog1,ssemov,sselog1")
3823 (set_attr "length_immediate" "1,0,1")
3824 (set_attr "prefix_extra" "0,1,*")
3825 (set_attr "prefix" "vex,vex,orig")
3826 (set_attr "mode" "V4SF")])
3828 ;; Although insertps takes register source, we prefer
3829 ;; unpcklps with register source since it is shorter.
3830 (define_insn "*vec_concatv2sf_sse4_1"
3831 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,x,x,*y ,*y")
3833 (match_operand:SF 1 "nonimmediate_operand" " 0,x,0,x,m, 0 , m")
3834 (match_operand:SF 2 "vector_move_operand" " x,x,m,m,C,*ym, C")))]
3837 unpcklps\t{%2, %0|%0, %2}
3838 vunpcklps\t{%2, %1, %0|%0, %1, %2}
3839 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3840 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3841 %vmovss\t{%1, %0|%0, %1}
3842 punpckldq\t{%2, %0|%0, %2}
3843 movd\t{%1, %0|%0, %1}"
3844 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
3845 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
3846 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
3847 (set_attr "prefix_extra" "*,*,1,1,*,*,*")
3848 (set_attr "length_immediate" "*,*,1,1,*,*,*")
3849 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
3850 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
3852 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3853 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3854 ;; alternatives pretty much forces the MMX alternative to be chosen.
3855 (define_insn "*vec_concatv2sf_sse"
3856 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
3858 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3859 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
3862 unpcklps\t{%2, %0|%0, %2}
3863 movss\t{%1, %0|%0, %1}
3864 punpckldq\t{%2, %0|%0, %2}
3865 movd\t{%1, %0|%0, %1}"
3866 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3867 (set_attr "mode" "V4SF,SF,DI,DI")])
3869 (define_insn "*vec_concatv4sf"
3870 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x")
3872 (match_operand:V2SF 1 "register_operand" " 0,x,0,x")
3873 (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
3876 movlhps\t{%2, %0|%0, %2}
3877 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3878 movhps\t{%2, %0|%0, %2}
3879 vmovhps\t{%2, %1, %0|%0, %1, %2}"
3880 [(set_attr "isa" "noavx,avx,noavx,avx")
3881 (set_attr "type" "ssemov")
3882 (set_attr "prefix" "orig,vex,orig,vex")
3883 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
3885 (define_expand "vec_init<mode>"
3886 [(match_operand:V_128 0 "register_operand" "")
3887 (match_operand 1 "" "")]
3890 ix86_expand_vector_init (false, operands[0], operands[1]);
3894 ;; Avoid combining registers from different units in a single alternative,
3895 ;; see comment above inline_secondary_memory_needed function in i386.c
3896 (define_insn "vec_set<mode>_0"
3897 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
3898 "=x,x,x ,x,x,x,x ,x ,m,m ,m")
3900 (vec_duplicate:VI4F_128
3901 (match_operand:<ssescalarmode> 2 "general_operand"
3902 " x,m,*r,m,x,x,*rm,*rm,x,fF,*r"))
3903 (match_operand:VI4F_128 1 "vector_move_operand"
3904 " C,C,C ,C,0,x,0 ,x ,0,0 ,0")
3908 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
3909 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
3910 %vmovd\t{%2, %0|%0, %2}
3911 movss\t{%2, %0|%0, %2}
3912 movss\t{%2, %0|%0, %2}
3913 vmovss\t{%2, %1, %0|%0, %1, %2}
3914 pinsrd\t{$0, %2, %0|%0, %2, 0}
3915 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
3919 [(set_attr "isa" "sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,avx,*,*,*")
3921 (cond [(eq_attr "alternative" "0,6,7")
3922 (const_string "sselog")
3923 (eq_attr "alternative" "9")
3924 (const_string "fmov")
3925 (eq_attr "alternative" "10")
3926 (const_string "imov")
3928 (const_string "ssemov")))
3929 (set_attr "prefix_extra" "*,*,*,*,*,*,1,1,*,*,*")
3930 (set_attr "length_immediate" "*,*,*,*,*,*,1,1,*,*,*")
3931 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex,*,*,*")
3932 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,*,*,*")])
3934 ;; A subset is vec_setv4sf.
3935 (define_insn "*vec_setv4sf_sse4_1"
3936 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3939 (match_operand:SF 2 "nonimmediate_operand" "xm,xm"))
3940 (match_operand:V4SF 1 "register_operand" "0,x")
3941 (match_operand:SI 3 "const_int_operand" "")))]
3943 && ((unsigned) exact_log2 (INTVAL (operands[3]))
3944 < GET_MODE_NUNITS (V4SFmode))"
3946 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
3947 switch (which_alternative)
3950 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3952 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3957 [(set_attr "isa" "noavx,avx")
3958 (set_attr "type" "sselog")
3959 (set_attr "prefix_data16" "1,*")
3960 (set_attr "prefix_extra" "1")
3961 (set_attr "length_immediate" "1")
3962 (set_attr "prefix" "orig,vex")
3963 (set_attr "mode" "V4SF")])
3965 (define_insn "sse4_1_insertps"
3966 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3967 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
3968 (match_operand:V4SF 1 "register_operand" "0,x")
3969 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
3973 if (MEM_P (operands[2]))
3975 unsigned count_s = INTVAL (operands[3]) >> 6;
3977 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
3978 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
3980 switch (which_alternative)
3983 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3985 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3990 [(set_attr "isa" "noavx,avx")
3991 (set_attr "type" "sselog")
3992 (set_attr "prefix_data16" "1,*")
3993 (set_attr "prefix_extra" "1")
3994 (set_attr "length_immediate" "1")
3995 (set_attr "prefix" "orig,vex")
3996 (set_attr "mode" "V4SF")])
3999 [(set (match_operand:VI4F_128 0 "memory_operand" "")
4001 (vec_duplicate:VI4F_128
4002 (match_operand:<ssescalarmode> 1 "nonmemory_operand" ""))
4005 "TARGET_SSE && reload_completed"
4008 emit_move_insn (adjust_address (operands[0], <ssescalarmode>mode, 0),
4013 (define_expand "vec_set<mode>"
4014 [(match_operand:V 0 "register_operand" "")
4015 (match_operand:<ssescalarmode> 1 "register_operand" "")
4016 (match_operand 2 "const_int_operand" "")]
4019 ix86_expand_vector_set (false, operands[0], operands[1],
4020 INTVAL (operands[2]));
4024 (define_insn_and_split "*vec_extractv4sf_0"
4025 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
4027 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
4028 (parallel [(const_int 0)])))]
4029 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4031 "&& reload_completed"
4034 rtx op1 = operands[1];
4036 op1 = gen_rtx_REG (SFmode, REGNO (op1));
4038 op1 = gen_lowpart (SFmode, op1);
4039 emit_move_insn (operands[0], op1);
4043 (define_insn_and_split "*sse4_1_extractps"
4044 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,x,x")
4046 (match_operand:V4SF 1 "register_operand" "x,0,x")
4047 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n")])))]
4050 %vextractps\t{%2, %1, %0|%0, %1, %2}
4053 "&& reload_completed && SSE_REG_P (operands[0])"
4056 rtx dest = gen_rtx_REG (V4SFmode, REGNO (operands[0]));
4057 switch (INTVAL (operands[2]))
4061 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
4062 operands[2], operands[2],
4063 GEN_INT (INTVAL (operands[2]) + 4),
4064 GEN_INT (INTVAL (operands[2]) + 4)));
4067 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
4070 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
4075 [(set_attr "isa" "*,noavx,avx")
4076 (set_attr "type" "sselog,*,*")
4077 (set_attr "prefix_data16" "1,*,*")
4078 (set_attr "prefix_extra" "1,*,*")
4079 (set_attr "length_immediate" "1,*,*")
4080 (set_attr "prefix" "maybe_vex,*,*")
4081 (set_attr "mode" "V4SF,*,*")])
4083 (define_insn_and_split "*vec_extract_v4sf_mem"
4084 [(set (match_operand:SF 0 "register_operand" "=x,*r,f")
4086 (match_operand:V4SF 1 "memory_operand" "o,o,o")
4087 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
4090 "&& reload_completed"
4093 int i = INTVAL (operands[2]);
4095 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
4099 (define_expand "avx_vextractf128<mode>"
4100 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "")
4101 (match_operand:V_256 1 "register_operand" "")
4102 (match_operand:SI 2 "const_0_to_1_operand" "")]
4105 rtx (*insn)(rtx, rtx);
4107 switch (INTVAL (operands[2]))
4110 insn = gen_vec_extract_lo_<mode>;
4113 insn = gen_vec_extract_hi_<mode>;
4119 emit_insn (insn (operands[0], operands[1]));
4123 (define_insn_and_split "vec_extract_lo_<mode>"
4124 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4125 (vec_select:<ssehalfvecmode>
4126 (match_operand:VI8F_256 1 "nonimmediate_operand" "xm,x")
4127 (parallel [(const_int 0) (const_int 1)])))]
4128 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4130 "&& reload_completed"
4133 rtx op1 = operands[1];
4135 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
4137 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
4138 emit_move_insn (operands[0], op1);
4142 (define_insn "vec_extract_hi_<mode>"
4143 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4144 (vec_select:<ssehalfvecmode>
4145 (match_operand:VI8F_256 1 "register_operand" "x,x")
4146 (parallel [(const_int 2) (const_int 3)])))]
4148 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
4149 [(set_attr "type" "sselog")
4150 (set_attr "prefix_extra" "1")
4151 (set_attr "length_immediate" "1")
4152 (set_attr "memory" "none,store")
4153 (set_attr "prefix" "vex")
4154 (set_attr "mode" "<sseinsnmode>")])
4156 (define_insn_and_split "vec_extract_lo_<mode>"
4157 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4158 (vec_select:<ssehalfvecmode>
4159 (match_operand:VI4F_256 1 "nonimmediate_operand" "xm,x")
4160 (parallel [(const_int 0) (const_int 1)
4161 (const_int 2) (const_int 3)])))]
4162 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4164 "&& reload_completed"
4167 rtx op1 = operands[1];
4169 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
4171 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
4172 emit_move_insn (operands[0], op1);
4176 (define_insn "vec_extract_hi_<mode>"
4177 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4178 (vec_select:<ssehalfvecmode>
4179 (match_operand:VI4F_256 1 "register_operand" "x,x")
4180 (parallel [(const_int 4) (const_int 5)
4181 (const_int 6) (const_int 7)])))]
4183 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
4184 [(set_attr "type" "sselog")
4185 (set_attr "prefix_extra" "1")
4186 (set_attr "length_immediate" "1")
4187 (set_attr "memory" "none,store")
4188 (set_attr "prefix" "vex")
4189 (set_attr "mode" "<sseinsnmode>")])
4191 (define_insn_and_split "vec_extract_lo_v16hi"
4192 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4194 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
4195 (parallel [(const_int 0) (const_int 1)
4196 (const_int 2) (const_int 3)
4197 (const_int 4) (const_int 5)
4198 (const_int 6) (const_int 7)])))]
4199 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4201 "&& reload_completed"
4204 rtx op1 = operands[1];
4206 op1 = gen_rtx_REG (V8HImode, REGNO (op1));
4208 op1 = gen_lowpart (V8HImode, op1);
4209 emit_move_insn (operands[0], op1);
4213 (define_insn "vec_extract_hi_v16hi"
4214 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4216 (match_operand:V16HI 1 "register_operand" "x,x")
4217 (parallel [(const_int 8) (const_int 9)
4218 (const_int 10) (const_int 11)
4219 (const_int 12) (const_int 13)
4220 (const_int 14) (const_int 15)])))]
4222 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
4223 [(set_attr "type" "sselog")
4224 (set_attr "prefix_extra" "1")
4225 (set_attr "length_immediate" "1")
4226 (set_attr "memory" "none,store")
4227 (set_attr "prefix" "vex")
4228 (set_attr "mode" "OI")])
4230 (define_insn_and_split "vec_extract_lo_v32qi"
4231 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4233 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
4234 (parallel [(const_int 0) (const_int 1)
4235 (const_int 2) (const_int 3)
4236 (const_int 4) (const_int 5)
4237 (const_int 6) (const_int 7)
4238 (const_int 8) (const_int 9)
4239 (const_int 10) (const_int 11)
4240 (const_int 12) (const_int 13)
4241 (const_int 14) (const_int 15)])))]
4242 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4244 "&& reload_completed"
4247 rtx op1 = operands[1];
4249 op1 = gen_rtx_REG (V16QImode, REGNO (op1));
4251 op1 = gen_lowpart (V16QImode, op1);
4252 emit_move_insn (operands[0], op1);
4256 (define_insn "vec_extract_hi_v32qi"
4257 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4259 (match_operand:V32QI 1 "register_operand" "x,x")
4260 (parallel [(const_int 16) (const_int 17)
4261 (const_int 18) (const_int 19)
4262 (const_int 20) (const_int 21)
4263 (const_int 22) (const_int 23)
4264 (const_int 24) (const_int 25)
4265 (const_int 26) (const_int 27)
4266 (const_int 28) (const_int 29)
4267 (const_int 30) (const_int 31)])))]
4269 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
4270 [(set_attr "type" "sselog")
4271 (set_attr "prefix_extra" "1")
4272 (set_attr "length_immediate" "1")
4273 (set_attr "memory" "none,store")
4274 (set_attr "prefix" "vex")
4275 (set_attr "mode" "OI")])
4277 ;; Modes handled by vec_extract patterns.
4278 (define_mode_iterator VEC_EXTRACT_MODE
4279 [(V32QI "TARGET_AVX") V16QI
4280 (V16HI "TARGET_AVX") V8HI
4281 (V8SI "TARGET_AVX") V4SI
4282 (V4DI "TARGET_AVX") V2DI
4283 (V8SF "TARGET_AVX") V4SF
4284 (V4DF "TARGET_AVX") V2DF])
4286 (define_expand "vec_extract<mode>"
4287 [(match_operand:<ssescalarmode> 0 "register_operand" "")
4288 (match_operand:VEC_EXTRACT_MODE 1 "register_operand" "")
4289 (match_operand 2 "const_int_operand" "")]
4292 ix86_expand_vector_extract (false, operands[0], operands[1],
4293 INTVAL (operands[2]));
4297 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4299 ;; Parallel double-precision floating point element swizzling
4301 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4303 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4304 (define_insn "avx_unpckhpd256"
4305 [(set (match_operand:V4DF 0 "register_operand" "=x")
4308 (match_operand:V4DF 1 "register_operand" "x")
4309 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4310 (parallel [(const_int 1) (const_int 5)
4311 (const_int 3) (const_int 7)])))]
4313 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
4314 [(set_attr "type" "sselog")
4315 (set_attr "prefix" "vex")
4316 (set_attr "mode" "V4DF")])
4318 (define_expand "vec_interleave_highv4df"
4322 (match_operand:V4DF 1 "register_operand" "x")
4323 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4324 (parallel [(const_int 0) (const_int 4)
4325 (const_int 2) (const_int 6)])))
4331 (parallel [(const_int 1) (const_int 5)
4332 (const_int 3) (const_int 7)])))
4333 (set (match_operand:V4DF 0 "register_operand" "")
4338 (parallel [(const_int 2) (const_int 3)
4339 (const_int 6) (const_int 7)])))]
4342 operands[3] = gen_reg_rtx (V4DFmode);
4343 operands[4] = gen_reg_rtx (V4DFmode);
4347 (define_expand "vec_interleave_highv2df"
4348 [(set (match_operand:V2DF 0 "register_operand" "")
4351 (match_operand:V2DF 1 "nonimmediate_operand" "")
4352 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4353 (parallel [(const_int 1)
4357 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
4358 operands[2] = force_reg (V2DFmode, operands[2]);
4361 (define_insn "*vec_interleave_highv2df"
4362 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
4365 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x")
4366 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0"))
4367 (parallel [(const_int 1)
4369 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4371 unpckhpd\t{%2, %0|%0, %2}
4372 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
4373 %vmovddup\t{%H1, %0|%0, %H1}
4374 movlpd\t{%H1, %0|%0, %H1}
4375 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
4376 %vmovhpd\t{%1, %0|%0, %1}"
4377 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
4378 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
4379 (set_attr "prefix_data16" "*,*,*,1,*,1")
4380 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
4381 (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")])
4383 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4384 (define_expand "avx_movddup256"
4385 [(set (match_operand:V4DF 0 "register_operand" "")
4388 (match_operand:V4DF 1 "nonimmediate_operand" "")
4390 (parallel [(const_int 0) (const_int 4)
4391 (const_int 2) (const_int 6)])))]
4394 (define_expand "avx_unpcklpd256"
4395 [(set (match_operand:V4DF 0 "register_operand" "")
4398 (match_operand:V4DF 1 "register_operand" "")
4399 (match_operand:V4DF 2 "nonimmediate_operand" ""))
4400 (parallel [(const_int 0) (const_int 4)
4401 (const_int 2) (const_int 6)])))]
4404 (define_insn "*avx_unpcklpd256"
4405 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
4408 (match_operand:V4DF 1 "nonimmediate_operand" " x,m")
4409 (match_operand:V4DF 2 "nonimmediate_operand" "xm,1"))
4410 (parallel [(const_int 0) (const_int 4)
4411 (const_int 2) (const_int 6)])))]
4414 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4415 vmovddup\t{%1, %0|%0, %1}"
4416 [(set_attr "type" "sselog")
4417 (set_attr "prefix" "vex")
4418 (set_attr "mode" "V4DF")])
4420 (define_expand "vec_interleave_lowv4df"
4424 (match_operand:V4DF 1 "register_operand" "x")
4425 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4426 (parallel [(const_int 0) (const_int 4)
4427 (const_int 2) (const_int 6)])))
4433 (parallel [(const_int 1) (const_int 5)
4434 (const_int 3) (const_int 7)])))
4435 (set (match_operand:V4DF 0 "register_operand" "")
4440 (parallel [(const_int 0) (const_int 1)
4441 (const_int 4) (const_int 5)])))]
4444 operands[3] = gen_reg_rtx (V4DFmode);
4445 operands[4] = gen_reg_rtx (V4DFmode);
4448 (define_expand "vec_interleave_lowv2df"
4449 [(set (match_operand:V2DF 0 "register_operand" "")
4452 (match_operand:V2DF 1 "nonimmediate_operand" "")
4453 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4454 (parallel [(const_int 0)
4458 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
4459 operands[1] = force_reg (V2DFmode, operands[1]);
4462 (define_insn "*vec_interleave_lowv2df"
4463 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,o")
4466 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0")
4467 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x"))
4468 (parallel [(const_int 0)
4470 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4472 unpcklpd\t{%2, %0|%0, %2}
4473 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4474 %vmovddup\t{%1, %0|%0, %1}
4475 movhpd\t{%2, %0|%0, %2}
4476 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4477 %vmovlpd\t{%2, %H0|%H0, %2}"
4478 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
4479 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
4480 (set_attr "prefix_data16" "*,*,*,1,*,1")
4481 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
4482 (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")])
4485 [(set (match_operand:V2DF 0 "memory_operand" "")
4488 (match_operand:V2DF 1 "register_operand" "")
4490 (parallel [(const_int 0)
4492 "TARGET_SSE3 && reload_completed"
4495 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4496 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4497 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4502 [(set (match_operand:V2DF 0 "register_operand" "")
4505 (match_operand:V2DF 1 "memory_operand" "")
4507 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "")
4508 (match_operand:SI 3 "const_int_operand" "")])))]
4509 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
4510 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
4512 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
4515 (define_expand "avx_shufpd256"
4516 [(match_operand:V4DF 0 "register_operand" "")
4517 (match_operand:V4DF 1 "register_operand" "")
4518 (match_operand:V4DF 2 "nonimmediate_operand" "")
4519 (match_operand:SI 3 "const_int_operand" "")]
4522 int mask = INTVAL (operands[3]);
4523 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4525 GEN_INT (mask & 2 ? 5 : 4),
4526 GEN_INT (mask & 4 ? 3 : 2),
4527 GEN_INT (mask & 8 ? 7 : 6)));
4531 (define_insn "avx_shufpd256_1"
4532 [(set (match_operand:V4DF 0 "register_operand" "=x")
4535 (match_operand:V4DF 1 "register_operand" "x")
4536 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4537 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4538 (match_operand 4 "const_4_to_5_operand" "")
4539 (match_operand 5 "const_2_to_3_operand" "")
4540 (match_operand 6 "const_6_to_7_operand" "")])))]
4544 mask = INTVAL (operands[3]);
4545 mask |= (INTVAL (operands[4]) - 4) << 1;
4546 mask |= (INTVAL (operands[5]) - 2) << 2;
4547 mask |= (INTVAL (operands[6]) - 6) << 3;
4548 operands[3] = GEN_INT (mask);
4550 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4552 [(set_attr "type" "sselog")
4553 (set_attr "length_immediate" "1")
4554 (set_attr "prefix" "vex")
4555 (set_attr "mode" "V4DF")])
4557 (define_expand "sse2_shufpd"
4558 [(match_operand:V2DF 0 "register_operand" "")
4559 (match_operand:V2DF 1 "register_operand" "")
4560 (match_operand:V2DF 2 "nonimmediate_operand" "")
4561 (match_operand:SI 3 "const_int_operand" "")]
4564 int mask = INTVAL (operands[3]);
4565 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4567 GEN_INT (mask & 2 ? 3 : 2)));
4571 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4572 (define_insn "avx2_interleave_highv4di"
4573 [(set (match_operand:V4DI 0 "register_operand" "=x")
4576 (match_operand:V4DI 1 "register_operand" "x")
4577 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
4578 (parallel [(const_int 1)
4583 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4584 [(set_attr "type" "sselog")
4585 (set_attr "prefix" "vex")
4586 (set_attr "mode" "OI")])
4588 (define_insn "vec_interleave_highv2di"
4589 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4592 (match_operand:V2DI 1 "register_operand" "0,x")
4593 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4594 (parallel [(const_int 1)
4598 punpckhqdq\t{%2, %0|%0, %2}
4599 vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4600 [(set_attr "isa" "noavx,avx")
4601 (set_attr "type" "sselog")
4602 (set_attr "prefix_data16" "1,*")
4603 (set_attr "prefix" "orig,vex")
4604 (set_attr "mode" "TI")])
4606 (define_insn "avx2_interleave_lowv4di"
4607 [(set (match_operand:V4DI 0 "register_operand" "=x")
4610 (match_operand:V4DI 1 "register_operand" "x")
4611 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
4612 (parallel [(const_int 0)
4617 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4618 [(set_attr "type" "sselog")
4619 (set_attr "prefix" "vex")
4620 (set_attr "mode" "OI")])
4622 (define_insn "vec_interleave_lowv2di"
4623 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4626 (match_operand:V2DI 1 "register_operand" "0,x")
4627 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4628 (parallel [(const_int 0)
4632 punpcklqdq\t{%2, %0|%0, %2}
4633 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4634 [(set_attr "isa" "noavx,avx")
4635 (set_attr "type" "sselog")
4636 (set_attr "prefix_data16" "1,*")
4637 (set_attr "prefix" "orig,vex")
4638 (set_attr "mode" "TI")])
4640 (define_insn "sse2_shufpd_<mode>"
4641 [(set (match_operand:VI8F_128 0 "register_operand" "=x,x")
4642 (vec_select:VI8F_128
4643 (vec_concat:<ssedoublevecmode>
4644 (match_operand:VI8F_128 1 "register_operand" "0,x")
4645 (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm"))
4646 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4647 (match_operand 4 "const_2_to_3_operand" "")])))]
4651 mask = INTVAL (operands[3]);
4652 mask |= (INTVAL (operands[4]) - 2) << 1;
4653 operands[3] = GEN_INT (mask);
4655 switch (which_alternative)
4658 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4660 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4665 [(set_attr "isa" "noavx,avx")
4666 (set_attr "type" "sselog")
4667 (set_attr "length_immediate" "1")
4668 (set_attr "prefix" "orig,vex")
4669 (set_attr "mode" "V2DF")])
4671 ;; Avoid combining registers from different units in a single alternative,
4672 ;; see comment above inline_secondary_memory_needed function in i386.c
4673 (define_insn "sse2_storehpd"
4674 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r")
4676 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
4677 (parallel [(const_int 1)])))]
4678 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4680 %vmovhpd\t{%1, %0|%0, %1}
4682 vunpckhpd\t{%d1, %0|%0, %d1}
4686 [(set_attr "isa" "*,noavx,avx,*,*,*")
4687 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
4688 (set (attr "prefix_data16")
4690 (and (eq_attr "alternative" "0")
4691 (not (match_test "TARGET_AVX")))
4693 (const_string "*")))
4694 (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
4695 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
4698 [(set (match_operand:DF 0 "register_operand" "")
4700 (match_operand:V2DF 1 "memory_operand" "")
4701 (parallel [(const_int 1)])))]
4702 "TARGET_SSE2 && reload_completed"
4703 [(set (match_dup 0) (match_dup 1))]
4704 "operands[1] = adjust_address (operands[1], DFmode, 8);")
4706 (define_insn "*vec_extractv2df_1_sse"
4707 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4709 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
4710 (parallel [(const_int 1)])))]
4711 "!TARGET_SSE2 && TARGET_SSE
4712 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4714 movhps\t{%1, %0|%0, %1}
4715 movhlps\t{%1, %0|%0, %1}
4716 movlps\t{%H1, %0|%0, %H1}"
4717 [(set_attr "type" "ssemov")
4718 (set_attr "mode" "V2SF,V4SF,V2SF")])
4720 ;; Avoid combining registers from different units in a single alternative,
4721 ;; see comment above inline_secondary_memory_needed function in i386.c
4722 (define_insn "sse2_storelpd"
4723 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4725 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4726 (parallel [(const_int 0)])))]
4727 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4729 %vmovlpd\t{%1, %0|%0, %1}
4734 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4735 (set_attr "prefix_data16" "1,*,*,*,*")
4736 (set_attr "prefix" "maybe_vex")
4737 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4740 [(set (match_operand:DF 0 "register_operand" "")
4742 (match_operand:V2DF 1 "nonimmediate_operand" "")
4743 (parallel [(const_int 0)])))]
4744 "TARGET_SSE2 && reload_completed"
4747 rtx op1 = operands[1];
4749 op1 = gen_rtx_REG (DFmode, REGNO (op1));
4751 op1 = gen_lowpart (DFmode, op1);
4752 emit_move_insn (operands[0], op1);
4756 (define_insn "*vec_extractv2df_0_sse"
4757 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4759 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
4760 (parallel [(const_int 0)])))]
4761 "!TARGET_SSE2 && TARGET_SSE
4762 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4764 movlps\t{%1, %0|%0, %1}
4765 movaps\t{%1, %0|%0, %1}
4766 movlps\t{%1, %0|%0, %1}"
4767 [(set_attr "type" "ssemov")
4768 (set_attr "mode" "V2SF,V4SF,V2SF")])
4770 (define_expand "sse2_loadhpd_exp"
4771 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4774 (match_operand:V2DF 1 "nonimmediate_operand" "")
4775 (parallel [(const_int 0)]))
4776 (match_operand:DF 2 "nonimmediate_operand" "")))]
4779 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4781 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
4783 /* Fix up the destination if needed. */
4784 if (dst != operands[0])
4785 emit_move_insn (operands[0], dst);
4790 ;; Avoid combining registers from different units in a single alternative,
4791 ;; see comment above inline_secondary_memory_needed function in i386.c
4792 (define_insn "sse2_loadhpd"
4793 [(set (match_operand:V2DF 0 "nonimmediate_operand"
4797 (match_operand:V2DF 1 "nonimmediate_operand"
4799 (parallel [(const_int 0)]))
4800 (match_operand:DF 2 "nonimmediate_operand"
4801 " m,m,x,x,x,*f,r")))]
4802 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4804 movhpd\t{%2, %0|%0, %2}
4805 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4806 unpcklpd\t{%2, %0|%0, %2}
4807 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4811 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
4812 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
4813 (set_attr "prefix_data16" "1,*,*,*,*,*,*")
4814 (set_attr "prefix" "orig,vex,orig,vex,*,*,*")
4815 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
4818 [(set (match_operand:V2DF 0 "memory_operand" "")
4820 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
4821 (match_operand:DF 1 "register_operand" "")))]
4822 "TARGET_SSE2 && reload_completed"
4823 [(set (match_dup 0) (match_dup 1))]
4824 "operands[0] = adjust_address (operands[0], DFmode, 8);")
4826 (define_expand "sse2_loadlpd_exp"
4827 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4829 (match_operand:DF 2 "nonimmediate_operand" "")
4831 (match_operand:V2DF 1 "nonimmediate_operand" "")
4832 (parallel [(const_int 1)]))))]
4835 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4837 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
4839 /* Fix up the destination if needed. */
4840 if (dst != operands[0])
4841 emit_move_insn (operands[0], dst);
4846 ;; Avoid combining registers from different units in a single alternative,
4847 ;; see comment above inline_secondary_memory_needed function in i386.c
4848 (define_insn "sse2_loadlpd"
4849 [(set (match_operand:V2DF 0 "nonimmediate_operand"
4850 "=x,x,x,x,x,x,x,x,m,m ,m")
4852 (match_operand:DF 2 "nonimmediate_operand"
4853 " m,m,m,x,x,0,0,x,x,*f,r")
4855 (match_operand:V2DF 1 "vector_move_operand"
4856 " C,0,x,0,x,x,o,o,0,0 ,0")
4857 (parallel [(const_int 1)]))))]
4858 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4860 %vmovsd\t{%2, %0|%0, %2}
4861 movlpd\t{%2, %0|%0, %2}
4862 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4863 movsd\t{%2, %0|%0, %2}
4864 vmovsd\t{%2, %1, %0|%0, %1, %2}
4865 shufpd\t{$2, %1, %0|%0, %1, 2}
4866 movhpd\t{%H1, %0|%0, %H1}
4867 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
4871 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
4873 (cond [(eq_attr "alternative" "5")
4874 (const_string "sselog")
4875 (eq_attr "alternative" "9")
4876 (const_string "fmov")
4877 (eq_attr "alternative" "10")
4878 (const_string "imov")
4880 (const_string "ssemov")))
4881 (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
4882 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
4883 (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
4884 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
4887 [(set (match_operand:V2DF 0 "memory_operand" "")
4889 (match_operand:DF 1 "register_operand" "")
4890 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
4891 "TARGET_SSE2 && reload_completed"
4892 [(set (match_dup 0) (match_dup 1))]
4893 "operands[0] = adjust_address (operands[0], DFmode, 8);")
4895 (define_insn "sse2_movsd"
4896 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o")
4898 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
4899 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
4903 movsd\t{%2, %0|%0, %2}
4904 vmovsd\t{%2, %1, %0|%0, %1, %2}
4905 movlpd\t{%2, %0|%0, %2}
4906 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4907 %vmovlpd\t{%2, %0|%0, %2}
4908 shufpd\t{$2, %1, %0|%0, %1, 2}
4909 movhps\t{%H1, %0|%0, %H1}
4910 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
4911 %vmovhps\t{%1, %H0|%H0, %1}"
4912 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
4915 (eq_attr "alternative" "5")
4916 (const_string "sselog")
4917 (const_string "ssemov")))
4918 (set (attr "prefix_data16")
4920 (and (eq_attr "alternative" "2,4")
4921 (not (match_test "TARGET_AVX")))
4923 (const_string "*")))
4924 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
4925 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
4926 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
4928 (define_insn "vec_dupv2df"
4929 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
4931 (match_operand:DF 1 "nonimmediate_operand" " 0,xm")))]
4935 %vmovddup\t{%1, %0|%0, %1}"
4936 [(set_attr "isa" "noavx,sse3")
4937 (set_attr "type" "sselog1")
4938 (set_attr "prefix" "orig,maybe_vex")
4939 (set_attr "mode" "V2DF")])
4941 (define_insn "*vec_concatv2df"
4942 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x,x,x,x,x,x")
4944 (match_operand:DF 1 "nonimmediate_operand" " 0,x,m,0,x,m,0,0")
4945 (match_operand:DF 2 "vector_move_operand" " x,x,1,m,m,C,x,m")))]
4948 unpcklpd\t{%2, %0|%0, %2}
4949 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4950 %vmovddup\t{%1, %0|%0, %1}
4951 movhpd\t{%2, %0|%0, %2}
4952 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4953 %vmovsd\t{%1, %0|%0, %1}
4954 movlhps\t{%2, %0|%0, %2}
4955 movhps\t{%2, %0|%0, %2}"
4956 [(set_attr "isa" "sse2_noavx,avx,sse3,sse2_noavx,avx,sse2,noavx,noavx")
4959 (eq_attr "alternative" "0,1,2")
4960 (const_string "sselog")
4961 (const_string "ssemov")))
4962 (set_attr "prefix_data16" "*,*,*,1,*,*,*,*")
4963 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex,orig,orig")
4964 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,DF,V4SF,V2SF")])
4966 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4968 ;; Parallel integral arithmetic
4970 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4972 (define_expand "neg<mode>2"
4973 [(set (match_operand:VI_AVX2 0 "register_operand" "")
4976 (match_operand:VI_AVX2 1 "nonimmediate_operand" "")))]
4978 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
4980 (define_expand "<plusminus_insn><mode>3"
4981 [(set (match_operand:VI_AVX2 0 "register_operand" "")
4983 (match_operand:VI_AVX2 1 "nonimmediate_operand" "")
4984 (match_operand:VI_AVX2 2 "nonimmediate_operand" "")))]
4986 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4988 (define_insn "*<plusminus_insn><mode>3"
4989 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,x")
4991 (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,x")
4992 (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,xm")))]
4993 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4995 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
4996 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
4997 [(set_attr "isa" "noavx,avx")
4998 (set_attr "type" "sseiadd")
4999 (set_attr "prefix_data16" "1,*")
5000 (set_attr "prefix" "orig,vex")
5001 (set_attr "mode" "<sseinsnmode>")])
5003 (define_expand "<sse2_avx2>_<plusminus_insn><mode>3"
5004 [(set (match_operand:VI12_AVX2 0 "register_operand" "")
5005 (sat_plusminus:VI12_AVX2
5006 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "")
5007 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "")))]
5009 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5011 (define_insn "*<sse2_avx2>_<plusminus_insn><mode>3"
5012 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,x")
5013 (sat_plusminus:VI12_AVX2
5014 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,x")
5015 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,xm")))]
5016 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5018 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
5019 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5020 [(set_attr "isa" "noavx,avx")
5021 (set_attr "type" "sseiadd")
5022 (set_attr "prefix_data16" "1,*")
5023 (set_attr "prefix" "orig,vex")
5024 (set_attr "mode" "TI")])
5026 (define_insn_and_split "mul<mode>3"
5027 [(set (match_operand:VI1_AVX2 0 "register_operand" "")
5028 (mult:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand" "")
5029 (match_operand:VI1_AVX2 2 "register_operand" "")))]
5031 && can_create_pseudo_p ()"
5038 enum machine_mode mulmode = <sseunpackmode>mode;
5040 for (i = 0; i < 6; ++i)
5041 t[i] = gen_reg_rtx (<MODE>mode);
5043 /* Unpack data such that we've got a source byte in each low byte of
5044 each word. We don't care what goes into the high byte of each word.
5045 Rather than trying to get zero in there, most convenient is to let
5046 it be a copy of the low byte. */
5047 emit_insn (gen_<vec_avx2>_interleave_high<mode> (t[0], operands[1],
5049 emit_insn (gen_<vec_avx2>_interleave_high<mode> (t[1], operands[2],
5051 emit_insn (gen_<vec_avx2>_interleave_low<mode> (t[2], operands[1],
5053 emit_insn (gen_<vec_avx2>_interleave_low<mode> (t[3], operands[2],
5056 /* Multiply words. The end-of-line annotations here give a picture of what
5057 the output of that instruction looks like. Dot means don't care; the
5058 letters are the bytes of the result with A being the most significant. */
5059 emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (mulmode, t[4]),
5060 gen_rtx_MULT (mulmode, /* .A.B.C.D.E.F.G.H */
5061 gen_lowpart (mulmode, t[0]),
5062 gen_lowpart (mulmode, t[1]))));
5063 emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (mulmode, t[5]),
5064 gen_rtx_MULT (mulmode, /* .I.J.K.L.M.N.O.P */
5065 gen_lowpart (mulmode, t[2]),
5066 gen_lowpart (mulmode, t[3]))));
5068 /* Extract the even bytes and merge them back together. */
5069 ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0);
5071 set_unique_reg_note (get_last_insn (), REG_EQUAL,
5072 gen_rtx_MULT (<MODE>mode, operands[1], operands[2]));
5076 (define_expand "mul<mode>3"
5077 [(set (match_operand:VI2_AVX2 0 "register_operand" "")
5078 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "")
5079 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "")))]
5081 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
5083 (define_insn "*mul<mode>3"
5084 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
5085 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x")
5086 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))]
5087 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
5089 pmullw\t{%2, %0|%0, %2}
5090 vpmullw\t{%2, %1, %0|%0, %1, %2}"
5091 [(set_attr "isa" "noavx,avx")
5092 (set_attr "type" "sseimul")
5093 (set_attr "prefix_data16" "1,*")
5094 (set_attr "prefix" "orig,vex")
5095 (set_attr "mode" "<sseinsnmode>")])
5097 (define_expand "<s>mul<mode>3_highpart"
5098 [(set (match_operand:VI2_AVX2 0 "register_operand" "")
5100 (lshiftrt:<ssedoublemode>
5101 (mult:<ssedoublemode>
5102 (any_extend:<ssedoublemode>
5103 (match_operand:VI2_AVX2 1 "nonimmediate_operand" ""))
5104 (any_extend:<ssedoublemode>
5105 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "")))
5108 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
5110 (define_insn "*<s>mul<mode>3_highpart"
5111 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
5113 (lshiftrt:<ssedoublemode>
5114 (mult:<ssedoublemode>
5115 (any_extend:<ssedoublemode>
5116 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
5117 (any_extend:<ssedoublemode>
5118 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
5120 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
5122 pmulh<u>w\t{%2, %0|%0, %2}
5123 vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
5124 [(set_attr "isa" "noavx,avx")
5125 (set_attr "type" "sseimul")
5126 (set_attr "prefix_data16" "1,*")
5127 (set_attr "prefix" "orig,vex")
5128 (set_attr "mode" "<sseinsnmode>")])
5130 (define_expand "avx2_umulv4siv4di3"
5131 [(set (match_operand:V4DI 0 "register_operand" "")
5135 (match_operand:V8SI 1 "nonimmediate_operand" "")
5136 (parallel [(const_int 0) (const_int 2)
5137 (const_int 4) (const_int 6)])))
5140 (match_operand:V8SI 2 "nonimmediate_operand" "")
5141 (parallel [(const_int 0) (const_int 2)
5142 (const_int 4) (const_int 6)])))))]
5144 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
5146 (define_insn "*avx_umulv4siv4di3"
5147 [(set (match_operand:V4DI 0 "register_operand" "=x")
5151 (match_operand:V8SI 1 "nonimmediate_operand" "%x")
5152 (parallel [(const_int 0) (const_int 2)
5153 (const_int 4) (const_int 6)])))
5156 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
5157 (parallel [(const_int 0) (const_int 2)
5158 (const_int 4) (const_int 6)])))))]
5159 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
5160 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5161 [(set_attr "type" "sseimul")
5162 (set_attr "prefix" "vex")
5163 (set_attr "mode" "OI")])
5165 (define_expand "sse2_umulv2siv2di3"
5166 [(set (match_operand:V2DI 0 "register_operand" "")
5170 (match_operand:V4SI 1 "nonimmediate_operand" "")
5171 (parallel [(const_int 0) (const_int 2)])))
5174 (match_operand:V4SI 2 "nonimmediate_operand" "")
5175 (parallel [(const_int 0) (const_int 2)])))))]
5177 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5179 (define_insn "*sse2_umulv2siv2di3"
5180 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5184 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
5185 (parallel [(const_int 0) (const_int 2)])))
5188 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
5189 (parallel [(const_int 0) (const_int 2)])))))]
5190 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5192 pmuludq\t{%2, %0|%0, %2}
5193 vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5194 [(set_attr "isa" "noavx,avx")
5195 (set_attr "type" "sseimul")
5196 (set_attr "prefix_data16" "1,*")
5197 (set_attr "prefix" "orig,vex")
5198 (set_attr "mode" "TI")])
5200 (define_expand "avx2_mulv4siv4di3"
5201 [(set (match_operand:V4DI 0 "register_operand" "")
5205 (match_operand:V8SI 1 "nonimmediate_operand" "")
5206 (parallel [(const_int 0) (const_int 2)
5207 (const_int 4) (const_int 6)])))
5210 (match_operand:V8SI 2 "nonimmediate_operand" "")
5211 (parallel [(const_int 0) (const_int 2)
5212 (const_int 4) (const_int 6)])))))]
5214 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
5216 (define_insn "*avx2_mulv4siv4di3"
5217 [(set (match_operand:V4DI 0 "register_operand" "=x")
5221 (match_operand:V8SI 1 "nonimmediate_operand" "x")
5222 (parallel [(const_int 0) (const_int 2)
5223 (const_int 4) (const_int 6)])))
5226 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
5227 (parallel [(const_int 0) (const_int 2)
5228 (const_int 4) (const_int 6)])))))]
5229 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
5230 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5231 [(set_attr "isa" "avx")
5232 (set_attr "type" "sseimul")
5233 (set_attr "prefix_extra" "1")
5234 (set_attr "prefix" "vex")
5235 (set_attr "mode" "OI")])
5237 (define_expand "sse4_1_mulv2siv2di3"
5238 [(set (match_operand:V2DI 0 "register_operand" "")
5242 (match_operand:V4SI 1 "nonimmediate_operand" "")
5243 (parallel [(const_int 0) (const_int 2)])))
5246 (match_operand:V4SI 2 "nonimmediate_operand" "")
5247 (parallel [(const_int 0) (const_int 2)])))))]
5249 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5251 (define_insn "*sse4_1_mulv2siv2di3"
5252 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5256 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
5257 (parallel [(const_int 0) (const_int 2)])))
5260 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
5261 (parallel [(const_int 0) (const_int 2)])))))]
5262 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5264 pmuldq\t{%2, %0|%0, %2}
5265 vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5266 [(set_attr "isa" "noavx,avx")
5267 (set_attr "type" "sseimul")
5268 (set_attr "prefix_data16" "1,*")
5269 (set_attr "prefix_extra" "1")
5270 (set_attr "prefix" "orig,vex")
5271 (set_attr "mode" "TI")])
5273 (define_expand "avx2_pmaddwd"
5274 [(set (match_operand:V8SI 0 "register_operand" "")
5279 (match_operand:V16HI 1 "nonimmediate_operand" "")
5280 (parallel [(const_int 0)
5290 (match_operand:V16HI 2 "nonimmediate_operand" "")
5291 (parallel [(const_int 0)
5301 (vec_select:V8HI (match_dup 1)
5302 (parallel [(const_int 1)
5311 (vec_select:V8HI (match_dup 2)
5312 (parallel [(const_int 1)
5319 (const_int 15)]))))))]
5321 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
5323 (define_expand "sse2_pmaddwd"
5324 [(set (match_operand:V4SI 0 "register_operand" "")
5329 (match_operand:V8HI 1 "nonimmediate_operand" "")
5330 (parallel [(const_int 0)
5336 (match_operand:V8HI 2 "nonimmediate_operand" "")
5337 (parallel [(const_int 0)
5343 (vec_select:V4HI (match_dup 1)
5344 (parallel [(const_int 1)
5349 (vec_select:V4HI (match_dup 2)
5350 (parallel [(const_int 1)
5353 (const_int 7)]))))))]
5355 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5357 (define_insn "*avx2_pmaddwd"
5358 [(set (match_operand:V8SI 0 "register_operand" "=x")
5363 (match_operand:V16HI 1 "nonimmediate_operand" "%x")
5364 (parallel [(const_int 0)
5374 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
5375 (parallel [(const_int 0)
5385 (vec_select:V8HI (match_dup 1)
5386 (parallel [(const_int 1)
5395 (vec_select:V8HI (match_dup 2)
5396 (parallel [(const_int 1)
5403 (const_int 15)]))))))]
5404 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
5405 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5406 [(set_attr "type" "sseiadd")
5407 (set_attr "prefix" "vex")
5408 (set_attr "mode" "OI")])
5410 (define_insn "*sse2_pmaddwd"
5411 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
5416 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
5417 (parallel [(const_int 0)
5423 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
5424 (parallel [(const_int 0)
5430 (vec_select:V4HI (match_dup 1)
5431 (parallel [(const_int 1)
5436 (vec_select:V4HI (match_dup 2)
5437 (parallel [(const_int 1)
5440 (const_int 7)]))))))]
5441 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5443 pmaddwd\t{%2, %0|%0, %2}
5444 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5445 [(set_attr "isa" "noavx,avx")
5446 (set_attr "type" "sseiadd")
5447 (set_attr "atom_unit" "simul")
5448 (set_attr "prefix_data16" "1,*")
5449 (set_attr "prefix" "orig,vex")
5450 (set_attr "mode" "TI")])
5452 (define_expand "mul<mode>3"
5453 [(set (match_operand:VI4_AVX2 0 "register_operand" "")
5454 (mult:VI4_AVX2 (match_operand:VI4_AVX2 1 "register_operand" "")
5455 (match_operand:VI4_AVX2 2 "register_operand" "")))]
5458 if (TARGET_SSE4_1 || TARGET_AVX)
5459 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
5462 (define_insn "*<sse4_1_avx2>_mul<mode>3"
5463 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,x")
5464 (mult:VI4_AVX2 (match_operand:VI4_AVX2 1 "nonimmediate_operand" "%0,x")
5465 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm,xm")))]
5466 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
5468 pmulld\t{%2, %0|%0, %2}
5469 vpmulld\t{%2, %1, %0|%0, %1, %2}"
5470 [(set_attr "isa" "noavx,avx")
5471 (set_attr "type" "sseimul")
5472 (set_attr "prefix_extra" "1")
5473 (set_attr "prefix" "orig,vex")
5474 (set_attr "mode" "<sseinsnmode>")])
5476 (define_insn_and_split "*sse2_mulv4si3"
5477 [(set (match_operand:V4SI 0 "register_operand" "")
5478 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5479 (match_operand:V4SI 2 "register_operand" "")))]
5480 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_AVX
5481 && can_create_pseudo_p ()"
5486 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5492 t1 = gen_reg_rtx (V4SImode);
5493 t2 = gen_reg_rtx (V4SImode);
5494 t3 = gen_reg_rtx (V4SImode);
5495 t4 = gen_reg_rtx (V4SImode);
5496 t5 = gen_reg_rtx (V4SImode);
5497 t6 = gen_reg_rtx (V4SImode);
5498 thirtytwo = GEN_INT (32);
5500 /* Multiply elements 2 and 0. */
5501 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
5504 /* Shift both input vectors down one element, so that elements 3
5505 and 1 are now in the slots for elements 2 and 0. For K8, at
5506 least, this is faster than using a shuffle. */
5507 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5508 gen_lowpart (V1TImode, op1),
5510 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5511 gen_lowpart (V1TImode, op2),
5513 /* Multiply elements 3 and 1. */
5514 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
5517 /* Move the results in element 2 down to element 1; we don't care
5518 what goes in elements 2 and 3. */
5519 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
5520 const0_rtx, const0_rtx));
5521 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
5522 const0_rtx, const0_rtx));
5524 /* Merge the parts back together. */
5525 emit_insn (gen_vec_interleave_lowv4si (op0, t5, t6));
5527 set_unique_reg_note (get_last_insn (), REG_EQUAL,
5528 gen_rtx_MULT (V4SImode, operands[1], operands[2]));
5532 (define_insn_and_split "mul<mode>3"
5533 [(set (match_operand:VI8_AVX2 0 "register_operand" "")
5534 (mult:VI8_AVX2 (match_operand:VI8_AVX2 1 "register_operand" "")
5535 (match_operand:VI8_AVX2 2 "register_operand" "")))]
5537 && can_create_pseudo_p ()"
5542 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5549 if (TARGET_XOP && <MODE>mode == V2DImode)
5551 /* op1: A,B,C,D, op2: E,F,G,H */
5552 op1 = gen_lowpart (V4SImode, op1);
5553 op2 = gen_lowpart (V4SImode, op2);
5555 t1 = gen_reg_rtx (V4SImode);
5556 t2 = gen_reg_rtx (V4SImode);
5557 t3 = gen_reg_rtx (V2DImode);
5558 t4 = gen_reg_rtx (V2DImode);
5561 emit_insn (gen_sse2_pshufd_1 (t1, op1,
5567 /* t2: (B*E),(A*F),(D*G),(C*H) */
5568 emit_insn (gen_mulv4si3 (t2, t1, op2));
5570 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
5571 emit_insn (gen_xop_phadddq (t3, t2));
5573 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
5574 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
5576 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
5577 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
5581 t1 = gen_reg_rtx (<MODE>mode);
5582 t2 = gen_reg_rtx (<MODE>mode);
5583 t3 = gen_reg_rtx (<MODE>mode);
5584 t4 = gen_reg_rtx (<MODE>mode);
5585 t5 = gen_reg_rtx (<MODE>mode);
5586 t6 = gen_reg_rtx (<MODE>mode);
5587 thirtytwo = GEN_INT (32);
5589 /* Multiply low parts. */
5590 emit_insn (gen_<sse2_avx2>_umulv<ssescalarnum>si<mode>3
5591 (t1, gen_lowpart (<ssepackmode>mode, op1),
5592 gen_lowpart (<ssepackmode>mode, op2)));
5594 /* Shift input vectors right 32 bits so we can multiply high parts. */
5595 emit_insn (gen_lshr<mode>3 (t2, op1, thirtytwo));
5596 emit_insn (gen_lshr<mode>3 (t3, op2, thirtytwo));
5598 /* Multiply high parts by low parts. */
5599 emit_insn (gen_<sse2_avx2>_umulv<ssescalarnum>si<mode>3
5600 (t4, gen_lowpart (<ssepackmode>mode, op1),
5601 gen_lowpart (<ssepackmode>mode, t3)));
5602 emit_insn (gen_<sse2_avx2>_umulv<ssescalarnum>si<mode>3
5603 (t5, gen_lowpart (<ssepackmode>mode, op2),
5604 gen_lowpart (<ssepackmode>mode, t2)));
5606 /* Shift them back. */
5607 emit_insn (gen_ashl<mode>3 (t4, t4, thirtytwo));
5608 emit_insn (gen_ashl<mode>3 (t5, t5, thirtytwo));
5610 /* Add the three parts together. */
5611 emit_insn (gen_add<mode>3 (t6, t1, t4));
5612 emit_insn (gen_add<mode>3 (op0, t6, t5));
5615 set_unique_reg_note (get_last_insn (), REG_EQUAL,
5616 gen_rtx_MULT (<MODE>mode, operands[1], operands[2]));
5620 (define_expand "vec_widen_<s>mult_hi_<mode>"
5621 [(match_operand:<sseunpackmode> 0 "register_operand" "")
5622 (any_extend:<sseunpackmode>
5623 (match_operand:VI2_AVX2 1 "register_operand" ""))
5624 (match_operand:VI2_AVX2 2 "register_operand" "")]
5627 rtx op1, op2, t1, t2, dest;
5631 t1 = gen_reg_rtx (<MODE>mode);
5632 t2 = gen_reg_rtx (<MODE>mode);
5633 dest = gen_lowpart (<MODE>mode, operands[0]);
5635 emit_insn (gen_mul<mode>3 (t1, op1, op2));
5636 emit_insn (gen_<s>mul<mode>3_highpart (t2, op1, op2));
5637 emit_insn (gen_vec_interleave_high<mode> (dest, t1, t2));
5641 (define_expand "vec_widen_<s>mult_lo_<mode>"
5642 [(match_operand:<sseunpackmode> 0 "register_operand" "")
5643 (any_extend:<sseunpackmode>
5644 (match_operand:VI2_AVX2 1 "register_operand" ""))
5645 (match_operand:VI2_AVX2 2 "register_operand" "")]
5648 rtx op1, op2, t1, t2, dest;
5652 t1 = gen_reg_rtx (<MODE>mode);
5653 t2 = gen_reg_rtx (<MODE>mode);
5654 dest = gen_lowpart (<MODE>mode, operands[0]);
5656 emit_insn (gen_mul<mode>3 (t1, op1, op2));
5657 emit_insn (gen_<s>mul<mode>3_highpart (t2, op1, op2));
5658 emit_insn (gen_vec_interleave_low<mode> (dest, t1, t2));
5662 (define_expand "vec_widen_<s>mult_hi_v8si"
5663 [(match_operand:V4DI 0 "register_operand" "")
5664 (any_extend:V4DI (match_operand:V8SI 1 "nonimmediate_operand" ""))
5665 (match_operand:V8SI 2 "nonimmediate_operand" "")]
5670 t1 = gen_reg_rtx (V4DImode);
5671 t2 = gen_reg_rtx (V4DImode);
5672 t3 = gen_reg_rtx (V8SImode);
5673 t4 = gen_reg_rtx (V8SImode);
5674 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, operands[1]),
5675 const0_rtx, const2_rtx,
5676 const1_rtx, GEN_INT (3)));
5677 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, operands[2]),
5678 const0_rtx, const2_rtx,
5679 const1_rtx, GEN_INT (3)));
5680 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1),
5681 GEN_INT (2 + (2 << 2) + (3 << 4) + (3 << 6))));
5682 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2),
5683 GEN_INT (2 + (2 << 2) + (3 << 4) + (3 << 6))));
5684 emit_insn (gen_avx2_<u>mulv4siv4di3 (operands[0], t3, t4));
5688 (define_expand "vec_widen_<s>mult_lo_v8si"
5689 [(match_operand:V4DI 0 "register_operand" "")
5690 (any_extend:V4DI (match_operand:V8SI 1 "nonimmediate_operand" ""))
5691 (match_operand:V8SI 2 "nonimmediate_operand" "")]
5696 t1 = gen_reg_rtx (V4DImode);
5697 t2 = gen_reg_rtx (V4DImode);
5698 t3 = gen_reg_rtx (V8SImode);
5699 t4 = gen_reg_rtx (V8SImode);
5700 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, operands[1]),
5701 const0_rtx, const2_rtx,
5702 const1_rtx, GEN_INT (3)));
5703 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, operands[2]),
5704 const0_rtx, const2_rtx,
5705 const1_rtx, GEN_INT (3)));
5706 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1),
5707 GEN_INT (0 + (0 << 2) + (1 << 4) + (1 << 6))));
5708 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2),
5709 GEN_INT (0 + (0 << 2) + (1 << 4) + (1 << 6))));
5710 emit_insn (gen_avx2_<u>mulv4siv4di3 (operands[0], t3, t4));
5714 (define_expand "vec_widen_smult_hi_v4si"
5715 [(match_operand:V2DI 0 "register_operand" "")
5716 (match_operand:V4SI 1 "register_operand" "")
5717 (match_operand:V4SI 2 "register_operand" "")]
5720 rtx op1, op2, t1, t2;
5724 t1 = gen_reg_rtx (V4SImode);
5725 t2 = gen_reg_rtx (V4SImode);
5729 emit_insn (gen_sse2_pshufd_1 (t1, op1, GEN_INT (0), GEN_INT (2),
5730 GEN_INT (1), GEN_INT (3)));
5731 emit_insn (gen_sse2_pshufd_1 (t2, op2, GEN_INT (0), GEN_INT (2),
5732 GEN_INT (1), GEN_INT (3)));
5733 emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
5737 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5738 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5739 emit_insn (gen_sse4_1_mulv2siv2di3 (operands[0], t1, t2));
5743 (define_expand "vec_widen_smult_lo_v4si"
5744 [(match_operand:V2DI 0 "register_operand" "")
5745 (match_operand:V4SI 1 "register_operand" "")
5746 (match_operand:V4SI 2 "register_operand" "")]
5749 rtx op1, op2, t1, t2;
5753 t1 = gen_reg_rtx (V4SImode);
5754 t2 = gen_reg_rtx (V4SImode);
5758 emit_insn (gen_sse2_pshufd_1 (t1, op1, GEN_INT (0), GEN_INT (2),
5759 GEN_INT (1), GEN_INT (3)));
5760 emit_insn (gen_sse2_pshufd_1 (t2, op2, GEN_INT (0), GEN_INT (2),
5761 GEN_INT (1), GEN_INT (3)));
5762 emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
5766 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5767 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5768 emit_insn (gen_sse4_1_mulv2siv2di3 (operands[0], t1, t2));
5772 (define_expand "vec_widen_umult_hi_v4si"
5773 [(match_operand:V2DI 0 "register_operand" "")
5774 (match_operand:V4SI 1 "register_operand" "")
5775 (match_operand:V4SI 2 "register_operand" "")]
5778 rtx op1, op2, t1, t2;
5782 t1 = gen_reg_rtx (V4SImode);
5783 t2 = gen_reg_rtx (V4SImode);
5785 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5786 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5787 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5791 (define_expand "vec_widen_umult_lo_v4si"
5792 [(match_operand:V2DI 0 "register_operand" "")
5793 (match_operand:V4SI 1 "register_operand" "")
5794 (match_operand:V4SI 2 "register_operand" "")]
5797 rtx op1, op2, t1, t2;
5801 t1 = gen_reg_rtx (V4SImode);
5802 t2 = gen_reg_rtx (V4SImode);
5804 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5805 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5806 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5810 (define_expand "sdot_prod<mode>"
5811 [(match_operand:<sseunpackmode> 0 "register_operand" "")
5812 (match_operand:VI2_AVX2 1 "register_operand" "")
5813 (match_operand:VI2_AVX2 2 "register_operand" "")
5814 (match_operand:<sseunpackmode> 3 "register_operand" "")]
5817 rtx t = gen_reg_rtx (<sseunpackmode>mode);
5818 emit_insn (gen_<sse2_avx2>_pmaddwd (t, operands[1], operands[2]));
5819 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5820 gen_rtx_PLUS (<sseunpackmode>mode,
5825 (define_code_attr sse2_sse4_1
5826 [(zero_extend "sse2") (sign_extend "sse4_1")])
5828 (define_expand "<s>dot_prodv4si"
5829 [(match_operand:V2DI 0 "register_operand" "")
5830 (any_extend:V2DI (match_operand:V4SI 1 "register_operand" ""))
5831 (match_operand:V4SI 2 "register_operand" "")
5832 (match_operand:V2DI 3 "register_operand" "")]
5833 "<CODE> == ZERO_EXTEND ? TARGET_SSE2 : TARGET_SSE4_1"
5837 t1 = gen_reg_rtx (V2DImode);
5838 emit_insn (gen_<sse2_sse4_1>_<u>mulv2siv2di3 (t1, operands[1], operands[2]));
5839 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5841 t2 = gen_reg_rtx (V4SImode);
5842 t3 = gen_reg_rtx (V4SImode);
5843 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5844 gen_lowpart (V1TImode, operands[1]),
5846 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5847 gen_lowpart (V1TImode, operands[2]),
5850 t4 = gen_reg_rtx (V2DImode);
5851 emit_insn (gen_<sse2_sse4_1>_<u>mulv2siv2di3 (t4, t2, t3));
5853 emit_insn (gen_addv2di3 (operands[0], t1, t4));
5857 (define_expand "<s>dot_prodv8si"
5858 [(match_operand:V4DI 0 "register_operand" "")
5859 (any_extend:V4DI (match_operand:V8SI 1 "register_operand" ""))
5860 (match_operand:V8SI 2 "register_operand" "")
5861 (match_operand:V4DI 3 "register_operand" "")]
5866 t1 = gen_reg_rtx (V4DImode);
5867 emit_insn (gen_avx2_<u>mulv4siv4di3 (t1, operands[1], operands[2]));
5868 emit_insn (gen_addv4di3 (t1, t1, operands[3]));
5870 t2 = gen_reg_rtx (V8SImode);
5871 t3 = gen_reg_rtx (V8SImode);
5872 emit_insn (gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, t2),
5873 gen_lowpart (V2TImode, operands[1]),
5875 emit_insn (gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, t3),
5876 gen_lowpart (V2TImode, operands[2]),
5879 t4 = gen_reg_rtx (V4DImode);
5880 emit_insn (gen_avx2_<u>mulv4siv4di3 (t4, t2, t3));
5882 emit_insn (gen_addv4di3 (operands[0], t1, t4));
5886 (define_insn "ashr<mode>3"
5887 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
5889 (match_operand:VI24_AVX2 1 "register_operand" "0,x")
5890 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5893 psra<ssemodesuffix>\t{%2, %0|%0, %2}
5894 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5895 [(set_attr "isa" "noavx,avx")
5896 (set_attr "type" "sseishft")
5897 (set (attr "length_immediate")
5898 (if_then_else (match_operand 2 "const_int_operand" "")
5900 (const_string "0")))
5901 (set_attr "prefix_data16" "1,*")
5902 (set_attr "prefix" "orig,vex")
5903 (set_attr "mode" "<sseinsnmode>")])
5905 (define_insn "<shift_insn><mode>3"
5906 [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
5907 (any_lshift:VI248_AVX2
5908 (match_operand:VI248_AVX2 1 "register_operand" "0,x")
5909 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5912 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
5913 vp<vshift><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5914 [(set_attr "isa" "noavx,avx")
5915 (set_attr "type" "sseishft")
5916 (set (attr "length_immediate")
5917 (if_then_else (match_operand 2 "const_int_operand" "")
5919 (const_string "0")))
5920 (set_attr "prefix_data16" "1,*")
5921 (set_attr "prefix" "orig,vex")
5922 (set_attr "mode" "<sseinsnmode>")])
5924 (define_expand "vec_shl_<mode>"
5925 [(set (match_operand:VI_128 0 "register_operand" "")
5927 (match_operand:VI_128 1 "register_operand" "")
5928 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5931 operands[0] = gen_lowpart (V1TImode, operands[0]);
5932 operands[1] = gen_lowpart (V1TImode, operands[1]);
5935 (define_insn "<sse2_avx2>_ashl<mode>3"
5936 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
5938 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
5939 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5942 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5944 switch (which_alternative)
5947 return "pslldq\t{%2, %0|%0, %2}";
5949 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
5954 [(set_attr "isa" "noavx,avx")
5955 (set_attr "type" "sseishft")
5956 (set_attr "length_immediate" "1")
5957 (set_attr "prefix_data16" "1,*")
5958 (set_attr "prefix" "orig,vex")
5959 (set_attr "mode" "<sseinsnmode>")])
5961 (define_expand "vec_shr_<mode>"
5962 [(set (match_operand:VI_128 0 "register_operand" "")
5964 (match_operand:VI_128 1 "register_operand" "")
5965 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5968 operands[0] = gen_lowpart (V1TImode, operands[0]);
5969 operands[1] = gen_lowpart (V1TImode, operands[1]);
5972 (define_insn "<sse2_avx2>_lshr<mode>3"
5973 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
5974 (lshiftrt:VIMAX_AVX2
5975 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
5976 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5979 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5981 switch (which_alternative)
5984 return "psrldq\t{%2, %0|%0, %2}";
5986 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
5991 [(set_attr "isa" "noavx,avx")
5992 (set_attr "type" "sseishft")
5993 (set_attr "length_immediate" "1")
5994 (set_attr "atom_unit" "sishuf")
5995 (set_attr "prefix_data16" "1,*")
5996 (set_attr "prefix" "orig,vex")
5997 (set_attr "mode" "<sseinsnmode>")])
6000 (define_expand "<code><mode>3"
6001 [(set (match_operand:VI124_256 0 "register_operand" "")
6003 (match_operand:VI124_256 1 "nonimmediate_operand" "")
6004 (match_operand:VI124_256 2 "nonimmediate_operand" "")))]
6006 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
6008 (define_insn "*avx2_<code><mode>3"
6009 [(set (match_operand:VI124_256 0 "register_operand" "=x")
6011 (match_operand:VI124_256 1 "nonimmediate_operand" "%x")
6012 (match_operand:VI124_256 2 "nonimmediate_operand" "xm")))]
6013 "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6014 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6015 [(set_attr "type" "sseiadd")
6016 (set_attr "prefix_extra" "1")
6017 (set_attr "prefix" "vex")
6018 (set_attr "mode" "OI")])
6020 (define_expand "<code><mode>3"
6021 [(set (match_operand:VI8_AVX2 0 "register_operand" "")
6023 (match_operand:VI8_AVX2 1 "register_operand" "")
6024 (match_operand:VI8_AVX2 2 "register_operand" "")))]
6031 xops[0] = operands[0];
6033 if (<CODE> == SMAX || <CODE> == UMAX)
6035 xops[1] = operands[1];
6036 xops[2] = operands[2];
6040 xops[1] = operands[2];
6041 xops[2] = operands[1];
6044 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
6046 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
6047 xops[4] = operands[1];
6048 xops[5] = operands[2];
6050 ok = ix86_expand_int_vcond (xops);
6055 (define_expand "<code><mode>3"
6056 [(set (match_operand:VI124_128 0 "register_operand" "")
6058 (match_operand:VI124_128 1 "nonimmediate_operand" "")
6059 (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
6062 if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
6063 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
6069 xops[0] = operands[0];
6070 operands[1] = force_reg (<MODE>mode, operands[1]);
6071 operands[2] = force_reg (<MODE>mode, operands[2]);
6075 xops[1] = operands[1];
6076 xops[2] = operands[2];
6080 xops[1] = operands[2];
6081 xops[2] = operands[1];
6084 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6085 xops[4] = operands[1];
6086 xops[5] = operands[2];
6088 ok = ix86_expand_int_vcond (xops);
6094 (define_insn "*sse4_1_<code><mode>3"
6095 [(set (match_operand:VI14_128 0 "register_operand" "=x,x")
6097 (match_operand:VI14_128 1 "nonimmediate_operand" "%0,x")
6098 (match_operand:VI14_128 2 "nonimmediate_operand" "xm,xm")))]
6099 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6101 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
6102 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6103 [(set_attr "isa" "noavx,avx")
6104 (set_attr "type" "sseiadd")
6105 (set_attr "prefix_extra" "1,*")
6106 (set_attr "prefix" "orig,vex")
6107 (set_attr "mode" "TI")])
6109 (define_insn "*<code>v8hi3"
6110 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6112 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
6113 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
6114 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
6116 p<maxmin_int>w\t{%2, %0|%0, %2}
6117 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
6118 [(set_attr "isa" "noavx,avx")
6119 (set_attr "type" "sseiadd")
6120 (set_attr "prefix_data16" "1,*")
6121 (set_attr "prefix_extra" "*,1")
6122 (set_attr "prefix" "orig,vex")
6123 (set_attr "mode" "TI")])
6125 (define_expand "<code><mode>3"
6126 [(set (match_operand:VI124_128 0 "register_operand" "")
6128 (match_operand:VI124_128 1 "nonimmediate_operand" "")
6129 (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
6132 if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
6133 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
6134 else if (<CODE> == UMAX && <MODE>mode == V8HImode)
6136 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
6137 operands[1] = force_reg (<MODE>mode, operands[1]);
6138 if (rtx_equal_p (op3, op2))
6139 op3 = gen_reg_rtx (V8HImode);
6140 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
6141 emit_insn (gen_addv8hi3 (op0, op3, op2));
6149 operands[1] = force_reg (<MODE>mode, operands[1]);
6150 operands[2] = force_reg (<MODE>mode, operands[2]);
6152 xops[0] = operands[0];
6156 xops[1] = operands[1];
6157 xops[2] = operands[2];
6161 xops[1] = operands[2];
6162 xops[2] = operands[1];
6165 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6166 xops[4] = operands[1];
6167 xops[5] = operands[2];
6169 ok = ix86_expand_int_vcond (xops);
6175 (define_insn "*sse4_1_<code><mode>3"
6176 [(set (match_operand:VI24_128 0 "register_operand" "=x,x")
6178 (match_operand:VI24_128 1 "nonimmediate_operand" "%0,x")
6179 (match_operand:VI24_128 2 "nonimmediate_operand" "xm,xm")))]
6180 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6182 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
6183 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6184 [(set_attr "isa" "noavx,avx")
6185 (set_attr "type" "sseiadd")
6186 (set_attr "prefix_extra" "1,*")
6187 (set_attr "prefix" "orig,vex")
6188 (set_attr "mode" "TI")])
6190 (define_insn "*<code>v16qi3"
6191 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6193 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")
6194 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))]
6195 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
6197 p<maxmin_int>b\t{%2, %0|%0, %2}
6198 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
6199 [(set_attr "isa" "noavx,avx")
6200 (set_attr "type" "sseiadd")
6201 (set_attr "prefix_data16" "1,*")
6202 (set_attr "prefix_extra" "*,1")
6203 (set_attr "prefix" "orig,vex")
6204 (set_attr "mode" "TI")])
6206 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6208 ;; Parallel integral comparisons
6210 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6212 (define_expand "avx2_eq<mode>3"
6213 [(set (match_operand:VI_256 0 "register_operand" "")
6215 (match_operand:VI_256 1 "nonimmediate_operand" "")
6216 (match_operand:VI_256 2 "nonimmediate_operand" "")))]
6218 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6220 (define_insn "*avx2_eq<mode>3"
6221 [(set (match_operand:VI_256 0 "register_operand" "=x")
6223 (match_operand:VI_256 1 "nonimmediate_operand" "%x")
6224 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
6225 "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6226 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6227 [(set_attr "type" "ssecmp")
6228 (set_attr "prefix_extra" "1")
6229 (set_attr "prefix" "vex")
6230 (set_attr "mode" "OI")])
6232 (define_insn "*sse4_1_eqv2di3"
6233 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6235 (match_operand:V2DI 1 "nonimmediate_operand" "%0,x")
6236 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
6237 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
6239 pcmpeqq\t{%2, %0|%0, %2}
6240 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
6241 [(set_attr "isa" "noavx,avx")
6242 (set_attr "type" "ssecmp")
6243 (set_attr "prefix_extra" "1")
6244 (set_attr "prefix" "orig,vex")
6245 (set_attr "mode" "TI")])
6247 (define_insn "*sse2_eq<mode>3"
6248 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
6250 (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x")
6251 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
6252 "TARGET_SSE2 && !TARGET_XOP
6253 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6255 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
6256 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6257 [(set_attr "isa" "noavx,avx")
6258 (set_attr "type" "ssecmp")
6259 (set_attr "prefix_data16" "1,*")
6260 (set_attr "prefix" "orig,vex")
6261 (set_attr "mode" "TI")])
6263 (define_expand "sse2_eq<mode>3"
6264 [(set (match_operand:VI124_128 0 "register_operand" "")
6266 (match_operand:VI124_128 1 "nonimmediate_operand" "")
6267 (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
6268 "TARGET_SSE2 && !TARGET_XOP "
6269 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6271 (define_expand "sse4_1_eqv2di3"
6272 [(set (match_operand:V2DI 0 "register_operand" "")
6274 (match_operand:V2DI 1 "nonimmediate_operand" "")
6275 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
6277 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
6279 (define_insn "sse4_2_gtv2di3"
6280 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6282 (match_operand:V2DI 1 "register_operand" "0,x")
6283 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
6286 pcmpgtq\t{%2, %0|%0, %2}
6287 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
6288 [(set_attr "isa" "noavx,avx")
6289 (set_attr "type" "ssecmp")
6290 (set_attr "prefix_extra" "1")
6291 (set_attr "prefix" "orig,vex")
6292 (set_attr "mode" "TI")])
6294 (define_insn "avx2_gt<mode>3"
6295 [(set (match_operand:VI_256 0 "register_operand" "=x")
6297 (match_operand:VI_256 1 "register_operand" "x")
6298 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
6300 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6301 [(set_attr "type" "ssecmp")
6302 (set_attr "prefix_extra" "1")
6303 (set_attr "prefix" "vex")
6304 (set_attr "mode" "OI")])
6306 (define_insn "sse2_gt<mode>3"
6307 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
6309 (match_operand:VI124_128 1 "register_operand" "0,x")
6310 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
6311 "TARGET_SSE2 && !TARGET_XOP"
6313 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
6314 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6315 [(set_attr "isa" "noavx,avx")
6316 (set_attr "type" "ssecmp")
6317 (set_attr "prefix_data16" "1,*")
6318 (set_attr "prefix" "orig,vex")
6319 (set_attr "mode" "TI")])
6321 (define_expand "vcond<V_256:mode><VI_256:mode>"
6322 [(set (match_operand:V_256 0 "register_operand" "")
6324 (match_operator 3 ""
6325 [(match_operand:VI_256 4 "nonimmediate_operand" "")
6326 (match_operand:VI_256 5 "nonimmediate_operand" "")])
6327 (match_operand:V_256 1 "general_operand" "")
6328 (match_operand:V_256 2 "general_operand" "")))]
6330 && (GET_MODE_NUNITS (<V_256:MODE>mode)
6331 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
6333 bool ok = ix86_expand_int_vcond (operands);
6338 (define_expand "vcond<V_128:mode><VI124_128:mode>"
6339 [(set (match_operand:V_128 0 "register_operand" "")
6341 (match_operator 3 ""
6342 [(match_operand:VI124_128 4 "nonimmediate_operand" "")
6343 (match_operand:VI124_128 5 "nonimmediate_operand" "")])
6344 (match_operand:V_128 1 "general_operand" "")
6345 (match_operand:V_128 2 "general_operand" "")))]
6347 && (GET_MODE_NUNITS (<V_128:MODE>mode)
6348 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
6350 bool ok = ix86_expand_int_vcond (operands);
6355 (define_expand "vcond<VI8F_128:mode>v2di"
6356 [(set (match_operand:VI8F_128 0 "register_operand" "")
6357 (if_then_else:VI8F_128
6358 (match_operator 3 ""
6359 [(match_operand:V2DI 4 "nonimmediate_operand" "")
6360 (match_operand:V2DI 5 "nonimmediate_operand" "")])
6361 (match_operand:VI8F_128 1 "general_operand" "")
6362 (match_operand:VI8F_128 2 "general_operand" "")))]
6365 bool ok = ix86_expand_int_vcond (operands);
6370 (define_expand "vcondu<V_256:mode><VI_256:mode>"
6371 [(set (match_operand:V_256 0 "register_operand" "")
6373 (match_operator 3 ""
6374 [(match_operand:VI_256 4 "nonimmediate_operand" "")
6375 (match_operand:VI_256 5 "nonimmediate_operand" "")])
6376 (match_operand:V_256 1 "general_operand" "")
6377 (match_operand:V_256 2 "general_operand" "")))]
6379 && (GET_MODE_NUNITS (<V_256:MODE>mode)
6380 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
6382 bool ok = ix86_expand_int_vcond (operands);
6387 (define_expand "vcondu<V_128:mode><VI124_128:mode>"
6388 [(set (match_operand:V_128 0 "register_operand" "")
6390 (match_operator 3 ""
6391 [(match_operand:VI124_128 4 "nonimmediate_operand" "")
6392 (match_operand:VI124_128 5 "nonimmediate_operand" "")])
6393 (match_operand:V_128 1 "general_operand" "")
6394 (match_operand:V_128 2 "general_operand" "")))]
6396 && (GET_MODE_NUNITS (<V_128:MODE>mode)
6397 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
6399 bool ok = ix86_expand_int_vcond (operands);
6404 (define_expand "vcondu<VI8F_128:mode>v2di"
6405 [(set (match_operand:VI8F_128 0 "register_operand" "")
6406 (if_then_else:VI8F_128
6407 (match_operator 3 ""
6408 [(match_operand:V2DI 4 "nonimmediate_operand" "")
6409 (match_operand:V2DI 5 "nonimmediate_operand" "")])
6410 (match_operand:VI8F_128 1 "general_operand" "")
6411 (match_operand:VI8F_128 2 "general_operand" "")))]
6414 bool ok = ix86_expand_int_vcond (operands);
6419 (define_mode_iterator VEC_PERM_AVX2
6420 [V16QI V8HI V4SI V2DI V4SF V2DF
6421 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
6422 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
6423 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")])
6425 (define_expand "vec_perm<mode>"
6426 [(match_operand:VEC_PERM_AVX2 0 "register_operand" "")
6427 (match_operand:VEC_PERM_AVX2 1 "register_operand" "")
6428 (match_operand:VEC_PERM_AVX2 2 "register_operand" "")
6429 (match_operand:<sseintvecmode> 3 "register_operand" "")]
6430 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
6432 ix86_expand_vec_perm (operands);
6436 (define_mode_iterator VEC_PERM_CONST
6437 [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE")
6438 (V2DF "TARGET_SSE") (V2DI "TARGET_SSE")
6439 (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2")
6440 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
6441 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
6442 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")])
6444 (define_expand "vec_perm_const<mode>"
6445 [(match_operand:VEC_PERM_CONST 0 "register_operand" "")
6446 (match_operand:VEC_PERM_CONST 1 "register_operand" "")
6447 (match_operand:VEC_PERM_CONST 2 "register_operand" "")
6448 (match_operand:<sseintvecmode> 3 "" "")]
6451 if (ix86_expand_vec_perm_const (operands))
6457 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6459 ;; Parallel bitwise logical operations
6461 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6463 (define_expand "one_cmpl<mode>2"
6464 [(set (match_operand:VI 0 "register_operand" "")
6465 (xor:VI (match_operand:VI 1 "nonimmediate_operand" "")
6469 int i, n = GET_MODE_NUNITS (<MODE>mode);
6470 rtvec v = rtvec_alloc (n);
6472 for (i = 0; i < n; ++i)
6473 RTVEC_ELT (v, i) = constm1_rtx;
6475 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
6478 (define_expand "<sse2_avx2>_andnot<mode>3"
6479 [(set (match_operand:VI_AVX2 0 "register_operand" "")
6481 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand" ""))
6482 (match_operand:VI_AVX2 2 "nonimmediate_operand" "")))]
6485 (define_insn "*andnot<mode>3"
6486 [(set (match_operand:VI 0 "register_operand" "=x,x")
6488 (not:VI (match_operand:VI 1 "register_operand" "0,x"))
6489 (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
6492 static char buf[32];
6496 switch (get_attr_mode (insn))
6499 gcc_assert (TARGET_AVX2);
6501 gcc_assert (TARGET_SSE2);
6507 gcc_assert (TARGET_AVX);
6509 gcc_assert (TARGET_SSE);
6518 switch (which_alternative)
6521 ops = "%s\t{%%2, %%0|%%0, %%2}";
6524 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
6530 snprintf (buf, sizeof (buf), ops, tmp);
6533 [(set_attr "isa" "noavx,avx")
6534 (set_attr "type" "sselog")
6535 (set (attr "prefix_data16")
6537 (and (eq_attr "alternative" "0")
6538 (eq_attr "mode" "TI"))
6540 (const_string "*")))
6541 (set_attr "prefix" "orig,vex")
6543 (cond [(and (not (match_test "TARGET_AVX2"))
6544 (match_test "GET_MODE_SIZE (<MODE>mode) > 16"))
6545 (const_string "V8SF")
6546 (not (match_test "TARGET_SSE2"))
6547 (const_string "V4SF")
6549 (const_string "<sseinsnmode>")))])
6551 (define_expand "<code><mode>3"
6552 [(set (match_operand:VI 0 "register_operand" "")
6554 (match_operand:VI 1 "nonimmediate_operand" "")
6555 (match_operand:VI 2 "nonimmediate_operand" "")))]
6557 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
6559 (define_insn "*<code><mode>3"
6560 [(set (match_operand:VI 0 "register_operand" "=x,x")
6562 (match_operand:VI 1 "nonimmediate_operand" "%0,x")
6563 (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
6565 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6567 static char buf[32];
6571 switch (get_attr_mode (insn))
6574 gcc_assert (TARGET_AVX2);
6576 gcc_assert (TARGET_SSE2);
6582 gcc_assert (TARGET_AVX);
6584 gcc_assert (TARGET_SSE);
6593 switch (which_alternative)
6596 ops = "%s\t{%%2, %%0|%%0, %%2}";
6599 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
6605 snprintf (buf, sizeof (buf), ops, tmp);
6608 [(set_attr "isa" "noavx,avx")
6609 (set_attr "type" "sselog")
6610 (set (attr "prefix_data16")
6612 (and (eq_attr "alternative" "0")
6613 (eq_attr "mode" "TI"))
6615 (const_string "*")))
6616 (set_attr "prefix" "orig,vex")
6618 (cond [(and (not (match_test "TARGET_AVX2"))
6619 (match_test "GET_MODE_SIZE (<MODE>mode) > 16"))
6620 (const_string "V8SF")
6621 (not (match_test "TARGET_SSE2"))
6622 (const_string "V4SF")
6624 (const_string "<sseinsnmode>")))])
6626 (define_insn "*andnottf3"
6627 [(set (match_operand:TF 0 "register_operand" "=x,x")
6629 (not:TF (match_operand:TF 1 "register_operand" "0,x"))
6630 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
6633 pandn\t{%2, %0|%0, %2}
6634 vpandn\t{%2, %1, %0|%0, %1, %2}"
6635 [(set_attr "isa" "noavx,avx")
6636 (set_attr "type" "sselog")
6637 (set_attr "prefix_data16" "1,*")
6638 (set_attr "prefix" "orig,vex")
6639 (set_attr "mode" "TI")])
6641 (define_expand "<code>tf3"
6642 [(set (match_operand:TF 0 "register_operand" "")
6644 (match_operand:TF 1 "nonimmediate_operand" "")
6645 (match_operand:TF 2 "nonimmediate_operand" "")))]
6647 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
6649 (define_insn "*<code>tf3"
6650 [(set (match_operand:TF 0 "register_operand" "=x,x")
6652 (match_operand:TF 1 "nonimmediate_operand" "%0,x")
6653 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
6655 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
6657 p<logic>\t{%2, %0|%0, %2}
6658 vp<logic>\t{%2, %1, %0|%0, %1, %2}"
6659 [(set_attr "isa" "noavx,avx")
6660 (set_attr "type" "sselog")
6661 (set_attr "prefix_data16" "1,*")
6662 (set_attr "prefix" "orig,vex")
6663 (set_attr "mode" "TI")])
6665 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6667 ;; Parallel integral element swizzling
6669 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6671 (define_expand "vec_pack_trunc_<mode>"
6672 [(match_operand:<ssepackmode> 0 "register_operand" "")
6673 (match_operand:VI248_AVX2 1 "register_operand" "")
6674 (match_operand:VI248_AVX2 2 "register_operand" "")]
6677 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
6678 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
6679 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6683 (define_insn "<sse2_avx2>_packsswb"
6684 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
6685 (vec_concat:VI1_AVX2
6686 (ss_truncate:<ssehalfvecmode>
6687 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6688 (ss_truncate:<ssehalfvecmode>
6689 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6692 packsswb\t{%2, %0|%0, %2}
6693 vpacksswb\t{%2, %1, %0|%0, %1, %2}"
6694 [(set_attr "isa" "noavx,avx")
6695 (set_attr "type" "sselog")
6696 (set_attr "prefix_data16" "1,*")
6697 (set_attr "prefix" "orig,vex")
6698 (set_attr "mode" "<sseinsnmode>")])
6700 (define_insn "<sse2_avx2>_packssdw"
6701 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
6702 (vec_concat:VI2_AVX2
6703 (ss_truncate:<ssehalfvecmode>
6704 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6705 (ss_truncate:<ssehalfvecmode>
6706 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6709 packssdw\t{%2, %0|%0, %2}
6710 vpackssdw\t{%2, %1, %0|%0, %1, %2}"
6711 [(set_attr "isa" "noavx,avx")
6712 (set_attr "type" "sselog")
6713 (set_attr "prefix_data16" "1,*")
6714 (set_attr "prefix" "orig,vex")
6715 (set_attr "mode" "<sseinsnmode>")])
6717 (define_insn "<sse2_avx2>_packuswb"
6718 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
6719 (vec_concat:VI1_AVX2
6720 (us_truncate:<ssehalfvecmode>
6721 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6722 (us_truncate:<ssehalfvecmode>
6723 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6726 packuswb\t{%2, %0|%0, %2}
6727 vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6728 [(set_attr "isa" "noavx,avx")
6729 (set_attr "type" "sselog")
6730 (set_attr "prefix_data16" "1,*")
6731 (set_attr "prefix" "orig,vex")
6732 (set_attr "mode" "<sseinsnmode>")])
6734 (define_insn "avx2_interleave_highv32qi"
6735 [(set (match_operand:V32QI 0 "register_operand" "=x")
6738 (match_operand:V32QI 1 "register_operand" "x")
6739 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
6740 (parallel [(const_int 8) (const_int 40)
6741 (const_int 9) (const_int 41)
6742 (const_int 10) (const_int 42)
6743 (const_int 11) (const_int 43)
6744 (const_int 12) (const_int 44)
6745 (const_int 13) (const_int 45)
6746 (const_int 14) (const_int 46)
6747 (const_int 15) (const_int 47)
6748 (const_int 24) (const_int 56)
6749 (const_int 25) (const_int 57)
6750 (const_int 26) (const_int 58)
6751 (const_int 27) (const_int 59)
6752 (const_int 28) (const_int 60)
6753 (const_int 29) (const_int 61)
6754 (const_int 30) (const_int 62)
6755 (const_int 31) (const_int 63)])))]
6757 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6758 [(set_attr "type" "sselog")
6759 (set_attr "prefix" "vex")
6760 (set_attr "mode" "OI")])
6762 (define_insn "vec_interleave_highv16qi"
6763 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6766 (match_operand:V16QI 1 "register_operand" "0,x")
6767 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
6768 (parallel [(const_int 8) (const_int 24)
6769 (const_int 9) (const_int 25)
6770 (const_int 10) (const_int 26)
6771 (const_int 11) (const_int 27)
6772 (const_int 12) (const_int 28)
6773 (const_int 13) (const_int 29)
6774 (const_int 14) (const_int 30)
6775 (const_int 15) (const_int 31)])))]
6778 punpckhbw\t{%2, %0|%0, %2}
6779 vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6780 [(set_attr "isa" "noavx,avx")
6781 (set_attr "type" "sselog")
6782 (set_attr "prefix_data16" "1,*")
6783 (set_attr "prefix" "orig,vex")
6784 (set_attr "mode" "TI")])
6786 (define_insn "avx2_interleave_lowv32qi"
6787 [(set (match_operand:V32QI 0 "register_operand" "=x")
6790 (match_operand:V32QI 1 "register_operand" "x")
6791 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
6792 (parallel [(const_int 0) (const_int 32)
6793 (const_int 1) (const_int 33)
6794 (const_int 2) (const_int 34)
6795 (const_int 3) (const_int 35)
6796 (const_int 4) (const_int 36)
6797 (const_int 5) (const_int 37)
6798 (const_int 6) (const_int 38)
6799 (const_int 7) (const_int 39)
6800 (const_int 16) (const_int 48)
6801 (const_int 17) (const_int 49)
6802 (const_int 18) (const_int 50)
6803 (const_int 19) (const_int 51)
6804 (const_int 20) (const_int 52)
6805 (const_int 21) (const_int 53)
6806 (const_int 22) (const_int 54)
6807 (const_int 23) (const_int 55)])))]
6809 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6810 [(set_attr "type" "sselog")
6811 (set_attr "prefix" "vex")
6812 (set_attr "mode" "OI")])
6814 (define_insn "vec_interleave_lowv16qi"
6815 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6818 (match_operand:V16QI 1 "register_operand" "0,x")
6819 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
6820 (parallel [(const_int 0) (const_int 16)
6821 (const_int 1) (const_int 17)
6822 (const_int 2) (const_int 18)
6823 (const_int 3) (const_int 19)
6824 (const_int 4) (const_int 20)
6825 (const_int 5) (const_int 21)
6826 (const_int 6) (const_int 22)
6827 (const_int 7) (const_int 23)])))]
6830 punpcklbw\t{%2, %0|%0, %2}
6831 vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6832 [(set_attr "isa" "noavx,avx")
6833 (set_attr "type" "sselog")
6834 (set_attr "prefix_data16" "1,*")
6835 (set_attr "prefix" "orig,vex")
6836 (set_attr "mode" "TI")])
6838 (define_insn "avx2_interleave_highv16hi"
6839 [(set (match_operand:V16HI 0 "register_operand" "=x")
6842 (match_operand:V16HI 1 "register_operand" "x")
6843 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
6844 (parallel [(const_int 4) (const_int 20)
6845 (const_int 5) (const_int 21)
6846 (const_int 6) (const_int 22)
6847 (const_int 7) (const_int 23)
6848 (const_int 12) (const_int 28)
6849 (const_int 13) (const_int 29)
6850 (const_int 14) (const_int 30)
6851 (const_int 15) (const_int 31)])))]
6853 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6854 [(set_attr "type" "sselog")
6855 (set_attr "prefix" "vex")
6856 (set_attr "mode" "OI")])
6858 (define_insn "vec_interleave_highv8hi"
6859 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6862 (match_operand:V8HI 1 "register_operand" "0,x")
6863 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
6864 (parallel [(const_int 4) (const_int 12)
6865 (const_int 5) (const_int 13)
6866 (const_int 6) (const_int 14)
6867 (const_int 7) (const_int 15)])))]
6870 punpckhwd\t{%2, %0|%0, %2}
6871 vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6872 [(set_attr "isa" "noavx,avx")
6873 (set_attr "type" "sselog")
6874 (set_attr "prefix_data16" "1,*")
6875 (set_attr "prefix" "orig,vex")
6876 (set_attr "mode" "TI")])
6878 (define_insn "avx2_interleave_lowv16hi"
6879 [(set (match_operand:V16HI 0 "register_operand" "=x")
6882 (match_operand:V16HI 1 "register_operand" "x")
6883 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
6884 (parallel [(const_int 0) (const_int 16)
6885 (const_int 1) (const_int 17)
6886 (const_int 2) (const_int 18)
6887 (const_int 3) (const_int 19)
6888 (const_int 8) (const_int 24)
6889 (const_int 9) (const_int 25)
6890 (const_int 10) (const_int 26)
6891 (const_int 11) (const_int 27)])))]
6893 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6894 [(set_attr "type" "sselog")
6895 (set_attr "prefix" "vex")
6896 (set_attr "mode" "OI")])
6898 (define_insn "vec_interleave_lowv8hi"
6899 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6902 (match_operand:V8HI 1 "register_operand" "0,x")
6903 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
6904 (parallel [(const_int 0) (const_int 8)
6905 (const_int 1) (const_int 9)
6906 (const_int 2) (const_int 10)
6907 (const_int 3) (const_int 11)])))]
6910 punpcklwd\t{%2, %0|%0, %2}
6911 vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6912 [(set_attr "isa" "noavx,avx")
6913 (set_attr "type" "sselog")
6914 (set_attr "prefix_data16" "1,*")
6915 (set_attr "prefix" "orig,vex")
6916 (set_attr "mode" "TI")])
6918 (define_insn "avx2_interleave_highv8si"
6919 [(set (match_operand:V8SI 0 "register_operand" "=x")
6922 (match_operand:V8SI 1 "register_operand" "x")
6923 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
6924 (parallel [(const_int 2) (const_int 10)
6925 (const_int 3) (const_int 11)
6926 (const_int 6) (const_int 14)
6927 (const_int 7) (const_int 15)])))]
6929 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6930 [(set_attr "type" "sselog")
6931 (set_attr "prefix" "vex")
6932 (set_attr "mode" "OI")])
6934 (define_insn "vec_interleave_highv4si"
6935 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6938 (match_operand:V4SI 1 "register_operand" "0,x")
6939 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6940 (parallel [(const_int 2) (const_int 6)
6941 (const_int 3) (const_int 7)])))]
6944 punpckhdq\t{%2, %0|%0, %2}
6945 vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6946 [(set_attr "isa" "noavx,avx")
6947 (set_attr "type" "sselog")
6948 (set_attr "prefix_data16" "1,*")
6949 (set_attr "prefix" "orig,vex")
6950 (set_attr "mode" "TI")])
6952 (define_insn "avx2_interleave_lowv8si"
6953 [(set (match_operand:V8SI 0 "register_operand" "=x")
6956 (match_operand:V8SI 1 "register_operand" "x")
6957 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
6958 (parallel [(const_int 0) (const_int 8)
6959 (const_int 1) (const_int 9)
6960 (const_int 4) (const_int 12)
6961 (const_int 5) (const_int 13)])))]
6963 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6964 [(set_attr "type" "sselog")
6965 (set_attr "prefix" "vex")
6966 (set_attr "mode" "OI")])
6968 (define_insn "vec_interleave_lowv4si"
6969 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6972 (match_operand:V4SI 1 "register_operand" "0,x")
6973 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6974 (parallel [(const_int 0) (const_int 4)
6975 (const_int 1) (const_int 5)])))]
6978 punpckldq\t{%2, %0|%0, %2}
6979 vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6980 [(set_attr "isa" "noavx,avx")
6981 (set_attr "type" "sselog")
6982 (set_attr "prefix_data16" "1,*")
6983 (set_attr "prefix" "orig,vex")
6984 (set_attr "mode" "TI")])
6986 (define_expand "vec_interleave_high<mode>"
6987 [(match_operand:VI_256 0 "register_operand" "=x")
6988 (match_operand:VI_256 1 "register_operand" "x")
6989 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
6992 rtx t1 = gen_reg_rtx (<MODE>mode);
6993 rtx t2 = gen_reg_rtx (<MODE>mode);
6994 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
6995 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
6996 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, operands[0]),
6997 gen_lowpart (V4DImode, t1),
6998 gen_lowpart (V4DImode, t2), GEN_INT (1 + (3 << 4))));
7002 (define_expand "vec_interleave_low<mode>"
7003 [(match_operand:VI_256 0 "register_operand" "=x")
7004 (match_operand:VI_256 1 "register_operand" "x")
7005 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
7008 rtx t1 = gen_reg_rtx (<MODE>mode);
7009 rtx t2 = gen_reg_rtx (<MODE>mode);
7010 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
7011 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
7012 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, operands[0]),
7013 gen_lowpart (V4DImode, t1),
7014 gen_lowpart (V4DImode, t2), GEN_INT (0 + (2 << 4))));
7018 ;; Modes handled by pinsr patterns.
7019 (define_mode_iterator PINSR_MODE
7020 [(V16QI "TARGET_SSE4_1") V8HI
7021 (V4SI "TARGET_SSE4_1")
7022 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
7024 (define_mode_attr sse2p4_1
7025 [(V16QI "sse4_1") (V8HI "sse2")
7026 (V4SI "sse4_1") (V2DI "sse4_1")])
7028 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
7029 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
7030 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x")
7031 (vec_merge:PINSR_MODE
7032 (vec_duplicate:PINSR_MODE
7033 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m"))
7034 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x")
7035 (match_operand:SI 3 "const_int_operand" "")))]
7037 && ((unsigned) exact_log2 (INTVAL (operands[3]))
7038 < GET_MODE_NUNITS (<MODE>mode))"
7040 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7042 switch (which_alternative)
7045 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
7046 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
7049 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
7051 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
7052 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
7055 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7060 [(set_attr "isa" "noavx,noavx,avx,avx")
7061 (set_attr "type" "sselog")
7062 (set (attr "prefix_rex")
7064 (and (not (match_test "TARGET_AVX"))
7065 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
7067 (const_string "*")))
7068 (set (attr "prefix_data16")
7070 (and (not (match_test "TARGET_AVX"))
7071 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
7073 (const_string "*")))
7074 (set (attr "prefix_extra")
7076 (and (not (match_test "TARGET_AVX"))
7077 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
7079 (const_string "1")))
7080 (set_attr "length_immediate" "1")
7081 (set_attr "prefix" "orig,orig,vex,vex")
7082 (set_attr "mode" "TI")])
7084 (define_insn "*sse4_1_pextrb_<mode>"
7085 [(set (match_operand:SWI48 0 "register_operand" "=r")
7088 (match_operand:V16QI 1 "register_operand" "x")
7089 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
7091 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
7092 [(set_attr "type" "sselog")
7093 (set_attr "prefix_extra" "1")
7094 (set_attr "length_immediate" "1")
7095 (set_attr "prefix" "maybe_vex")
7096 (set_attr "mode" "TI")])
7098 (define_insn "*sse4_1_pextrb_memory"
7099 [(set (match_operand:QI 0 "memory_operand" "=m")
7101 (match_operand:V16QI 1 "register_operand" "x")
7102 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
7104 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
7105 [(set_attr "type" "sselog")
7106 (set_attr "prefix_extra" "1")
7107 (set_attr "length_immediate" "1")
7108 (set_attr "prefix" "maybe_vex")
7109 (set_attr "mode" "TI")])
7111 (define_insn "*sse2_pextrw_<mode>"
7112 [(set (match_operand:SWI48 0 "register_operand" "=r")
7115 (match_operand:V8HI 1 "register_operand" "x")
7116 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
7118 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
7119 [(set_attr "type" "sselog")
7120 (set_attr "prefix_data16" "1")
7121 (set_attr "length_immediate" "1")
7122 (set_attr "prefix" "maybe_vex")
7123 (set_attr "mode" "TI")])
7125 (define_insn "*sse4_1_pextrw_memory"
7126 [(set (match_operand:HI 0 "memory_operand" "=m")
7128 (match_operand:V8HI 1 "register_operand" "x")
7129 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
7131 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
7132 [(set_attr "type" "sselog")
7133 (set_attr "prefix_extra" "1")
7134 (set_attr "length_immediate" "1")
7135 (set_attr "prefix" "maybe_vex")
7136 (set_attr "mode" "TI")])
7138 (define_insn "*sse4_1_pextrd"
7139 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
7141 (match_operand:V4SI 1 "register_operand" "x")
7142 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
7144 "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
7145 [(set_attr "type" "sselog")
7146 (set_attr "prefix_extra" "1")
7147 (set_attr "length_immediate" "1")
7148 (set_attr "prefix" "maybe_vex")
7149 (set_attr "mode" "TI")])
7151 (define_insn "*sse4_1_pextrd_zext"
7152 [(set (match_operand:DI 0 "register_operand" "=r")
7155 (match_operand:V4SI 1 "register_operand" "x")
7156 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))]
7157 "TARGET_64BIT && TARGET_SSE4_1"
7158 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
7159 [(set_attr "type" "sselog")
7160 (set_attr "prefix_extra" "1")
7161 (set_attr "length_immediate" "1")
7162 (set_attr "prefix" "maybe_vex")
7163 (set_attr "mode" "TI")])
7165 ;; It must come before *vec_extractv2di_1_rex64 since it is preferred.
7166 (define_insn "*sse4_1_pextrq"
7167 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
7169 (match_operand:V2DI 1 "register_operand" "x")
7170 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
7171 "TARGET_SSE4_1 && TARGET_64BIT"
7172 "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
7173 [(set_attr "type" "sselog")
7174 (set_attr "prefix_rex" "1")
7175 (set_attr "prefix_extra" "1")
7176 (set_attr "length_immediate" "1")
7177 (set_attr "prefix" "maybe_vex")
7178 (set_attr "mode" "TI")])
7180 (define_expand "avx2_pshufdv3"
7181 [(match_operand:V8SI 0 "register_operand" "")
7182 (match_operand:V8SI 1 "nonimmediate_operand" "")
7183 (match_operand:SI 2 "const_0_to_255_operand" "")]
7186 int mask = INTVAL (operands[2]);
7187 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
7188 GEN_INT ((mask >> 0) & 3),
7189 GEN_INT ((mask >> 2) & 3),
7190 GEN_INT ((mask >> 4) & 3),
7191 GEN_INT ((mask >> 6) & 3),
7192 GEN_INT (((mask >> 0) & 3) + 4),
7193 GEN_INT (((mask >> 2) & 3) + 4),
7194 GEN_INT (((mask >> 4) & 3) + 4),
7195 GEN_INT (((mask >> 6) & 3) + 4)));
7199 (define_insn "avx2_pshufd_1"
7200 [(set (match_operand:V8SI 0 "register_operand" "=x")
7202 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
7203 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7204 (match_operand 3 "const_0_to_3_operand" "")
7205 (match_operand 4 "const_0_to_3_operand" "")
7206 (match_operand 5 "const_0_to_3_operand" "")
7207 (match_operand 6 "const_4_to_7_operand" "")
7208 (match_operand 7 "const_4_to_7_operand" "")
7209 (match_operand 8 "const_4_to_7_operand" "")
7210 (match_operand 9 "const_4_to_7_operand" "")])))]
7212 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
7213 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
7214 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
7215 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
7218 mask |= INTVAL (operands[2]) << 0;
7219 mask |= INTVAL (operands[3]) << 2;
7220 mask |= INTVAL (operands[4]) << 4;
7221 mask |= INTVAL (operands[5]) << 6;
7222 operands[2] = GEN_INT (mask);
7224 return "vpshufd\t{%2, %1, %0|%0, %1, %2}";
7226 [(set_attr "type" "sselog1")
7227 (set_attr "prefix" "vex")
7228 (set_attr "length_immediate" "1")
7229 (set_attr "mode" "OI")])
7231 (define_expand "sse2_pshufd"
7232 [(match_operand:V4SI 0 "register_operand" "")
7233 (match_operand:V4SI 1 "nonimmediate_operand" "")
7234 (match_operand:SI 2 "const_int_operand" "")]
7237 int mask = INTVAL (operands[2]);
7238 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
7239 GEN_INT ((mask >> 0) & 3),
7240 GEN_INT ((mask >> 2) & 3),
7241 GEN_INT ((mask >> 4) & 3),
7242 GEN_INT ((mask >> 6) & 3)));
7246 (define_insn "sse2_pshufd_1"
7247 [(set (match_operand:V4SI 0 "register_operand" "=x")
7249 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7250 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7251 (match_operand 3 "const_0_to_3_operand" "")
7252 (match_operand 4 "const_0_to_3_operand" "")
7253 (match_operand 5 "const_0_to_3_operand" "")])))]
7257 mask |= INTVAL (operands[2]) << 0;
7258 mask |= INTVAL (operands[3]) << 2;
7259 mask |= INTVAL (operands[4]) << 4;
7260 mask |= INTVAL (operands[5]) << 6;
7261 operands[2] = GEN_INT (mask);
7263 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
7265 [(set_attr "type" "sselog1")
7266 (set_attr "prefix_data16" "1")
7267 (set_attr "prefix" "maybe_vex")
7268 (set_attr "length_immediate" "1")
7269 (set_attr "mode" "TI")])
7271 (define_expand "avx2_pshuflwv3"
7272 [(match_operand:V16HI 0 "register_operand" "")
7273 (match_operand:V16HI 1 "nonimmediate_operand" "")
7274 (match_operand:SI 2 "const_0_to_255_operand" "")]
7277 int mask = INTVAL (operands[2]);
7278 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
7279 GEN_INT ((mask >> 0) & 3),
7280 GEN_INT ((mask >> 2) & 3),
7281 GEN_INT ((mask >> 4) & 3),
7282 GEN_INT ((mask >> 6) & 3),
7283 GEN_INT (((mask >> 0) & 3) + 8),
7284 GEN_INT (((mask >> 2) & 3) + 8),
7285 GEN_INT (((mask >> 4) & 3) + 8),
7286 GEN_INT (((mask >> 6) & 3) + 8)));
7290 (define_insn "avx2_pshuflw_1"
7291 [(set (match_operand:V16HI 0 "register_operand" "=x")
7293 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
7294 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7295 (match_operand 3 "const_0_to_3_operand" "")
7296 (match_operand 4 "const_0_to_3_operand" "")
7297 (match_operand 5 "const_0_to_3_operand" "")
7302 (match_operand 6 "const_8_to_11_operand" "")
7303 (match_operand 7 "const_8_to_11_operand" "")
7304 (match_operand 8 "const_8_to_11_operand" "")
7305 (match_operand 9 "const_8_to_11_operand" "")
7311 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
7312 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
7313 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
7314 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
7317 mask |= INTVAL (operands[2]) << 0;
7318 mask |= INTVAL (operands[3]) << 2;
7319 mask |= INTVAL (operands[4]) << 4;
7320 mask |= INTVAL (operands[5]) << 6;
7321 operands[2] = GEN_INT (mask);
7323 return "vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7325 [(set_attr "type" "sselog")
7326 (set_attr "prefix" "vex")
7327 (set_attr "length_immediate" "1")
7328 (set_attr "mode" "OI")])
7330 (define_expand "sse2_pshuflw"
7331 [(match_operand:V8HI 0 "register_operand" "")
7332 (match_operand:V8HI 1 "nonimmediate_operand" "")
7333 (match_operand:SI 2 "const_int_operand" "")]
7336 int mask = INTVAL (operands[2]);
7337 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
7338 GEN_INT ((mask >> 0) & 3),
7339 GEN_INT ((mask >> 2) & 3),
7340 GEN_INT ((mask >> 4) & 3),
7341 GEN_INT ((mask >> 6) & 3)));
7345 (define_insn "sse2_pshuflw_1"
7346 [(set (match_operand:V8HI 0 "register_operand" "=x")
7348 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7349 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7350 (match_operand 3 "const_0_to_3_operand" "")
7351 (match_operand 4 "const_0_to_3_operand" "")
7352 (match_operand 5 "const_0_to_3_operand" "")
7360 mask |= INTVAL (operands[2]) << 0;
7361 mask |= INTVAL (operands[3]) << 2;
7362 mask |= INTVAL (operands[4]) << 4;
7363 mask |= INTVAL (operands[5]) << 6;
7364 operands[2] = GEN_INT (mask);
7366 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7368 [(set_attr "type" "sselog")
7369 (set_attr "prefix_data16" "0")
7370 (set_attr "prefix_rep" "1")
7371 (set_attr "prefix" "maybe_vex")
7372 (set_attr "length_immediate" "1")
7373 (set_attr "mode" "TI")])
7375 (define_expand "avx2_pshufhwv3"
7376 [(match_operand:V16HI 0 "register_operand" "")
7377 (match_operand:V16HI 1 "nonimmediate_operand" "")
7378 (match_operand:SI 2 "const_0_to_255_operand" "")]
7381 int mask = INTVAL (operands[2]);
7382 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
7383 GEN_INT (((mask >> 0) & 3) + 4),
7384 GEN_INT (((mask >> 2) & 3) + 4),
7385 GEN_INT (((mask >> 4) & 3) + 4),
7386 GEN_INT (((mask >> 6) & 3) + 4),
7387 GEN_INT (((mask >> 0) & 3) + 12),
7388 GEN_INT (((mask >> 2) & 3) + 12),
7389 GEN_INT (((mask >> 4) & 3) + 12),
7390 GEN_INT (((mask >> 6) & 3) + 12)));
7394 (define_insn "avx2_pshufhw_1"
7395 [(set (match_operand:V16HI 0 "register_operand" "=x")
7397 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
7398 (parallel [(const_int 0)
7402 (match_operand 2 "const_4_to_7_operand" "")
7403 (match_operand 3 "const_4_to_7_operand" "")
7404 (match_operand 4 "const_4_to_7_operand" "")
7405 (match_operand 5 "const_4_to_7_operand" "")
7410 (match_operand 6 "const_12_to_15_operand" "")
7411 (match_operand 7 "const_12_to_15_operand" "")
7412 (match_operand 8 "const_12_to_15_operand" "")
7413 (match_operand 9 "const_12_to_15_operand" "")])))]
7415 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
7416 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
7417 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
7418 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
7421 mask |= (INTVAL (operands[2]) - 4) << 0;
7422 mask |= (INTVAL (operands[3]) - 4) << 2;
7423 mask |= (INTVAL (operands[4]) - 4) << 4;
7424 mask |= (INTVAL (operands[5]) - 4) << 6;
7425 operands[2] = GEN_INT (mask);
7427 return "vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7429 [(set_attr "type" "sselog")
7430 (set_attr "prefix" "vex")
7431 (set_attr "length_immediate" "1")
7432 (set_attr "mode" "OI")])
7434 (define_expand "sse2_pshufhw"
7435 [(match_operand:V8HI 0 "register_operand" "")
7436 (match_operand:V8HI 1 "nonimmediate_operand" "")
7437 (match_operand:SI 2 "const_int_operand" "")]
7440 int mask = INTVAL (operands[2]);
7441 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
7442 GEN_INT (((mask >> 0) & 3) + 4),
7443 GEN_INT (((mask >> 2) & 3) + 4),
7444 GEN_INT (((mask >> 4) & 3) + 4),
7445 GEN_INT (((mask >> 6) & 3) + 4)));
7449 (define_insn "sse2_pshufhw_1"
7450 [(set (match_operand:V8HI 0 "register_operand" "=x")
7452 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7453 (parallel [(const_int 0)
7457 (match_operand 2 "const_4_to_7_operand" "")
7458 (match_operand 3 "const_4_to_7_operand" "")
7459 (match_operand 4 "const_4_to_7_operand" "")
7460 (match_operand 5 "const_4_to_7_operand" "")])))]
7464 mask |= (INTVAL (operands[2]) - 4) << 0;
7465 mask |= (INTVAL (operands[3]) - 4) << 2;
7466 mask |= (INTVAL (operands[4]) - 4) << 4;
7467 mask |= (INTVAL (operands[5]) - 4) << 6;
7468 operands[2] = GEN_INT (mask);
7470 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7472 [(set_attr "type" "sselog")
7473 (set_attr "prefix_rep" "1")
7474 (set_attr "prefix_data16" "0")
7475 (set_attr "prefix" "maybe_vex")
7476 (set_attr "length_immediate" "1")
7477 (set_attr "mode" "TI")])
7479 (define_expand "sse2_loadd"
7480 [(set (match_operand:V4SI 0 "register_operand" "")
7483 (match_operand:SI 1 "nonimmediate_operand" ""))
7487 "operands[2] = CONST0_RTX (V4SImode);")
7489 (define_insn "sse2_loadld"
7490 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x,x,x")
7493 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x"))
7494 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,x")
7498 %vmovd\t{%2, %0|%0, %2}
7499 %vmovd\t{%2, %0|%0, %2}
7500 movss\t{%2, %0|%0, %2}
7501 movss\t{%2, %0|%0, %2}
7502 vmovss\t{%2, %1, %0|%0, %1, %2}"
7503 [(set_attr "isa" "sse2,*,noavx,noavx,avx")
7504 (set_attr "type" "ssemov")
7505 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex")
7506 (set_attr "mode" "TI,TI,V4SF,SF,SF")])
7508 (define_insn_and_split "sse2_stored"
7509 [(set (match_operand:SI 0 "nonimmediate_operand" "=xm,r")
7511 (match_operand:V4SI 1 "register_operand" "x,Yi")
7512 (parallel [(const_int 0)])))]
7515 "&& reload_completed
7516 && (TARGET_INTER_UNIT_MOVES
7517 || MEM_P (operands [0])
7518 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7519 [(set (match_dup 0) (match_dup 1))]
7520 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
7522 (define_insn_and_split "*vec_ext_v4si_mem"
7523 [(set (match_operand:SI 0 "register_operand" "=r")
7525 (match_operand:V4SI 1 "memory_operand" "o")
7526 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
7532 int i = INTVAL (operands[2]);
7534 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
7538 (define_expand "sse_storeq"
7539 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7541 (match_operand:V2DI 1 "register_operand" "")
7542 (parallel [(const_int 0)])))]
7545 (define_insn "*sse2_storeq_rex64"
7546 [(set (match_operand:DI 0 "nonimmediate_operand" "=xm,*r,r")
7548 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
7549 (parallel [(const_int 0)])))]
7550 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7554 mov{q}\t{%1, %0|%0, %1}"
7555 [(set_attr "type" "*,*,imov")
7556 (set_attr "mode" "*,*,DI")])
7558 (define_insn "*sse2_storeq"
7559 [(set (match_operand:DI 0 "nonimmediate_operand" "=xm")
7561 (match_operand:V2DI 1 "register_operand" "x")
7562 (parallel [(const_int 0)])))]
7567 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7569 (match_operand:V2DI 1 "register_operand" "")
7570 (parallel [(const_int 0)])))]
7573 && (TARGET_INTER_UNIT_MOVES
7574 || MEM_P (operands [0])
7575 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7576 [(set (match_dup 0) (match_dup 1))]
7577 "operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));")
7579 (define_insn "*vec_extractv2di_1_rex64"
7580 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,x,r")
7582 (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o,o")
7583 (parallel [(const_int 1)])))]
7584 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7586 %vmovhps\t{%1, %0|%0, %1}
7587 psrldq\t{$8, %0|%0, 8}
7588 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7589 %vmovq\t{%H1, %0|%0, %H1}
7590 mov{q}\t{%H1, %0|%0, %H1}"
7591 [(set_attr "isa" "*,noavx,avx,*,*")
7592 (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,imov")
7593 (set_attr "length_immediate" "*,1,1,*,*")
7594 (set_attr "memory" "*,none,none,*,*")
7595 (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig")
7596 (set_attr "mode" "V2SF,TI,TI,TI,DI")])
7598 (define_insn "*vec_extractv2di_1"
7599 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,x,x,x")
7601 (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o,x,o")
7602 (parallel [(const_int 1)])))]
7603 "!TARGET_64BIT && TARGET_SSE
7604 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7606 %vmovhps\t{%1, %0|%0, %1}
7607 psrldq\t{$8, %0|%0, 8}
7608 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7609 %vmovq\t{%H1, %0|%0, %H1}
7610 movhlps\t{%1, %0|%0, %1}
7611 movlps\t{%H1, %0|%0, %H1}"
7612 [(set_attr "isa" "*,sse2_noavx,avx,sse2,noavx,noavx")
7613 (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,ssemov,ssemov")
7614 (set_attr "length_immediate" "*,1,1,*,*,*")
7615 (set_attr "memory" "*,none,none,*,*,*")
7616 (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig,orig")
7617 (set_attr "mode" "V2SF,TI,TI,TI,V4SF,V2SF")])
7619 (define_expand "vec_dupv4si"
7620 [(set (match_operand:V4SI 0 "register_operand" "")
7622 (match_operand:SI 1 "nonimmediate_operand" "")))]
7626 operands[1] = force_reg (V4SImode, operands[1]);
7629 (define_insn "*vec_dupv4si"
7630 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7632 (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))]
7635 %vpshufd\t{$0, %1, %0|%0, %1, 0}
7636 vbroadcastss\t{%1, %0|%0, %1}
7637 shufps\t{$0, %0, %0|%0, %0, 0}"
7638 [(set_attr "isa" "sse2,avx,noavx")
7639 (set_attr "type" "sselog1,ssemov,sselog1")
7640 (set_attr "length_immediate" "1,0,1")
7641 (set_attr "prefix_extra" "0,1,*")
7642 (set_attr "prefix" "maybe_vex,vex,orig")
7643 (set_attr "mode" "TI,V4SF,V4SF")])
7645 (define_expand "vec_dupv2di"
7646 [(set (match_operand:V2DI 0 "register_operand" "")
7648 (match_operand:DI 1 "nonimmediate_operand" "")))]
7652 operands[1] = force_reg (V2DImode, operands[1]);
7655 (define_insn "*vec_dupv2di"
7656 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,x")
7658 (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))]
7662 vpunpcklqdq\t{%d1, %0|%0, %d1}
7663 %vmovddup\t{%1, %0|%0, %1}
7665 [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
7666 (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
7667 (set_attr "prefix" "orig,vex,maybe_vex,orig")
7668 (set_attr "mode" "TI,TI,DF,V4SF")])
7670 (define_insn "*vec_concatv2si_sse4_1"
7671 [(set (match_operand:V2SI 0 "register_operand" "=x, x,x,x, x, *y,*y")
7673 (match_operand:SI 1 "nonimmediate_operand" " 0, x,0,x,rm, 0,rm")
7674 (match_operand:SI 2 "vector_move_operand" "rm,rm,x,x, C,*ym, C")))]
7677 pinsrd\t{$1, %2, %0|%0, %2, 1}
7678 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
7679 punpckldq\t{%2, %0|%0, %2}
7680 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7681 %vmovd\t{%1, %0|%0, %1}
7682 punpckldq\t{%2, %0|%0, %2}
7683 movd\t{%1, %0|%0, %1}"
7684 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
7685 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
7686 (set_attr "prefix_extra" "1,1,*,*,*,*,*")
7687 (set_attr "length_immediate" "1,1,*,*,*,*,*")
7688 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
7689 (set_attr "mode" "TI,TI,TI,TI,TI,DI,DI")])
7691 ;; ??? In theory we can match memory for the MMX alternative, but allowing
7692 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7693 ;; alternatives pretty much forces the MMX alternative to be chosen.
7694 (define_insn "*vec_concatv2si_sse2"
7695 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
7697 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
7698 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
7701 punpckldq\t{%2, %0|%0, %2}
7702 movd\t{%1, %0|%0, %1}
7703 punpckldq\t{%2, %0|%0, %2}
7704 movd\t{%1, %0|%0, %1}"
7705 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7706 (set_attr "mode" "TI,TI,DI,DI")])
7708 (define_insn "*vec_concatv2si_sse"
7709 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
7711 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
7712 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
7715 unpcklps\t{%2, %0|%0, %2}
7716 movss\t{%1, %0|%0, %1}
7717 punpckldq\t{%2, %0|%0, %2}
7718 movd\t{%1, %0|%0, %1}"
7719 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7720 (set_attr "mode" "V4SF,V4SF,DI,DI")])
7722 (define_insn "*vec_concatv4si"
7723 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x,x")
7725 (match_operand:V2SI 1 "register_operand" " 0,x,0,0,x")
7726 (match_operand:V2SI 2 "nonimmediate_operand" " x,x,x,m,m")))]
7729 punpcklqdq\t{%2, %0|%0, %2}
7730 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7731 movlhps\t{%2, %0|%0, %2}
7732 movhps\t{%2, %0|%0, %2}
7733 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7734 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
7735 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
7736 (set_attr "prefix" "orig,vex,orig,orig,vex")
7737 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
7739 ;; movd instead of movq is required to handle broken assemblers.
7740 (define_insn "*vec_concatv2di_rex64"
7741 [(set (match_operand:V2DI 0 "register_operand"
7742 "=x,x ,x ,Yi,!x,x,x,x,x")
7744 (match_operand:DI 1 "nonimmediate_operand"
7745 " 0,x ,xm,r ,*y,0,x,0,x")
7746 (match_operand:DI 2 "vector_move_operand"
7747 "rm,rm,C ,C ,C ,x,x,m,m")))]
7750 pinsrq\t{$1, %2, %0|%0, %2, 1}
7751 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
7752 %vmovq\t{%1, %0|%0, %1}
7753 %vmovd\t{%1, %0|%0, %1}
7754 movq2dq\t{%1, %0|%0, %1}
7755 punpcklqdq\t{%2, %0|%0, %2}
7756 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7757 movhps\t{%2, %0|%0, %2}
7758 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7759 [(set_attr "isa" "sse4_noavx,avx,*,*,*,noavx,avx,noavx,avx")
7762 (eq_attr "alternative" "0,1,5,6")
7763 (const_string "sselog")
7764 (const_string "ssemov")))
7765 (set (attr "prefix_rex")
7767 (and (eq_attr "alternative" "0,3")
7768 (not (match_test "TARGET_AVX")))
7770 (const_string "*")))
7771 (set_attr "prefix_extra" "1,1,*,*,*,*,*,*,*")
7772 (set_attr "length_immediate" "1,1,*,*,*,*,*,*,*")
7773 (set_attr "prefix" "orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex")
7774 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,V2SF,V2SF")])
7776 (define_insn "vec_concatv2di"
7777 [(set (match_operand:V2DI 0 "register_operand" "=x,?x,x,x,x,x,x")
7779 (match_operand:DI 1 "nonimmediate_operand" "xm,*y,0,x,0,0,x")
7780 (match_operand:DI 2 "vector_move_operand" " C, C,x,x,x,m,m")))]
7781 "!TARGET_64BIT && TARGET_SSE"
7783 %vmovq\t{%1, %0|%0, %1}
7784 movq2dq\t{%1, %0|%0, %1}
7785 punpcklqdq\t{%2, %0|%0, %2}
7786 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7787 movlhps\t{%2, %0|%0, %2}
7788 movhps\t{%2, %0|%0, %2}
7789 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7790 [(set_attr "isa" "sse2,sse2,sse2_noavx,avx,noavx,noavx,avx")
7791 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,ssemov,ssemov")
7792 (set_attr "prefix" "maybe_vex,orig,orig,vex,orig,orig,vex")
7793 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")])
7795 (define_expand "vec_unpacks_lo_<mode>"
7796 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7797 (match_operand:VI124_AVX2 1 "register_operand" "")]
7799 "ix86_expand_sse_unpack (operands, false, false); DONE;")
7801 (define_expand "vec_unpacks_hi_<mode>"
7802 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7803 (match_operand:VI124_AVX2 1 "register_operand" "")]
7805 "ix86_expand_sse_unpack (operands, false, true); DONE;")
7807 (define_expand "vec_unpacku_lo_<mode>"
7808 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7809 (match_operand:VI124_AVX2 1 "register_operand" "")]
7811 "ix86_expand_sse_unpack (operands, true, false); DONE;")
7813 (define_expand "vec_unpacku_hi_<mode>"
7814 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7815 (match_operand:VI124_AVX2 1 "register_operand" "")]
7817 "ix86_expand_sse_unpack (operands, true, true); DONE;")
7819 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7823 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7825 (define_expand "avx2_uavgv32qi3"
7826 [(set (match_operand:V32QI 0 "register_operand" "")
7832 (match_operand:V32QI 1 "nonimmediate_operand" ""))
7834 (match_operand:V32QI 2 "nonimmediate_operand" "")))
7835 (const_vector:V32QI [(const_int 1) (const_int 1)
7836 (const_int 1) (const_int 1)
7837 (const_int 1) (const_int 1)
7838 (const_int 1) (const_int 1)
7839 (const_int 1) (const_int 1)
7840 (const_int 1) (const_int 1)
7841 (const_int 1) (const_int 1)
7842 (const_int 1) (const_int 1)
7843 (const_int 1) (const_int 1)
7844 (const_int 1) (const_int 1)
7845 (const_int 1) (const_int 1)
7846 (const_int 1) (const_int 1)
7847 (const_int 1) (const_int 1)
7848 (const_int 1) (const_int 1)
7849 (const_int 1) (const_int 1)
7850 (const_int 1) (const_int 1)]))
7853 "ix86_fixup_binary_operands_no_copy (PLUS, V32QImode, operands);")
7855 (define_expand "sse2_uavgv16qi3"
7856 [(set (match_operand:V16QI 0 "register_operand" "")
7862 (match_operand:V16QI 1 "nonimmediate_operand" ""))
7864 (match_operand:V16QI 2 "nonimmediate_operand" "")))
7865 (const_vector:V16QI [(const_int 1) (const_int 1)
7866 (const_int 1) (const_int 1)
7867 (const_int 1) (const_int 1)
7868 (const_int 1) (const_int 1)
7869 (const_int 1) (const_int 1)
7870 (const_int 1) (const_int 1)
7871 (const_int 1) (const_int 1)
7872 (const_int 1) (const_int 1)]))
7875 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
7877 (define_insn "*avx2_uavgv32qi3"
7878 [(set (match_operand:V32QI 0 "register_operand" "=x")
7884 (match_operand:V32QI 1 "nonimmediate_operand" "%x"))
7886 (match_operand:V32QI 2 "nonimmediate_operand" "xm")))
7887 (const_vector:V32QI [(const_int 1) (const_int 1)
7888 (const_int 1) (const_int 1)
7889 (const_int 1) (const_int 1)
7890 (const_int 1) (const_int 1)
7891 (const_int 1) (const_int 1)
7892 (const_int 1) (const_int 1)
7893 (const_int 1) (const_int 1)
7894 (const_int 1) (const_int 1)
7895 (const_int 1) (const_int 1)
7896 (const_int 1) (const_int 1)
7897 (const_int 1) (const_int 1)
7898 (const_int 1) (const_int 1)
7899 (const_int 1) (const_int 1)
7900 (const_int 1) (const_int 1)
7901 (const_int 1) (const_int 1)
7902 (const_int 1) (const_int 1)]))
7904 "TARGET_AVX2 && ix86_binary_operator_ok (PLUS, V32QImode, operands)"
7905 "vpavgb\t{%2, %1, %0|%0, %1, %2}"
7906 [(set_attr "type" "sseiadd")
7907 (set_attr "prefix" "vex")
7908 (set_attr "mode" "OI")])
7910 (define_insn "*sse2_uavgv16qi3"
7911 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
7917 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x"))
7919 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))
7920 (const_vector:V16QI [(const_int 1) (const_int 1)
7921 (const_int 1) (const_int 1)
7922 (const_int 1) (const_int 1)
7923 (const_int 1) (const_int 1)
7924 (const_int 1) (const_int 1)
7925 (const_int 1) (const_int 1)
7926 (const_int 1) (const_int 1)
7927 (const_int 1) (const_int 1)]))
7929 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7931 pavgb\t{%2, %0|%0, %2}
7932 vpavgb\t{%2, %1, %0|%0, %1, %2}"
7933 [(set_attr "isa" "noavx,avx")
7934 (set_attr "type" "sseiadd")
7935 (set_attr "prefix_data16" "1,*")
7936 (set_attr "prefix" "orig,vex")
7937 (set_attr "mode" "TI")])
7939 (define_expand "avx2_uavgv16hi3"
7940 [(set (match_operand:V16HI 0 "register_operand" "")
7946 (match_operand:V16HI 1 "nonimmediate_operand" ""))
7948 (match_operand:V16HI 2 "nonimmediate_operand" "")))
7949 (const_vector:V16HI [(const_int 1) (const_int 1)
7950 (const_int 1) (const_int 1)
7951 (const_int 1) (const_int 1)
7952 (const_int 1) (const_int 1)
7953 (const_int 1) (const_int 1)
7954 (const_int 1) (const_int 1)
7955 (const_int 1) (const_int 1)
7956 (const_int 1) (const_int 1)]))
7959 "ix86_fixup_binary_operands_no_copy (PLUS, V16HImode, operands);")
7961 (define_expand "sse2_uavgv8hi3"
7962 [(set (match_operand:V8HI 0 "register_operand" "")
7968 (match_operand:V8HI 1 "nonimmediate_operand" ""))
7970 (match_operand:V8HI 2 "nonimmediate_operand" "")))
7971 (const_vector:V8HI [(const_int 1) (const_int 1)
7972 (const_int 1) (const_int 1)
7973 (const_int 1) (const_int 1)
7974 (const_int 1) (const_int 1)]))
7977 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
7979 (define_insn "*avx2_uavgv16hi3"
7980 [(set (match_operand:V16HI 0 "register_operand" "=x")
7986 (match_operand:V16HI 1 "nonimmediate_operand" "%x"))
7988 (match_operand:V16HI 2 "nonimmediate_operand" "xm")))
7989 (const_vector:V16HI [(const_int 1) (const_int 1)
7990 (const_int 1) (const_int 1)
7991 (const_int 1) (const_int 1)
7992 (const_int 1) (const_int 1)
7993 (const_int 1) (const_int 1)
7994 (const_int 1) (const_int 1)
7995 (const_int 1) (const_int 1)
7996 (const_int 1) (const_int 1)]))
7998 "TARGET_AVX2 && ix86_binary_operator_ok (PLUS, V16HImode, operands)"
7999 "vpavgw\t{%2, %1, %0|%0, %1, %2}"
8000 [(set_attr "type" "sseiadd")
8001 (set_attr "prefix" "vex")
8002 (set_attr "mode" "OI")])
8004 (define_insn "*sse2_uavgv8hi3"
8005 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8011 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
8013 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
8014 (const_vector:V8HI [(const_int 1) (const_int 1)
8015 (const_int 1) (const_int 1)
8016 (const_int 1) (const_int 1)
8017 (const_int 1) (const_int 1)]))
8019 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
8021 pavgw\t{%2, %0|%0, %2}
8022 vpavgw\t{%2, %1, %0|%0, %1, %2}"
8023 [(set_attr "isa" "noavx,avx")
8024 (set_attr "type" "sseiadd")
8025 (set_attr "prefix_data16" "1,*")
8026 (set_attr "prefix" "orig,vex")
8027 (set_attr "mode" "TI")])
8029 ;; The correct representation for this is absolutely enormous, and
8030 ;; surely not generally useful.
8031 (define_insn "<sse2_avx2>_psadbw"
8032 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x,x")
8033 (unspec:VI8_AVX2 [(match_operand:<ssebytemode> 1 "register_operand" "0,x")
8034 (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,xm")]
8038 psadbw\t{%2, %0|%0, %2}
8039 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
8040 [(set_attr "isa" "noavx,avx")
8041 (set_attr "type" "sseiadd")
8042 (set_attr "atom_unit" "simul")
8043 (set_attr "prefix_data16" "1,*")
8044 (set_attr "prefix" "orig,vex")
8045 (set_attr "mode" "<sseinsnmode>")])
8047 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
8048 [(set (match_operand:SI 0 "register_operand" "=r")
8050 [(match_operand:VF 1 "register_operand" "x")]
8053 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
8054 [(set_attr "type" "ssemov")
8055 (set_attr "prefix" "maybe_vex")
8056 (set_attr "mode" "<MODE>")])
8058 (define_insn "avx2_pmovmskb"
8059 [(set (match_operand:SI 0 "register_operand" "=r")
8060 (unspec:SI [(match_operand:V32QI 1 "register_operand" "x")]
8063 "vpmovmskb\t{%1, %0|%0, %1}"
8064 [(set_attr "type" "ssemov")
8065 (set_attr "prefix" "vex")
8066 (set_attr "mode" "DI")])
8068 (define_insn "sse2_pmovmskb"
8069 [(set (match_operand:SI 0 "register_operand" "=r")
8070 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
8073 "%vpmovmskb\t{%1, %0|%0, %1}"
8074 [(set_attr "type" "ssemov")
8075 (set_attr "prefix_data16" "1")
8076 (set_attr "prefix" "maybe_vex")
8077 (set_attr "mode" "SI")])
8079 (define_expand "sse2_maskmovdqu"
8080 [(set (match_operand:V16QI 0 "memory_operand" "")
8081 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
8082 (match_operand:V16QI 2 "register_operand" "")
8087 (define_insn "*sse2_maskmovdqu"
8088 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
8089 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8090 (match_operand:V16QI 2 "register_operand" "x")
8091 (mem:V16QI (match_dup 0))]
8094 "%vmaskmovdqu\t{%2, %1|%1, %2}"
8095 [(set_attr "type" "ssemov")
8096 (set_attr "prefix_data16" "1")
8097 ;; The implicit %rdi operand confuses default length_vex computation.
8098 (set (attr "length_vex")
8099 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
8100 (set_attr "prefix" "maybe_vex")
8101 (set_attr "mode" "TI")])
8103 (define_insn "sse_ldmxcsr"
8104 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
8108 [(set_attr "type" "sse")
8109 (set_attr "atom_sse_attr" "mxcsr")
8110 (set_attr "prefix" "maybe_vex")
8111 (set_attr "memory" "load")])
8113 (define_insn "sse_stmxcsr"
8114 [(set (match_operand:SI 0 "memory_operand" "=m")
8115 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
8118 [(set_attr "type" "sse")
8119 (set_attr "atom_sse_attr" "mxcsr")
8120 (set_attr "prefix" "maybe_vex")
8121 (set_attr "memory" "store")])
8123 (define_insn "sse2_clflush"
8124 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
8128 [(set_attr "type" "sse")
8129 (set_attr "atom_sse_attr" "fence")
8130 (set_attr "memory" "unknown")])
8133 (define_insn "sse3_mwait"
8134 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8135 (match_operand:SI 1 "register_operand" "c")]
8138 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
8139 ;; Since 32bit register operands are implicitly zero extended to 64bit,
8140 ;; we only need to set up 32bit registers.
8142 [(set_attr "length" "3")])
8144 (define_insn "sse3_monitor"
8145 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8146 (match_operand:SI 1 "register_operand" "c")
8147 (match_operand:SI 2 "register_operand" "d")]
8149 "TARGET_SSE3 && !TARGET_64BIT"
8150 "monitor\t%0, %1, %2"
8151 [(set_attr "length" "3")])
8153 (define_insn "sse3_monitor64"
8154 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
8155 (match_operand:SI 1 "register_operand" "c")
8156 (match_operand:SI 2 "register_operand" "d")]
8158 "TARGET_SSE3 && TARGET_64BIT"
8159 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
8160 ;; RCX and RDX are used. Since 32bit register operands are implicitly
8161 ;; zero extended to 64bit, we only need to set up 32bit registers.
8163 [(set_attr "length" "3")])
8165 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8167 ;; SSSE3 instructions
8169 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8171 (define_insn "avx2_phaddwv16hi3"
8172 [(set (match_operand:V16HI 0 "register_operand" "=x")
8179 (match_operand:V16HI 1 "register_operand" "x")
8180 (parallel [(const_int 0)]))
8181 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8183 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8184 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8187 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8188 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8190 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8191 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8195 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8196 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8198 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8199 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8202 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8203 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8205 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8206 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8212 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8213 (parallel [(const_int 0)]))
8214 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8216 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8217 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8220 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8221 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8223 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8224 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8228 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8229 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8231 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8232 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8235 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8236 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8238 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8239 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8241 "vphaddw\t{%2, %1, %0|%0, %1, %2}"
8242 [(set_attr "type" "sseiadd")
8243 (set_attr "prefix_extra" "1")
8244 (set_attr "prefix" "vex")
8245 (set_attr "mode" "OI")])
8247 (define_insn "ssse3_phaddwv8hi3"
8248 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8254 (match_operand:V8HI 1 "register_operand" "0,x")
8255 (parallel [(const_int 0)]))
8256 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8258 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8259 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8262 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8263 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8265 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8266 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8271 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8272 (parallel [(const_int 0)]))
8273 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8275 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8276 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8279 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8280 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8282 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8283 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8286 phaddw\t{%2, %0|%0, %2}
8287 vphaddw\t{%2, %1, %0|%0, %1, %2}"
8288 [(set_attr "isa" "noavx,avx")
8289 (set_attr "type" "sseiadd")
8290 (set_attr "atom_unit" "complex")
8291 (set_attr "prefix_data16" "1,*")
8292 (set_attr "prefix_extra" "1")
8293 (set_attr "prefix" "orig,vex")
8294 (set_attr "mode" "TI")])
8296 (define_insn "ssse3_phaddwv4hi3"
8297 [(set (match_operand:V4HI 0 "register_operand" "=y")
8302 (match_operand:V4HI 1 "register_operand" "0")
8303 (parallel [(const_int 0)]))
8304 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8306 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8307 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8311 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8312 (parallel [(const_int 0)]))
8313 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8315 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8316 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8318 "phaddw\t{%2, %0|%0, %2}"
8319 [(set_attr "type" "sseiadd")
8320 (set_attr "atom_unit" "complex")
8321 (set_attr "prefix_extra" "1")
8322 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8323 (set_attr "mode" "DI")])
8325 (define_insn "avx2_phadddv8si3"
8326 [(set (match_operand:V8SI 0 "register_operand" "=x")
8332 (match_operand:V8SI 1 "register_operand" "x")
8333 (parallel [(const_int 0)]))
8334 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8336 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8337 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8340 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
8341 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
8343 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
8344 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
8349 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
8350 (parallel [(const_int 0)]))
8351 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8353 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8354 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
8357 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
8358 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
8360 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
8361 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
8363 "vphaddd\t{%2, %1, %0|%0, %1, %2}"
8364 [(set_attr "type" "sseiadd")
8365 (set_attr "prefix_extra" "1")
8366 (set_attr "prefix" "vex")
8367 (set_attr "mode" "OI")])
8369 (define_insn "ssse3_phadddv4si3"
8370 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
8375 (match_operand:V4SI 1 "register_operand" "0,x")
8376 (parallel [(const_int 0)]))
8377 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8379 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8380 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8384 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
8385 (parallel [(const_int 0)]))
8386 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8388 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8389 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8392 phaddd\t{%2, %0|%0, %2}
8393 vphaddd\t{%2, %1, %0|%0, %1, %2}"
8394 [(set_attr "isa" "noavx,avx")
8395 (set_attr "type" "sseiadd")
8396 (set_attr "atom_unit" "complex")
8397 (set_attr "prefix_data16" "1,*")
8398 (set_attr "prefix_extra" "1")
8399 (set_attr "prefix" "orig,vex")
8400 (set_attr "mode" "TI")])
8402 (define_insn "ssse3_phadddv2si3"
8403 [(set (match_operand:V2SI 0 "register_operand" "=y")
8407 (match_operand:V2SI 1 "register_operand" "0")
8408 (parallel [(const_int 0)]))
8409 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8412 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8413 (parallel [(const_int 0)]))
8414 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8416 "phaddd\t{%2, %0|%0, %2}"
8417 [(set_attr "type" "sseiadd")
8418 (set_attr "atom_unit" "complex")
8419 (set_attr "prefix_extra" "1")
8420 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8421 (set_attr "mode" "DI")])
8423 (define_insn "avx2_phaddswv16hi3"
8424 [(set (match_operand:V16HI 0 "register_operand" "=x")
8431 (match_operand:V16HI 1 "register_operand" "x")
8432 (parallel [(const_int 0)]))
8433 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8435 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8436 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8439 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8440 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8442 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8443 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8447 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8448 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8450 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8451 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8454 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8455 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8457 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8458 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8464 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8465 (parallel [(const_int 0)]))
8466 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8468 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8469 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8472 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8473 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8475 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8476 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8480 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8481 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8483 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8484 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8487 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8488 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8490 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8491 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8493 "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8494 [(set_attr "type" "sseiadd")
8495 (set_attr "prefix_extra" "1")
8496 (set_attr "prefix" "vex")
8497 (set_attr "mode" "OI")])
8499 (define_insn "ssse3_phaddswv8hi3"
8500 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8506 (match_operand:V8HI 1 "register_operand" "0,x")
8507 (parallel [(const_int 0)]))
8508 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8510 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8511 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8514 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8515 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8517 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8518 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8523 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8524 (parallel [(const_int 0)]))
8525 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8527 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8528 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8531 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8532 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8534 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8535 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8538 phaddsw\t{%2, %0|%0, %2}
8539 vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8540 [(set_attr "isa" "noavx,avx")
8541 (set_attr "type" "sseiadd")
8542 (set_attr "atom_unit" "complex")
8543 (set_attr "prefix_data16" "1,*")
8544 (set_attr "prefix_extra" "1")
8545 (set_attr "prefix" "orig,vex")
8546 (set_attr "mode" "TI")])
8548 (define_insn "ssse3_phaddswv4hi3"
8549 [(set (match_operand:V4HI 0 "register_operand" "=y")
8554 (match_operand:V4HI 1 "register_operand" "0")
8555 (parallel [(const_int 0)]))
8556 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8558 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8559 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8563 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8564 (parallel [(const_int 0)]))
8565 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8567 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8568 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8570 "phaddsw\t{%2, %0|%0, %2}"
8571 [(set_attr "type" "sseiadd")
8572 (set_attr "atom_unit" "complex")
8573 (set_attr "prefix_extra" "1")
8574 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8575 (set_attr "mode" "DI")])
8577 (define_insn "avx2_phsubwv16hi3"
8578 [(set (match_operand:V16HI 0 "register_operand" "=x")
8585 (match_operand:V16HI 1 "register_operand" "x")
8586 (parallel [(const_int 0)]))
8587 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8589 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8590 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8593 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8594 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8596 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8597 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8601 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8602 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8604 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8605 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8608 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8609 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8611 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8612 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8618 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8619 (parallel [(const_int 0)]))
8620 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8622 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8623 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8626 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8627 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8629 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8630 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8634 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8635 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8637 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8638 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8641 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8642 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8644 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8645 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8647 "vphsubw\t{%2, %1, %0|%0, %1, %2}"
8648 [(set_attr "type" "sseiadd")
8649 (set_attr "prefix_extra" "1")
8650 (set_attr "prefix" "vex")
8651 (set_attr "mode" "OI")])
8653 (define_insn "ssse3_phsubwv8hi3"
8654 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8660 (match_operand:V8HI 1 "register_operand" "0,x")
8661 (parallel [(const_int 0)]))
8662 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8664 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8665 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8668 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8669 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8671 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8672 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8677 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8678 (parallel [(const_int 0)]))
8679 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8681 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8682 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8685 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8686 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8688 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8689 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8692 phsubw\t{%2, %0|%0, %2}
8693 vphsubw\t{%2, %1, %0|%0, %1, %2}"
8694 [(set_attr "isa" "noavx,avx")
8695 (set_attr "type" "sseiadd")
8696 (set_attr "atom_unit" "complex")
8697 (set_attr "prefix_data16" "1,*")
8698 (set_attr "prefix_extra" "1")
8699 (set_attr "prefix" "orig,vex")
8700 (set_attr "mode" "TI")])
8702 (define_insn "ssse3_phsubwv4hi3"
8703 [(set (match_operand:V4HI 0 "register_operand" "=y")
8708 (match_operand:V4HI 1 "register_operand" "0")
8709 (parallel [(const_int 0)]))
8710 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8712 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8713 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8717 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8718 (parallel [(const_int 0)]))
8719 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8721 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8722 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8724 "phsubw\t{%2, %0|%0, %2}"
8725 [(set_attr "type" "sseiadd")
8726 (set_attr "atom_unit" "complex")
8727 (set_attr "prefix_extra" "1")
8728 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8729 (set_attr "mode" "DI")])
8731 (define_insn "avx2_phsubdv8si3"
8732 [(set (match_operand:V8SI 0 "register_operand" "=x")
8738 (match_operand:V8SI 1 "register_operand" "x")
8739 (parallel [(const_int 0)]))
8740 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8742 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8743 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8746 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
8747 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
8749 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
8750 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
8755 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
8756 (parallel [(const_int 0)]))
8757 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8759 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8760 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
8763 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
8764 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
8766 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
8767 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
8769 "vphsubd\t{%2, %1, %0|%0, %1, %2}"
8770 [(set_attr "type" "sseiadd")
8771 (set_attr "prefix_extra" "1")
8772 (set_attr "prefix" "vex")
8773 (set_attr "mode" "OI")])
8775 (define_insn "ssse3_phsubdv4si3"
8776 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
8781 (match_operand:V4SI 1 "register_operand" "0,x")
8782 (parallel [(const_int 0)]))
8783 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8785 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8786 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8790 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
8791 (parallel [(const_int 0)]))
8792 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8794 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8795 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8798 phsubd\t{%2, %0|%0, %2}
8799 vphsubd\t{%2, %1, %0|%0, %1, %2}"
8801 [(set_attr "isa" "noavx,avx")
8802 (set_attr "type" "sseiadd")
8803 (set_attr "atom_unit" "complex")
8804 (set_attr "prefix_data16" "1,*")
8805 (set_attr "prefix_extra" "1")
8806 (set_attr "prefix" "orig,vex")
8807 (set_attr "mode" "TI")])
8809 (define_insn "ssse3_phsubdv2si3"
8810 [(set (match_operand:V2SI 0 "register_operand" "=y")
8814 (match_operand:V2SI 1 "register_operand" "0")
8815 (parallel [(const_int 0)]))
8816 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8819 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8820 (parallel [(const_int 0)]))
8821 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8823 "phsubd\t{%2, %0|%0, %2}"
8824 [(set_attr "type" "sseiadd")
8825 (set_attr "atom_unit" "complex")
8826 (set_attr "prefix_extra" "1")
8827 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8828 (set_attr "mode" "DI")])
8830 (define_insn "avx2_phsubswv16hi3"
8831 [(set (match_operand:V16HI 0 "register_operand" "=x")
8838 (match_operand:V16HI 1 "register_operand" "x")
8839 (parallel [(const_int 0)]))
8840 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8842 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8843 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8846 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8847 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8849 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8850 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8854 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8855 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8857 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8858 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8861 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8862 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8864 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8865 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8871 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8872 (parallel [(const_int 0)]))
8873 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8875 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8876 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8879 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8880 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8882 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8883 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8887 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8888 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8890 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8891 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8894 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8895 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8897 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8898 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8900 "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8901 [(set_attr "type" "sseiadd")
8902 (set_attr "prefix_extra" "1")
8903 (set_attr "prefix" "vex")
8904 (set_attr "mode" "OI")])
8906 (define_insn "ssse3_phsubswv8hi3"
8907 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8913 (match_operand:V8HI 1 "register_operand" "0,x")
8914 (parallel [(const_int 0)]))
8915 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8917 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8918 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8921 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8922 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8924 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8925 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8930 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8931 (parallel [(const_int 0)]))
8932 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8934 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8935 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8938 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8939 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8941 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8942 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8945 phsubsw\t{%2, %0|%0, %2}
8946 vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8947 [(set_attr "isa" "noavx,avx")
8948 (set_attr "type" "sseiadd")
8949 (set_attr "atom_unit" "complex")
8950 (set_attr "prefix_data16" "1,*")
8951 (set_attr "prefix_extra" "1")
8952 (set_attr "prefix" "orig,vex")
8953 (set_attr "mode" "TI")])
8955 (define_insn "ssse3_phsubswv4hi3"
8956 [(set (match_operand:V4HI 0 "register_operand" "=y")
8961 (match_operand:V4HI 1 "register_operand" "0")
8962 (parallel [(const_int 0)]))
8963 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8965 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8966 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8970 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8971 (parallel [(const_int 0)]))
8972 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8974 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8975 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8977 "phsubsw\t{%2, %0|%0, %2}"
8978 [(set_attr "type" "sseiadd")
8979 (set_attr "atom_unit" "complex")
8980 (set_attr "prefix_extra" "1")
8981 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8982 (set_attr "mode" "DI")])
8984 (define_insn "avx2_pmaddubsw256"
8985 [(set (match_operand:V16HI 0 "register_operand" "=x")
8990 (match_operand:V32QI 1 "register_operand" "x")
8991 (parallel [(const_int 0)
9009 (match_operand:V32QI 2 "nonimmediate_operand" "xm")
9010 (parallel [(const_int 0)
9028 (vec_select:V16QI (match_dup 1)
9029 (parallel [(const_int 1)
9046 (vec_select:V16QI (match_dup 2)
9047 (parallel [(const_int 1)
9062 (const_int 31)]))))))]
9064 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
9065 [(set_attr "type" "sseiadd")
9066 (set_attr "prefix_extra" "1")
9067 (set_attr "prefix" "vex")
9068 (set_attr "mode" "OI")])
9070 (define_insn "ssse3_pmaddubsw128"
9071 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9076 (match_operand:V16QI 1 "register_operand" "0,x")
9077 (parallel [(const_int 0)
9087 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
9088 (parallel [(const_int 0)
9098 (vec_select:V8QI (match_dup 1)
9099 (parallel [(const_int 1)
9108 (vec_select:V8QI (match_dup 2)
9109 (parallel [(const_int 1)
9116 (const_int 15)]))))))]
9119 pmaddubsw\t{%2, %0|%0, %2}
9120 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
9121 [(set_attr "isa" "noavx,avx")
9122 (set_attr "type" "sseiadd")
9123 (set_attr "atom_unit" "simul")
9124 (set_attr "prefix_data16" "1,*")
9125 (set_attr "prefix_extra" "1")
9126 (set_attr "prefix" "orig,vex")
9127 (set_attr "mode" "TI")])
9129 (define_insn "ssse3_pmaddubsw"
9130 [(set (match_operand:V4HI 0 "register_operand" "=y")
9135 (match_operand:V8QI 1 "register_operand" "0")
9136 (parallel [(const_int 0)
9142 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
9143 (parallel [(const_int 0)
9149 (vec_select:V4QI (match_dup 1)
9150 (parallel [(const_int 1)
9155 (vec_select:V4QI (match_dup 2)
9156 (parallel [(const_int 1)
9159 (const_int 7)]))))))]
9161 "pmaddubsw\t{%2, %0|%0, %2}"
9162 [(set_attr "type" "sseiadd")
9163 (set_attr "atom_unit" "simul")
9164 (set_attr "prefix_extra" "1")
9165 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9166 (set_attr "mode" "DI")])
9168 (define_expand "avx2_umulhrswv16hi3"
9169 [(set (match_operand:V16HI 0 "register_operand" "")
9176 (match_operand:V16HI 1 "nonimmediate_operand" ""))
9178 (match_operand:V16HI 2 "nonimmediate_operand" "")))
9180 (const_vector:V16HI [(const_int 1) (const_int 1)
9181 (const_int 1) (const_int 1)
9182 (const_int 1) (const_int 1)
9183 (const_int 1) (const_int 1)
9184 (const_int 1) (const_int 1)
9185 (const_int 1) (const_int 1)
9186 (const_int 1) (const_int 1)
9187 (const_int 1) (const_int 1)]))
9190 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
9192 (define_insn "*avx2_umulhrswv16hi3"
9193 [(set (match_operand:V16HI 0 "register_operand" "=x")
9200 (match_operand:V16HI 1 "nonimmediate_operand" "%x"))
9202 (match_operand:V16HI 2 "nonimmediate_operand" "xm")))
9204 (const_vector:V16HI [(const_int 1) (const_int 1)
9205 (const_int 1) (const_int 1)
9206 (const_int 1) (const_int 1)
9207 (const_int 1) (const_int 1)
9208 (const_int 1) (const_int 1)
9209 (const_int 1) (const_int 1)
9210 (const_int 1) (const_int 1)
9211 (const_int 1) (const_int 1)]))
9213 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
9214 "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9215 [(set_attr "type" "sseimul")
9216 (set_attr "prefix_extra" "1")
9217 (set_attr "prefix" "vex")
9218 (set_attr "mode" "OI")])
9220 (define_expand "ssse3_pmulhrswv8hi3"
9221 [(set (match_operand:V8HI 0 "register_operand" "")
9228 (match_operand:V8HI 1 "nonimmediate_operand" ""))
9230 (match_operand:V8HI 2 "nonimmediate_operand" "")))
9232 (const_vector:V8HI [(const_int 1) (const_int 1)
9233 (const_int 1) (const_int 1)
9234 (const_int 1) (const_int 1)
9235 (const_int 1) (const_int 1)]))
9238 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
9240 (define_insn "*ssse3_pmulhrswv8hi3"
9241 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9248 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
9250 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
9252 (const_vector:V8HI [(const_int 1) (const_int 1)
9253 (const_int 1) (const_int 1)
9254 (const_int 1) (const_int 1)
9255 (const_int 1) (const_int 1)]))
9257 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9259 pmulhrsw\t{%2, %0|%0, %2}
9260 vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9261 [(set_attr "isa" "noavx,avx")
9262 (set_attr "type" "sseimul")
9263 (set_attr "prefix_data16" "1,*")
9264 (set_attr "prefix_extra" "1")
9265 (set_attr "prefix" "orig,vex")
9266 (set_attr "mode" "TI")])
9268 (define_expand "ssse3_pmulhrswv4hi3"
9269 [(set (match_operand:V4HI 0 "register_operand" "")
9276 (match_operand:V4HI 1 "nonimmediate_operand" ""))
9278 (match_operand:V4HI 2 "nonimmediate_operand" "")))
9280 (const_vector:V4HI [(const_int 1) (const_int 1)
9281 (const_int 1) (const_int 1)]))
9284 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
9286 (define_insn "*ssse3_pmulhrswv4hi3"
9287 [(set (match_operand:V4HI 0 "register_operand" "=y")
9294 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
9296 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
9298 (const_vector:V4HI [(const_int 1) (const_int 1)
9299 (const_int 1) (const_int 1)]))
9301 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
9302 "pmulhrsw\t{%2, %0|%0, %2}"
9303 [(set_attr "type" "sseimul")
9304 (set_attr "prefix_extra" "1")
9305 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9306 (set_attr "mode" "DI")])
9308 (define_insn "<ssse3_avx2>_pshufb<mode>3"
9309 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
9310 (unspec:VI1_AVX2 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
9311 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")]
9315 pshufb\t{%2, %0|%0, %2}
9316 vpshufb\t{%2, %1, %0|%0, %1, %2}"
9317 [(set_attr "isa" "noavx,avx")
9318 (set_attr "type" "sselog1")
9319 (set_attr "prefix_data16" "1,*")
9320 (set_attr "prefix_extra" "1")
9321 (set_attr "prefix" "orig,vex")
9322 (set_attr "mode" "<sseinsnmode>")])
9324 (define_insn "ssse3_pshufbv8qi3"
9325 [(set (match_operand:V8QI 0 "register_operand" "=y")
9326 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
9327 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
9330 "pshufb\t{%2, %0|%0, %2}";
9331 [(set_attr "type" "sselog1")
9332 (set_attr "prefix_extra" "1")
9333 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9334 (set_attr "mode" "DI")])
9336 (define_insn "<ssse3_avx2>_psign<mode>3"
9337 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
9339 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
9340 (match_operand:VI124_AVX2 2 "nonimmediate_operand" "xm,xm")]
9344 psign<ssemodesuffix>\t{%2, %0|%0, %2}
9345 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9346 [(set_attr "isa" "noavx,avx")
9347 (set_attr "type" "sselog1")
9348 (set_attr "prefix_data16" "1,*")
9349 (set_attr "prefix_extra" "1")
9350 (set_attr "prefix" "orig,vex")
9351 (set_attr "mode" "<sseinsnmode>")])
9353 (define_insn "ssse3_psign<mode>3"
9354 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9356 [(match_operand:MMXMODEI 1 "register_operand" "0")
9357 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
9360 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
9361 [(set_attr "type" "sselog1")
9362 (set_attr "prefix_extra" "1")
9363 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9364 (set_attr "mode" "DI")])
9366 (define_insn "<ssse3_avx2>_palignr<mode>"
9367 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x")
9368 (unspec:SSESCALARMODE [(match_operand:SSESCALARMODE 1 "register_operand" "0,x")
9369 (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,xm")
9370 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
9374 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9376 switch (which_alternative)
9379 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9381 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9386 [(set_attr "isa" "noavx,avx")
9387 (set_attr "type" "sseishft")
9388 (set_attr "atom_unit" "sishuf")
9389 (set_attr "prefix_data16" "1,*")
9390 (set_attr "prefix_extra" "1")
9391 (set_attr "length_immediate" "1")
9392 (set_attr "prefix" "orig,vex")
9393 (set_attr "mode" "<sseinsnmode>")])
9395 (define_insn "ssse3_palignrdi"
9396 [(set (match_operand:DI 0 "register_operand" "=y")
9397 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
9398 (match_operand:DI 2 "nonimmediate_operand" "ym")
9399 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9403 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9404 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9406 [(set_attr "type" "sseishft")
9407 (set_attr "atom_unit" "sishuf")
9408 (set_attr "prefix_extra" "1")
9409 (set_attr "length_immediate" "1")
9410 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9411 (set_attr "mode" "DI")])
9413 (define_insn "abs<mode>2"
9414 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x")
9416 (match_operand:VI124_AVX2 1 "nonimmediate_operand" "xm")))]
9418 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
9419 [(set_attr "type" "sselog1")
9420 (set_attr "prefix_data16" "1")
9421 (set_attr "prefix_extra" "1")
9422 (set_attr "prefix" "maybe_vex")
9423 (set_attr "mode" "<sseinsnmode>")])
9425 (define_insn "abs<mode>2"
9426 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9428 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
9430 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
9431 [(set_attr "type" "sselog1")
9432 (set_attr "prefix_rep" "0")
9433 (set_attr "prefix_extra" "1")
9434 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9435 (set_attr "mode" "DI")])
9437 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9439 ;; AMD SSE4A instructions
9441 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9443 (define_insn "sse4a_movnt<mode>"
9444 [(set (match_operand:MODEF 0 "memory_operand" "=m")
9446 [(match_operand:MODEF 1 "register_operand" "x")]
9449 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
9450 [(set_attr "type" "ssemov")
9451 (set_attr "mode" "<MODE>")])
9453 (define_insn "sse4a_vmmovnt<mode>"
9454 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
9455 (unspec:<ssescalarmode>
9456 [(vec_select:<ssescalarmode>
9457 (match_operand:VF_128 1 "register_operand" "x")
9458 (parallel [(const_int 0)]))]
9461 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
9462 [(set_attr "type" "ssemov")
9463 (set_attr "mode" "<ssescalarmode>")])
9465 (define_insn "sse4a_extrqi"
9466 [(set (match_operand:V2DI 0 "register_operand" "=x")
9467 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9468 (match_operand 2 "const_0_to_255_operand" "")
9469 (match_operand 3 "const_0_to_255_operand" "")]
9472 "extrq\t{%3, %2, %0|%0, %2, %3}"
9473 [(set_attr "type" "sse")
9474 (set_attr "prefix_data16" "1")
9475 (set_attr "length_immediate" "2")
9476 (set_attr "mode" "TI")])
9478 (define_insn "sse4a_extrq"
9479 [(set (match_operand:V2DI 0 "register_operand" "=x")
9480 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9481 (match_operand:V16QI 2 "register_operand" "x")]
9484 "extrq\t{%2, %0|%0, %2}"
9485 [(set_attr "type" "sse")
9486 (set_attr "prefix_data16" "1")
9487 (set_attr "mode" "TI")])
9489 (define_insn "sse4a_insertqi"
9490 [(set (match_operand:V2DI 0 "register_operand" "=x")
9491 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9492 (match_operand:V2DI 2 "register_operand" "x")
9493 (match_operand 3 "const_0_to_255_operand" "")
9494 (match_operand 4 "const_0_to_255_operand" "")]
9497 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
9498 [(set_attr "type" "sseins")
9499 (set_attr "prefix_data16" "0")
9500 (set_attr "prefix_rep" "1")
9501 (set_attr "length_immediate" "2")
9502 (set_attr "mode" "TI")])
9504 (define_insn "sse4a_insertq"
9505 [(set (match_operand:V2DI 0 "register_operand" "=x")
9506 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9507 (match_operand:V2DI 2 "register_operand" "x")]
9510 "insertq\t{%2, %0|%0, %2}"
9511 [(set_attr "type" "sseins")
9512 (set_attr "prefix_data16" "0")
9513 (set_attr "prefix_rep" "1")
9514 (set_attr "mode" "TI")])
9516 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9518 ;; Intel SSE4.1 instructions
9520 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9522 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
9523 [(set (match_operand:VF 0 "register_operand" "=x,x")
9525 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
9526 (match_operand:VF 1 "register_operand" "0,x")
9527 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "")))]
9530 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
9531 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9532 [(set_attr "isa" "noavx,avx")
9533 (set_attr "type" "ssemov")
9534 (set_attr "length_immediate" "1")
9535 (set_attr "prefix_data16" "1,*")
9536 (set_attr "prefix_extra" "1")
9537 (set_attr "prefix" "orig,vex")
9538 (set_attr "mode" "<MODE>")])
9540 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
9541 [(set (match_operand:VF 0 "reg_not_xmm0_operand_maybe_avx" "=x,x")
9543 [(match_operand:VF 1 "reg_not_xmm0_operand_maybe_avx" "0,x")
9544 (match_operand:VF 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
9545 (match_operand:VF 3 "register_operand" "Yz,x")]
9549 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
9550 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9551 [(set_attr "isa" "noavx,avx")
9552 (set_attr "type" "ssemov")
9553 (set_attr "length_immediate" "1")
9554 (set_attr "prefix_data16" "1,*")
9555 (set_attr "prefix_extra" "1")
9556 (set_attr "prefix" "orig,vex")
9557 (set_attr "mode" "<MODE>")])
9559 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
9560 [(set (match_operand:VF 0 "register_operand" "=x,x")
9562 [(match_operand:VF 1 "nonimmediate_operand" "%0,x")
9563 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
9564 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9568 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
9569 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9570 [(set_attr "isa" "noavx,avx")
9571 (set_attr "type" "ssemul")
9572 (set_attr "length_immediate" "1")
9573 (set_attr "prefix_data16" "1,*")
9574 (set_attr "prefix_extra" "1")
9575 (set_attr "prefix" "orig,vex")
9576 (set_attr "mode" "<MODE>")])
9578 (define_insn "<sse4_1_avx2>_movntdqa"
9579 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x")
9580 (unspec:VI8_AVX2 [(match_operand:VI8_AVX2 1 "memory_operand" "m")]
9583 "%vmovntdqa\t{%1, %0|%0, %1}"
9584 [(set_attr "type" "ssemov")
9585 (set_attr "prefix_extra" "1")
9586 (set_attr "prefix" "maybe_vex")
9587 (set_attr "mode" "<sseinsnmode>")])
9589 (define_insn "<sse4_1_avx2>_mpsadbw"
9590 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
9591 (unspec:VI1_AVX2 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
9592 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
9593 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9597 mpsadbw\t{%3, %2, %0|%0, %2, %3}
9598 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9599 [(set_attr "isa" "noavx,avx")
9600 (set_attr "type" "sselog1")
9601 (set_attr "length_immediate" "1")
9602 (set_attr "prefix_extra" "1")
9603 (set_attr "prefix" "orig,vex")
9604 (set_attr "mode" "<sseinsnmode>")])
9606 (define_insn "avx2_packusdw"
9607 [(set (match_operand:V16HI 0 "register_operand" "=x")
9610 (match_operand:V8SI 1 "register_operand" "x"))
9612 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))))]
9614 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9615 [(set_attr "type" "sselog")
9616 (set_attr "prefix_extra" "1")
9617 (set_attr "prefix" "vex")
9618 (set_attr "mode" "OI")])
9620 (define_insn "sse4_1_packusdw"
9621 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9624 (match_operand:V4SI 1 "register_operand" "0,x"))
9626 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))]
9629 packusdw\t{%2, %0|%0, %2}
9630 vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9631 [(set_attr "isa" "noavx,avx")
9632 (set_attr "type" "sselog")
9633 (set_attr "prefix_extra" "1")
9634 (set_attr "prefix" "orig,vex")
9635 (set_attr "mode" "TI")])
9637 (define_insn "<sse4_1_avx2>_pblendvb"
9638 [(set (match_operand:VI1_AVX2 0 "reg_not_xmm0_operand" "=x,x")
9640 [(match_operand:VI1_AVX2 1 "reg_not_xmm0_operand_maybe_avx" "0,x")
9641 (match_operand:VI1_AVX2 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
9642 (match_operand:VI1_AVX2 3 "register_operand" "Yz,x")]
9646 pblendvb\t{%3, %2, %0|%0, %2, %3}
9647 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9648 [(set_attr "isa" "noavx,avx")
9649 (set_attr "type" "ssemov")
9650 (set_attr "prefix_extra" "1")
9651 (set_attr "length_immediate" "*,1")
9652 (set_attr "prefix" "orig,vex")
9653 (set_attr "mode" "<sseinsnmode>")])
9655 (define_insn "sse4_1_pblendw"
9656 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9658 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
9659 (match_operand:V8HI 1 "register_operand" "0,x")
9660 (match_operand:SI 3 "const_0_to_255_operand" "n,n")))]
9663 pblendw\t{%3, %2, %0|%0, %2, %3}
9664 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9665 [(set_attr "isa" "noavx,avx")
9666 (set_attr "type" "ssemov")
9667 (set_attr "prefix_extra" "1")
9668 (set_attr "length_immediate" "1")
9669 (set_attr "prefix" "orig,vex")
9670 (set_attr "mode" "TI")])
9672 ;; The builtin uses an 8-bit immediate. Expand that.
9673 (define_expand "avx2_pblendw"
9674 [(set (match_operand:V16HI 0 "register_operand" "")
9676 (match_operand:V16HI 2 "nonimmediate_operand" "")
9677 (match_operand:V16HI 1 "register_operand" "")
9678 (match_operand:SI 3 "const_0_to_255_operand" "")))]
9681 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
9682 operands[3] = GEN_INT (val << 8 | val);
9685 (define_insn "*avx2_pblendw"
9686 [(set (match_operand:V16HI 0 "register_operand" "=x")
9688 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
9689 (match_operand:V16HI 1 "register_operand" "x")
9690 (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
9693 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
9694 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9696 [(set_attr "type" "ssemov")
9697 (set_attr "prefix_extra" "1")
9698 (set_attr "length_immediate" "1")
9699 (set_attr "prefix" "vex")
9700 (set_attr "mode" "OI")])
9702 (define_insn "avx2_pblendd<mode>"
9703 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
9705 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
9706 (match_operand:VI4_AVX2 1 "register_operand" "x")
9707 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9709 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9710 [(set_attr "type" "ssemov")
9711 (set_attr "prefix_extra" "1")
9712 (set_attr "length_immediate" "1")
9713 (set_attr "prefix" "vex")
9714 (set_attr "mode" "<sseinsnmode>")])
9716 (define_insn "sse4_1_phminposuw"
9717 [(set (match_operand:V8HI 0 "register_operand" "=x")
9718 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
9719 UNSPEC_PHMINPOSUW))]
9721 "%vphminposuw\t{%1, %0|%0, %1}"
9722 [(set_attr "type" "sselog1")
9723 (set_attr "prefix_extra" "1")
9724 (set_attr "prefix" "maybe_vex")
9725 (set_attr "mode" "TI")])
9727 (define_insn "avx2_<code>v16qiv16hi2"
9728 [(set (match_operand:V16HI 0 "register_operand" "=x")
9730 (match_operand:V16QI 1 "nonimmediate_operand" "xm")))]
9732 "vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
9733 [(set_attr "type" "ssemov")
9734 (set_attr "prefix_extra" "1")
9735 (set_attr "prefix" "vex")
9736 (set_attr "mode" "OI")])
9738 (define_insn "sse4_1_<code>v8qiv8hi2"
9739 [(set (match_operand:V8HI 0 "register_operand" "=x")
9742 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9743 (parallel [(const_int 0)
9752 "%vpmov<extsuffix>bw\t{%1, %0|%0, %q1}"
9753 [(set_attr "type" "ssemov")
9754 (set_attr "prefix_extra" "1")
9755 (set_attr "prefix" "maybe_vex")
9756 (set_attr "mode" "TI")])
9758 (define_insn "avx2_<code>v8qiv8si2"
9759 [(set (match_operand:V8SI 0 "register_operand" "=x")
9762 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9763 (parallel [(const_int 0)
9772 "vpmov<extsuffix>bd\t{%1, %0|%0, %q1}"
9773 [(set_attr "type" "ssemov")
9774 (set_attr "prefix_extra" "1")
9775 (set_attr "prefix" "vex")
9776 (set_attr "mode" "OI")])
9778 (define_insn "sse4_1_<code>v4qiv4si2"
9779 [(set (match_operand:V4SI 0 "register_operand" "=x")
9782 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9783 (parallel [(const_int 0)
9788 "%vpmov<extsuffix>bd\t{%1, %0|%0, %k1}"
9789 [(set_attr "type" "ssemov")
9790 (set_attr "prefix_extra" "1")
9791 (set_attr "prefix" "maybe_vex")
9792 (set_attr "mode" "TI")])
9794 (define_insn "avx2_<code>v8hiv8si2"
9795 [(set (match_operand:V8SI 0 "register_operand" "=x")
9797 (match_operand:V8HI 1 "nonimmediate_operand" "xm")))]
9799 "vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
9800 [(set_attr "type" "ssemov")
9801 (set_attr "prefix_extra" "1")
9802 (set_attr "prefix" "vex")
9803 (set_attr "mode" "OI")])
9805 (define_insn "sse4_1_<code>v4hiv4si2"
9806 [(set (match_operand:V4SI 0 "register_operand" "=x")
9809 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9810 (parallel [(const_int 0)
9815 "%vpmov<extsuffix>wd\t{%1, %0|%0, %q1}"
9816 [(set_attr "type" "ssemov")
9817 (set_attr "prefix_extra" "1")
9818 (set_attr "prefix" "maybe_vex")
9819 (set_attr "mode" "TI")])
9821 (define_insn "avx2_<code>v4qiv4di2"
9822 [(set (match_operand:V4DI 0 "register_operand" "=x")
9825 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9826 (parallel [(const_int 0)
9831 "vpmov<extsuffix>bq\t{%1, %0|%0, %k1}"
9832 [(set_attr "type" "ssemov")
9833 (set_attr "prefix_extra" "1")
9834 (set_attr "prefix" "vex")
9835 (set_attr "mode" "OI")])
9837 (define_insn "sse4_1_<code>v2qiv2di2"
9838 [(set (match_operand:V2DI 0 "register_operand" "=x")
9841 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9842 (parallel [(const_int 0)
9845 "%vpmov<extsuffix>bq\t{%1, %0|%0, %w1}"
9846 [(set_attr "type" "ssemov")
9847 (set_attr "prefix_extra" "1")
9848 (set_attr "prefix" "maybe_vex")
9849 (set_attr "mode" "TI")])
9851 (define_insn "avx2_<code>v4hiv4di2"
9852 [(set (match_operand:V4DI 0 "register_operand" "=x")
9855 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9856 (parallel [(const_int 0)
9861 "vpmov<extsuffix>wq\t{%1, %0|%0, %q1}"
9862 [(set_attr "type" "ssemov")
9863 (set_attr "prefix_extra" "1")
9864 (set_attr "prefix" "vex")
9865 (set_attr "mode" "OI")])
9867 (define_insn "sse4_1_<code>v2hiv2di2"
9868 [(set (match_operand:V2DI 0 "register_operand" "=x")
9871 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9872 (parallel [(const_int 0)
9875 "%vpmov<extsuffix>wq\t{%1, %0|%0, %k1}"
9876 [(set_attr "type" "ssemov")
9877 (set_attr "prefix_extra" "1")
9878 (set_attr "prefix" "maybe_vex")
9879 (set_attr "mode" "TI")])
9881 (define_insn "avx2_<code>v4siv4di2"
9882 [(set (match_operand:V4DI 0 "register_operand" "=x")
9884 (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
9886 "vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
9887 [(set_attr "type" "ssemov")
9888 (set_attr "prefix_extra" "1")
9889 (set_attr "mode" "OI")])
9891 (define_insn "sse4_1_<code>v2siv2di2"
9892 [(set (match_operand:V2DI 0 "register_operand" "=x")
9895 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9896 (parallel [(const_int 0)
9899 "%vpmov<extsuffix>dq\t{%1, %0|%0, %q1}"
9900 [(set_attr "type" "ssemov")
9901 (set_attr "prefix_extra" "1")
9902 (set_attr "prefix" "maybe_vex")
9903 (set_attr "mode" "TI")])
9905 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
9906 ;; setting FLAGS_REG. But it is not a really compare instruction.
9907 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
9908 [(set (reg:CC FLAGS_REG)
9909 (unspec:CC [(match_operand:VF 0 "register_operand" "x")
9910 (match_operand:VF 1 "nonimmediate_operand" "xm")]
9913 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
9914 [(set_attr "type" "ssecomi")
9915 (set_attr "prefix_extra" "1")
9916 (set_attr "prefix" "vex")
9917 (set_attr "mode" "<MODE>")])
9919 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
9920 ;; But it is not a really compare instruction.
9921 (define_insn "avx_ptest256"
9922 [(set (reg:CC FLAGS_REG)
9923 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
9924 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
9927 "vptest\t{%1, %0|%0, %1}"
9928 [(set_attr "type" "ssecomi")
9929 (set_attr "prefix_extra" "1")
9930 (set_attr "prefix" "vex")
9931 (set_attr "mode" "OI")])
9933 (define_insn "sse4_1_ptest"
9934 [(set (reg:CC FLAGS_REG)
9935 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
9936 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
9939 "%vptest\t{%1, %0|%0, %1}"
9940 [(set_attr "type" "ssecomi")
9941 (set_attr "prefix_extra" "1")
9942 (set_attr "prefix" "maybe_vex")
9943 (set_attr "mode" "TI")])
9945 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
9946 [(set (match_operand:VF 0 "register_operand" "=x")
9948 [(match_operand:VF 1 "nonimmediate_operand" "xm")
9949 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9952 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9953 [(set_attr "type" "ssecvt")
9954 (set (attr "prefix_data16")
9956 (match_test "TARGET_AVX")
9958 (const_string "1")))
9959 (set_attr "prefix_extra" "1")
9960 (set_attr "length_immediate" "1")
9961 (set_attr "prefix" "maybe_vex")
9962 (set_attr "mode" "<MODE>")])
9964 (define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
9965 [(match_operand:<sseintvecmode> 0 "register_operand" "")
9966 (match_operand:VF1 1 "nonimmediate_operand" "")
9967 (match_operand:SI 2 "const_0_to_15_operand" "")]
9970 rtx tmp = gen_reg_rtx (<MODE>mode);
9973 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
9976 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
9980 (define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
9981 [(match_operand:<ssepackfltmode> 0 "register_operand" "")
9982 (match_operand:VF2 1 "nonimmediate_operand" "")
9983 (match_operand:VF2 2 "nonimmediate_operand" "")
9984 (match_operand:SI 3 "const_0_to_15_operand" "")]
9989 if (<MODE>mode == V2DFmode
9990 && TARGET_AVX && !TARGET_PREFER_AVX128)
9992 rtx tmp2 = gen_reg_rtx (V4DFmode);
9994 tmp0 = gen_reg_rtx (V4DFmode);
9995 tmp1 = force_reg (V2DFmode, operands[1]);
9997 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
9998 emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
9999 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
10003 tmp0 = gen_reg_rtx (<MODE>mode);
10004 tmp1 = gen_reg_rtx (<MODE>mode);
10007 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
10010 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
10013 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
10018 (define_insn "sse4_1_round<ssescalarmodesuffix>"
10019 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
10022 [(match_operand:VF_128 2 "register_operand" "x,x")
10023 (match_operand:SI 3 "const_0_to_15_operand" "n,n")]
10025 (match_operand:VF_128 1 "register_operand" "0,x")
10029 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
10030 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10031 [(set_attr "isa" "noavx,avx")
10032 (set_attr "type" "ssecvt")
10033 (set_attr "length_immediate" "1")
10034 (set_attr "prefix_data16" "1,*")
10035 (set_attr "prefix_extra" "1")
10036 (set_attr "prefix" "orig,vex")
10037 (set_attr "mode" "<MODE>")])
10039 (define_expand "round<mode>2"
10040 [(set (match_dup 4)
10042 (match_operand:VF 1 "register_operand" "")
10044 (set (match_operand:VF 0 "register_operand" "")
10046 [(match_dup 4) (match_dup 5)]
10048 "TARGET_ROUND && !flag_trapping_math"
10050 enum machine_mode scalar_mode;
10051 const struct real_format *fmt;
10052 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
10053 rtx half, vec_half;
10055 scalar_mode = GET_MODE_INNER (<MODE>mode);
10057 /* load nextafter (0.5, 0.0) */
10058 fmt = REAL_MODE_FORMAT (scalar_mode);
10059 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
10060 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
10061 half = const_double_from_real_value (pred_half, scalar_mode);
10063 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
10064 vec_half = force_reg (<MODE>mode, vec_half);
10066 operands[3] = gen_reg_rtx (<MODE>mode);
10067 emit_insn (gen_copysign<mode>3 (operands[3], vec_half, operands[1]));
10069 operands[4] = gen_reg_rtx (<MODE>mode);
10070 operands[5] = GEN_INT (ROUND_TRUNC);
10073 (define_expand "round<mode>2_sfix"
10074 [(match_operand:<sseintvecmode> 0 "register_operand" "")
10075 (match_operand:VF1 1 "register_operand" "")]
10076 "TARGET_ROUND && !flag_trapping_math"
10078 rtx tmp = gen_reg_rtx (<MODE>mode);
10080 emit_insn (gen_round<mode>2 (tmp, operands[1]));
10083 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
10087 (define_expand "round<mode>2_vec_pack_sfix"
10088 [(match_operand:<ssepackfltmode> 0 "register_operand" "")
10089 (match_operand:VF2 1 "register_operand" "")
10090 (match_operand:VF2 2 "register_operand" "")]
10091 "TARGET_ROUND && !flag_trapping_math"
10095 if (<MODE>mode == V2DFmode
10096 && TARGET_AVX && !TARGET_PREFER_AVX128)
10098 rtx tmp2 = gen_reg_rtx (V4DFmode);
10100 tmp0 = gen_reg_rtx (V4DFmode);
10101 tmp1 = force_reg (V2DFmode, operands[1]);
10103 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
10104 emit_insn (gen_roundv4df2 (tmp2, tmp0));
10105 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
10109 tmp0 = gen_reg_rtx (<MODE>mode);
10110 tmp1 = gen_reg_rtx (<MODE>mode);
10112 emit_insn (gen_round<mode>2 (tmp0, operands[1]));
10113 emit_insn (gen_round<mode>2 (tmp1, operands[2]));
10116 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
10121 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10123 ;; Intel SSE4.2 string/text processing instructions
10125 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10127 (define_insn_and_split "sse4_2_pcmpestr"
10128 [(set (match_operand:SI 0 "register_operand" "=c,c")
10130 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
10131 (match_operand:SI 3 "register_operand" "a,a")
10132 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
10133 (match_operand:SI 5 "register_operand" "d,d")
10134 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
10136 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
10144 (set (reg:CC FLAGS_REG)
10153 && can_create_pseudo_p ()"
10158 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10159 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10160 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10163 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
10164 operands[3], operands[4],
10165 operands[5], operands[6]));
10167 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
10168 operands[3], operands[4],
10169 operands[5], operands[6]));
10170 if (flags && !(ecx || xmm0))
10171 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
10172 operands[2], operands[3],
10173 operands[4], operands[5],
10175 if (!(flags || ecx || xmm0))
10176 emit_note (NOTE_INSN_DELETED);
10180 [(set_attr "type" "sselog")
10181 (set_attr "prefix_data16" "1")
10182 (set_attr "prefix_extra" "1")
10183 (set_attr "length_immediate" "1")
10184 (set_attr "memory" "none,load")
10185 (set_attr "mode" "TI")])
10187 (define_insn "sse4_2_pcmpestri"
10188 [(set (match_operand:SI 0 "register_operand" "=c,c")
10190 [(match_operand:V16QI 1 "register_operand" "x,x")
10191 (match_operand:SI 2 "register_operand" "a,a")
10192 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
10193 (match_operand:SI 4 "register_operand" "d,d")
10194 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
10196 (set (reg:CC FLAGS_REG)
10205 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
10206 [(set_attr "type" "sselog")
10207 (set_attr "prefix_data16" "1")
10208 (set_attr "prefix_extra" "1")
10209 (set_attr "prefix" "maybe_vex")
10210 (set_attr "length_immediate" "1")
10211 (set_attr "memory" "none,load")
10212 (set_attr "mode" "TI")])
10214 (define_insn "sse4_2_pcmpestrm"
10215 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10217 [(match_operand:V16QI 1 "register_operand" "x,x")
10218 (match_operand:SI 2 "register_operand" "a,a")
10219 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
10220 (match_operand:SI 4 "register_operand" "d,d")
10221 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
10223 (set (reg:CC FLAGS_REG)
10232 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
10233 [(set_attr "type" "sselog")
10234 (set_attr "prefix_data16" "1")
10235 (set_attr "prefix_extra" "1")
10236 (set_attr "length_immediate" "1")
10237 (set_attr "prefix" "maybe_vex")
10238 (set_attr "memory" "none,load")
10239 (set_attr "mode" "TI")])
10241 (define_insn "sse4_2_pcmpestr_cconly"
10242 [(set (reg:CC FLAGS_REG)
10244 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10245 (match_operand:SI 3 "register_operand" "a,a,a,a")
10246 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
10247 (match_operand:SI 5 "register_operand" "d,d,d,d")
10248 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
10250 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10251 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10254 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10255 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10256 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
10257 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
10258 [(set_attr "type" "sselog")
10259 (set_attr "prefix_data16" "1")
10260 (set_attr "prefix_extra" "1")
10261 (set_attr "length_immediate" "1")
10262 (set_attr "memory" "none,load,none,load")
10263 (set_attr "prefix" "maybe_vex")
10264 (set_attr "mode" "TI")])
10266 (define_insn_and_split "sse4_2_pcmpistr"
10267 [(set (match_operand:SI 0 "register_operand" "=c,c")
10269 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
10270 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
10271 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
10273 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
10279 (set (reg:CC FLAGS_REG)
10286 && can_create_pseudo_p ()"
10291 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10292 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10293 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10296 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
10297 operands[3], operands[4]));
10299 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
10300 operands[3], operands[4]));
10301 if (flags && !(ecx || xmm0))
10302 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
10303 operands[2], operands[3],
10305 if (!(flags || ecx || xmm0))
10306 emit_note (NOTE_INSN_DELETED);
10310 [(set_attr "type" "sselog")
10311 (set_attr "prefix_data16" "1")
10312 (set_attr "prefix_extra" "1")
10313 (set_attr "length_immediate" "1")
10314 (set_attr "memory" "none,load")
10315 (set_attr "mode" "TI")])
10317 (define_insn "sse4_2_pcmpistri"
10318 [(set (match_operand:SI 0 "register_operand" "=c,c")
10320 [(match_operand:V16QI 1 "register_operand" "x,x")
10321 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10322 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10324 (set (reg:CC FLAGS_REG)
10331 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
10332 [(set_attr "type" "sselog")
10333 (set_attr "prefix_data16" "1")
10334 (set_attr "prefix_extra" "1")
10335 (set_attr "length_immediate" "1")
10336 (set_attr "prefix" "maybe_vex")
10337 (set_attr "memory" "none,load")
10338 (set_attr "mode" "TI")])
10340 (define_insn "sse4_2_pcmpistrm"
10341 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10343 [(match_operand:V16QI 1 "register_operand" "x,x")
10344 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10345 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10347 (set (reg:CC FLAGS_REG)
10354 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
10355 [(set_attr "type" "sselog")
10356 (set_attr "prefix_data16" "1")
10357 (set_attr "prefix_extra" "1")
10358 (set_attr "length_immediate" "1")
10359 (set_attr "prefix" "maybe_vex")
10360 (set_attr "memory" "none,load")
10361 (set_attr "mode" "TI")])
10363 (define_insn "sse4_2_pcmpistr_cconly"
10364 [(set (reg:CC FLAGS_REG)
10366 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10367 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
10368 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
10370 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10371 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10374 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10375 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10376 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
10377 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
10378 [(set_attr "type" "sselog")
10379 (set_attr "prefix_data16" "1")
10380 (set_attr "prefix_extra" "1")
10381 (set_attr "length_immediate" "1")
10382 (set_attr "memory" "none,load,none,load")
10383 (set_attr "prefix" "maybe_vex")
10384 (set_attr "mode" "TI")])
10386 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10388 ;; XOP instructions
10390 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10392 ;; XOP parallel integer multiply/add instructions.
10393 ;; Note the XOP multiply/add instructions
10394 ;; a[i] = b[i] * c[i] + d[i];
10395 ;; do not allow the value being added to be a memory operation.
10396 (define_insn "xop_pmacsww"
10397 [(set (match_operand:V8HI 0 "register_operand" "=x")
10400 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10401 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10402 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10404 "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10405 [(set_attr "type" "ssemuladd")
10406 (set_attr "mode" "TI")])
10408 (define_insn "xop_pmacssww"
10409 [(set (match_operand:V8HI 0 "register_operand" "=x")
10411 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10412 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10413 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10415 "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10416 [(set_attr "type" "ssemuladd")
10417 (set_attr "mode" "TI")])
10419 (define_insn "xop_pmacsdd"
10420 [(set (match_operand:V4SI 0 "register_operand" "=x")
10423 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10424 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10425 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10427 "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10428 [(set_attr "type" "ssemuladd")
10429 (set_attr "mode" "TI")])
10431 (define_insn "xop_pmacssdd"
10432 [(set (match_operand:V4SI 0 "register_operand" "=x")
10434 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10435 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10436 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10438 "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10439 [(set_attr "type" "ssemuladd")
10440 (set_attr "mode" "TI")])
10442 (define_insn "xop_pmacssdql"
10443 [(set (match_operand:V2DI 0 "register_operand" "=x")
10448 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10449 (parallel [(const_int 1)
10452 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10453 (parallel [(const_int 1)
10455 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10457 "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10458 [(set_attr "type" "ssemuladd")
10459 (set_attr "mode" "TI")])
10461 (define_insn "xop_pmacssdqh"
10462 [(set (match_operand:V2DI 0 "register_operand" "=x")
10467 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10468 (parallel [(const_int 0)
10472 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10473 (parallel [(const_int 0)
10475 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10477 "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10478 [(set_attr "type" "ssemuladd")
10479 (set_attr "mode" "TI")])
10481 (define_insn "xop_pmacsdql"
10482 [(set (match_operand:V2DI 0 "register_operand" "=x")
10487 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10488 (parallel [(const_int 1)
10492 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10493 (parallel [(const_int 1)
10495 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10497 "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10498 [(set_attr "type" "ssemuladd")
10499 (set_attr "mode" "TI")])
10501 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10502 ;; fake it with a multiply/add. In general, we expect the define_split to
10503 ;; occur before register allocation, so we have to handle the corner case where
10504 ;; the target is the same as operands 1/2
10505 (define_insn_and_split "xop_mulv2div2di3_low"
10506 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10510 (match_operand:V4SI 1 "register_operand" "%x")
10511 (parallel [(const_int 1)
10515 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10516 (parallel [(const_int 1)
10517 (const_int 3)])))))]
10520 "&& reload_completed"
10521 [(set (match_dup 0)
10529 (parallel [(const_int 1)
10534 (parallel [(const_int 1)
10538 operands[3] = CONST0_RTX (V2DImode);
10540 [(set_attr "type" "ssemul")
10541 (set_attr "mode" "TI")])
10543 (define_insn "xop_pmacsdqh"
10544 [(set (match_operand:V2DI 0 "register_operand" "=x")
10549 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10550 (parallel [(const_int 0)
10554 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10555 (parallel [(const_int 0)
10557 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10559 "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10560 [(set_attr "type" "ssemuladd")
10561 (set_attr "mode" "TI")])
10563 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10564 ;; fake it with a multiply/add. In general, we expect the define_split to
10565 ;; occur before register allocation, so we have to handle the corner case where
10566 ;; the target is the same as either operands[1] or operands[2]
10567 (define_insn_and_split "xop_mulv2div2di3_high"
10568 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10572 (match_operand:V4SI 1 "register_operand" "%x")
10573 (parallel [(const_int 0)
10577 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10578 (parallel [(const_int 0)
10579 (const_int 2)])))))]
10582 "&& reload_completed"
10583 [(set (match_dup 0)
10591 (parallel [(const_int 0)
10596 (parallel [(const_int 0)
10600 operands[3] = CONST0_RTX (V2DImode);
10602 [(set_attr "type" "ssemul")
10603 (set_attr "mode" "TI")])
10605 ;; XOP parallel integer multiply/add instructions for the intrinisics
10606 (define_insn "xop_pmacsswd"
10607 [(set (match_operand:V4SI 0 "register_operand" "=x")
10612 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10613 (parallel [(const_int 1)
10619 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10620 (parallel [(const_int 1)
10624 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10626 "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10627 [(set_attr "type" "ssemuladd")
10628 (set_attr "mode" "TI")])
10630 (define_insn "xop_pmacswd"
10631 [(set (match_operand:V4SI 0 "register_operand" "=x")
10636 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10637 (parallel [(const_int 1)
10643 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10644 (parallel [(const_int 1)
10648 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10650 "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10651 [(set_attr "type" "ssemuladd")
10652 (set_attr "mode" "TI")])
10654 (define_insn "xop_pmadcsswd"
10655 [(set (match_operand:V4SI 0 "register_operand" "=x")
10661 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10662 (parallel [(const_int 0)
10668 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10669 (parallel [(const_int 0)
10677 (parallel [(const_int 1)
10684 (parallel [(const_int 1)
10687 (const_int 7)])))))
10688 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10690 "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10691 [(set_attr "type" "ssemuladd")
10692 (set_attr "mode" "TI")])
10694 (define_insn "xop_pmadcswd"
10695 [(set (match_operand:V4SI 0 "register_operand" "=x")
10701 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10702 (parallel [(const_int 0)
10708 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10709 (parallel [(const_int 0)
10717 (parallel [(const_int 1)
10724 (parallel [(const_int 1)
10727 (const_int 7)])))))
10728 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10730 "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10731 [(set_attr "type" "ssemuladd")
10732 (set_attr "mode" "TI")])
10734 ;; XOP parallel XMM conditional moves
10735 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
10736 [(set (match_operand:V 0 "register_operand" "=x,x")
10738 (match_operand:V 3 "nonimmediate_operand" "x,m")
10739 (match_operand:V 1 "register_operand" "x,x")
10740 (match_operand:V 2 "nonimmediate_operand" "xm,x")))]
10742 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10743 [(set_attr "type" "sse4arg")])
10745 ;; XOP horizontal add/subtract instructions
10746 (define_insn "xop_phaddbw"
10747 [(set (match_operand:V8HI 0 "register_operand" "=x")
10751 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10752 (parallel [(const_int 0)
10763 (parallel [(const_int 1)
10770 (const_int 15)])))))]
10772 "vphaddbw\t{%1, %0|%0, %1}"
10773 [(set_attr "type" "sseiadd1")])
10775 (define_insn "xop_phaddbd"
10776 [(set (match_operand:V4SI 0 "register_operand" "=x")
10781 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10782 (parallel [(const_int 0)
10789 (parallel [(const_int 1)
10792 (const_int 13)]))))
10797 (parallel [(const_int 2)
10804 (parallel [(const_int 3)
10807 (const_int 15)]))))))]
10809 "vphaddbd\t{%1, %0|%0, %1}"
10810 [(set_attr "type" "sseiadd1")])
10812 (define_insn "xop_phaddbq"
10813 [(set (match_operand:V2DI 0 "register_operand" "=x")
10819 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10820 (parallel [(const_int 0)
10825 (parallel [(const_int 1)
10831 (parallel [(const_int 2)
10836 (parallel [(const_int 3)
10837 (const_int 7)])))))
10843 (parallel [(const_int 8)
10848 (parallel [(const_int 9)
10849 (const_int 13)]))))
10854 (parallel [(const_int 10)
10859 (parallel [(const_int 11)
10860 (const_int 15)])))))))]
10862 "vphaddbq\t{%1, %0|%0, %1}"
10863 [(set_attr "type" "sseiadd1")])
10865 (define_insn "xop_phaddwd"
10866 [(set (match_operand:V4SI 0 "register_operand" "=x")
10870 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10871 (parallel [(const_int 0)
10878 (parallel [(const_int 1)
10881 (const_int 7)])))))]
10883 "vphaddwd\t{%1, %0|%0, %1}"
10884 [(set_attr "type" "sseiadd1")])
10886 (define_insn "xop_phaddwq"
10887 [(set (match_operand:V2DI 0 "register_operand" "=x")
10892 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10893 (parallel [(const_int 0)
10898 (parallel [(const_int 1)
10904 (parallel [(const_int 2)
10909 (parallel [(const_int 3)
10910 (const_int 7)]))))))]
10912 "vphaddwq\t{%1, %0|%0, %1}"
10913 [(set_attr "type" "sseiadd1")])
10915 (define_insn "xop_phadddq"
10916 [(set (match_operand:V2DI 0 "register_operand" "=x")
10920 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10921 (parallel [(const_int 0)
10926 (parallel [(const_int 1)
10927 (const_int 3)])))))]
10929 "vphadddq\t{%1, %0|%0, %1}"
10930 [(set_attr "type" "sseiadd1")])
10932 (define_insn "xop_phaddubw"
10933 [(set (match_operand:V8HI 0 "register_operand" "=x")
10937 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10938 (parallel [(const_int 0)
10949 (parallel [(const_int 1)
10956 (const_int 15)])))))]
10958 "vphaddubw\t{%1, %0|%0, %1}"
10959 [(set_attr "type" "sseiadd1")])
10961 (define_insn "xop_phaddubd"
10962 [(set (match_operand:V4SI 0 "register_operand" "=x")
10967 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10968 (parallel [(const_int 0)
10975 (parallel [(const_int 1)
10978 (const_int 13)]))))
10983 (parallel [(const_int 2)
10990 (parallel [(const_int 3)
10993 (const_int 15)]))))))]
10995 "vphaddubd\t{%1, %0|%0, %1}"
10996 [(set_attr "type" "sseiadd1")])
10998 (define_insn "xop_phaddubq"
10999 [(set (match_operand:V2DI 0 "register_operand" "=x")
11005 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11006 (parallel [(const_int 0)
11011 (parallel [(const_int 1)
11017 (parallel [(const_int 2)
11022 (parallel [(const_int 3)
11023 (const_int 7)])))))
11029 (parallel [(const_int 8)
11034 (parallel [(const_int 9)
11035 (const_int 13)]))))
11040 (parallel [(const_int 10)
11045 (parallel [(const_int 11)
11046 (const_int 15)])))))))]
11048 "vphaddubq\t{%1, %0|%0, %1}"
11049 [(set_attr "type" "sseiadd1")])
11051 (define_insn "xop_phadduwd"
11052 [(set (match_operand:V4SI 0 "register_operand" "=x")
11056 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11057 (parallel [(const_int 0)
11064 (parallel [(const_int 1)
11067 (const_int 7)])))))]
11069 "vphadduwd\t{%1, %0|%0, %1}"
11070 [(set_attr "type" "sseiadd1")])
11072 (define_insn "xop_phadduwq"
11073 [(set (match_operand:V2DI 0 "register_operand" "=x")
11078 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11079 (parallel [(const_int 0)
11084 (parallel [(const_int 1)
11090 (parallel [(const_int 2)
11095 (parallel [(const_int 3)
11096 (const_int 7)]))))))]
11098 "vphadduwq\t{%1, %0|%0, %1}"
11099 [(set_attr "type" "sseiadd1")])
11101 (define_insn "xop_phaddudq"
11102 [(set (match_operand:V2DI 0 "register_operand" "=x")
11106 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
11107 (parallel [(const_int 0)
11112 (parallel [(const_int 1)
11113 (const_int 3)])))))]
11115 "vphaddudq\t{%1, %0|%0, %1}"
11116 [(set_attr "type" "sseiadd1")])
11118 (define_insn "xop_phsubbw"
11119 [(set (match_operand:V8HI 0 "register_operand" "=x")
11123 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11124 (parallel [(const_int 0)
11135 (parallel [(const_int 1)
11142 (const_int 15)])))))]
11144 "vphsubbw\t{%1, %0|%0, %1}"
11145 [(set_attr "type" "sseiadd1")])
11147 (define_insn "xop_phsubwd"
11148 [(set (match_operand:V4SI 0 "register_operand" "=x")
11152 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11153 (parallel [(const_int 0)
11160 (parallel [(const_int 1)
11163 (const_int 7)])))))]
11165 "vphsubwd\t{%1, %0|%0, %1}"
11166 [(set_attr "type" "sseiadd1")])
11168 (define_insn "xop_phsubdq"
11169 [(set (match_operand:V2DI 0 "register_operand" "=x")
11173 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
11174 (parallel [(const_int 0)
11179 (parallel [(const_int 1)
11180 (const_int 3)])))))]
11182 "vphsubdq\t{%1, %0|%0, %1}"
11183 [(set_attr "type" "sseiadd1")])
11185 ;; XOP permute instructions
11186 (define_insn "xop_pperm"
11187 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
11189 [(match_operand:V16QI 1 "register_operand" "x,x")
11190 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
11191 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
11192 UNSPEC_XOP_PERMUTE))]
11193 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11194 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11195 [(set_attr "type" "sse4arg")
11196 (set_attr "mode" "TI")])
11198 ;; XOP pack instructions that combine two vectors into a smaller vector
11199 (define_insn "xop_pperm_pack_v2di_v4si"
11200 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
11203 (match_operand:V2DI 1 "register_operand" "x,x"))
11205 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
11206 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11207 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11208 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11209 [(set_attr "type" "sse4arg")
11210 (set_attr "mode" "TI")])
11212 (define_insn "xop_pperm_pack_v4si_v8hi"
11213 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
11216 (match_operand:V4SI 1 "register_operand" "x,x"))
11218 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
11219 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11220 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11221 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11222 [(set_attr "type" "sse4arg")
11223 (set_attr "mode" "TI")])
11225 (define_insn "xop_pperm_pack_v8hi_v16qi"
11226 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
11229 (match_operand:V8HI 1 "register_operand" "x,x"))
11231 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
11232 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11233 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11234 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11235 [(set_attr "type" "sse4arg")
11236 (set_attr "mode" "TI")])
11238 ;; XOP packed rotate instructions
11239 (define_expand "rotl<mode>3"
11240 [(set (match_operand:VI_128 0 "register_operand" "")
11242 (match_operand:VI_128 1 "nonimmediate_operand" "")
11243 (match_operand:SI 2 "general_operand")))]
11246 /* If we were given a scalar, convert it to parallel */
11247 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11249 rtvec vs = rtvec_alloc (<ssescalarnum>);
11250 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11251 rtx reg = gen_reg_rtx (<MODE>mode);
11252 rtx op2 = operands[2];
11255 if (GET_MODE (op2) != <ssescalarmode>mode)
11257 op2 = gen_reg_rtx (<ssescalarmode>mode);
11258 convert_move (op2, operands[2], false);
11261 for (i = 0; i < <ssescalarnum>; i++)
11262 RTVEC_ELT (vs, i) = op2;
11264 emit_insn (gen_vec_init<mode> (reg, par));
11265 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11270 (define_expand "rotr<mode>3"
11271 [(set (match_operand:VI_128 0 "register_operand" "")
11273 (match_operand:VI_128 1 "nonimmediate_operand" "")
11274 (match_operand:SI 2 "general_operand")))]
11277 /* If we were given a scalar, convert it to parallel */
11278 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11280 rtvec vs = rtvec_alloc (<ssescalarnum>);
11281 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11282 rtx neg = gen_reg_rtx (<MODE>mode);
11283 rtx reg = gen_reg_rtx (<MODE>mode);
11284 rtx op2 = operands[2];
11287 if (GET_MODE (op2) != <ssescalarmode>mode)
11289 op2 = gen_reg_rtx (<ssescalarmode>mode);
11290 convert_move (op2, operands[2], false);
11293 for (i = 0; i < <ssescalarnum>; i++)
11294 RTVEC_ELT (vs, i) = op2;
11296 emit_insn (gen_vec_init<mode> (reg, par));
11297 emit_insn (gen_neg<mode>2 (neg, reg));
11298 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
11303 (define_insn "xop_rotl<mode>3"
11304 [(set (match_operand:VI_128 0 "register_operand" "=x")
11306 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
11307 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11309 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11310 [(set_attr "type" "sseishft")
11311 (set_attr "length_immediate" "1")
11312 (set_attr "mode" "TI")])
11314 (define_insn "xop_rotr<mode>3"
11315 [(set (match_operand:VI_128 0 "register_operand" "=x")
11317 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
11318 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11321 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
11322 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
11324 [(set_attr "type" "sseishft")
11325 (set_attr "length_immediate" "1")
11326 (set_attr "mode" "TI")])
11328 (define_expand "vrotr<mode>3"
11329 [(match_operand:VI_128 0 "register_operand" "")
11330 (match_operand:VI_128 1 "register_operand" "")
11331 (match_operand:VI_128 2 "register_operand" "")]
11334 rtx reg = gen_reg_rtx (<MODE>mode);
11335 emit_insn (gen_neg<mode>2 (reg, operands[2]));
11336 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11340 (define_expand "vrotl<mode>3"
11341 [(match_operand:VI_128 0 "register_operand" "")
11342 (match_operand:VI_128 1 "register_operand" "")
11343 (match_operand:VI_128 2 "register_operand" "")]
11346 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
11350 (define_insn "xop_vrotl<mode>3"
11351 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
11352 (if_then_else:VI_128
11354 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
11357 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
11361 (neg:VI_128 (match_dup 2)))))]
11362 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11363 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11364 [(set_attr "type" "sseishft")
11365 (set_attr "prefix_data16" "0")
11366 (set_attr "prefix_extra" "2")
11367 (set_attr "mode" "TI")])
11369 ;; XOP packed shift instructions.
11370 (define_expand "vlshr<mode>3"
11371 [(set (match_operand:VI12_128 0 "register_operand" "")
11373 (match_operand:VI12_128 1 "register_operand" "")
11374 (match_operand:VI12_128 2 "nonimmediate_operand" "")))]
11377 rtx neg = gen_reg_rtx (<MODE>mode);
11378 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11379 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
11383 (define_expand "vlshr<mode>3"
11384 [(set (match_operand:VI48_128 0 "register_operand" "")
11386 (match_operand:VI48_128 1 "register_operand" "")
11387 (match_operand:VI48_128 2 "nonimmediate_operand" "")))]
11388 "TARGET_AVX2 || TARGET_XOP"
11392 rtx neg = gen_reg_rtx (<MODE>mode);
11393 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11394 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
11399 (define_expand "vlshr<mode>3"
11400 [(set (match_operand:VI48_256 0 "register_operand" "")
11402 (match_operand:VI48_256 1 "register_operand" "")
11403 (match_operand:VI48_256 2 "nonimmediate_operand" "")))]
11406 (define_expand "vashr<mode>3"
11407 [(set (match_operand:VI128_128 0 "register_operand" "")
11408 (ashiftrt:VI128_128
11409 (match_operand:VI128_128 1 "register_operand" "")
11410 (match_operand:VI128_128 2 "nonimmediate_operand" "")))]
11413 rtx neg = gen_reg_rtx (<MODE>mode);
11414 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11415 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], neg));
11419 (define_expand "vashrv4si3"
11420 [(set (match_operand:V4SI 0 "register_operand" "")
11421 (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand" "")
11422 (match_operand:V4SI 2 "nonimmediate_operand" "")))]
11423 "TARGET_AVX2 || TARGET_XOP"
11427 rtx neg = gen_reg_rtx (V4SImode);
11428 emit_insn (gen_negv4si2 (neg, operands[2]));
11429 emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
11434 (define_expand "vashrv8si3"
11435 [(set (match_operand:V8SI 0 "register_operand" "")
11436 (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand" "")
11437 (match_operand:V8SI 2 "nonimmediate_operand" "")))]
11440 (define_expand "vashl<mode>3"
11441 [(set (match_operand:VI12_128 0 "register_operand" "")
11443 (match_operand:VI12_128 1 "register_operand" "")
11444 (match_operand:VI12_128 2 "nonimmediate_operand" "")))]
11447 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
11451 (define_expand "vashl<mode>3"
11452 [(set (match_operand:VI48_128 0 "register_operand" "")
11454 (match_operand:VI48_128 1 "register_operand" "")
11455 (match_operand:VI48_128 2 "nonimmediate_operand" "")))]
11456 "TARGET_AVX2 || TARGET_XOP"
11460 operands[2] = force_reg (<MODE>mode, operands[2]);
11461 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
11466 (define_expand "vashl<mode>3"
11467 [(set (match_operand:VI48_256 0 "register_operand" "")
11469 (match_operand:VI48_256 1 "register_operand" "")
11470 (match_operand:VI48_256 2 "nonimmediate_operand" "")))]
11473 (define_insn "xop_sha<mode>3"
11474 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
11475 (if_then_else:VI_128
11477 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
11480 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
11484 (neg:VI_128 (match_dup 2)))))]
11485 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11486 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11487 [(set_attr "type" "sseishft")
11488 (set_attr "prefix_data16" "0")
11489 (set_attr "prefix_extra" "2")
11490 (set_attr "mode" "TI")])
11492 (define_insn "xop_shl<mode>3"
11493 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
11494 (if_then_else:VI_128
11496 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
11499 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
11503 (neg:VI_128 (match_dup 2)))))]
11504 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11505 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11506 [(set_attr "type" "sseishft")
11507 (set_attr "prefix_data16" "0")
11508 (set_attr "prefix_extra" "2")
11509 (set_attr "mode" "TI")])
11511 ;; SSE2 doesn't have some shift variants, so define versions for XOP
11512 (define_expand "ashlv16qi3"
11513 [(set (match_operand:V16QI 0 "register_operand" "")
11515 (match_operand:V16QI 1 "register_operand" "")
11516 (match_operand:SI 2 "nonmemory_operand" "")))]
11519 rtx reg = gen_reg_rtx (V16QImode);
11523 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
11524 for (i = 0; i < 16; i++)
11525 XVECEXP (par, 0, i) = operands[2];
11527 emit_insn (gen_vec_initv16qi (reg, par));
11528 emit_insn (gen_xop_shav16qi3 (operands[0], operands[1], reg));
11532 (define_expand "<shift_insn>v16qi3"
11533 [(set (match_operand:V16QI 0 "register_operand" "")
11535 (match_operand:V16QI 1 "register_operand" "")
11536 (match_operand:SI 2 "nonmemory_operand" "")))]
11539 rtx reg = gen_reg_rtx (V16QImode);
11541 bool negate = false;
11542 rtx (*shift_insn)(rtx, rtx, rtx);
11545 if (CONST_INT_P (operands[2]))
11546 operands[2] = GEN_INT (-INTVAL (operands[2]));
11550 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
11551 for (i = 0; i < 16; i++)
11552 XVECEXP (par, 0, i) = operands[2];
11554 emit_insn (gen_vec_initv16qi (reg, par));
11557 emit_insn (gen_negv16qi2 (reg, reg));
11559 if (<CODE> == LSHIFTRT)
11560 shift_insn = gen_xop_shlv16qi3;
11562 shift_insn = gen_xop_shav16qi3;
11564 emit_insn (shift_insn (operands[0], operands[1], reg));
11568 (define_expand "ashrv2di3"
11569 [(set (match_operand:V2DI 0 "register_operand" "")
11571 (match_operand:V2DI 1 "register_operand" "")
11572 (match_operand:DI 2 "nonmemory_operand" "")))]
11575 rtx reg = gen_reg_rtx (V2DImode);
11577 bool negate = false;
11580 if (CONST_INT_P (operands[2]))
11581 operands[2] = GEN_INT (-INTVAL (operands[2]));
11585 par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
11586 for (i = 0; i < 2; i++)
11587 XVECEXP (par, 0, i) = operands[2];
11589 emit_insn (gen_vec_initv2di (reg, par));
11592 emit_insn (gen_negv2di2 (reg, reg));
11594 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
11598 ;; XOP FRCZ support
11599 (define_insn "xop_frcz<mode>2"
11600 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
11602 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
11605 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
11606 [(set_attr "type" "ssecvt1")
11607 (set_attr "mode" "<MODE>")])
11610 (define_expand "xop_vmfrcz<mode>2"
11611 [(set (match_operand:VF_128 0 "register_operand")
11614 [(match_operand:VF_128 1 "nonimmediate_operand")]
11620 operands[3] = CONST0_RTX (<MODE>mode);
11623 (define_insn "*xop_vmfrcz_<mode>"
11624 [(set (match_operand:VF_128 0 "register_operand" "=x")
11627 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
11629 (match_operand:VF_128 2 "const0_operand")
11632 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
11633 [(set_attr "type" "ssecvt1")
11634 (set_attr "mode" "<MODE>")])
11636 (define_insn "xop_maskcmp<mode>3"
11637 [(set (match_operand:VI_128 0 "register_operand" "=x")
11638 (match_operator:VI_128 1 "ix86_comparison_int_operator"
11639 [(match_operand:VI_128 2 "register_operand" "x")
11640 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
11642 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
11643 [(set_attr "type" "sse4arg")
11644 (set_attr "prefix_data16" "0")
11645 (set_attr "prefix_rep" "0")
11646 (set_attr "prefix_extra" "2")
11647 (set_attr "length_immediate" "1")
11648 (set_attr "mode" "TI")])
11650 (define_insn "xop_maskcmp_uns<mode>3"
11651 [(set (match_operand:VI_128 0 "register_operand" "=x")
11652 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
11653 [(match_operand:VI_128 2 "register_operand" "x")
11654 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
11656 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
11657 [(set_attr "type" "ssecmp")
11658 (set_attr "prefix_data16" "0")
11659 (set_attr "prefix_rep" "0")
11660 (set_attr "prefix_extra" "2")
11661 (set_attr "length_immediate" "1")
11662 (set_attr "mode" "TI")])
11664 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
11665 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
11666 ;; the exact instruction generated for the intrinsic.
11667 (define_insn "xop_maskcmp_uns2<mode>3"
11668 [(set (match_operand:VI_128 0 "register_operand" "=x")
11670 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
11671 [(match_operand:VI_128 2 "register_operand" "x")
11672 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
11673 UNSPEC_XOP_UNSIGNED_CMP))]
11675 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
11676 [(set_attr "type" "ssecmp")
11677 (set_attr "prefix_data16" "0")
11678 (set_attr "prefix_extra" "2")
11679 (set_attr "length_immediate" "1")
11680 (set_attr "mode" "TI")])
11682 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
11683 ;; being added here to be complete.
11684 (define_insn "xop_pcom_tf<mode>3"
11685 [(set (match_operand:VI_128 0 "register_operand" "=x")
11687 [(match_operand:VI_128 1 "register_operand" "x")
11688 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
11689 (match_operand:SI 3 "const_int_operand" "n")]
11690 UNSPEC_XOP_TRUEFALSE))]
11693 return ((INTVAL (operands[3]) != 0)
11694 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11695 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
11697 [(set_attr "type" "ssecmp")
11698 (set_attr "prefix_data16" "0")
11699 (set_attr "prefix_extra" "2")
11700 (set_attr "length_immediate" "1")
11701 (set_attr "mode" "TI")])
11703 (define_insn "xop_vpermil2<mode>3"
11704 [(set (match_operand:VF 0 "register_operand" "=x")
11706 [(match_operand:VF 1 "register_operand" "x")
11707 (match_operand:VF 2 "nonimmediate_operand" "%x")
11708 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm")
11709 (match_operand:SI 4 "const_0_to_3_operand" "n")]
11712 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
11713 [(set_attr "type" "sse4arg")
11714 (set_attr "length_immediate" "1")
11715 (set_attr "mode" "<MODE>")])
11717 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11719 (define_insn "aesenc"
11720 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11721 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11722 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11726 aesenc\t{%2, %0|%0, %2}
11727 vaesenc\t{%2, %1, %0|%0, %1, %2}"
11728 [(set_attr "isa" "noavx,avx")
11729 (set_attr "type" "sselog1")
11730 (set_attr "prefix_extra" "1")
11731 (set_attr "prefix" "orig,vex")
11732 (set_attr "mode" "TI")])
11734 (define_insn "aesenclast"
11735 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11736 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11737 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11738 UNSPEC_AESENCLAST))]
11741 aesenclast\t{%2, %0|%0, %2}
11742 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
11743 [(set_attr "isa" "noavx,avx")
11744 (set_attr "type" "sselog1")
11745 (set_attr "prefix_extra" "1")
11746 (set_attr "prefix" "orig,vex")
11747 (set_attr "mode" "TI")])
11749 (define_insn "aesdec"
11750 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11751 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11752 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11756 aesdec\t{%2, %0|%0, %2}
11757 vaesdec\t{%2, %1, %0|%0, %1, %2}"
11758 [(set_attr "isa" "noavx,avx")
11759 (set_attr "type" "sselog1")
11760 (set_attr "prefix_extra" "1")
11761 (set_attr "prefix" "orig,vex")
11762 (set_attr "mode" "TI")])
11764 (define_insn "aesdeclast"
11765 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11766 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11767 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11768 UNSPEC_AESDECLAST))]
11771 aesdeclast\t{%2, %0|%0, %2}
11772 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
11773 [(set_attr "isa" "noavx,avx")
11774 (set_attr "type" "sselog1")
11775 (set_attr "prefix_extra" "1")
11776 (set_attr "prefix" "orig,vex")
11777 (set_attr "mode" "TI")])
11779 (define_insn "aesimc"
11780 [(set (match_operand:V2DI 0 "register_operand" "=x")
11781 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
11784 "%vaesimc\t{%1, %0|%0, %1}"
11785 [(set_attr "type" "sselog1")
11786 (set_attr "prefix_extra" "1")
11787 (set_attr "prefix" "maybe_vex")
11788 (set_attr "mode" "TI")])
11790 (define_insn "aeskeygenassist"
11791 [(set (match_operand:V2DI 0 "register_operand" "=x")
11792 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
11793 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11794 UNSPEC_AESKEYGENASSIST))]
11796 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
11797 [(set_attr "type" "sselog1")
11798 (set_attr "prefix_extra" "1")
11799 (set_attr "length_immediate" "1")
11800 (set_attr "prefix" "maybe_vex")
11801 (set_attr "mode" "TI")])
11803 (define_insn "pclmulqdq"
11804 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11805 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11806 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")
11807 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
11811 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
11812 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11813 [(set_attr "isa" "noavx,avx")
11814 (set_attr "type" "sselog1")
11815 (set_attr "prefix_extra" "1")
11816 (set_attr "length_immediate" "1")
11817 (set_attr "prefix" "orig,vex")
11818 (set_attr "mode" "TI")])
11820 (define_expand "avx_vzeroall"
11821 [(match_par_dup 0 [(const_int 0)])]
11824 int nregs = TARGET_64BIT ? 16 : 8;
11827 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11829 XVECEXP (operands[0], 0, 0)
11830 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11833 for (regno = 0; regno < nregs; regno++)
11834 XVECEXP (operands[0], 0, regno + 1)
11835 = gen_rtx_SET (VOIDmode,
11836 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
11837 CONST0_RTX (V8SImode));
11840 (define_insn "*avx_vzeroall"
11841 [(match_parallel 0 "vzeroall_operation"
11842 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
11845 [(set_attr "type" "sse")
11846 (set_attr "modrm" "0")
11847 (set_attr "memory" "none")
11848 (set_attr "prefix" "vex")
11849 (set_attr "mode" "OI")])
11851 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
11852 ;; if the upper 128bits are unused.
11853 (define_insn "avx_vzeroupper"
11854 [(unspec_volatile [(match_operand 0 "const_int_operand" "")]
11855 UNSPECV_VZEROUPPER)]
11858 [(set_attr "type" "sse")
11859 (set_attr "modrm" "0")
11860 (set_attr "memory" "none")
11861 (set_attr "prefix" "vex")
11862 (set_attr "mode" "OI")])
11864 (define_mode_attr AVXTOSSEMODE
11865 [(V4DI "V2DI") (V2DI "V2DI")
11866 (V8SI "V4SI") (V4SI "V4SI")
11867 (V16HI "V8HI") (V8HI "V8HI")
11868 (V32QI "V16QI") (V16QI "V16QI")])
11870 (define_insn "avx2_pbroadcast<mode>"
11871 [(set (match_operand:VI 0 "register_operand" "=x")
11873 (vec_select:<ssescalarmode>
11874 (match_operand:<AVXTOSSEMODE> 1 "nonimmediate_operand" "xm")
11875 (parallel [(const_int 0)]))))]
11877 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %1}"
11878 [(set_attr "type" "ssemov")
11879 (set_attr "prefix_extra" "1")
11880 (set_attr "prefix" "vex")
11881 (set_attr "mode" "<sseinsnmode>")])
11883 (define_insn "avx2_permvarv8si"
11884 [(set (match_operand:V8SI 0 "register_operand" "=x")
11886 [(match_operand:V8SI 1 "register_operand" "x")
11887 (match_operand:V8SI 2 "nonimmediate_operand" "xm")]
11890 "vpermd\t{%2, %1, %0|%0, %1, %2}"
11891 [(set_attr "type" "sselog")
11892 (set_attr "prefix" "vex")
11893 (set_attr "mode" "OI")])
11895 (define_insn "avx2_permv4df"
11896 [(set (match_operand:V4DF 0 "register_operand" "=x")
11898 [(match_operand:V4DF 1 "register_operand" "xm")
11899 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11902 "vpermpd\t{%2, %1, %0|%0, %1, %2}"
11903 [(set_attr "type" "sselog")
11904 (set_attr "prefix_extra" "1")
11905 (set_attr "prefix" "vex")
11906 (set_attr "mode" "OI")])
11908 (define_insn "avx2_permvarv8sf"
11909 [(set (match_operand:V8SF 0 "register_operand" "=x")
11911 [(match_operand:V8SF 1 "register_operand" "x")
11912 (match_operand:V8SF 2 "nonimmediate_operand" "xm")]
11915 "vpermps\t{%2, %1, %0|%0, %1, %2}"
11916 [(set_attr "type" "sselog")
11917 (set_attr "prefix" "vex")
11918 (set_attr "mode" "OI")])
11920 (define_expand "avx2_permv4di"
11921 [(match_operand:V4DI 0 "register_operand" "")
11922 (match_operand:V4DI 1 "nonimmediate_operand" "")
11923 (match_operand:SI 2 "const_0_to_255_operand" "")]
11926 int mask = INTVAL (operands[2]);
11927 emit_insn (gen_avx2_permv4di_1 (operands[0], operands[1],
11928 GEN_INT ((mask >> 0) & 3),
11929 GEN_INT ((mask >> 2) & 3),
11930 GEN_INT ((mask >> 4) & 3),
11931 GEN_INT ((mask >> 6) & 3)));
11935 (define_insn "avx2_permv4di_1"
11936 [(set (match_operand:V4DI 0 "register_operand" "=x")
11938 (match_operand:V4DI 1 "nonimmediate_operand" "xm")
11939 (parallel [(match_operand 2 "const_0_to_3_operand" "")
11940 (match_operand 3 "const_0_to_3_operand" "")
11941 (match_operand 4 "const_0_to_3_operand" "")
11942 (match_operand 5 "const_0_to_3_operand" "")])))]
11946 mask |= INTVAL (operands[2]) << 0;
11947 mask |= INTVAL (operands[3]) << 2;
11948 mask |= INTVAL (operands[4]) << 4;
11949 mask |= INTVAL (operands[5]) << 6;
11950 operands[2] = GEN_INT (mask);
11951 return "vpermq\t{%2, %1, %0|%0, %1, %2}";
11953 [(set_attr "type" "sselog")
11954 (set_attr "prefix" "vex")
11955 (set_attr "mode" "OI")])
11957 (define_insn "avx2_permv2ti"
11958 [(set (match_operand:V4DI 0 "register_operand" "=x")
11960 [(match_operand:V4DI 1 "register_operand" "x")
11961 (match_operand:V4DI 2 "nonimmediate_operand" "xm")
11962 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11965 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11966 [(set_attr "type" "sselog")
11967 (set_attr "prefix" "vex")
11968 (set_attr "mode" "OI")])
11970 (define_insn "avx2_vec_dupv4df"
11971 [(set (match_operand:V4DF 0 "register_operand" "=x")
11972 (vec_duplicate:V4DF
11974 (match_operand:V2DF 1 "register_operand" "x")
11975 (parallel [(const_int 0)]))))]
11977 "vbroadcastsd\t{%1, %0|%0, %1}"
11978 [(set_attr "type" "sselog1")
11979 (set_attr "prefix" "vex")
11980 (set_attr "mode" "V4DF")])
11982 ;; Modes handled by AVX vec_dup patterns.
11983 (define_mode_iterator AVX_VEC_DUP_MODE
11984 [V8SI V8SF V4DI V4DF])
11986 (define_insn "vec_dup<mode>"
11987 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x")
11988 (vec_duplicate:AVX_VEC_DUP_MODE
11989 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,?x")))]
11992 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
11994 [(set_attr "type" "ssemov")
11995 (set_attr "prefix_extra" "1")
11996 (set_attr "prefix" "vex")
11997 (set_attr "mode" "V8SF")])
11999 (define_insn "avx2_vbroadcasti128_<mode>"
12000 [(set (match_operand:VI_256 0 "register_operand" "=x")
12002 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m")
12005 "vbroadcasti128\t{%1, %0|%0, %1}"
12006 [(set_attr "type" "ssemov")
12007 (set_attr "prefix_extra" "1")
12008 (set_attr "prefix" "vex")
12009 (set_attr "mode" "OI")])
12012 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "")
12013 (vec_duplicate:AVX_VEC_DUP_MODE
12014 (match_operand:<ssescalarmode> 1 "register_operand" "")))]
12015 "TARGET_AVX && reload_completed"
12016 [(set (match_dup 2)
12017 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
12019 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
12020 "operands[2] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[0]));")
12022 (define_insn "avx_vbroadcastf128_<mode>"
12023 [(set (match_operand:V_256 0 "register_operand" "=x,x,x")
12025 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
12029 vbroadcast<i128>\t{%1, %0|%0, %1}
12030 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
12031 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
12032 [(set_attr "type" "ssemov,sselog1,sselog1")
12033 (set_attr "prefix_extra" "1")
12034 (set_attr "length_immediate" "0,1,1")
12035 (set_attr "prefix" "vex")
12036 (set_attr "mode" "<sseinsnmode>")])
12038 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
12039 ;; If it so happens that the input is in memory, use vbroadcast.
12040 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
12041 (define_insn "*avx_vperm_broadcast_v4sf"
12042 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
12044 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
12045 (match_parallel 2 "avx_vbroadcast_operand"
12046 [(match_operand 3 "const_int_operand" "C,n,n")])))]
12049 int elt = INTVAL (operands[3]);
12050 switch (which_alternative)
12054 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
12055 return "vbroadcastss\t{%1, %0|%0, %1}";
12057 operands[2] = GEN_INT (elt * 0x55);
12058 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
12060 gcc_unreachable ();
12063 [(set_attr "type" "ssemov,ssemov,sselog1")
12064 (set_attr "prefix_extra" "1")
12065 (set_attr "length_immediate" "0,0,1")
12066 (set_attr "prefix" "vex")
12067 (set_attr "mode" "SF,SF,V4SF")])
12069 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
12070 [(set (match_operand:VF_256 0 "register_operand" "=x,x,x")
12072 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x")
12073 (match_parallel 2 "avx_vbroadcast_operand"
12074 [(match_operand 3 "const_int_operand" "C,n,n")])))]
12077 "&& reload_completed"
12078 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
12080 rtx op0 = operands[0], op1 = operands[1];
12081 int elt = INTVAL (operands[3]);
12087 /* Shuffle element we care about into all elements of the 128-bit lane.
12088 The other lane gets shuffled too, but we don't care. */
12089 if (<MODE>mode == V4DFmode)
12090 mask = (elt & 1 ? 15 : 0);
12092 mask = (elt & 3) * 0x55;
12093 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
12095 /* Shuffle the lane we care about into both lanes of the dest. */
12096 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
12097 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
12101 operands[1] = adjust_address_nv (op1, <ssescalarmode>mode,
12102 elt * GET_MODE_SIZE (<ssescalarmode>mode));
12105 (define_expand "avx_vpermil<mode>"
12106 [(set (match_operand:VF2 0 "register_operand" "")
12108 (match_operand:VF2 1 "nonimmediate_operand" "")
12109 (match_operand:SI 2 "const_0_to_255_operand" "")))]
12112 int mask = INTVAL (operands[2]);
12113 rtx perm[<ssescalarnum>];
12115 perm[0] = GEN_INT (mask & 1);
12116 perm[1] = GEN_INT ((mask >> 1) & 1);
12117 if (<MODE>mode == V4DFmode)
12119 perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
12120 perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
12124 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
12127 (define_expand "avx_vpermil<mode>"
12128 [(set (match_operand:VF1 0 "register_operand" "")
12130 (match_operand:VF1 1 "nonimmediate_operand" "")
12131 (match_operand:SI 2 "const_0_to_255_operand" "")))]
12134 int mask = INTVAL (operands[2]);
12135 rtx perm[<ssescalarnum>];
12137 perm[0] = GEN_INT (mask & 3);
12138 perm[1] = GEN_INT ((mask >> 2) & 3);
12139 perm[2] = GEN_INT ((mask >> 4) & 3);
12140 perm[3] = GEN_INT ((mask >> 6) & 3);
12141 if (<MODE>mode == V8SFmode)
12143 perm[4] = GEN_INT ((mask & 3) + 4);
12144 perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
12145 perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
12146 perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
12150 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
12153 (define_insn "*avx_vpermilp<mode>"
12154 [(set (match_operand:VF 0 "register_operand" "=x")
12156 (match_operand:VF 1 "nonimmediate_operand" "xm")
12157 (match_parallel 2 ""
12158 [(match_operand 3 "const_int_operand" "")])))]
12160 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
12162 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
12163 operands[2] = GEN_INT (mask);
12164 return "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
12166 [(set_attr "type" "sselog")
12167 (set_attr "prefix_extra" "1")
12168 (set_attr "length_immediate" "1")
12169 (set_attr "prefix" "vex")
12170 (set_attr "mode" "<MODE>")])
12172 (define_insn "avx_vpermilvar<mode>3"
12173 [(set (match_operand:VF 0 "register_operand" "=x")
12175 [(match_operand:VF 1 "register_operand" "x")
12176 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "xm")]
12179 "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12180 [(set_attr "type" "sselog")
12181 (set_attr "prefix_extra" "1")
12182 (set_attr "prefix" "vex")
12183 (set_attr "mode" "<MODE>")])
12185 (define_expand "avx_vperm2f128<mode>3"
12186 [(set (match_operand:AVX256MODE2P 0 "register_operand" "")
12187 (unspec:AVX256MODE2P
12188 [(match_operand:AVX256MODE2P 1 "register_operand" "")
12189 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "")
12190 (match_operand:SI 3 "const_0_to_255_operand" "")]
12191 UNSPEC_VPERMIL2F128))]
12194 int mask = INTVAL (operands[3]);
12195 if ((mask & 0x88) == 0)
12197 rtx perm[<ssescalarnum>], t1, t2;
12198 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
12200 base = (mask & 3) * nelt2;
12201 for (i = 0; i < nelt2; ++i)
12202 perm[i] = GEN_INT (base + i);
12204 base = ((mask >> 4) & 3) * nelt2;
12205 for (i = 0; i < nelt2; ++i)
12206 perm[i + nelt2] = GEN_INT (base + i);
12208 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
12209 operands[1], operands[2]);
12210 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
12211 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
12212 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
12218 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
12219 ;; means that in order to represent this properly in rtl we'd have to
12220 ;; nest *another* vec_concat with a zero operand and do the select from
12221 ;; a 4x wide vector. That doesn't seem very nice.
12222 (define_insn "*avx_vperm2f128<mode>_full"
12223 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
12224 (unspec:AVX256MODE2P
12225 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
12226 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
12227 (match_operand:SI 3 "const_0_to_255_operand" "n")]
12228 UNSPEC_VPERMIL2F128))]
12230 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12231 [(set_attr "type" "sselog")
12232 (set_attr "prefix_extra" "1")
12233 (set_attr "length_immediate" "1")
12234 (set_attr "prefix" "vex")
12235 (set_attr "mode" "<sseinsnmode>")])
12237 (define_insn "*avx_vperm2f128<mode>_nozero"
12238 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
12239 (vec_select:AVX256MODE2P
12240 (vec_concat:<ssedoublevecmode>
12241 (match_operand:AVX256MODE2P 1 "register_operand" "x")
12242 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
12243 (match_parallel 3 ""
12244 [(match_operand 4 "const_int_operand" "")])))]
12246 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
12248 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
12250 return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
12252 return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
12253 operands[3] = GEN_INT (mask);
12254 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
12256 [(set_attr "type" "sselog")
12257 (set_attr "prefix_extra" "1")
12258 (set_attr "length_immediate" "1")
12259 (set_attr "prefix" "vex")
12260 (set_attr "mode" "<sseinsnmode>")])
12262 (define_expand "avx_vinsertf128<mode>"
12263 [(match_operand:V_256 0 "register_operand" "")
12264 (match_operand:V_256 1 "register_operand" "")
12265 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "")
12266 (match_operand:SI 3 "const_0_to_1_operand" "")]
12269 rtx (*insn)(rtx, rtx, rtx);
12271 switch (INTVAL (operands[3]))
12274 insn = gen_vec_set_lo_<mode>;
12277 insn = gen_vec_set_hi_<mode>;
12280 gcc_unreachable ();
12283 emit_insn (insn (operands[0], operands[1], operands[2]));
12287 (define_insn "avx2_vec_set_lo_v4di"
12288 [(set (match_operand:V4DI 0 "register_operand" "=x")
12290 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
12292 (match_operand:V4DI 1 "register_operand" "x")
12293 (parallel [(const_int 2) (const_int 3)]))))]
12295 "vinserti128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12296 [(set_attr "type" "sselog")
12297 (set_attr "prefix_extra" "1")
12298 (set_attr "length_immediate" "1")
12299 (set_attr "prefix" "vex")
12300 (set_attr "mode" "OI")])
12302 (define_insn "avx2_vec_set_hi_v4di"
12303 [(set (match_operand:V4DI 0 "register_operand" "=x")
12306 (match_operand:V4DI 1 "register_operand" "x")
12307 (parallel [(const_int 0) (const_int 1)]))
12308 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
12310 "vinserti128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12311 [(set_attr "type" "sselog")
12312 (set_attr "prefix_extra" "1")
12313 (set_attr "length_immediate" "1")
12314 (set_attr "prefix" "vex")
12315 (set_attr "mode" "OI")])
12317 (define_insn "vec_set_lo_<mode>"
12318 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
12319 (vec_concat:VI8F_256
12320 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
12321 (vec_select:<ssehalfvecmode>
12322 (match_operand:VI8F_256 1 "register_operand" "x")
12323 (parallel [(const_int 2) (const_int 3)]))))]
12325 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12326 [(set_attr "type" "sselog")
12327 (set_attr "prefix_extra" "1")
12328 (set_attr "length_immediate" "1")
12329 (set_attr "prefix" "vex")
12330 (set_attr "mode" "<sseinsnmode>")])
12332 (define_insn "vec_set_hi_<mode>"
12333 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
12334 (vec_concat:VI8F_256
12335 (vec_select:<ssehalfvecmode>
12336 (match_operand:VI8F_256 1 "register_operand" "x")
12337 (parallel [(const_int 0) (const_int 1)]))
12338 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
12340 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12341 [(set_attr "type" "sselog")
12342 (set_attr "prefix_extra" "1")
12343 (set_attr "length_immediate" "1")
12344 (set_attr "prefix" "vex")
12345 (set_attr "mode" "<sseinsnmode>")])
12347 (define_insn "vec_set_lo_<mode>"
12348 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
12349 (vec_concat:VI4F_256
12350 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
12351 (vec_select:<ssehalfvecmode>
12352 (match_operand:VI4F_256 1 "register_operand" "x")
12353 (parallel [(const_int 4) (const_int 5)
12354 (const_int 6) (const_int 7)]))))]
12356 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12357 [(set_attr "type" "sselog")
12358 (set_attr "prefix_extra" "1")
12359 (set_attr "length_immediate" "1")
12360 (set_attr "prefix" "vex")
12361 (set_attr "mode" "<sseinsnmode>")])
12363 (define_insn "vec_set_hi_<mode>"
12364 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
12365 (vec_concat:VI4F_256
12366 (vec_select:<ssehalfvecmode>
12367 (match_operand:VI4F_256 1 "register_operand" "x")
12368 (parallel [(const_int 0) (const_int 1)
12369 (const_int 2) (const_int 3)]))
12370 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
12372 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12373 [(set_attr "type" "sselog")
12374 (set_attr "prefix_extra" "1")
12375 (set_attr "length_immediate" "1")
12376 (set_attr "prefix" "vex")
12377 (set_attr "mode" "<sseinsnmode>")])
12379 (define_insn "vec_set_lo_v16hi"
12380 [(set (match_operand:V16HI 0 "register_operand" "=x")
12382 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
12384 (match_operand:V16HI 1 "register_operand" "x")
12385 (parallel [(const_int 8) (const_int 9)
12386 (const_int 10) (const_int 11)
12387 (const_int 12) (const_int 13)
12388 (const_int 14) (const_int 15)]))))]
12390 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12391 [(set_attr "type" "sselog")
12392 (set_attr "prefix_extra" "1")
12393 (set_attr "length_immediate" "1")
12394 (set_attr "prefix" "vex")
12395 (set_attr "mode" "OI")])
12397 (define_insn "vec_set_hi_v16hi"
12398 [(set (match_operand:V16HI 0 "register_operand" "=x")
12401 (match_operand:V16HI 1 "register_operand" "x")
12402 (parallel [(const_int 0) (const_int 1)
12403 (const_int 2) (const_int 3)
12404 (const_int 4) (const_int 5)
12405 (const_int 6) (const_int 7)]))
12406 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
12408 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12409 [(set_attr "type" "sselog")
12410 (set_attr "prefix_extra" "1")
12411 (set_attr "length_immediate" "1")
12412 (set_attr "prefix" "vex")
12413 (set_attr "mode" "OI")])
12415 (define_insn "vec_set_lo_v32qi"
12416 [(set (match_operand:V32QI 0 "register_operand" "=x")
12418 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
12420 (match_operand:V32QI 1 "register_operand" "x")
12421 (parallel [(const_int 16) (const_int 17)
12422 (const_int 18) (const_int 19)
12423 (const_int 20) (const_int 21)
12424 (const_int 22) (const_int 23)
12425 (const_int 24) (const_int 25)
12426 (const_int 26) (const_int 27)
12427 (const_int 28) (const_int 29)
12428 (const_int 30) (const_int 31)]))))]
12430 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12431 [(set_attr "type" "sselog")
12432 (set_attr "prefix_extra" "1")
12433 (set_attr "length_immediate" "1")
12434 (set_attr "prefix" "vex")
12435 (set_attr "mode" "OI")])
12437 (define_insn "vec_set_hi_v32qi"
12438 [(set (match_operand:V32QI 0 "register_operand" "=x")
12441 (match_operand:V32QI 1 "register_operand" "x")
12442 (parallel [(const_int 0) (const_int 1)
12443 (const_int 2) (const_int 3)
12444 (const_int 4) (const_int 5)
12445 (const_int 6) (const_int 7)
12446 (const_int 8) (const_int 9)
12447 (const_int 10) (const_int 11)
12448 (const_int 12) (const_int 13)
12449 (const_int 14) (const_int 15)]))
12450 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
12452 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12453 [(set_attr "type" "sselog")
12454 (set_attr "prefix_extra" "1")
12455 (set_attr "length_immediate" "1")
12456 (set_attr "prefix" "vex")
12457 (set_attr "mode" "OI")])
12459 (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
12460 [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
12462 [(match_operand:<sseintvecmode> 2 "register_operand" "x")
12463 (match_operand:V48_AVX2 1 "memory_operand" "m")]
12466 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
12467 [(set_attr "type" "sselog1")
12468 (set_attr "prefix_extra" "1")
12469 (set_attr "prefix" "vex")
12470 (set_attr "mode" "<sseinsnmode>")])
12472 (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
12473 [(set (match_operand:V48_AVX2 0 "memory_operand" "=m")
12475 [(match_operand:<sseintvecmode> 1 "register_operand" "x")
12476 (match_operand:V48_AVX2 2 "register_operand" "x")
12480 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12481 [(set_attr "type" "sselog1")
12482 (set_attr "prefix_extra" "1")
12483 (set_attr "prefix" "vex")
12484 (set_attr "mode" "<sseinsnmode>")])
12486 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
12487 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
12488 (unspec:AVX256MODE2P
12489 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
12493 "&& reload_completed"
12496 rtx op0 = operands[0];
12497 rtx op1 = operands[1];
12499 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
12501 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
12502 emit_move_insn (op0, op1);
12506 (define_expand "vec_init<mode>"
12507 [(match_operand:V_256 0 "register_operand" "")
12508 (match_operand 1 "" "")]
12511 ix86_expand_vector_init (false, operands[0], operands[1]);
12515 (define_expand "avx2_extracti128"
12516 [(match_operand:V2DI 0 "nonimmediate_operand" "")
12517 (match_operand:V4DI 1 "register_operand" "")
12518 (match_operand:SI 2 "const_0_to_1_operand" "")]
12521 rtx (*insn)(rtx, rtx);
12523 switch (INTVAL (operands[2]))
12526 insn = gen_vec_extract_lo_v4di;
12529 insn = gen_vec_extract_hi_v4di;
12532 gcc_unreachable ();
12535 emit_insn (insn (operands[0], operands[1]));
12539 (define_expand "avx2_inserti128"
12540 [(match_operand:V4DI 0 "register_operand" "")
12541 (match_operand:V4DI 1 "register_operand" "")
12542 (match_operand:V2DI 2 "nonimmediate_operand" "")
12543 (match_operand:SI 3 "const_0_to_1_operand" "")]
12546 rtx (*insn)(rtx, rtx, rtx);
12548 switch (INTVAL (operands[3]))
12551 insn = gen_avx2_vec_set_lo_v4di;
12554 insn = gen_avx2_vec_set_hi_v4di;
12557 gcc_unreachable ();
12560 emit_insn (insn (operands[0], operands[1], operands[2]));
12564 (define_insn "avx2_ashrv<mode>"
12565 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
12567 (match_operand:VI4_AVX2 1 "register_operand" "x")
12568 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")))]
12570 "vpsravd\t{%2, %1, %0|%0, %1, %2}"
12571 [(set_attr "type" "sseishft")
12572 (set_attr "prefix" "vex")
12573 (set_attr "mode" "<sseinsnmode>")])
12575 (define_insn "avx2_<shift_insn>v<mode>"
12576 [(set (match_operand:VI48_AVX2 0 "register_operand" "=x")
12577 (any_lshift:VI48_AVX2
12578 (match_operand:VI48_AVX2 1 "register_operand" "x")
12579 (match_operand:VI48_AVX2 2 "nonimmediate_operand" "xm")))]
12581 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12582 [(set_attr "type" "sseishft")
12583 (set_attr "prefix" "vex")
12584 (set_attr "mode" "<sseinsnmode>")])
12586 (define_insn "avx_vec_concat<mode>"
12587 [(set (match_operand:V_256 0 "register_operand" "=x,x")
12589 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
12590 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))]
12593 switch (which_alternative)
12596 return "vinsert<i128>\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
12598 switch (get_attr_mode (insn))
12601 return "vmovaps\t{%1, %x0|%x0, %1}";
12603 return "vmovapd\t{%1, %x0|%x0, %1}";
12605 return "vmovdqa\t{%1, %x0|%x0, %1}";
12608 gcc_unreachable ();
12611 [(set_attr "type" "sselog,ssemov")
12612 (set_attr "prefix_extra" "1,*")
12613 (set_attr "length_immediate" "1,*")
12614 (set_attr "prefix" "vex")
12615 (set_attr "mode" "<sseinsnmode>")])
12617 (define_insn "vcvtph2ps"
12618 [(set (match_operand:V4SF 0 "register_operand" "=x")
12620 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
12622 (parallel [(const_int 0) (const_int 1)
12623 (const_int 1) (const_int 2)])))]
12625 "vcvtph2ps\t{%1, %0|%0, %1}"
12626 [(set_attr "type" "ssecvt")
12627 (set_attr "prefix" "vex")
12628 (set_attr "mode" "V4SF")])
12630 (define_insn "*vcvtph2ps_load"
12631 [(set (match_operand:V4SF 0 "register_operand" "=x")
12632 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
12633 UNSPEC_VCVTPH2PS))]
12635 "vcvtph2ps\t{%1, %0|%0, %1}"
12636 [(set_attr "type" "ssecvt")
12637 (set_attr "prefix" "vex")
12638 (set_attr "mode" "V8SF")])
12640 (define_insn "vcvtph2ps256"
12641 [(set (match_operand:V8SF 0 "register_operand" "=x")
12642 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
12643 UNSPEC_VCVTPH2PS))]
12645 "vcvtph2ps\t{%1, %0|%0, %1}"
12646 [(set_attr "type" "ssecvt")
12647 (set_attr "prefix" "vex")
12648 (set_attr "mode" "V8SF")])
12650 (define_expand "vcvtps2ph"
12651 [(set (match_operand:V8HI 0 "register_operand" "")
12653 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "")
12654 (match_operand:SI 2 "const_0_to_255_operand" "")]
12658 "operands[3] = CONST0_RTX (V4HImode);")
12660 (define_insn "*vcvtps2ph"
12661 [(set (match_operand:V8HI 0 "register_operand" "=x")
12663 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12664 (match_operand:SI 2 "const_0_to_255_operand" "N")]
12666 (match_operand:V4HI 3 "const0_operand" "")))]
12668 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12669 [(set_attr "type" "ssecvt")
12670 (set_attr "prefix" "vex")
12671 (set_attr "mode" "V4SF")])
12673 (define_insn "*vcvtps2ph_store"
12674 [(set (match_operand:V4HI 0 "memory_operand" "=m")
12675 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12676 (match_operand:SI 2 "const_0_to_255_operand" "N")]
12677 UNSPEC_VCVTPS2PH))]
12679 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12680 [(set_attr "type" "ssecvt")
12681 (set_attr "prefix" "vex")
12682 (set_attr "mode" "V4SF")])
12684 (define_insn "vcvtps2ph256"
12685 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
12686 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
12687 (match_operand:SI 2 "const_0_to_255_operand" "N")]
12688 UNSPEC_VCVTPS2PH))]
12690 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12691 [(set_attr "type" "ssecvt")
12692 (set_attr "prefix" "vex")
12693 (set_attr "mode" "V8SF")])
12695 ;; For gather* insn patterns
12696 (define_mode_iterator VEC_GATHER_MODE
12697 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
12698 (define_mode_attr VEC_GATHER_IDXSI
12699 [(V2DI "V4SI") (V2DF "V4SI")
12700 (V4DI "V4SI") (V4DF "V4SI")
12701 (V4SI "V4SI") (V4SF "V4SI")
12702 (V8SI "V8SI") (V8SF "V8SI")])
12703 (define_mode_attr VEC_GATHER_IDXDI
12704 [(V2DI "V2DI") (V2DF "V2DI")
12705 (V4DI "V4DI") (V4DF "V4DI")
12706 (V4SI "V2DI") (V4SF "V2DI")
12707 (V8SI "V4DI") (V8SF "V4DI")])
12708 (define_mode_attr VEC_GATHER_SRCDI
12709 [(V2DI "V2DI") (V2DF "V2DF")
12710 (V4DI "V4DI") (V4DF "V4DF")
12711 (V4SI "V4SI") (V4SF "V4SF")
12712 (V8SI "V4SI") (V8SF "V4SF")])
12714 (define_expand "avx2_gathersi<mode>"
12715 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "")
12716 (unspec:VEC_GATHER_MODE
12717 [(match_operand:VEC_GATHER_MODE 1 "register_operand" "")
12718 (mem:<ssescalarmode>
12720 [(match_operand 2 "vsib_address_operand" "")
12721 (match_operand:<VEC_GATHER_IDXSI>
12722 3 "register_operand" "")
12723 (match_operand:SI 5 "const1248_operand " "")]))
12724 (mem:BLK (scratch))
12725 (match_operand:VEC_GATHER_MODE 4 "register_operand" "")]
12727 (clobber (match_scratch:VEC_GATHER_MODE 6 ""))])]
12731 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
12732 operands[5]), UNSPEC_VSIBADDR);
12735 (define_insn "*avx2_gathersi<mode>"
12736 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
12737 (unspec:VEC_GATHER_MODE
12738 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
12739 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
12741 [(match_operand:P 3 "vsib_address_operand" "p")
12742 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
12743 (match_operand:SI 6 "const1248_operand" "n")]
12745 (mem:BLK (scratch))
12746 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
12748 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
12750 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
12751 [(set_attr "type" "ssemov")
12752 (set_attr "prefix" "vex")
12753 (set_attr "mode" "<sseinsnmode>")])
12755 (define_insn "*avx2_gathersi<mode>_2"
12756 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
12757 (unspec:VEC_GATHER_MODE
12759 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
12761 [(match_operand:P 2 "vsib_address_operand" "p")
12762 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
12763 (match_operand:SI 5 "const1248_operand" "n")]
12765 (mem:BLK (scratch))
12766 (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
12768 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
12770 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
12771 [(set_attr "type" "ssemov")
12772 (set_attr "prefix" "vex")
12773 (set_attr "mode" "<sseinsnmode>")])
12775 (define_expand "avx2_gatherdi<mode>"
12776 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "")
12777 (unspec:VEC_GATHER_MODE
12778 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "")
12779 (mem:<ssescalarmode>
12781 [(match_operand 2 "vsib_address_operand" "")
12782 (match_operand:<VEC_GATHER_IDXDI>
12783 3 "register_operand" "")
12784 (match_operand:SI 5 "const1248_operand " "")]))
12785 (mem:BLK (scratch))
12786 (match_operand:<VEC_GATHER_SRCDI>
12787 4 "register_operand" "")]
12789 (clobber (match_scratch:VEC_GATHER_MODE 6 ""))])]
12793 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
12794 operands[5]), UNSPEC_VSIBADDR);
12797 (define_insn "*avx2_gatherdi<mode>"
12798 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
12799 (unspec:VEC_GATHER_MODE
12800 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
12801 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
12803 [(match_operand:P 3 "vsib_address_operand" "p")
12804 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
12805 (match_operand:SI 6 "const1248_operand" "n")]
12807 (mem:BLK (scratch))
12808 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
12810 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
12812 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
12813 [(set_attr "type" "ssemov")
12814 (set_attr "prefix" "vex")
12815 (set_attr "mode" "<sseinsnmode>")])
12817 (define_insn "*avx2_gatherdi<mode>_2"
12818 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
12819 (unspec:VEC_GATHER_MODE
12821 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
12823 [(match_operand:P 2 "vsib_address_operand" "p")
12824 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
12825 (match_operand:SI 5 "const1248_operand" "n")]
12827 (mem:BLK (scratch))
12828 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
12830 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
12833 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
12834 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
12835 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
12837 [(set_attr "type" "ssemov")
12838 (set_attr "prefix" "vex")
12839 (set_attr "mode" "<sseinsnmode>")])