1 ;; Copyright (C) 2006, 2007 Free Software Foundation, Inc.
3 ;; This file is free software; you can redistribute it and/or modify it under
4 ;; the terms of the GNU General Public License as published by the Free
5 ;; Software Foundation; either version 3 of the License, or (at your option)
8 ;; This file is distributed in the hope that it will be useful, but WITHOUT
9 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 ;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 ;; You should have received a copy of the GNU General Public License
14 ;; along with GCC; see the file COPYING3. If not see
15 ;; <http://www.gnu.org/licenses/>.
17 ;;- See file "rtl.def" for documentation on define_insn, match_*, et. al.
20 ;; Define an insn type attribute. This is used in function unit delay
22 ;; multi0 is a multiple insn rtl whose first insn is in pipe0
23 ;; multi1 is a multiple insn rtl whose first insn is in pipe1
24 (define_attr "type" "fx2,shuf,fx3,load,store,br,spr,lnop,nop,fxb,fp6,fp7,fpd,iprefetch,multi0,multi1,hbr,convert"
28 (define_attr "length" ""
31 (define_attr "tune" "cell,celledp" (const (symbol_ref "spu_tune")))
32 ;; Processor type -- this attribute must exactly match the processor_type
33 ;; enumeration in spu.h.
35 (define_attr "cpu" "spu"
36 (const (symbol_ref "spu_cpu_attr")))
38 ; (define_function_unit NAME MULTIPLICITY SIMULTANEITY
39 ; TEST READY-DELAY ISSUE-DELAY [CONFLICT-LIST])
41 (define_cpu_unit "pipe0,pipe1,fp,ls")
43 (define_insn_reservation "NOP" 1 (eq_attr "type" "nop")
46 (define_insn_reservation "FX2" 2 (eq_attr "type" "fx2")
49 (define_insn_reservation "FX3" 4 (eq_attr "type" "fx3,fxb")
52 (define_insn_reservation "FP6" 6 (eq_attr "type" "fp6")
53 "pipe0 + fp, nothing*5")
55 (define_insn_reservation "FP7" 7 (eq_attr "type" "fp7")
56 "pipe0, fp, nothing*5")
58 ;; The behavior of the double precision is that both pipes stall
59 ;; for 6 cycles and the rest of the operation pipelines for
60 ;; 7 cycles. The simplest way to model this is to simply ignore
62 (define_insn_reservation "FPD" 7
63 (and (eq_attr "tune" "cell")
64 (eq_attr "type" "fpd"))
65 "pipe0 + pipe1, fp, nothing*5")
67 ;; Tune for CELLEDP, 9 cycles, dual-issuable, fully pipelined
68 (define_insn_reservation "FPD_CELLEDP" 9
69 (and (eq_attr "tune" "celledp")
70 (eq_attr "type" "fpd"))
71 "pipe0 + fp, nothing*8")
73 (define_insn_reservation "LNOP" 1 (eq_attr "type" "lnop")
76 (define_insn_reservation "STORE" 1 (eq_attr "type" "store")
79 (define_insn_reservation "IPREFETCH" 1 (eq_attr "type" "iprefetch")
82 (define_insn_reservation "SHUF" 4 (eq_attr "type" "shuf,br,spr")
85 (define_insn_reservation "LOAD" 6 (eq_attr "type" "load")
86 "pipe1 + ls, nothing*5")
88 (define_insn_reservation "HBR" 18 (eq_attr "type" "hbr")
91 (define_insn_reservation "MULTI0" 4 (eq_attr "type" "multi0")
92 "pipe0+pipe1, nothing*3")
94 (define_insn_reservation "MULTI1" 4 (eq_attr "type" "multi1")
97 (define_insn_reservation "CONVERT" 0 (eq_attr "type" "convert")
100 ;; Force pipe0 to occur before pipe 1 in a cycle.
101 (absence_set "pipe0" "pipe1")
110 (UNSPEC_EXTEND_CMP 5)
153 (UNSPEC_SPU_REALIGN_LOAD 49)
154 (UNSPEC_SPU_MASK_FOR_LOAD 50)
158 (include "predicates.md")
159 (include "constraints.md")
164 (define_mode_macro ALL [QI V16QI
172 ; Everything except DI and TI which are handled separately because
173 ; they need different constraints to correctly test VOIDmode constants
174 (define_mode_macro MOV [QI V16QI
181 (define_mode_macro DTI [DI TI])
183 (define_mode_macro VINT [QI V16QI
189 (define_mode_macro VQHSI [QI V16QI
193 (define_mode_macro VHSI [HI V8HI
196 (define_mode_macro VSDF [SF V4SF
199 (define_mode_macro VSI [SI V4SI])
200 (define_mode_macro VDI [DI V2DI])
201 (define_mode_macro VSF [SF V4SF])
202 (define_mode_macro VDF [DF V2DF])
204 (define_mode_macro VCMP [V16QI
210 (define_mode_macro VCMPU [V16QI
214 (define_mode_attr bh [(QI "b") (V16QI "b")
218 (define_mode_attr d [(SF "") (V4SF "")
219 (DF "d") (V2DF "d")])
220 (define_mode_attr d6 [(SF "6") (V4SF "6")
221 (DF "d") (V2DF "d")])
223 (define_mode_attr f2i [(SF "si") (V4SF "v4si")
224 (DF "di") (V2DF "v2di")])
225 (define_mode_attr F2I [(SF "SI") (V4SF "V4SI")
226 (DF "DI") (V2DF "V2DI")])
228 (define_mode_attr DF2I [(DF "SI") (V2DF "V2DI")])
230 (define_mode_attr umask [(HI "f") (V8HI "f")
231 (SI "g") (V4SI "g")])
232 (define_mode_attr nmask [(HI "F") (V8HI "F")
233 (SI "G") (V4SI "G")])
235 ;; Used for carry and borrow instructions.
236 (define_mode_macro CBOP [SI DI V4SI V2DI])
238 ;; Used in vec_set and vec_extract
239 (define_mode_macro V [V2DI V4SI V8HI V16QI V2DF V4SF])
240 (define_mode_attr inner [(V16QI "QI")
246 (define_mode_attr vmult [(V16QI "1")
252 (define_mode_attr voff [(V16QI "13")
262 (define_expand "mov<mode>"
263 [(set (match_operand:ALL 0 "spu_nonimm_operand" "=r,r,r,m")
264 (match_operand:ALL 1 "general_operand" "r,i,m,r"))]
267 if (spu_expand_mov(operands, <MODE>mode))
272 [(set (match_operand 0 "spu_reg_operand")
273 (match_operand 1 "immediate_operand"))]
277 (high (match_dup 1)))
279 (lo_sum (match_dup 0)
282 if (spu_split_immediate (operands))
288 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
289 (match_operand:SI 1 "immediate_operand" "s"))
294 ;; Whenever a function generates the 'pic' pattern above we need to
295 ;; load the pic_offset_table register.
296 ;; GCC doesn't deal well with labels in the middle of a block so we
297 ;; hardcode the offsets in the asm here.
298 (define_insn "load_pic_offset"
299 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
300 (unspec:SI [(const_int 0)] 0))
301 (set (match_operand:SI 1 "spu_reg_operand" "=r")
302 (unspec:SI [(const_int 0)] 0))]
304 "ila\t%1,.+8\;brsl\t%0,4"
305 [(set_attr "length" "8")
306 (set_attr "type" "multi0")])
311 (define_insn "_mov<mode>"
312 [(set (match_operand:MOV 0 "spu_nonimm_operand" "=r,r,r,r,r,m")
313 (match_operand:MOV 1 "spu_mov_operand" "r,A,f,j,m,r"))]
314 "spu_valid_move (operands)"
322 [(set_attr "type" "fx2,fx2,shuf,shuf,load,store")])
324 (define_insn "low_<mode>"
325 [(set (match_operand:VSI 0 "spu_reg_operand" "=r")
326 (lo_sum:VSI (match_operand:VSI 1 "spu_reg_operand" "0")
327 (match_operand:VSI 2 "immediate_operand" "i")))]
331 (define_insn "_movdi"
332 [(set (match_operand:DI 0 "spu_nonimm_operand" "=r,r,r,r,r,m")
333 (match_operand:DI 1 "spu_mov_operand" "r,a,f,k,m,r"))]
334 "spu_valid_move (operands)"
342 [(set_attr "type" "fx2,fx2,shuf,shuf,load,store")])
344 (define_insn "_movti"
345 [(set (match_operand:TI 0 "spu_nonimm_operand" "=r,r,r,r,r,m")
346 (match_operand:TI 1 "spu_mov_operand" "r,U,f,l,m,r"))]
347 "spu_valid_move (operands)"
355 [(set_attr "type" "fx2,fx2,shuf,shuf,load,store")])
357 (define_insn_and_split "load"
358 [(set (match_operand 0 "spu_reg_operand" "=r")
359 (match_operand 1 "memory_operand" "m"))
360 (clobber (match_operand:TI 2 "spu_reg_operand" "=&r"))
361 (clobber (match_operand:SI 3 "spu_reg_operand" "=&r"))]
362 "GET_MODE(operands[0]) == GET_MODE(operands[1])"
367 { spu_split_load(operands); DONE; })
369 (define_insn_and_split "store"
370 [(set (match_operand 0 "memory_operand" "=m")
371 (match_operand 1 "spu_reg_operand" "r"))
372 (clobber (match_operand:TI 2 "spu_reg_operand" "=&r"))
373 (clobber (match_operand:TI 3 "spu_reg_operand" "=&r"))]
374 "GET_MODE(operands[0]) == GET_MODE(operands[1])"
379 { spu_split_store(operands); DONE; })
381 ;; Operand 3 is the number of bytes. 1:b 2:h 4:w 8:d
383 (define_expand "cpat"
384 [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
385 (unspec:TI [(match_operand:SI 1 "spu_reg_operand" "r,r")
386 (match_operand:SI 2 "spu_nonmem_operand" "r,n")
387 (match_operand:SI 3 "immediate_operand" "i,i")] UNSPEC_CPAT))]
390 rtx x = gen_cpat_const (operands);
393 emit_move_insn (operands[0], x);
399 [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
400 (unspec:TI [(match_operand:SI 1 "spu_reg_operand" "r,r")
401 (match_operand:SI 2 "spu_nonmem_operand" "r,n")
402 (match_operand:SI 3 "immediate_operand" "i,i")] UNSPEC_CPAT))]
407 [(set_attr "type" "shuf")])
410 [(set (match_operand:TI 0 "spu_reg_operand")
411 (unspec:TI [(match_operand:SI 1 "spu_nonmem_operand")
412 (match_operand:SI 2 "immediate_operand")
413 (match_operand:SI 3 "immediate_operand")] UNSPEC_CPAT))]
415 [(set (match_dup:TI 0)
418 operands[4] = gen_cpat_const (operands);
425 (define_insn "extendqihi2"
426 [(set (match_operand:HI 0 "spu_reg_operand" "=r")
427 (sign_extend:HI (match_operand:QI 1 "spu_reg_operand" "r")))]
431 (define_insn "extendhisi2"
432 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
433 (sign_extend:SI (match_operand:HI 1 "spu_reg_operand" "r")))]
437 (define_expand "extendsidi2"
438 [(set (match_dup:DI 2)
439 (zero_extend:DI (match_operand:SI 1 "spu_reg_operand" "")))
440 (set (match_operand:DI 0 "spu_reg_operand" "")
441 (sign_extend:DI (vec_select:SI (match_dup:V2SI 3)
442 (parallel [(const_int 1)]))))]
445 operands[2] = gen_reg_rtx (DImode);
446 operands[3] = spu_gen_subreg (V2SImode, operands[2]);
450 [(set (match_operand:DI 0 "spu_reg_operand" "=r")
453 (match_operand:V2SI 1 "spu_reg_operand" "r")
454 (parallel [(const_int 1) ]))))]
458 (define_expand "extendqiti2"
459 [(set (match_operand:TI 0 "register_operand" "")
460 (sign_extend:TI (match_operand:QI 1 "register_operand" "")))]
462 "spu_expand_sign_extend(operands);
465 (define_expand "extendhiti2"
466 [(set (match_operand:TI 0 "register_operand" "")
467 (sign_extend:TI (match_operand:HI 1 "register_operand" "")))]
469 "spu_expand_sign_extend(operands);
472 (define_expand "extendsiti2"
473 [(set (match_operand:TI 0 "register_operand" "")
474 (sign_extend:TI (match_operand:SI 1 "register_operand" "")))]
476 "spu_expand_sign_extend(operands);
479 (define_expand "extendditi2"
480 [(set (match_operand:TI 0 "register_operand" "")
481 (sign_extend:TI (match_operand:DI 1 "register_operand" "")))]
483 "spu_expand_sign_extend(operands);
489 (define_insn "zero_extendqihi2"
490 [(set (match_operand:HI 0 "spu_reg_operand" "=r")
491 (zero_extend:HI (match_operand:QI 1 "spu_reg_operand" "r")))]
493 "andi\t%0,%1,0x00ff")
495 (define_insn "zero_extendqisi2"
496 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
497 (zero_extend:SI (match_operand:QI 1 "spu_reg_operand" "r")))]
499 "andi\t%0,%1,0x00ff")
501 (define_expand "zero_extendhisi2"
502 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
503 (zero_extend:SI (match_operand:HI 1 "spu_reg_operand" "r")))
504 (clobber (match_scratch:SI 2 "=&r"))]
507 rtx mask = gen_reg_rtx (SImode);
508 rtx op1 = simplify_gen_subreg (SImode, operands[1], HImode, 0);
509 emit_move_insn (mask, GEN_INT (0xffff));
510 emit_insn (gen_andsi3(operands[0], op1, mask));
514 (define_insn "zero_extendsidi2"
515 [(set (match_operand:DI 0 "spu_reg_operand" "=r")
516 (zero_extend:DI (match_operand:SI 1 "spu_reg_operand" "r")))]
519 [(set_attr "type" "shuf")])
521 (define_insn "zero_extendsiti2"
522 [(set (match_operand:TI 0 "spu_reg_operand" "=r")
523 (zero_extend:TI (match_operand:SI 1 "spu_reg_operand" "r")))]
525 "rotqmbyi\t%0,%1,-12"
526 [(set_attr "type" "shuf")])
528 (define_insn "zero_extendditi2"
529 [(set (match_operand:TI 0 "spu_reg_operand" "=r")
530 (zero_extend:TI (match_operand:DI 1 "spu_reg_operand" "r")))]
533 [(set_attr "type" "shuf")])
538 (define_insn "truncdiqi2"
539 [(set (match_operand:QI 0 "spu_reg_operand" "=r")
540 (truncate:QI (match_operand:DI 1 "spu_reg_operand" "r")))]
543 [(set_attr "type" "shuf")])
545 (define_insn "truncdihi2"
546 [(set (match_operand:HI 0 "spu_reg_operand" "=r")
547 (truncate:HI (match_operand:DI 1 "spu_reg_operand" "r")))]
550 [(set_attr "type" "shuf")])
552 (define_insn "truncdisi2"
553 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
554 (truncate:SI (match_operand:DI 1 "spu_reg_operand" "r")))]
557 [(set_attr "type" "shuf")])
559 (define_insn "trunctiqi2"
560 [(set (match_operand:QI 0 "spu_reg_operand" "=r")
561 (truncate:QI (match_operand:TI 1 "spu_reg_operand" "r")))]
564 [(set_attr "type" "shuf")])
566 (define_insn "trunctihi2"
567 [(set (match_operand:HI 0 "spu_reg_operand" "=r")
568 (truncate:HI (match_operand:TI 1 "spu_reg_operand" "r")))]
571 [(set_attr "type" "shuf")])
573 (define_insn "trunctisi2"
574 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
575 (truncate:SI (match_operand:TI 1 "spu_reg_operand" "r")))]
578 [(set_attr "type" "shuf")])
580 (define_insn "trunctidi2"
581 [(set (match_operand:DI 0 "spu_reg_operand" "=r")
582 (truncate:DI (match_operand:TI 1 "spu_reg_operand" "r")))]
585 [(set_attr "type" "shuf")])
590 (define_insn "floatsisf2"
591 [(set (match_operand:SF 0 "spu_reg_operand" "=r")
592 (float:SF (match_operand:SI 1 "spu_reg_operand" "r")))]
595 [(set_attr "type" "fp7")])
597 (define_insn "floatv4siv4sf2"
598 [(set (match_operand:V4SF 0 "spu_reg_operand" "=r")
599 (float:V4SF (match_operand:V4SI 1 "spu_reg_operand" "r")))]
602 [(set_attr "type" "fp7")])
604 (define_insn "fix_truncsfsi2"
605 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
606 (fix:SI (match_operand:SF 1 "spu_reg_operand" "r")))]
609 [(set_attr "type" "fp7")])
611 (define_insn "fix_truncv4sfv4si2"
612 [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
613 (fix:V4SI (match_operand:V4SF 1 "spu_reg_operand" "r")))]
616 [(set_attr "type" "fp7")])
618 (define_insn "floatunssisf2"
619 [(set (match_operand:SF 0 "spu_reg_operand" "=r")
620 (unsigned_float:SF (match_operand:SI 1 "spu_reg_operand" "r")))]
623 [(set_attr "type" "fp7")])
625 (define_insn "floatunsv4siv4sf2"
626 [(set (match_operand:V4SF 0 "spu_reg_operand" "=r")
627 (unsigned_float:V4SF (match_operand:V4SI 1 "spu_reg_operand" "r")))]
630 [(set_attr "type" "fp7")])
632 (define_insn "fixuns_truncsfsi2"
633 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
634 (unsigned_fix:SI (match_operand:SF 1 "spu_reg_operand" "r")))]
637 [(set_attr "type" "fp7")])
639 (define_insn "fixuns_truncv4sfv4si2"
640 [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
641 (unsigned_fix:V4SI (match_operand:V4SF 1 "spu_reg_operand" "r")))]
644 [(set_attr "type" "fp7")])
646 (define_insn "extendsfdf2"
647 [(set (match_operand:DF 0 "spu_reg_operand" "=r")
648 (float_extend:DF (match_operand:SF 1 "spu_reg_operand" "r")))]
651 [(set_attr "type" "fpd")])
653 (define_insn "truncdfsf2"
654 [(set (match_operand:SF 0 "spu_reg_operand" "=r")
655 (float_truncate:SF (match_operand:DF 1 "spu_reg_operand" "r")))]
658 [(set_attr "type" "fpd")])
660 ;; Do (double)(operands[1]+0x80000000u)-(double)0x80000000
661 (define_expand "floatsidf2"
662 [(set (match_operand:DF 0 "register_operand" "")
663 (float:DF (match_operand:SI 1 "register_operand" "")))]
667 rtx c0 = gen_reg_rtx (SImode);
668 rtx c1 = gen_reg_rtx (DFmode);
669 rtx r0 = gen_reg_rtx (SImode);
670 rtx r1 = gen_reg_rtx (DFmode);
672 emit_move_insn (c0, GEN_INT (-0x80000000ll));
673 emit_move_insn (c1, spu_float_const ("2147483648", DFmode));
675 emit_insn (gen_xorsi3 (r0, operands[1], c0));
679 emit_library_call_value (ufloat_optab->handlers[DFmode][SImode].libfunc,
680 NULL_RTX, LCT_NORMAL, DFmode, 1, r0, SImode);
681 insns = get_insns ();
683 emit_libcall_block (insns, r1, value,
684 gen_rtx_UNSIGNED_FLOAT (DFmode, r0));
686 emit_insn (gen_subdf3 (operands[0], r1, c1));
690 (define_expand "floatdidf2"
691 [(set (match_operand:DF 0 "register_operand" "")
692 (float:DF (match_operand:DI 1 "register_operand" "")))]
696 rtx c0 = gen_reg_rtx (DImode);
697 rtx r0 = gen_reg_rtx (DImode);
698 rtx r1 = gen_reg_rtx (DFmode);
699 rtx r2 = gen_reg_rtx (DImode);
700 rtx setneg = gen_reg_rtx (DImode);
701 rtx isneg = gen_reg_rtx (SImode);
702 rtx neg = gen_reg_rtx (DImode);
703 rtx mask = gen_reg_rtx (DImode);
705 emit_move_insn (c0, GEN_INT (0x8000000000000000ull));
707 emit_insn (gen_negdi2 (neg, operands[1]));
708 emit_insn (gen_cgt_di_m1 (isneg, operands[1]));
709 emit_insn (gen_extend_compare (mask, isneg));
710 emit_insn (gen_selb (r0, neg, operands[1], mask));
711 emit_insn (gen_andc_di (setneg, c0, mask));
716 emit_library_call_value (ufloat_optab->handlers[DFmode][DImode].libfunc,
717 NULL_RTX, LCT_NORMAL, DFmode, 1, r0, DImode);
718 insns = get_insns ();
720 emit_libcall_block (insns, r1, value,
721 gen_rtx_UNSIGNED_FLOAT (DFmode, r0));
723 emit_insn (gen_iordi3 (r2, gen_rtx_SUBREG (DImode, r1, 0), setneg));
724 emit_move_insn (operands[0], gen_rtx_SUBREG (DFmode, r2, 0));
730 (define_expand "addv16qi3"
731 [(set (match_operand:V16QI 0 "spu_reg_operand" "=r")
732 (plus:V16QI (match_operand:V16QI 1 "spu_reg_operand" "r")
733 (match_operand:V16QI 2 "spu_reg_operand" "r")))]
736 rtx res_short = simplify_gen_subreg (V8HImode, operands[0], V16QImode, 0);
737 rtx lhs_short = simplify_gen_subreg (V8HImode, operands[1], V16QImode, 0);
738 rtx rhs_short = simplify_gen_subreg (V8HImode, operands[2], V16QImode, 0);
739 rtx rhs_and = gen_reg_rtx (V8HImode);
740 rtx hi_char = gen_reg_rtx (V8HImode);
741 rtx lo_char = gen_reg_rtx (V8HImode);
742 rtx mask = gen_reg_rtx (V8HImode);
744 emit_move_insn (mask, spu_const (V8HImode, 0x00ff));
745 emit_insn (gen_andv8hi3 (rhs_and, rhs_short, spu_const (V8HImode, 0xff00)));
746 emit_insn (gen_addv8hi3 (hi_char, lhs_short, rhs_and));
747 emit_insn (gen_addv8hi3 (lo_char, lhs_short, rhs_short));
748 emit_insn (gen_selb (res_short, hi_char, lo_char, mask));
752 (define_insn "add<mode>3"
753 [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
754 (plus:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r")
755 (match_operand:VHSI 2 "spu_arith_operand" "r,B")))]
761 (define_expand "add<mode>3"
762 [(set (match_dup:VDI 3)
763 (unspec:VDI [(match_operand:VDI 1 "spu_reg_operand" "")
764 (match_operand:VDI 2 "spu_reg_operand" "")] UNSPEC_CG))
765 (set (match_dup:VDI 5)
766 (unspec:VDI [(match_dup 3)
768 (match_dup:TI 4)] UNSPEC_SHUFB))
769 (set (match_operand:VDI 0 "spu_reg_operand" "")
770 (unspec:VDI [(match_dup 1)
772 (match_dup 5)] UNSPEC_ADDX))]
775 unsigned char pat[16] = {
776 0x04, 0x05, 0x06, 0x07,
777 0x80, 0x80, 0x80, 0x80,
778 0x0c, 0x0d, 0x0e, 0x0f,
779 0x80, 0x80, 0x80, 0x80
781 operands[3] = gen_reg_rtx (<MODE>mode);
782 operands[4] = gen_reg_rtx (TImode);
783 operands[5] = gen_reg_rtx (<MODE>mode);
784 emit_move_insn (operands[4], array_to_constant (TImode, pat));
787 (define_insn "cg_<mode>"
788 [(set (match_operand:CBOP 0 "spu_reg_operand" "=r")
789 (unspec:CBOP [(match_operand 1 "spu_reg_operand" "r")
790 (match_operand 2 "spu_reg_operand" "r")] UNSPEC_CG))]
794 (define_insn "cgx_<mode>"
795 [(set (match_operand:CBOP 0 "spu_reg_operand" "=r")
796 (unspec:CBOP [(match_operand 1 "spu_reg_operand" "r")
797 (match_operand 2 "spu_reg_operand" "r")
798 (match_operand 3 "spu_reg_operand" "0")] UNSPEC_CGX))]
802 (define_insn "addx_<mode>"
803 [(set (match_operand:CBOP 0 "spu_reg_operand" "=r")
804 (unspec:CBOP [(match_operand 1 "spu_reg_operand" "r")
805 (match_operand 2 "spu_reg_operand" "r")
806 (match_operand 3 "spu_reg_operand" "0")] UNSPEC_ADDX))]
811 ;; This is not the most efficient implementation of addti3.
812 ;; We include this here because 1) the compiler needs it to be
813 ;; defined as the word size is 128-bit and 2) sometimes gcc
814 ;; substitutes an add for a constant left-shift. 2) is unlikely
815 ;; because we also give addti3 a high cost. In case gcc does
816 ;; generate TImode add, here is the code to do it.
817 ;; operand 2 is a nonmemory because the compiler requires it.
818 (define_insn "addti3"
819 [(set (match_operand:TI 0 "spu_reg_operand" "=&r")
820 (plus:TI (match_operand:TI 1 "spu_reg_operand" "r")
821 (match_operand:TI 2 "spu_nonmem_operand" "r")))
822 (clobber (match_scratch:TI 3 "=&r"))]
831 [(set_attr "type" "multi0")
832 (set_attr "length" "28")])
834 (define_insn "add<mode>3"
835 [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
836 (plus:VSF (match_operand:VSF 1 "spu_reg_operand" "r")
837 (match_operand:VSF 2 "spu_reg_operand" "r")))]
840 [(set_attr "type" "fp6")])
842 (define_insn "add<mode>3"
843 [(set (match_operand:VDF 0 "spu_reg_operand" "=r")
844 (plus:VDF (match_operand:VDF 1 "spu_reg_operand" "r")
845 (match_operand:VDF 2 "spu_reg_operand" "r")))]
848 [(set_attr "type" "fpd")])
853 (define_expand "subv16qi3"
854 [(set (match_operand:V16QI 0 "spu_reg_operand" "=r")
855 (minus:V16QI (match_operand:V16QI 1 "spu_reg_operand" "r")
856 (match_operand:V16QI 2 "spu_reg_operand" "r")))]
859 rtx res_short = simplify_gen_subreg (V8HImode, operands[0], V16QImode, 0);
860 rtx lhs_short = simplify_gen_subreg (V8HImode, operands[1], V16QImode, 0);
861 rtx rhs_short = simplify_gen_subreg (V8HImode, operands[2], V16QImode, 0);
862 rtx rhs_and = gen_reg_rtx (V8HImode);
863 rtx hi_char = gen_reg_rtx (V8HImode);
864 rtx lo_char = gen_reg_rtx (V8HImode);
865 rtx mask = gen_reg_rtx (V8HImode);
867 emit_move_insn (mask, spu_const (V8HImode, 0x00ff));
868 emit_insn (gen_andv8hi3 (rhs_and, rhs_short, spu_const (V8HImode, 0xff00)));
869 emit_insn (gen_subv8hi3 (hi_char, lhs_short, rhs_and));
870 emit_insn (gen_subv8hi3 (lo_char, lhs_short, rhs_short));
871 emit_insn (gen_selb (res_short, hi_char, lo_char, mask));
875 (define_insn "sub<mode>3"
876 [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
877 (minus:VHSI (match_operand:VHSI 1 "spu_arith_operand" "r,B")
878 (match_operand:VHSI 2 "spu_reg_operand" "r,r")))]
884 (define_expand "sub<mode>3"
885 [(set (match_dup:VDI 3)
886 (unspec:VDI [(match_operand:VDI 1 "spu_reg_operand" "")
887 (match_operand:VDI 2 "spu_reg_operand" "")] UNSPEC_BG))
888 (set (match_dup:VDI 5)
889 (unspec:VDI [(match_dup 3)
891 (match_dup:TI 4)] UNSPEC_SHUFB))
892 (set (match_operand:VDI 0 "spu_reg_operand" "")
893 (unspec:VDI [(match_dup 1)
895 (match_dup 5)] UNSPEC_SFX))]
898 unsigned char pat[16] = {
899 0x04, 0x05, 0x06, 0x07,
900 0xc0, 0xc0, 0xc0, 0xc0,
901 0x0c, 0x0d, 0x0e, 0x0f,
902 0xc0, 0xc0, 0xc0, 0xc0
904 operands[3] = gen_reg_rtx (<MODE>mode);
905 operands[4] = gen_reg_rtx (TImode);
906 operands[5] = gen_reg_rtx (<MODE>mode);
907 emit_move_insn (operands[4], array_to_constant (TImode, pat));
910 (define_insn "bg_<mode>"
911 [(set (match_operand:CBOP 0 "spu_reg_operand" "=r")
912 (unspec:CBOP [(match_operand 1 "spu_reg_operand" "r")
913 (match_operand 2 "spu_reg_operand" "r")] UNSPEC_BG))]
917 (define_insn "bgx_<mode>"
918 [(set (match_operand:CBOP 0 "spu_reg_operand" "=r")
919 (unspec:CBOP [(match_operand 1 "spu_reg_operand" "r")
920 (match_operand 2 "spu_reg_operand" "r")
921 (match_operand 3 "spu_reg_operand" "0")] UNSPEC_BGX))]
925 (define_insn "sfx_<mode>"
926 [(set (match_operand:CBOP 0 "spu_reg_operand" "=r")
927 (unspec:CBOP [(match_operand 1 "spu_reg_operand" "r")
928 (match_operand 2 "spu_reg_operand" "r")
929 (match_operand 3 "spu_reg_operand" "0")] UNSPEC_SFX))]
933 (define_insn "subti3"
934 [(set (match_operand:TI 0 "spu_reg_operand" "=r")
935 (minus:TI (match_operand:TI 1 "spu_reg_operand" "r")
936 (match_operand:TI 2 "spu_reg_operand" "r")))
937 (clobber (match_scratch:TI 3 "=&r"))
938 (clobber (match_scratch:TI 4 "=&r"))
939 (clobber (match_scratch:TI 5 "=&r"))]
950 [(set_attr "type" "multi0")
951 (set_attr "length" "36")])
953 (define_insn "sub<mode>3"
954 [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
955 (minus:VSF (match_operand:VSF 1 "spu_reg_operand" "r")
956 (match_operand:VSF 2 "spu_reg_operand" "r")))]
959 [(set_attr "type" "fp6")])
961 (define_insn "sub<mode>3"
962 [(set (match_operand:VDF 0 "spu_reg_operand" "=r")
963 (minus:VDF (match_operand:VDF 1 "spu_reg_operand" "r")
964 (match_operand:VDF 2 "spu_reg_operand" "r")))]
967 [(set_attr "type" "fpd")])
972 (define_expand "negv16qi2"
973 [(set (match_operand:V16QI 0 "spu_reg_operand" "=r")
974 (neg:V16QI (match_operand:V16QI 1 "spu_reg_operand" "r")))]
977 rtx zero = gen_reg_rtx (V16QImode);
978 emit_move_insn (zero, CONST0_RTX (V16QImode));
979 emit_insn (gen_subv16qi3 (operands[0], zero, operands[1]));
983 (define_insn "neg<mode>2"
984 [(set (match_operand:VHSI 0 "spu_reg_operand" "=r")
985 (neg:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r")))]
989 (define_expand "negdi2"
990 [(set (match_operand:DI 0 "spu_reg_operand" "")
991 (neg:DI (match_operand:DI 1 "spu_reg_operand" "")))]
994 rtx zero = gen_reg_rtx(DImode);
995 emit_move_insn(zero, GEN_INT(0));
996 emit_insn(gen_subdi3(operands[0], zero, operands[1]));
1000 (define_expand "negti2"
1001 [(set (match_operand:TI 0 "spu_reg_operand" "")
1002 (neg:TI (match_operand:TI 1 "spu_reg_operand" "")))]
1005 rtx zero = gen_reg_rtx(TImode);
1006 emit_move_insn(zero, GEN_INT(0));
1007 emit_insn(gen_subti3(operands[0], zero, operands[1]));
1011 (define_expand "neg<mode>2"
1013 [(set (match_operand:VSF 0 "spu_reg_operand" "")
1014 (neg:VSF (match_operand:VSF 1 "spu_reg_operand" "")))
1015 (use (match_dup 2))])]
1017 "operands[2] = gen_reg_rtx (<F2I>mode);
1018 emit_move_insn (operands[2], spu_const (<F2I>mode, -0x80000000ull));")
1020 (define_expand "neg<mode>2"
1022 [(set (match_operand:VDF 0 "spu_reg_operand" "")
1023 (neg:VDF (match_operand:VDF 1 "spu_reg_operand" "")))
1024 (use (match_dup 2))])]
1026 "operands[2] = gen_reg_rtx (<F2I>mode);
1027 emit_move_insn (operands[2], spu_const (<F2I>mode, -0x8000000000000000ull));")
1029 (define_insn_and_split "_neg<mode>2"
1030 [(set (match_operand:VSDF 0 "spu_reg_operand" "=r")
1031 (neg:VSDF (match_operand:VSDF 1 "spu_reg_operand" "r")))
1032 (use (match_operand:<F2I> 2 "spu_reg_operand" "r"))]
1036 [(set (match_dup:<F2I> 3)
1037 (xor:<F2I> (match_dup:<F2I> 4)
1038 (match_dup:<F2I> 2)))]
1040 operands[3] = spu_gen_subreg (<F2I>mode, operands[0]);
1041 operands[4] = spu_gen_subreg (<F2I>mode, operands[1]);
1047 (define_expand "abs<mode>2"
1049 [(set (match_operand:VSF 0 "spu_reg_operand" "")
1050 (abs:VSF (match_operand:VSF 1 "spu_reg_operand" "")))
1051 (use (match_dup 2))])]
1053 "operands[2] = gen_reg_rtx (<F2I>mode);
1054 emit_move_insn (operands[2], spu_const (<F2I>mode, 0x7fffffffull));")
1056 (define_expand "abs<mode>2"
1058 [(set (match_operand:VDF 0 "spu_reg_operand" "")
1059 (abs:VDF (match_operand:VDF 1 "spu_reg_operand" "")))
1060 (use (match_dup 2))])]
1062 "operands[2] = gen_reg_rtx (<F2I>mode);
1063 emit_move_insn (operands[2], spu_const (<F2I>mode, 0x7fffffffffffffffull));")
1065 (define_insn_and_split "_abs<mode>2"
1066 [(set (match_operand:VSDF 0 "spu_reg_operand" "=r")
1067 (abs:VSDF (match_operand:VSDF 1 "spu_reg_operand" "r")))
1068 (use (match_operand:<F2I> 2 "spu_reg_operand" "r"))]
1072 [(set (match_dup:<F2I> 3)
1073 (and:<F2I> (match_dup:<F2I> 4)
1074 (match_dup:<F2I> 2)))]
1076 operands[3] = spu_gen_subreg (<F2I>mode, operands[0]);
1077 operands[4] = spu_gen_subreg (<F2I>mode, operands[1]);
1083 (define_insn "mulhi3"
1084 [(set (match_operand:HI 0 "spu_reg_operand" "=r,r")
1085 (mult:HI (match_operand:HI 1 "spu_reg_operand" "r,r")
1086 (match_operand:HI 2 "spu_arith_operand" "r,B")))]
1091 [(set_attr "type" "fp7")])
1093 (define_expand "mulv8hi3"
1094 [(set (match_operand:V8HI 0 "spu_reg_operand" "")
1095 (mult:V8HI (match_operand:V8HI 1 "spu_reg_operand" "")
1096 (match_operand:V8HI 2 "spu_reg_operand" "")))]
1099 rtx result = simplify_gen_subreg (V4SImode, operands[0], V8HImode, 0);
1100 rtx low = gen_reg_rtx (V4SImode);
1101 rtx high = gen_reg_rtx (V4SImode);
1102 rtx shift = gen_reg_rtx (V4SImode);
1103 rtx mask = gen_reg_rtx (V4SImode);
1105 emit_move_insn (mask, spu_const (V4SImode, 0x0000ffff));
1106 emit_insn (gen_spu_mpyhh (high, operands[1], operands[2]));
1107 emit_insn (gen_spu_mpy (low, operands[1], operands[2]));
1108 emit_insn (gen_ashlv4si3 (shift, high, spu_const(V4SImode, 16)));
1109 emit_insn (gen_selb (result, shift, low, mask));
1113 (define_expand "mul<mode>3"
1115 [(set (match_operand:VSI 0 "spu_reg_operand" "")
1116 (mult:VSI (match_operand:VSI 1 "spu_reg_operand" "")
1117 (match_operand:VSI 2 "spu_reg_operand" "")))
1118 (clobber (match_dup:VSI 3))
1119 (clobber (match_dup:VSI 4))
1120 (clobber (match_dup:VSI 5))
1121 (clobber (match_dup:VSI 6))])]
1124 operands[3] = gen_reg_rtx(<MODE>mode);
1125 operands[4] = gen_reg_rtx(<MODE>mode);
1126 operands[5] = gen_reg_rtx(<MODE>mode);
1127 operands[6] = gen_reg_rtx(<MODE>mode);
1130 (define_insn_and_split "_mulsi3"
1131 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
1132 (mult:SI (match_operand:SI 1 "spu_reg_operand" "r")
1133 (match_operand:SI 2 "spu_arith_operand" "rK")))
1134 (clobber (match_operand:SI 3 "spu_reg_operand" "=&r"))
1135 (clobber (match_operand:SI 4 "spu_reg_operand" "=&r"))
1136 (clobber (match_operand:SI 5 "spu_reg_operand" "=&r"))
1137 (clobber (match_operand:SI 6 "spu_reg_operand" "=&r"))]
1141 [(set (match_dup:SI 0)
1142 (mult:SI (match_dup:SI 1)
1145 HOST_WIDE_INT val = 0;
1146 rtx a = operands[3];
1147 rtx b = operands[4];
1148 rtx c = operands[5];
1149 rtx d = operands[6];
1150 if (GET_CODE(operands[2]) == CONST_INT)
1152 val = INTVAL(operands[2]);
1153 emit_move_insn(d, operands[2]);
1156 if (val && (val & 0xffff) == 0)
1158 emit_insn(gen_mpyh_si(operands[0], operands[2], operands[1]));
1160 else if (val > 0 && val < 0x10000)
1162 rtx cst = satisfies_constraint_K (GEN_INT (val)) ? GEN_INT(val) : d;
1163 emit_insn(gen_mpyh_si(a, operands[1], operands[2]));
1164 emit_insn(gen_mpyu_si(c, operands[1], cst));
1165 emit_insn(gen_addsi3(operands[0], a, c));
1169 emit_insn(gen_mpyh_si(a, operands[1], operands[2]));
1170 emit_insn(gen_mpyh_si(b, operands[2], operands[1]));
1171 emit_insn(gen_mpyu_si(c, operands[1], operands[2]));
1172 emit_insn(gen_addsi3(d, a, b));
1173 emit_insn(gen_addsi3(operands[0], d, c));
1178 (define_insn_and_split "_mulv4si3"
1179 [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
1180 (mult:V4SI (match_operand:V4SI 1 "spu_reg_operand" "r")
1181 (match_operand:V4SI 2 "spu_reg_operand" "r")))
1182 (clobber (match_operand:V4SI 3 "spu_reg_operand" "=&r"))
1183 (clobber (match_operand:V4SI 4 "spu_reg_operand" "=&r"))
1184 (clobber (match_operand:V4SI 5 "spu_reg_operand" "=&r"))
1185 (clobber (match_operand:V4SI 6 "spu_reg_operand" "=&r"))]
1189 [(set (match_dup:V4SI 0)
1190 (mult:V4SI (match_dup:V4SI 1)
1191 (match_dup:V4SI 2)))]
1193 rtx a = operands[3];
1194 rtx b = operands[4];
1195 rtx c = operands[5];
1196 rtx d = operands[6];
1197 rtx op1 = simplify_gen_subreg (V8HImode, operands[1], V4SImode, 0);
1198 rtx op2 = simplify_gen_subreg (V8HImode, operands[2], V4SImode, 0);
1199 emit_insn(gen_spu_mpyh(a, op1, op2));
1200 emit_insn(gen_spu_mpyh(b, op2, op1));
1201 emit_insn(gen_spu_mpyu(c, op1, op2));
1202 emit_insn(gen_addv4si3(d, a, b));
1203 emit_insn(gen_addv4si3(operands[0], d, c));
1207 (define_insn "mulhisi3"
1208 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
1209 (mult:SI (sign_extend:SI (match_operand:HI 1 "spu_reg_operand" "r"))
1210 (sign_extend:SI (match_operand:HI 2 "spu_reg_operand" "r"))))]
1213 [(set_attr "type" "fp7")])
1215 (define_insn "mulhisi3_imm"
1216 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
1217 (mult:SI (sign_extend:SI (match_operand:HI 1 "spu_reg_operand" "r"))
1218 (match_operand:SI 2 "imm_K_operand" "K")))]
1221 [(set_attr "type" "fp7")])
1223 (define_insn "umulhisi3"
1224 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
1225 (mult:SI (zero_extend:SI (match_operand:HI 1 "spu_reg_operand" "r"))
1226 (zero_extend:SI (match_operand:HI 2 "spu_reg_operand" "r"))))]
1229 [(set_attr "type" "fp7")])
1231 (define_insn "umulhisi3_imm"
1232 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
1233 (mult:SI (zero_extend:SI (match_operand:HI 1 "spu_reg_operand" "r"))
1234 (and:SI (match_operand:SI 2 "imm_K_operand" "K") (const_int 65535))))]
1237 [(set_attr "type" "fp7")])
1239 (define_insn "mpyu_si"
1240 [(set (match_operand:SI 0 "spu_reg_operand" "=r,r")
1241 (mult:SI (and:SI (match_operand:SI 1 "spu_reg_operand" "r,r")
1243 (and:SI (match_operand:SI 2 "spu_arith_operand" "r,K")
1244 (const_int 65535))))]
1249 [(set_attr "type" "fp7")])
1251 ;; This isn't always profitable to use. Consider r = a * b + c * d.
1252 ;; It's faster to do the multiplies in parallel then add them. If we
1253 ;; merge a multiply and add it prevents the multiplies from happening in
1255 (define_insn "mpya_si"
1256 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
1257 (plus:SI (mult:SI (sign_extend:SI (match_operand:HI 1 "spu_reg_operand" "r"))
1258 (sign_extend:SI (match_operand:HI 2 "spu_reg_operand" "r")))
1259 (match_operand:SI 3 "spu_reg_operand" "r")))]
1262 [(set_attr "type" "fp7")])
1264 (define_insn "mpyh_si"
1265 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
1266 (mult:SI (and:SI (match_operand:SI 1 "spu_reg_operand" "r")
1268 (and:SI (match_operand:SI 2 "spu_reg_operand" "r")
1269 (const_int 65535))))]
1272 [(set_attr "type" "fp7")])
1274 (define_insn "mpys_si"
1275 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
1277 (mult:SI (sign_extend:SI (match_operand:HI 1 "spu_reg_operand" "r"))
1278 (sign_extend:SI (match_operand:HI 2 "spu_reg_operand" "r")))
1282 [(set_attr "type" "fp7")])
1284 (define_insn "mpyhh_si"
1285 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
1286 (mult:SI (ashiftrt:SI (match_operand:SI 1 "spu_reg_operand" "r")
1288 (ashiftrt:SI (match_operand:SI 2 "spu_reg_operand" "r")
1292 [(set_attr "type" "fp7")])
1294 (define_insn "mpyhhu_si"
1295 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
1296 (mult:SI (lshiftrt:SI (match_operand:SI 1 "spu_reg_operand" "r")
1298 (lshiftrt:SI (match_operand:SI 2 "spu_reg_operand" "r")
1302 [(set_attr "type" "fp7")])
1304 (define_insn "mpyhha_si"
1305 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
1306 (plus:SI (mult:SI (ashiftrt:SI (match_operand:SI 1 "spu_reg_operand" "r")
1308 (ashiftrt:SI (match_operand:SI 2 "spu_reg_operand" "r")
1310 (match_operand:SI 3 "spu_reg_operand" "0")))]
1313 [(set_attr "type" "fp7")])
1315 (define_insn "mul<mode>3"
1316 [(set (match_operand:VSDF 0 "spu_reg_operand" "=r")
1317 (mult:VSDF (match_operand:VSDF 1 "spu_reg_operand" "r")
1318 (match_operand:VSDF 2 "spu_reg_operand" "r")))]
1321 [(set_attr "type" "fp<d6>")])
1323 (define_insn "fma_<mode>"
1324 [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
1325 (plus:VSF (mult:VSF (match_operand:VSF 1 "spu_reg_operand" "r")
1326 (match_operand:VSF 2 "spu_reg_operand" "r"))
1327 (match_operand:VSF 3 "spu_reg_operand" "r")))]
1330 [(set_attr "type" "fp6")])
1332 (define_insn "fnms_<mode>"
1333 [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
1334 (minus:VSF (match_operand:VSF 3 "spu_reg_operand" "r")
1335 (mult:VSF (match_operand:VSF 1 "spu_reg_operand" "r")
1336 (match_operand:VSF 2 "spu_reg_operand" "r"))))]
1339 [(set_attr "type" "fp6")])
1341 (define_insn "fms_<mode>"
1342 [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
1343 (minus:VSF (mult:VSF (match_operand:VSF 1 "spu_reg_operand" "r")
1344 (match_operand:VSF 2 "spu_reg_operand" "r"))
1345 (match_operand:VSF 3 "spu_reg_operand" "r")))]
1348 [(set_attr "type" "fp6")])
1350 (define_insn "fma_<mode>"
1351 [(set (match_operand:VDF 0 "spu_reg_operand" "=r")
1352 (plus:VDF (mult:VDF (match_operand:VDF 1 "spu_reg_operand" "r")
1353 (match_operand:VDF 2 "spu_reg_operand" "r"))
1354 (match_operand:VDF 3 "spu_reg_operand" "0")))]
1357 [(set_attr "type" "fpd")])
1359 (define_insn "fnma_<mode>"
1360 [(set (match_operand:VDF 0 "spu_reg_operand" "=r")
1361 (neg:VDF (plus:VDF (mult:VDF (match_operand:VDF 1 "spu_reg_operand" "r")
1362 (match_operand:VDF 2 "spu_reg_operand" "r"))
1363 (match_operand:VDF 3 "spu_reg_operand" "0"))))]
1366 [(set_attr "type" "fpd")])
1368 (define_insn "fnms_<mode>"
1369 [(set (match_operand:VDF 0 "spu_reg_operand" "=r")
1370 (minus:VDF (match_operand:VDF 3 "spu_reg_operand" "0")
1371 (mult:VDF (match_operand:VDF 1 "spu_reg_operand" "r")
1372 (match_operand:VDF 2 "spu_reg_operand" "r"))))]
1375 [(set_attr "type" "fpd")])
1377 (define_insn "fms_<mode>"
1378 [(set (match_operand:VDF 0 "spu_reg_operand" "=r")
1379 (minus:VDF (mult:VDF (match_operand:VDF 1 "spu_reg_operand" "r")
1380 (match_operand:VDF 2 "spu_reg_operand" "r"))
1381 (match_operand:VDF 3 "spu_reg_operand" "0")))]
1384 [(set_attr "type" "fpd")])
1387 ;; mul highpart, used for divide by constant optimizations.
1389 (define_expand "smulsi3_highpart"
1390 [(set (match_operand:SI 0 "register_operand" "")
1393 (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" ""))
1394 (sign_extend:DI (match_operand:SI 2 "register_operand" "")))
1398 rtx t0 = gen_reg_rtx (SImode);
1399 rtx t1 = gen_reg_rtx (SImode);
1400 rtx t2 = gen_reg_rtx (SImode);
1401 rtx t3 = gen_reg_rtx (SImode);
1402 rtx t4 = gen_reg_rtx (SImode);
1403 rtx t5 = gen_reg_rtx (SImode);
1404 rtx t6 = gen_reg_rtx (SImode);
1405 rtx t7 = gen_reg_rtx (SImode);
1406 rtx t8 = gen_reg_rtx (SImode);
1407 rtx t9 = gen_reg_rtx (SImode);
1408 rtx t11 = gen_reg_rtx (SImode);
1409 rtx t12 = gen_reg_rtx (SImode);
1410 rtx t14 = gen_reg_rtx (SImode);
1411 rtx t15 = gen_reg_rtx (HImode);
1412 rtx t16 = gen_reg_rtx (HImode);
1413 rtx t17 = gen_reg_rtx (HImode);
1414 rtx t18 = gen_reg_rtx (HImode);
1415 rtx t19 = gen_reg_rtx (SImode);
1416 rtx t20 = gen_reg_rtx (SImode);
1417 rtx t21 = gen_reg_rtx (SImode);
1418 rtx op1_hi = gen_rtx_SUBREG (HImode, operands[1], 2);
1419 rtx op2_hi = gen_rtx_SUBREG (HImode, operands[2], 2);
1420 rtx t0_hi = gen_rtx_SUBREG (HImode, t0, 2);
1421 rtx t1_hi = gen_rtx_SUBREG (HImode, t1, 2);
1423 rtx insn = emit_insn (gen_lshrsi3 (t0, operands[1], GEN_INT (16)));
1424 emit_insn (gen_lshrsi3 (t1, operands[2], GEN_INT (16)));
1425 emit_insn (gen_umulhisi3 (t2, op1_hi, op2_hi));
1426 emit_insn (gen_mpyh_si (t3, operands[1], operands[2]));
1427 emit_insn (gen_mpyh_si (t4, operands[2], operands[1]));
1428 emit_insn (gen_mpyhh_si (t5, operands[1], operands[2]));
1429 emit_insn (gen_mpys_si (t6, t0_hi, op2_hi));
1430 emit_insn (gen_mpys_si (t7, t1_hi, op1_hi));
1432 /* Gen carry bits (in t9 and t11). */
1433 emit_insn (gen_addsi3 (t8, t2, t3));
1434 emit_insn (gen_cg_si (t9, t2, t3));
1435 emit_insn (gen_cg_si (t11, t8, t4));
1437 /* Gen high 32 bits in operand[0]. Correct for mpys. */
1438 emit_insn (gen_addx_si (t12, t5, t6, t9));
1439 emit_insn (gen_addx_si (t14, t12, t7, t11));
1441 /* mpys treats both operands as signed when we really want it to treat
1442 the first operand as signed and the second operand as unsigned.
1443 The code below corrects for that difference. */
1444 emit_insn (gen_cgt_hi (t15, op1_hi, GEN_INT (-1)));
1445 emit_insn (gen_cgt_hi (t16, op2_hi, GEN_INT (-1)));
1446 emit_insn (gen_andc_hi (t17, t1_hi, t15));
1447 emit_insn (gen_andc_hi (t18, t0_hi, t16));
1448 emit_insn (gen_extendhisi2 (t19, t17));
1449 emit_insn (gen_extendhisi2 (t20, t18));
1450 emit_insn (gen_addsi3 (t21, t19, t20));
1451 emit_insn (gen_addsi3 (operands[0], t14, t21));
1452 unshare_all_rtl_in_chain (insn);
1456 (define_expand "umulsi3_highpart"
1457 [(set (match_operand:SI 0 "register_operand" "")
1460 (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" ""))
1461 (zero_extend:DI (match_operand:SI 2 "register_operand" "")))
1466 rtx t0 = gen_reg_rtx (SImode);
1467 rtx t1 = gen_reg_rtx (SImode);
1468 rtx t2 = gen_reg_rtx (SImode);
1469 rtx t3 = gen_reg_rtx (SImode);
1470 rtx t4 = gen_reg_rtx (SImode);
1471 rtx t5 = gen_reg_rtx (SImode);
1472 rtx t6 = gen_reg_rtx (SImode);
1473 rtx t7 = gen_reg_rtx (SImode);
1474 rtx t8 = gen_reg_rtx (SImode);
1475 rtx t9 = gen_reg_rtx (SImode);
1476 rtx t10 = gen_reg_rtx (SImode);
1477 rtx t12 = gen_reg_rtx (SImode);
1478 rtx t13 = gen_reg_rtx (SImode);
1479 rtx t14 = gen_reg_rtx (SImode);
1480 rtx op1_hi = gen_rtx_SUBREG (HImode, operands[1], 2);
1481 rtx op2_hi = gen_rtx_SUBREG (HImode, operands[2], 2);
1482 rtx t0_hi = gen_rtx_SUBREG (HImode, t0, 2);
1484 rtx insn = emit_insn (gen_rotlsi3 (t0, operands[2], GEN_INT (16)));
1485 emit_insn (gen_umulhisi3 (t1, op1_hi, op2_hi));
1486 emit_insn (gen_umulhisi3 (t2, op1_hi, t0_hi));
1487 emit_insn (gen_mpyhhu_si (t3, operands[1], t0));
1488 emit_insn (gen_mpyhhu_si (t4, operands[1], operands[2]));
1489 emit_insn (gen_ashlsi3 (t5, t2, GEN_INT (16)));
1490 emit_insn (gen_ashlsi3 (t6, t3, GEN_INT (16)));
1491 emit_insn (gen_lshrsi3 (t7, t2, GEN_INT (16)));
1492 emit_insn (gen_lshrsi3 (t8, t3, GEN_INT (16)));
1494 /* Gen carry bits (in t10 and t12). */
1495 emit_insn (gen_addsi3 (t9, t1, t5));
1496 emit_insn (gen_cg_si (t10, t1, t5));
1497 emit_insn (gen_cg_si (t12, t9, t6));
1499 /* Gen high 32 bits in operand[0]. */
1500 emit_insn (gen_addx_si (t13, t4, t7, t10));
1501 emit_insn (gen_addx_si (t14, t13, t8, t12));
1502 emit_insn (gen_movsi (operands[0], t14));
1503 unshare_all_rtl_in_chain (insn);
1510 ;; Not necessarily the best implementation of divide but faster then
1511 ;; the default that gcc provides because this is inlined and it uses
1513 (define_insn "divmodsi4"
1514 [(set (match_operand:SI 0 "spu_reg_operand" "=&r")
1515 (div:SI (match_operand:SI 1 "spu_reg_operand" "r")
1516 (match_operand:SI 2 "spu_reg_operand" "r")))
1517 (set (match_operand:SI 3 "spu_reg_operand" "=&r")
1518 (mod:SI (match_dup 1)
1520 (clobber (match_scratch:SI 4 "=&r"))
1521 (clobber (match_scratch:SI 5 "=&r"))
1522 (clobber (match_scratch:SI 6 "=&r"))
1523 (clobber (match_scratch:SI 7 "=&r"))
1524 (clobber (match_scratch:SI 8 "=&r"))
1525 (clobber (match_scratch:SI 9 "=&r"))
1526 (clobber (match_scratch:SI 10 "=&r"))
1527 (clobber (match_scratch:SI 11 "=&r"))
1528 (clobber (match_scratch:SI 12 "=&r"))
1529 (clobber (reg:SI 130))]
1537 selb %8,%8,%1,%10\\n\\
1538 selb %9,%9,%2,%11\\n\\
1544 shlqbyi %3,%8,0\\n\\
1545 xor %11,%10,%11\\n\\
1549 1: or %12,%0,%5\\n\\
1550 rotqmbii %5,%5,-1\\n\\
1554 rotqmbii %4,%4,-1\\n\\
1555 selb %0,%12,%0,%6\\n\\
1557 selb %3,%7,%3,%6\\n\\
1561 selb %3,%8,%3,%10\\n\\
1563 [(set_attr "type" "multi0")
1564 (set_attr "length" "128")])
1566 (define_insn "udivmodsi4"
1567 [(set (match_operand:SI 0 "spu_reg_operand" "=&r")
1568 (udiv:SI (match_operand:SI 1 "spu_reg_operand" "r")
1569 (match_operand:SI 2 "spu_reg_operand" "r")))
1570 (set (match_operand:SI 3 "spu_reg_operand" "=&r")
1571 (umod:SI (match_dup 1)
1573 (clobber (match_scratch:SI 4 "=&r"))
1574 (clobber (match_scratch:SI 5 "=&r"))
1575 (clobber (match_scratch:SI 6 "=&r"))
1576 (clobber (match_scratch:SI 7 "=&r"))
1577 (clobber (match_scratch:SI 8 "=&r"))
1578 (clobber (reg:SI 130))]
1591 rotqmbii %5,%5,-1\\n\\
1595 rotqmbii %4,%4,-1\\n\\
1596 selb %0,%8,%0,%6\\n\\
1598 selb %3,%7,%3,%6\\n\\
1601 [(set_attr "type" "multi0")
1602 (set_attr "length" "80")])
1604 (define_insn_and_split "div<mode>3"
1605 [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
1606 (div:VSF (match_operand:VSF 1 "spu_reg_operand" "r")
1607 (match_operand:VSF 2 "spu_reg_operand" "r")))
1608 (clobber (match_scratch:VSF 3 "=&r"))
1609 (clobber (match_scratch:VSF 4 "=&r"))]
1613 [(set (match_dup:VSF 0)
1614 (div:VSF (match_dup:VSF 1)
1616 (clobber (match_dup:VSF 3))
1617 (clobber (match_dup:VSF 4))]
1619 emit_insn(gen_frest_<mode>(operands[3], operands[2]));
1620 emit_insn(gen_fi_<mode>(operands[3], operands[2], operands[3]));
1621 emit_insn(gen_mul<mode>3(operands[4], operands[1], operands[3]));
1622 emit_insn(gen_fnms_<mode>(operands[0], operands[4], operands[2], operands[1]));
1623 emit_insn(gen_fma_<mode>(operands[0], operands[0], operands[3], operands[4]));
1630 (define_insn_and_split "sqrtsf2"
1631 [(set (match_operand:SF 0 "spu_reg_operand" "=r")
1632 (sqrt:SF (match_operand:SF 1 "spu_reg_operand" "r")))
1633 (clobber (match_scratch:SF 2 "=&r"))
1634 (clobber (match_scratch:SF 3 "=&r"))
1635 (clobber (match_scratch:SF 4 "=&r"))
1636 (clobber (match_scratch:SF 5 "=&r"))]
1640 [(set (match_dup:SF 0)
1641 (sqrt:SF (match_dup:SF 1)))
1642 (clobber (match_dup:SF 2))
1643 (clobber (match_dup:SF 3))
1644 (clobber (match_dup:SF 4))
1645 (clobber (match_dup:SF 5))]
1647 emit_move_insn (operands[3],spu_float_const(\"0.5\",SFmode));
1648 emit_move_insn (operands[4],spu_float_const(\"1.00000011920928955078125\",SFmode));
1649 emit_insn(gen_frsqest_sf(operands[2],operands[1]));
1650 emit_insn(gen_fi_sf(operands[2],operands[1],operands[2]));
1651 emit_insn(gen_mulsf3(operands[5],operands[2],operands[1]));
1652 emit_insn(gen_mulsf3(operands[3],operands[5],operands[3]));
1653 emit_insn(gen_fnms_sf(operands[4],operands[2],operands[5],operands[4]));
1654 emit_insn(gen_fma_sf(operands[0],operands[4],operands[3],operands[5]));
1658 (define_insn "frest_<mode>"
1659 [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
1660 (unspec:VSF [(match_operand:VSF 1 "spu_reg_operand" "r")] UNSPEC_FREST))]
1663 [(set_attr "type" "shuf")])
1665 (define_insn "frsqest_<mode>"
1666 [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
1667 (unspec:VSF [(match_operand:VSF 1 "spu_reg_operand" "r")] UNSPEC_FRSQEST))]
1670 [(set_attr "type" "shuf")])
1672 (define_insn "fi_<mode>"
1673 [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
1674 (unspec:VSF [(match_operand:VSF 1 "spu_reg_operand" "r")
1675 (match_operand:VSF 2 "spu_reg_operand" "r")] UNSPEC_FI))]
1678 [(set_attr "type" "fp7")])
1683 (define_insn "and<mode>3"
1684 [(set (match_operand:MOV 0 "spu_reg_operand" "=r,r")
1685 (and:MOV (match_operand:MOV 1 "spu_reg_operand" "r,r")
1686 (match_operand:MOV 2 "spu_logical_operand" "r,C")))]
1690 and%j2i\t%0,%1,%J2")
1692 (define_insn "anddi3"
1693 [(set (match_operand:DI 0 "spu_reg_operand" "=r,r")
1694 (and:DI (match_operand:DI 1 "spu_reg_operand" "r,r")
1695 (match_operand:DI 2 "spu_logical_operand" "r,c")))]
1699 and%k2i\t%0,%1,%K2")
1701 (define_insn "andti3"
1702 [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
1703 (and:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
1704 (match_operand:TI 2 "spu_logical_operand" "r,Y")))]
1708 and%m2i\t%0,%1,%L2")
1710 (define_insn "andc_<mode>"
1711 [(set (match_operand:ALL 0 "spu_reg_operand" "=r")
1712 (and:ALL (not:ALL (match_operand:ALL 2 "spu_reg_operand" "r"))
1713 (match_operand:ALL 1 "spu_reg_operand" "r")))]
1717 (define_insn "nand_<mode>"
1718 [(set (match_operand:ALL 0 "spu_reg_operand" "=r")
1719 (not:ALL (and:ALL (match_operand:ALL 2 "spu_reg_operand" "r")
1720 (match_operand:ALL 1 "spu_reg_operand" "r"))))]
1727 (define_insn "ior<mode>3"
1728 [(set (match_operand:MOV 0 "spu_reg_operand" "=r,r,r")
1729 (ior:MOV (match_operand:MOV 1 "spu_reg_operand" "r,r,0")
1730 (match_operand:MOV 2 "spu_ior_operand" "r,C,D")))]
1737 (define_insn "iordi3"
1738 [(set (match_operand:DI 0 "spu_reg_operand" "=r,r,r")
1739 (ior:DI (match_operand:DI 1 "spu_reg_operand" "r,r,0")
1740 (match_operand:DI 2 "spu_ior_operand" "r,c,d")))]
1747 (define_insn "iorti3"
1748 [(set (match_operand:TI 0 "spu_reg_operand" "=r,r,r")
1749 (ior:TI (match_operand:TI 1 "spu_reg_operand" "r,r,0")
1750 (match_operand:TI 2 "spu_ior_operand" "r,Y,Z")))]
1757 (define_insn "orc_<mode>"
1758 [(set (match_operand:ALL 0 "spu_reg_operand" "=r")
1759 (ior:ALL (not:ALL (match_operand:ALL 2 "spu_reg_operand" "r"))
1760 (match_operand:ALL 1 "spu_reg_operand" "r")))]
1764 (define_insn "nor_<mode>"
1765 [(set (match_operand:ALL 0 "spu_reg_operand" "=r")
1766 (not:ALL (ior:ALL (match_operand:ALL 1 "spu_reg_operand" "r")
1767 (match_operand:ALL 2 "spu_reg_operand" "r"))))]
1773 (define_insn "xor<mode>3"
1774 [(set (match_operand:MOV 0 "spu_reg_operand" "=r,r")
1775 (xor:MOV (match_operand:MOV 1 "spu_reg_operand" "r,r")
1776 (match_operand:MOV 2 "spu_logical_operand" "r,B")))]
1780 xor%j2i\t%0,%1,%J2")
1782 (define_insn "xordi3"
1783 [(set (match_operand:DI 0 "spu_reg_operand" "=r,r")
1784 (xor:DI (match_operand:DI 1 "spu_reg_operand" "r,r")
1785 (match_operand:DI 2 "spu_logical_operand" "r,c")))]
1789 xor%k2i\t%0,%1,%K2")
1791 (define_insn "xorti3"
1792 [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
1793 (xor:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
1794 (match_operand:TI 2 "spu_logical_operand" "r,Y")))]
1798 xor%m2i\t%0,%1,%L2")
1800 (define_insn "eqv_<mode>"
1801 [(set (match_operand:ALL 0 "spu_reg_operand" "=r")
1802 (not:ALL (xor:ALL (match_operand:ALL 1 "spu_reg_operand" "r")
1803 (match_operand:ALL 2 "spu_reg_operand" "r"))))]
1809 (define_insn "one_cmpl<mode>2"
1810 [(set (match_operand:ALL 0 "spu_reg_operand" "=r")
1811 (not:ALL (match_operand:ALL 1 "spu_reg_operand" "r")))]
1818 (define_expand "selb"
1819 [(set (match_operand 0 "spu_reg_operand" "")
1820 (unspec [(match_operand 1 "spu_reg_operand" "")
1821 (match_operand 2 "spu_reg_operand" "")
1822 (match_operand 3 "spu_reg_operand" "")] UNSPEC_SELB))]
1825 rtx s = gen__selb (operands[0], operands[1], operands[2], operands[3]);
1826 PUT_MODE (SET_SRC (s), GET_MODE (operands[0]));
1831 ;; This could be defined as a combination of logical operations, but at
1832 ;; one time it caused a crash due to recursive expansion of rtl during CSE.
1833 (define_insn "_selb"
1834 [(set (match_operand 0 "spu_reg_operand" "=r")
1835 (unspec [(match_operand 1 "spu_reg_operand" "r")
1836 (match_operand 2 "spu_reg_operand" "r")
1837 (match_operand 3 "spu_reg_operand" "r")] UNSPEC_SELB))]
1838 "GET_MODE(operands[0]) == GET_MODE(operands[1])
1839 && GET_MODE(operands[1]) == GET_MODE(operands[2])"
1840 "selb\t%0,%1,%2,%3")
1843 ;; Misc. byte/bit operations
1844 ;; clz/ctz/ffs/popcount/parity
1847 (define_insn "clz<mode>2"
1848 [(set (match_operand:VSI 0 "spu_reg_operand" "=r")
1849 (clz:VSI (match_operand:VSI 1 "spu_reg_operand" "r")))]
1853 (define_expand "ctz<mode>2"
1855 (neg:VSI (match_operand:VSI 1 "spu_reg_operand" "")))
1856 (set (match_dup 3) (and:VSI (match_dup 1)
1858 (set (match_dup 4) (clz:VSI (match_dup 3)))
1859 (set (match_operand:VSI 0 "spu_reg_operand" "")
1860 (minus:VSI (match_dup 5) (match_dup 4)))]
1863 operands[2] = gen_reg_rtx (<MODE>mode);
1864 operands[3] = gen_reg_rtx (<MODE>mode);
1865 operands[4] = gen_reg_rtx (<MODE>mode);
1866 operands[5] = spu_const(<MODE>mode, 31);
1869 (define_expand "ffs<mode>2"
1871 (neg:VSI (match_operand:VSI 1 "spu_reg_operand" "")))
1872 (set (match_dup 3) (and:VSI (match_dup 1)
1874 (set (match_dup 4) (clz:VSI (match_dup 3)))
1875 (set (match_operand:VSI 0 "spu_reg_operand" "")
1876 (minus:VSI (match_dup 5) (match_dup 4)))]
1879 operands[2] = gen_reg_rtx (<MODE>mode);
1880 operands[3] = gen_reg_rtx (<MODE>mode);
1881 operands[4] = gen_reg_rtx (<MODE>mode);
1882 operands[5] = spu_const(<MODE>mode, 32);
1885 (define_expand "popcountsi2"
1887 (unspec:SI [(match_operand:SI 1 "spu_reg_operand" "")]
1890 (unspec:HI [(match_dup 2)] UNSPEC_SUMB))
1891 (set (match_operand:SI 0 "spu_reg_operand" "")
1892 (sign_extend:SI (match_dup 3)))]
1895 operands[2] = gen_reg_rtx (SImode);
1896 operands[3] = gen_reg_rtx (HImode);
1899 (define_expand "paritysi2"
1900 [(set (match_operand:SI 0 "spu_reg_operand" "")
1901 (parity:SI (match_operand:SI 1 "spu_reg_operand" "")))]
1904 operands[2] = gen_reg_rtx (SImode);
1905 emit_insn (gen_popcountsi2(operands[2], operands[1]));
1906 emit_insn (gen_andsi3(operands[0], operands[2], GEN_INT (1)));
1910 (define_insn "cntb_si"
1911 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
1912 (unspec:SI [(match_operand:SI 1 "spu_reg_operand" "r")]
1916 [(set_attr "type" "fxb")])
1918 (define_insn "cntb_v16qi"
1919 [(set (match_operand:V16QI 0 "spu_reg_operand" "=r")
1920 (unspec:V16QI [(match_operand:V16QI 1 "spu_reg_operand" "r")]
1924 [(set_attr "type" "fxb")])
1926 (define_insn "sumb_si"
1927 [(set (match_operand:HI 0 "spu_reg_operand" "=r")
1928 (unspec:HI [(match_operand:SI 1 "spu_reg_operand" "r")] UNSPEC_SUMB))]
1931 [(set_attr "type" "fxb")])
1936 (define_insn "ashl<mode>3"
1937 [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
1938 (ashift:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r")
1939 (match_operand:VHSI 2 "spu_nonmem_operand" "r,W")))]
1943 shl<bh>i\t%0,%1,%<umask>2"
1944 [(set_attr "type" "fx3")])
1946 (define_insn_and_split "ashldi3"
1947 [(set (match_operand:DI 0 "spu_reg_operand" "=r,r")
1948 (ashift:DI (match_operand:DI 1 "spu_reg_operand" "r,r")
1949 (match_operand:SI 2 "spu_nonmem_operand" "r,I")))
1950 (clobber (match_scratch:SI 3 "=&r,X"))]
1954 [(set (match_dup:DI 0)
1955 (ashift:DI (match_dup:DI 1)
1958 rtx op0 = gen_rtx_REG (TImode, REGNO (operands[0]));
1959 rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
1960 rtx op2 = operands[2];
1961 rtx op3 = operands[3];
1963 if (GET_CODE (operands[2]) == REG)
1965 emit_insn (gen_addsi3 (op3, op2, GEN_INT (64)));
1966 emit_insn (gen_rotlti3 (op0, op1, GEN_INT (64)));
1967 emit_insn (gen_shlqbybi_ti (op0, op0, op3));
1968 emit_insn (gen_shlqbi_ti (op0, op0, op3));
1972 HOST_WIDE_INT val = INTVAL (operands[2]);
1973 emit_insn (gen_rotlti3 (op0, op1, GEN_INT (64)));
1974 emit_insn (gen_shlqby_ti (op0, op0, GEN_INT (val / 8 + 8)));
1976 emit_insn (gen_shlqbi_ti (op0, op0, GEN_INT (val % 8)));
1981 (define_expand "ashlti3"
1982 [(parallel [(set (match_operand:TI 0 "spu_reg_operand" "")
1983 (ashift:TI (match_operand:TI 1 "spu_reg_operand" "")
1984 (match_operand:SI 2 "spu_nonmem_operand" "")))
1985 (clobber (match_dup:TI 3))])]
1987 "if (GET_CODE (operands[2]) == CONST_INT)
1989 emit_insn(gen_ashlti3_imm(operands[0], operands[1], operands[2]));
1992 operands[3] = gen_reg_rtx (TImode);")
1994 (define_insn_and_split "ashlti3_imm"
1995 [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
1996 (ashift:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
1997 (match_operand:SI 2 "immediate_operand" "O,P")))]
2002 "!satisfies_constraint_O (operands[2]) && !satisfies_constraint_P (operands[2])"
2003 [(set (match_dup:TI 0)
2004 (ashift:TI (match_dup:TI 1)
2006 (set (match_dup:TI 0)
2007 (ashift:TI (match_dup:TI 0)
2010 HOST_WIDE_INT val = INTVAL(operands[2]);
2011 operands[3] = GEN_INT (val&7);
2012 operands[4] = GEN_INT (val&-8);
2014 [(set_attr "type" "shuf,shuf")])
2016 (define_insn_and_split "ashlti3_reg"
2017 [(set (match_operand:TI 0 "spu_reg_operand" "=r")
2018 (ashift:TI (match_operand:TI 1 "spu_reg_operand" "r")
2019 (match_operand:SI 2 "spu_reg_operand" "r")))
2020 (clobber (match_operand:TI 3 "spu_reg_operand" "=&r"))]
2024 [(set (match_dup:TI 3)
2025 (ashift:TI (match_dup:TI 1)
2026 (and:SI (match_dup:SI 2)
2028 (set (match_dup:TI 0)
2029 (ashift:TI (match_dup:TI 3)
2030 (and:SI (match_dup:SI 2)
2034 (define_insn "shlqbybi_ti"
2035 [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
2036 (ashift:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
2037 (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
2043 [(set_attr "type" "shuf,shuf")])
2045 (define_insn "shlqbi_ti"
2046 [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
2047 (ashift:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
2048 (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
2054 [(set_attr "type" "shuf,shuf")])
2056 (define_insn "shlqby_ti"
2057 [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
2058 (ashift:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
2059 (mult:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
2065 [(set_attr "type" "shuf,shuf")])
2070 (define_insn_and_split "lshr<mode>3"
2071 [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
2072 (lshiftrt:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r")
2073 (match_operand:VHSI 2 "spu_nonmem_operand" "r,W")))
2074 (clobber (match_scratch:VHSI 3 "=&r,X"))]
2078 rot<bh>mi\t%0,%1,-%<umask>2"
2079 "reload_completed && GET_CODE (operands[2]) == REG"
2080 [(set (match_dup:VHSI 3)
2081 (neg:VHSI (match_dup:VHSI 2)))
2082 (set (match_dup:VHSI 0)
2083 (lshiftrt:VHSI (match_dup:VHSI 1)
2084 (neg:VHSI (match_dup:VHSI 3))))]
2086 [(set_attr "type" "*,fx3")])
2089 (define_insn "rotm_<mode>"
2090 [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
2091 (lshiftrt:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r")
2092 (neg:VHSI (match_operand:VHSI 2 "spu_nonmem_operand" "r,W"))))]
2096 rot<bh>mi\t%0,%1,-%<nmask>2"
2097 [(set_attr "type" "fx3")])
2099 (define_expand "lshr<mode>3"
2100 [(parallel [(set (match_operand:DTI 0 "spu_reg_operand" "")
2101 (lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "")
2102 (match_operand:SI 2 "spu_nonmem_operand" "")))
2103 (clobber (match_dup:DTI 3))
2104 (clobber (match_dup:SI 4))
2105 (clobber (match_dup:SI 5))])]
2107 "if (GET_CODE (operands[2]) == CONST_INT)
2109 emit_insn(gen_lshr<mode>3_imm(operands[0], operands[1], operands[2]));
2112 operands[3] = gen_reg_rtx (<MODE>mode);
2113 operands[4] = gen_reg_rtx (SImode);
2114 operands[5] = gen_reg_rtx (SImode);")
2116 (define_insn_and_split "lshr<mode>3_imm"
2117 [(set (match_operand:DTI 0 "spu_reg_operand" "=r,r")
2118 (lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r")
2119 (match_operand:SI 2 "immediate_operand" "O,P")))]
2122 rotqmbyi\t%0,%1,-%h2
2123 rotqmbii\t%0,%1,-%e2"
2124 "!satisfies_constraint_O (operands[2]) && !satisfies_constraint_P (operands[2])"
2125 [(set (match_dup:DTI 0)
2126 (lshiftrt:DTI (match_dup:DTI 1)
2128 (set (match_dup:DTI 0)
2129 (lshiftrt:DTI (match_dup:DTI 0)
2132 HOST_WIDE_INT val = INTVAL(operands[2]);
2133 operands[4] = GEN_INT (val&7);
2134 operands[5] = GEN_INT (val&-8);
2136 [(set_attr "type" "shuf,shuf")])
2138 (define_insn_and_split "lshr<mode>3_reg"
2139 [(set (match_operand:DTI 0 "spu_reg_operand" "=r")
2140 (lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r")
2141 (match_operand:SI 2 "spu_reg_operand" "r")))
2142 (clobber (match_operand:DTI 3 "spu_reg_operand" "=&r"))
2143 (clobber (match_operand:SI 4 "spu_reg_operand" "=&r"))
2144 (clobber (match_operand:SI 5 "spu_reg_operand" "=&r"))]
2148 [(set (match_dup:DTI 3)
2149 (lshiftrt:DTI (match_dup:DTI 1)
2150 (and:SI (neg:SI (match_dup:SI 4))
2152 (set (match_dup:DTI 0)
2153 (lshiftrt:DTI (match_dup:DTI 3)
2154 (and:SI (neg:SI (and:SI (match_dup:SI 5)
2158 emit_insn(gen_subsi3(operands[4], GEN_INT(0), operands[2]));
2159 emit_insn(gen_subsi3(operands[5], GEN_INT(7), operands[2]));
2162 (define_insn "rotqmbybi_<mode>"
2163 [(set (match_operand:DTI 0 "spu_reg_operand" "=r,r")
2164 (lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r")
2165 (and:SI (neg:SI (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
2171 rotqmbyi\t%0,%1,-%H2"
2172 [(set_attr "type" "shuf")])
2174 (define_insn "rotqmbi_<mode>"
2175 [(set (match_operand:DTI 0 "spu_reg_operand" "=r,r")
2176 (lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r")
2177 (and:SI (neg:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I"))
2182 rotqmbii\t%0,%1,-%E2"
2183 [(set_attr "type" "shuf")])
2185 (define_insn "rotqmby_<mode>"
2186 [(set (match_operand:DTI 0 "spu_reg_operand" "=r,r")
2187 (lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r")
2188 (mult:SI (neg:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I"))
2193 rotqmbyi\t%0,%1,-%F2"
2194 [(set_attr "type" "shuf")])
2199 (define_insn_and_split "ashr<mode>3"
2200 [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
2201 (ashiftrt:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r")
2202 (match_operand:VHSI 2 "spu_nonmem_operand" "r,W")))
2203 (clobber (match_scratch:VHSI 3 "=&r,X"))]
2207 rotma<bh>i\t%0,%1,-%<umask>2"
2208 "reload_completed && GET_CODE (operands[2]) == REG"
2209 [(set (match_dup:VHSI 3)
2210 (neg:VHSI (match_dup:VHSI 2)))
2211 (set (match_dup:VHSI 0)
2212 (ashiftrt:VHSI (match_dup:VHSI 1)
2213 (neg:VHSI (match_dup:VHSI 3))))]
2215 [(set_attr "type" "*,fx3")])
2218 (define_insn "rotma_<mode>"
2219 [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
2220 (ashiftrt:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r")
2221 (neg:VHSI (match_operand:VHSI 2 "spu_nonmem_operand" "r,W"))))]
2225 rotma<bh>i\t%0,%1,-%<nmask>2"
2226 [(set_attr "type" "fx3")])
2228 (define_insn_and_split "ashrdi3"
2229 [(set (match_operand:DI 0 "spu_reg_operand" "=r,r")
2230 (ashiftrt:DI (match_operand:DI 1 "spu_reg_operand" "r,r")
2231 (match_operand:SI 2 "spu_nonmem_operand" "r,I")))
2232 (clobber (match_scratch:TI 3 "=&r,&r"))
2233 (clobber (match_scratch:TI 4 "=&r,&r"))
2234 (clobber (match_scratch:SI 5 "=&r,&r"))]
2238 [(set (match_dup:DI 0)
2239 (ashiftrt:DI (match_dup:DI 1)
2242 rtx op0 = gen_rtx_REG (TImode, REGNO (operands[0]));
2243 rtx op0v = gen_rtx_REG (V4SImode, REGNO (op0));
2244 rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
2245 rtx op1s = gen_rtx_REG (SImode, REGNO (op1));
2246 rtx op2 = operands[2];
2247 rtx op3 = operands[3];
2248 rtx op4 = operands[4];
2249 rtx op5 = operands[5];
2251 if (GET_CODE (op2) == CONST_INT && INTVAL (op2) >= 63)
2253 rtx op0s = gen_rtx_REG (SImode, REGNO (op0));
2254 emit_insn (gen_ashrsi3 (op0s, op1s, GEN_INT (32)));
2255 emit_insn (gen_spu_fsm (op0v, op0s));
2257 else if (GET_CODE (op2) == CONST_INT && INTVAL (op2) >= 32)
2259 rtx op0d = gen_rtx_REG (V2DImode, REGNO (op0));
2260 HOST_WIDE_INT val = INTVAL (op2);
2261 emit_insn (gen_lshrti3 (op0, op1, GEN_INT (32)));
2262 emit_insn (gen_spu_xswd (op0d, op0v));
2264 emit_insn (gen_ashrv4si3 (op0v, op0v, spu_const (V4SImode, val - 32)));
2268 rtx op3v = gen_rtx_REG (V4SImode, REGNO (op3));
2269 unsigned char arr[16] = {
2270 0xff, 0xff, 0xff, 0xff,
2271 0xff, 0xff, 0xff, 0xff,
2272 0x00, 0x00, 0x00, 0x00,
2273 0x00, 0x00, 0x00, 0x00
2276 emit_insn (gen_ashrsi3 (op5, op1s, GEN_INT (31)));
2277 emit_move_insn (op4, array_to_constant (TImode, arr));
2278 emit_insn (gen_spu_fsm (op3v, op5));
2280 if (GET_CODE (operands[2]) == REG)
2282 emit_insn (gen_selb (op4, op3, op1, op4));
2283 emit_insn (gen_negsi2 (op5, op2));
2284 emit_insn (gen_rotqbybi_ti (op0, op4, op5));
2285 emit_insn (gen_rotqbi_ti (op0, op0, op5));
2289 HOST_WIDE_INT val = -INTVAL (op2);
2290 emit_insn (gen_selb (op0, op3, op1, op4));
2292 emit_insn (gen_rotqby_ti (op0, op0, GEN_INT ((val - 7) / 8)));
2294 emit_insn (gen_rotqbi_ti (op0, op0, GEN_INT (val % 8)));
2301 (define_expand "ashrti3"
2302 [(set (match_operand:TI 0 "spu_reg_operand" "")
2303 (ashiftrt:TI (match_operand:TI 1 "spu_reg_operand" "")
2304 (match_operand:SI 2 "spu_nonmem_operand" "")))]
2307 rtx sign_shift = gen_reg_rtx (SImode);
2308 rtx sign_mask = gen_reg_rtx (TImode);
2309 rtx sign_mask_v4si = gen_rtx_SUBREG (V4SImode, sign_mask, 0);
2310 rtx op1_v4si = spu_gen_subreg (V4SImode, operands[1]);
2311 rtx t = gen_reg_rtx (TImode);
2312 emit_insn (gen_subsi3 (sign_shift, GEN_INT (128), force_reg (SImode, operands[2])));
2313 emit_insn (gen_ashrv4si3 (sign_mask_v4si, op1_v4si, spu_const (V4SImode, 31)));
2314 emit_insn (gen_fsm_ti (sign_mask, sign_mask));
2315 emit_insn (gen_ashlti3 (sign_mask, sign_mask, sign_shift));
2316 emit_insn (gen_lshrti3 (t, operands[1], operands[2]));
2317 emit_insn (gen_iorti3 (operands[0], t, sign_mask));
2321 ;; fsm is used after rotam to replicate the sign across the whole register.
2322 (define_insn "fsm_ti"
2323 [(set (match_operand:TI 0 "spu_reg_operand" "=r")
2324 (unspec:TI [(match_operand:TI 1 "spu_reg_operand" "r")] UNSPEC_FSM))]
2327 [(set_attr "type" "shuf")])
2332 (define_insn "rotl<mode>3"
2333 [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
2334 (rotate:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r")
2335 (match_operand:VHSI 2 "spu_nonmem_operand" "r,W")))]
2339 rot<bh>i\t%0,%1,%<umask>2"
2340 [(set_attr "type" "fx3")])
2342 (define_insn "rotlti3"
2343 [(set (match_operand:TI 0 "spu_reg_operand" "=&r,r,r,r")
2344 (rotate:TI (match_operand:TI 1 "spu_reg_operand" "r,r,r,r")
2345 (match_operand:SI 2 "spu_nonmem_operand" "r,O,P,I")))]
2348 rotqbybi\t%0,%1,%2\;rotqbi\t%0,%0,%2
2351 rotqbyi\t%0,%1,%h2\;rotqbii\t%0,%0,%e2"
2352 [(set_attr "length" "8,4,4,8")
2353 (set_attr "type" "multi1,shuf,shuf,multi1")])
2355 (define_insn "rotqbybi_ti"
2356 [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
2357 (rotate:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
2358 (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
2364 [(set_attr "type" "shuf,shuf")])
2366 (define_insn "rotqby_ti"
2367 [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
2368 (rotate:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
2369 (mult:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
2375 [(set_attr "type" "shuf,shuf")])
2377 (define_insn "rotqbi_ti"
2378 [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
2379 (rotate:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
2380 (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
2386 [(set_attr "type" "shuf,shuf")])
2389 ;; struct extract/insert
2390 ;; We have to handle mem's because GCC will generate invalid SUBREG's
2391 ;; if it handles them. We generate better code anyway.
2393 (define_expand "extv"
2394 [(set (match_operand 0 "register_operand" "")
2395 (sign_extract (match_operand 1 "register_operand" "")
2396 (match_operand:SI 2 "const_int_operand" "")
2397 (match_operand:SI 3 "const_int_operand" "")))]
2399 { spu_expand_extv(operands, 0); DONE; })
2401 (define_expand "extzv"
2402 [(set (match_operand 0 "register_operand" "")
2403 (zero_extract (match_operand 1 "register_operand" "")
2404 (match_operand:SI 2 "const_int_operand" "")
2405 (match_operand:SI 3 "const_int_operand" "")))]
2407 { spu_expand_extv(operands, 1); DONE; })
2409 (define_expand "insv"
2410 [(set (zero_extract (match_operand 0 "register_operand" "")
2411 (match_operand:SI 1 "const_int_operand" "")
2412 (match_operand:SI 2 "const_int_operand" ""))
2413 (match_operand 3 "nonmemory_operand" ""))]
2415 { spu_expand_insv(operands); DONE; })
2418 ;; String/block move insn.
2419 ;; Argument 0 is the destination
2420 ;; Argument 1 is the source
2421 ;; Argument 2 is the length
2422 ;; Argument 3 is the alignment
2424 (define_expand "movstrsi"
2425 [(parallel [(set (match_operand:BLK 0 "" "")
2426 (match_operand:BLK 1 "" ""))
2427 (use (match_operand:SI 2 "" ""))
2428 (use (match_operand:SI 3 "" ""))])]
2432 if (spu_expand_block_move (operands))
2441 (define_insn "indirect_jump"
2442 [(set (pc) (match_operand:SI 0 "spu_reg_operand" "r"))]
2445 [(set_attr "type" "br")])
2449 (label_ref (match_operand 0 "" "")))]
2452 [(set_attr "type" "br")])
2457 ;; This will be used for leaf functions, that don't save any regs and
2458 ;; don't have locals on stack, maybe... that is for functions that
2459 ;; don't change $sp and don't need to save $lr.
2460 (define_expand "return"
2465 ;; used in spu_expand_epilogue to generate return from a function and
2466 ;; explicitly set use of $lr.
2468 (define_insn "_return"
2472 [(set_attr "type" "br")])
2478 (define_insn "ceq_<mode>"
2479 [(set (match_operand:VQHSI 0 "spu_reg_operand" "=r,r")
2480 (eq:VQHSI (match_operand:VQHSI 1 "spu_reg_operand" "r,r")
2481 (match_operand:VQHSI 2 "spu_arith_operand" "r,B")))]
2485 ceq<bh>i\t%0,%1,%2")
2487 (define_insn_and_split "ceq_di"
2488 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
2489 (eq:SI (match_operand:DI 1 "spu_reg_operand" "r")
2490 (match_operand:DI 2 "spu_reg_operand" "r")))]
2494 [(set (match_dup:SI 0)
2495 (eq:SI (match_dup:DI 1)
2498 rtx op0 = gen_rtx_REG (V4SImode, REGNO (operands[0]));
2499 rtx op1 = gen_rtx_REG (V4SImode, REGNO (operands[1]));
2500 rtx op2 = gen_rtx_REG (V4SImode, REGNO (operands[2]));
2501 emit_insn (gen_ceq_v4si (op0, op1, op2));
2502 emit_insn (gen_spu_gb (op0, op0));
2503 emit_insn (gen_cgt_si (operands[0], operands[0], GEN_INT (11)));
2508 ;; We provide the TI compares for completeness and because some parts of
2509 ;; gcc/libgcc use them, even though user code might never see it.
2510 (define_insn "ceq_ti"
2511 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
2512 (eq:SI (match_operand:TI 1 "spu_reg_operand" "r")
2513 (match_operand:TI 2 "spu_reg_operand" "r")))]
2515 "ceq\t%0,%1,%2\;gb\t%0,%0\;ceqi\t%0,%0,15"
2516 [(set_attr "type" "multi0")
2517 (set_attr "length" "12")])
2519 (define_insn "ceq_<mode>"
2520 [(set (match_operand:<F2I> 0 "spu_reg_operand" "=r")
2521 (eq:<F2I> (match_operand:VSF 1 "spu_reg_operand" "r")
2522 (match_operand:VSF 2 "spu_reg_operand" "r")))]
2526 (define_insn "cmeq_<mode>"
2527 [(set (match_operand:<F2I> 0 "spu_reg_operand" "=r")
2528 (eq:<F2I> (abs:VSF (match_operand:VSF 1 "spu_reg_operand" "r"))
2529 (abs:VSF (match_operand:VSF 2 "spu_reg_operand" "r"))))]
2533 ;; These implementations of ceq_df and cgt_df do not correctly handle
2534 ;; NAN or INF. We will also get incorrect results when the result
2535 ;; of the double subtract is too small.
2536 (define_expand "ceq_df"
2537 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
2538 (eq:SI (match_operand:DF 1 "spu_reg_operand" "r")
2539 (match_operand:DF 2 "const_zero_operand" "i")))]
2542 if (flag_unsafe_math_optimizations && spu_arch == PROCESSOR_CELL)
2544 rtx s0_ti = gen_reg_rtx(TImode);
2545 rtx s1_v4 = gen_reg_rtx(V4SImode);
2546 rtx s0_v4 = spu_gen_subreg(V4SImode, s0_ti);
2547 rtx to_ti = gen_reg_rtx(TImode);
2548 rtx to_v4 = gen_reg_rtx(V4SImode);
2549 rtx l_v4 = gen_reg_rtx(V4SImode);
2550 emit_insn (gen_spu_convert (l_v4, operands[1]));
2551 emit_insn (gen_movv4si(s1_v4, spu_const(V4SImode, -0x80000000ll)));
2552 emit_insn (gen_ceq_v4si(s0_v4, l_v4, CONST0_RTX(V4SImode)));
2553 emit_insn (gen_ceq_v4si(s1_v4, l_v4, s1_v4));
2554 emit_insn (gen_rotqby_ti(to_ti, s0_ti, GEN_INT(4)));
2555 emit_insn (gen_spu_convert (to_v4, to_ti));
2556 emit_insn (gen_iorv4si3(s1_v4, s0_v4, s1_v4));
2557 emit_insn (gen_andv4si3(to_v4, to_v4, s1_v4));
2558 emit_insn (gen_spu_convert (operands[0], to_v4));
2563 (define_insn "ceq_<mode>_celledp"
2564 [(set (match_operand:<DF2I> 0 "spu_reg_operand" "=r")
2565 (eq:<DF2I> (match_operand:VDF 1 "spu_reg_operand" "r")
2566 (match_operand:VDF 2 "spu_reg_operand" "r")))]
2567 "spu_arch == PROCESSOR_CELLEDP"
2569 [(set_attr "type" "fpd")])
2571 (define_insn "cmeq_<mode>_celledp"
2572 [(set (match_operand:<DF2I> 0 "spu_reg_operand" "=r")
2573 (eq:<DF2I> (abs:VDF (match_operand:VDF 1 "spu_reg_operand" "r"))
2574 (abs:VDF (match_operand:VDF 2 "spu_reg_operand" "r"))))]
2575 "spu_arch == PROCESSOR_CELLEDP"
2577 [(set_attr "type" "fpd")])
2579 (define_expand "ceq_v2df"
2580 [(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
2581 (eq:V2DI (match_operand:V2DF 1 "spu_reg_operand" "r")
2582 (match_operand:V2DF 2 "spu_reg_operand" "r")))]
2585 if (spu_arch == PROCESSOR_CELL)
2587 rtx ra = spu_gen_subreg (V4SImode, operands[1]);
2588 rtx rb = spu_gen_subreg (V4SImode, operands[2]);
2589 rtx temp = gen_reg_rtx (TImode);
2590 rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
2591 rtx temp2 = gen_reg_rtx (V4SImode);
2592 rtx biteq = gen_reg_rtx (V4SImode);
2593 rtx ahi_inf = gen_reg_rtx (V4SImode);
2594 rtx a_nan = gen_reg_rtx (V4SImode);
2595 rtx a_abs = gen_reg_rtx (V4SImode);
2596 rtx b_abs = gen_reg_rtx (V4SImode);
2597 rtx iszero = gen_reg_rtx (V4SImode);
2598 rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF,
2599 0x7FFFFFFF, 0xFFFFFFFF);
2600 rtx sign_mask = gen_reg_rtx (V4SImode);
2601 rtx nan_mask = gen_reg_rtx (V4SImode);
2602 rtx hihi_promote = gen_reg_rtx (TImode);
2604 emit_move_insn (sign_mask, pat);
2605 pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0,
2607 emit_move_insn (nan_mask, pat);
2608 pat = spu_const_from_ints (TImode, 0x00010203, 0x10111213,
2609 0x08090A0B, 0x18191A1B);
2610 emit_move_insn (hihi_promote, pat);
2612 emit_insn (gen_ceq_v4si (biteq, ra, rb));
2613 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, biteq),
2615 emit_insn (gen_andv4si3 (biteq, biteq, temp_v4si));
2616 emit_insn (gen_andv4si3 (a_abs, ra, sign_mask));
2617 emit_insn (gen_andv4si3 (b_abs, rb, sign_mask));
2618 emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask));
2619 emit_insn (gen_ceq_v4si (ahi_inf, a_abs, nan_mask));
2620 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan),
2622 emit_insn (gen_andv4si3 (temp2, temp_v4si, ahi_inf));
2623 emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2));
2624 emit_insn (gen_iorv4si3 (temp2, a_abs, b_abs));
2625 emit_insn (gen_ceq_v4si (iszero, temp2, CONST0_RTX (V4SImode)));
2626 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, iszero),
2628 emit_insn (gen_andv4si3 (iszero, iszero, temp_v4si));
2629 emit_insn (gen_iorv4si3 (temp2, biteq, iszero));
2630 emit_insn (gen_andc_v4si (temp2, temp2, a_nan));
2631 emit_insn (gen_shufb (operands[0], temp2, temp2, hihi_promote));
2636 (define_expand "cmeq_v2df"
2637 [(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
2638 (eq:V2DI (abs:V2DF (match_operand:V2DF 1 "spu_reg_operand" "r"))
2639 (abs:V2DF (match_operand:V2DF 2 "spu_reg_operand" "r"))))]
2642 if(spu_arch == PROCESSOR_CELL)
2644 rtx ra = spu_gen_subreg (V4SImode, operands[1]);
2645 rtx rb = spu_gen_subreg (V4SImode, operands[2]);
2646 rtx temp = gen_reg_rtx (TImode);
2647 rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
2648 rtx temp2 = gen_reg_rtx (V4SImode);
2649 rtx biteq = gen_reg_rtx (V4SImode);
2650 rtx ahi_inf = gen_reg_rtx (V4SImode);
2651 rtx a_nan = gen_reg_rtx (V4SImode);
2652 rtx a_abs = gen_reg_rtx (V4SImode);
2653 rtx b_abs = gen_reg_rtx (V4SImode);
2655 rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF,
2656 0x7FFFFFFF, 0xFFFFFFFF);
2657 rtx sign_mask = gen_reg_rtx (V4SImode);
2658 rtx nan_mask = gen_reg_rtx (V4SImode);
2659 rtx hihi_promote = gen_reg_rtx (TImode);
2661 emit_move_insn (sign_mask, pat);
2663 pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0,
2665 emit_move_insn (nan_mask, pat);
2666 pat = spu_const_from_ints (TImode, 0x00010203, 0x10111213,
2667 0x08090A0B, 0x18191A1B);
2668 emit_move_insn (hihi_promote, pat);
2670 emit_insn (gen_andv4si3 (a_abs, ra, sign_mask));
2671 emit_insn (gen_andv4si3 (b_abs, rb, sign_mask));
2672 emit_insn (gen_ceq_v4si (biteq, a_abs, b_abs));
2673 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, biteq),
2675 emit_insn (gen_andv4si3 (biteq, biteq, temp_v4si));
2676 emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask));
2677 emit_insn (gen_ceq_v4si (ahi_inf, a_abs, nan_mask));
2678 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan),
2680 emit_insn (gen_andv4si3 (temp2, temp_v4si, ahi_inf));
2681 emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2));
2682 emit_insn (gen_andc_v4si (temp2, biteq, a_nan));
2683 emit_insn (gen_shufb (operands[0], temp2, temp2, hihi_promote));
2691 (define_insn "cgt_<mode>"
2692 [(set (match_operand:VQHSI 0 "spu_reg_operand" "=r,r")
2693 (gt:VQHSI (match_operand:VQHSI 1 "spu_reg_operand" "r,r")
2694 (match_operand:VQHSI 2 "spu_arith_operand" "r,B")))]
2698 cgt<bh>i\t%0,%1,%2")
2700 (define_insn "cgt_di_m1"
2701 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
2702 (gt:SI (match_operand:DI 1 "spu_reg_operand" "r")
2707 (define_insn_and_split "cgt_di"
2708 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
2709 (gt:SI (match_operand:DI 1 "spu_reg_operand" "r")
2710 (match_operand:DI 2 "spu_reg_operand" "r")))
2711 (clobber (match_scratch:V4SI 3 "=&r"))
2712 (clobber (match_scratch:V4SI 4 "=&r"))
2713 (clobber (match_scratch:V4SI 5 "=&r"))]
2717 [(set (match_dup:SI 0)
2718 (gt:SI (match_dup:DI 1)
2721 rtx op0 = gen_rtx_REG (V4SImode, REGNO (operands[0]));
2722 rtx op1 = gen_rtx_REG (V4SImode, REGNO (operands[1]));
2723 rtx op2 = gen_rtx_REG (V4SImode, REGNO (operands[2]));
2724 rtx op3 = operands[3];
2725 rtx op4 = operands[4];
2726 rtx op5 = operands[5];
2727 rtx op3d = gen_rtx_REG (V2DImode, REGNO (operands[3]));
2728 emit_insn (gen_clgt_v4si (op3, op1, op2));
2729 emit_insn (gen_ceq_v4si (op4, op1, op2));
2730 emit_insn (gen_cgt_v4si (op5, op1, op2));
2731 emit_insn (gen_spu_xswd (op3d, op3));
2732 emit_insn (gen_selb (op0, op5, op3, op4));
2736 (define_insn "cgt_ti"
2737 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
2738 (gt:SI (match_operand:TI 1 "spu_reg_operand" "r")
2739 (match_operand:TI 2 "spu_reg_operand" "r")))
2740 (clobber (match_scratch:V4SI 3 "=&r"))
2741 (clobber (match_scratch:V4SI 4 "=&r"))
2742 (clobber (match_scratch:V4SI 5 "=&r"))]
2748 selb\t%0,%4,%0,%3\;\
2750 selb\t%0,%4,%0,%3\;\
2753 [(set_attr "type" "multi0")
2754 (set_attr "length" "36")])
2756 (define_insn "cgt_<mode>"
2757 [(set (match_operand:<F2I> 0 "spu_reg_operand" "=r")
2758 (gt:<F2I> (match_operand:VSF 1 "spu_reg_operand" "r")
2759 (match_operand:VSF 2 "spu_reg_operand" "r")))]
2763 (define_insn "cmgt_<mode>"
2764 [(set (match_operand:<F2I> 0 "spu_reg_operand" "=r")
2765 (gt:<F2I> (abs:VSF (match_operand:VSF 1 "spu_reg_operand" "r"))
2766 (abs:VSF (match_operand:VSF 2 "spu_reg_operand" "r"))))]
2770 (define_expand "cgt_df"
2771 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
2772 (gt:SI (match_operand:DF 1 "spu_reg_operand" "r")
2773 (match_operand:DF 2 "const_zero_operand" "i")))]
2776 if (flag_unsafe_math_optimizations && spu_arch == PROCESSOR_CELL)
2778 rtx s0_ti = gen_reg_rtx(TImode);
2779 rtx s1_v4 = gen_reg_rtx(V4SImode);
2780 rtx s0_v4 = spu_gen_subreg(V4SImode, s0_ti);
2781 rtx to_ti = gen_reg_rtx(TImode);
2782 rtx to_v4 = gen_reg_rtx(V4SImode);
2783 rtx l_v4 = gen_reg_rtx(V4SImode);
2784 emit_insn (gen_spu_convert(l_v4, operands[1]));
2785 emit_insn (gen_ceq_v4si(s0_v4, l_v4, const0_rtx));
2786 emit_insn (gen_cgt_v4si(s1_v4, l_v4, const0_rtx));
2787 emit_insn (gen_rotqby_ti(to_ti, s0_ti, GEN_INT(4)));
2788 emit_insn (gen_spu_convert(to_v4, to_ti));
2789 emit_insn (gen_andc_v4si(to_v4, s0_v4, to_v4));
2790 emit_insn (gen_iorv4si3(to_v4, to_v4, s1_v4));
2791 emit_insn (gen_spu_convert(operands[0], to_v4));
2796 (define_insn "cgt_<mode>_celledp"
2797 [(set (match_operand:<DF2I> 0 "spu_reg_operand" "=r")
2798 (gt:<DF2I> (match_operand:VDF 1 "spu_reg_operand" "r")
2799 (match_operand:VDF 2 "spu_reg_operand" "r")))]
2800 "spu_arch == PROCESSOR_CELLEDP"
2802 [(set_attr "type" "fpd")])
2804 (define_insn "cmgt_<mode>_celledp"
2805 [(set (match_operand:<DF2I> 0 "spu_reg_operand" "=r")
2806 (gt:<DF2I> (abs:VDF (match_operand:VDF 1 "spu_reg_operand" "r"))
2807 (abs:VDF (match_operand:VDF 2 "spu_reg_operand" "r"))))]
2808 "spu_arch == PROCESSOR_CELLEDP"
2810 [(set_attr "type" "fpd")])
2812 (define_expand "cgt_v2df"
2813 [(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
2814 (gt:V2DI (match_operand:V2DF 1 "spu_reg_operand" "r")
2815 (match_operand:V2DF 2 "spu_reg_operand" "r")))]
2818 if(spu_arch == PROCESSOR_CELL)
2820 rtx ra = spu_gen_subreg (V4SImode, operands[1]);
2821 rtx rb = spu_gen_subreg (V4SImode, operands[2]);
2822 rtx zero = gen_reg_rtx (V4SImode);
2823 rtx temp = gen_reg_rtx (TImode);
2824 rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
2825 rtx temp2 = gen_reg_rtx (V4SImode);
2826 rtx hi_inf = gen_reg_rtx (V4SImode);
2827 rtx a_nan = gen_reg_rtx (V4SImode);
2828 rtx b_nan = gen_reg_rtx (V4SImode);
2829 rtx a_abs = gen_reg_rtx (V4SImode);
2830 rtx b_abs = gen_reg_rtx (V4SImode);
2831 rtx asel = gen_reg_rtx (V4SImode);
2832 rtx bsel = gen_reg_rtx (V4SImode);
2833 rtx abor = gen_reg_rtx (V4SImode);
2834 rtx bbor = gen_reg_rtx (V4SImode);
2835 rtx gt_hi = gen_reg_rtx (V4SImode);
2836 rtx gt_lo = gen_reg_rtx (V4SImode);
2837 rtx sign_mask = gen_reg_rtx (V4SImode);
2838 rtx nan_mask = gen_reg_rtx (V4SImode);
2839 rtx hi_promote = gen_reg_rtx (TImode);
2840 rtx borrow_shuffle = gen_reg_rtx (TImode);
2841 rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF,
2842 0x7FFFFFFF, 0xFFFFFFFF);
2843 emit_move_insn (sign_mask, pat);
2844 pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0,
2846 emit_move_insn (nan_mask, pat);
2847 pat = spu_const_from_ints (TImode, 0x00010203, 0x00010203,
2848 0x08090A0B, 0x08090A0B);
2849 emit_move_insn (hi_promote, pat);
2850 pat = spu_const_from_ints (TImode, 0x04050607, 0xC0C0C0C0,
2851 0x0C0D0E0F, 0xC0C0C0C0);
2852 emit_move_insn (borrow_shuffle, pat);
2854 emit_insn (gen_andv4si3 (a_nan, ra, sign_mask));
2855 emit_insn (gen_ceq_v4si (hi_inf, a_nan, nan_mask));
2856 emit_insn (gen_clgt_v4si (a_nan, a_nan, nan_mask));
2857 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan),
2859 emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf));
2860 emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2));
2861 emit_insn (gen_shufb (a_nan, a_nan, a_nan, hi_promote));
2862 emit_insn (gen_andv4si3 (b_nan, rb, sign_mask));
2863 emit_insn (gen_ceq_v4si (hi_inf, b_nan, nan_mask));
2864 emit_insn (gen_clgt_v4si (b_nan, b_nan, nan_mask));
2865 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, b_nan),
2867 emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf));
2868 emit_insn (gen_iorv4si3 (b_nan, b_nan, temp2));
2869 emit_insn (gen_shufb (b_nan, b_nan, b_nan, hi_promote));
2870 emit_insn (gen_iorv4si3 (a_nan, a_nan, b_nan));
2871 emit_move_insn (zero, CONST0_RTX (V4SImode));
2872 emit_insn (gen_ashrv4si3 (asel, ra, spu_const (V4SImode, 31)));
2873 emit_insn (gen_shufb (asel, asel, asel, hi_promote));
2874 emit_insn (gen_andv4si3 (a_abs, ra, sign_mask));
2875 emit_insn (gen_bg_v4si (abor, zero, a_abs));
2876 emit_insn (gen_shufb (abor, abor, abor, borrow_shuffle));
2877 emit_insn (gen_sfx_v4si (abor, zero, a_abs, abor));
2878 emit_insn (gen_selb (abor, a_abs, abor, asel));
2879 emit_insn (gen_ashrv4si3 (bsel, rb, spu_const (V4SImode, 31)));
2880 emit_insn (gen_shufb (bsel, bsel, bsel, hi_promote));
2881 emit_insn (gen_andv4si3 (b_abs, rb, sign_mask));
2882 emit_insn (gen_bg_v4si (bbor, zero, b_abs));
2883 emit_insn (gen_shufb (bbor, bbor, bbor, borrow_shuffle));
2884 emit_insn (gen_sfx_v4si (bbor, zero, b_abs, bbor));
2885 emit_insn (gen_selb (bbor, b_abs, bbor, bsel));
2886 emit_insn (gen_cgt_v4si (gt_hi, abor, bbor));
2887 emit_insn (gen_clgt_v4si (gt_lo, abor, bbor));
2888 emit_insn (gen_ceq_v4si (temp2, abor, bbor));
2889 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, gt_lo),
2891 emit_insn (gen_andv4si3 (temp2, temp2, temp_v4si));
2892 emit_insn (gen_iorv4si3 (temp2, gt_hi, temp2));
2894 emit_insn (gen_shufb (temp2, temp2, temp2, hi_promote));
2895 emit_insn (gen_andc_v4si (temp2, temp2, a_nan));
2896 emit_move_insn (operands[0], spu_gen_subreg (V2DImode, temp2));
2901 (define_expand "cmgt_v2df"
2902 [(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
2903 (gt:V2DI (abs:V2DF (match_operand:V2DF 1 "spu_reg_operand" "r"))
2904 (abs:V2DF (match_operand:V2DF 2 "spu_reg_operand" "r"))))]
2907 if(spu_arch == PROCESSOR_CELL)
2909 rtx ra = spu_gen_subreg (V4SImode, operands[1]);
2910 rtx rb = spu_gen_subreg (V4SImode, operands[2]);
2911 rtx temp = gen_reg_rtx (TImode);
2912 rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
2913 rtx temp2 = gen_reg_rtx (V4SImode);
2914 rtx hi_inf = gen_reg_rtx (V4SImode);
2915 rtx a_nan = gen_reg_rtx (V4SImode);
2916 rtx b_nan = gen_reg_rtx (V4SImode);
2917 rtx a_abs = gen_reg_rtx (V4SImode);
2918 rtx b_abs = gen_reg_rtx (V4SImode);
2919 rtx gt_hi = gen_reg_rtx (V4SImode);
2920 rtx gt_lo = gen_reg_rtx (V4SImode);
2921 rtx sign_mask = gen_reg_rtx (V4SImode);
2922 rtx nan_mask = gen_reg_rtx (V4SImode);
2923 rtx hi_promote = gen_reg_rtx (TImode);
2924 rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF,
2925 0x7FFFFFFF, 0xFFFFFFFF);
2926 emit_move_insn (sign_mask, pat);
2927 pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0,
2929 emit_move_insn (nan_mask, pat);
2930 pat = spu_const_from_ints (TImode, 0x00010203, 0x00010203,
2931 0x08090A0B, 0x08090A0B);
2932 emit_move_insn (hi_promote, pat);
2934 emit_insn (gen_andv4si3 (a_abs, ra, sign_mask));
2935 emit_insn (gen_ceq_v4si (hi_inf, a_abs, nan_mask));
2936 emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask));
2937 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan),
2939 emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf));
2940 emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2));
2941 emit_insn (gen_shufb (a_nan, a_nan, a_nan, hi_promote));
2942 emit_insn (gen_andv4si3 (b_abs, rb, sign_mask));
2943 emit_insn (gen_ceq_v4si (hi_inf, b_abs, nan_mask));
2944 emit_insn (gen_clgt_v4si (b_nan, b_abs, nan_mask));
2945 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, b_nan),
2947 emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf));
2948 emit_insn (gen_iorv4si3 (b_nan, b_nan, temp2));
2949 emit_insn (gen_shufb (b_nan, b_nan, b_nan, hi_promote));
2950 emit_insn (gen_iorv4si3 (a_nan, a_nan, b_nan));
2952 emit_insn (gen_clgt_v4si (gt_hi, a_abs, b_abs));
2953 emit_insn (gen_clgt_v4si (gt_lo, a_abs, b_abs));
2954 emit_insn (gen_ceq_v4si (temp2, a_abs, b_abs));
2955 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, gt_lo),
2957 emit_insn (gen_andv4si3 (temp2, temp2, temp_v4si));
2958 emit_insn (gen_iorv4si3 (temp2, gt_hi, temp2));
2959 emit_insn (gen_shufb (temp2, temp2, temp2, hi_promote));
2960 emit_insn (gen_andc_v4si (temp2, temp2, a_nan));
2961 emit_move_insn (operands[0], spu_gen_subreg (V2DImode, temp2));
2969 (define_insn "clgt_<mode>"
2970 [(set (match_operand:VQHSI 0 "spu_reg_operand" "=r,r")
2971 (gtu:VQHSI (match_operand:VQHSI 1 "spu_reg_operand" "r,r")
2972 (match_operand:VQHSI 2 "spu_arith_operand" "r,B")))]
2976 clgt<bh>i\t%0,%1,%2")
2978 (define_insn_and_split "clgt_di"
2979 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
2980 (gtu:SI (match_operand:DI 1 "spu_reg_operand" "r")
2981 (match_operand:DI 2 "spu_reg_operand" "r")))
2982 (clobber (match_scratch:V4SI 3 "=&r"))
2983 (clobber (match_scratch:V4SI 4 "=&r"))
2984 (clobber (match_scratch:V4SI 5 "=&r"))]
2988 [(set (match_dup:SI 0)
2989 (gtu:SI (match_dup:DI 1)
2992 rtx op0 = gen_rtx_REG (V4SImode, REGNO (operands[0]));
2993 rtx op1 = gen_rtx_REG (V4SImode, REGNO (operands[1]));
2994 rtx op2 = gen_rtx_REG (V4SImode, REGNO (operands[2]));
2995 rtx op3 = operands[3];
2996 rtx op4 = operands[4];
2997 rtx op5 = operands[5];
2998 rtx op5d = gen_rtx_REG (V2DImode, REGNO (operands[5]));
2999 emit_insn (gen_clgt_v4si (op3, op1, op2));
3000 emit_insn (gen_ceq_v4si (op4, op1, op2));
3001 emit_insn (gen_spu_xswd (op5d, op3));
3002 emit_insn (gen_selb (op0, op3, op5, op4));
3006 (define_insn "clgt_ti"
3007 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
3008 (gtu:SI (match_operand:TI 1 "spu_reg_operand" "r")
3009 (match_operand:TI 2 "spu_reg_operand" "r")))
3010 (clobber (match_scratch:V4SI 3 "=&r"))
3011 (clobber (match_scratch:V4SI 4 "=&r"))]
3016 selb\t%0,%4,%0,%3\;\
3018 selb\t%0,%4,%0,%3\;\
3021 [(set_attr "type" "multi0")
3022 (set_attr "length" "32")])
3026 (define_insn "dftsv_celledp"
3027 [(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
3028 (unspec [(match_operand:V2DF 1 "spu_reg_operand" "r")
3029 (match_operand:SI 2 "const_int_operand" "i")] UNSPEC_DFTSV))]
3030 "spu_arch == PROCESSOR_CELLEDP"
3032 [(set_attr "type" "fpd")])
3034 (define_expand "dftsv"
3035 [(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
3036 (unspec [(match_operand:V2DF 1 "spu_reg_operand" "r")
3037 (match_operand:SI 2 "const_int_operand" "i")] UNSPEC_DFTSV))]
3040 if(spu_arch == PROCESSOR_CELL)
3042 rtx result = gen_reg_rtx (V4SImode);
3043 emit_move_insn (result, CONST0_RTX (V4SImode));
3045 if (INTVAL (operands[2]))
3047 rtx ra = spu_gen_subreg (V4SImode, operands[1]);
3048 rtx abs = gen_reg_rtx (V4SImode);
3049 rtx sign = gen_reg_rtx (V4SImode);
3050 rtx temp = gen_reg_rtx (TImode);
3051 rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
3052 rtx temp2 = gen_reg_rtx (V4SImode);
3053 rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF,
3054 0x7FFFFFFF, 0xFFFFFFFF);
3055 rtx sign_mask = gen_reg_rtx (V4SImode);
3056 rtx hi_promote = gen_reg_rtx (TImode);
3057 emit_move_insn (sign_mask, pat);
3058 pat = spu_const_from_ints (TImode, 0x00010203, 0x00010203,
3059 0x08090A0B, 0x08090A0B);
3060 emit_move_insn (hi_promote, pat);
3062 emit_insn (gen_ashrv4si3 (sign, ra, spu_const (V4SImode, 31)));
3063 emit_insn (gen_shufb (sign, sign, sign, hi_promote));
3064 emit_insn (gen_andv4si3 (abs, ra, sign_mask));
3066 /* NaN or +inf or -inf */
3067 if (INTVAL (operands[2]) & 0x70)
3069 rtx nan_mask = gen_reg_rtx (V4SImode);
3070 rtx isinf = gen_reg_rtx (V4SImode);
3071 pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0,
3073 emit_move_insn (nan_mask, pat);
3074 emit_insn (gen_ceq_v4si (isinf, abs, nan_mask));
3077 if (INTVAL (operands[2]) & 0x40)
3079 rtx isnan = gen_reg_rtx (V4SImode);
3080 emit_insn (gen_clgt_v4si (isnan, abs, nan_mask));
3081 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, isnan),
3083 emit_insn (gen_andv4si3 (temp2, temp_v4si, isinf));
3084 emit_insn (gen_iorv4si3 (isnan, isnan, temp2));
3085 emit_insn (gen_shufb (isnan, isnan, isnan, hi_promote));
3086 emit_insn (gen_iorv4si3 (result, result, isnan));
3089 if (INTVAL (operands[2]) & 0x30)
3091 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, isinf),
3093 emit_insn (gen_andv4si3 (isinf, isinf, temp_v4si));
3094 emit_insn (gen_shufb (isinf, isinf, isinf, hi_promote));
3097 if (INTVAL (operands[2]) & 0x20)
3099 emit_insn (gen_andc_v4si (temp2, isinf, sign));
3100 emit_insn (gen_iorv4si3 (result, result, temp2));
3103 if (INTVAL (operands[2]) & 0x10)
3105 emit_insn (gen_andv4si3 (temp2, isinf, sign));
3106 emit_insn (gen_iorv4si3 (result, result, temp2));
3112 if (INTVAL (operands[2]) & 0xF)
3114 rtx iszero = gen_reg_rtx (V4SImode);
3115 emit_insn (gen_ceq_v4si (iszero, abs, CONST0_RTX (V4SImode)));
3116 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, iszero),
3118 emit_insn (gen_andv4si3 (iszero, iszero, temp_v4si));
3121 if (INTVAL (operands[2]) & 0x3)
3123 rtx isdenorm = gen_reg_rtx (V4SImode);
3124 rtx denorm_mask = gen_reg_rtx (V4SImode);
3125 emit_move_insn (denorm_mask, spu_const (V4SImode, 0xFFFFF));
3126 emit_insn (gen_clgt_v4si (isdenorm, abs, denorm_mask));
3127 emit_insn (gen_nor_v4si (isdenorm, isdenorm, iszero));
3128 emit_insn (gen_shufb (isdenorm, isdenorm,
3129 isdenorm, hi_promote));
3131 if (INTVAL (operands[2]) & 0x2)
3133 emit_insn (gen_andc_v4si (temp2, isdenorm, sign));
3134 emit_insn (gen_iorv4si3 (result, result, temp2));
3137 if (INTVAL (operands[2]) & 0x1)
3139 emit_insn (gen_andv4si3 (temp2, isdenorm, sign));
3140 emit_insn (gen_iorv4si3 (result, result, temp2));
3145 if (INTVAL (operands[2]) & 0xC)
3147 emit_insn (gen_shufb (iszero, iszero, iszero, hi_promote));
3149 if (INTVAL (operands[2]) & 0x8)
3151 emit_insn (gen_andc_v4si (temp2, iszero, sign));
3152 emit_insn (gen_iorv4si3 (result, result, temp2));
3155 if (INTVAL (operands[2]) & 0x4)
3157 emit_insn (gen_andv4si3 (temp2, iszero, sign));
3158 emit_insn (gen_iorv4si3 (result, result, temp2));
3163 emit_move_insn (operands[0], spu_gen_subreg (V2DImode, result));
3173 (if_then_else (match_operator 1 "branch_comparison_operator"
3175 "spu_reg_operand" "r")
3177 (label_ref (match_operand 0 "" ""))
3181 [(set_attr "type" "br")])
3185 (if_then_else (match_operator 0 "branch_comparison_operator"
3187 "spu_reg_operand" "r")
3193 [(set_attr "type" "br")])
3197 (if_then_else (match_operator 1 "branch_comparison_operator"
3199 "spu_reg_operand" "r")
3202 (label_ref (match_operand 0 "" ""))))]
3205 [(set_attr "type" "br")])
3209 (if_then_else (match_operator 0 "branch_comparison_operator"
3211 "spu_reg_operand" "r")
3217 [(set_attr "type" "br")])
3220 ;; Compare insns are next. Note that the spu has two types of compares,
3221 ;; signed & unsigned, and one type of branch.
3223 ;; Start with the DEFINE_EXPANDs to generate the rtl for compares, scc
3224 ;; insns, and branches. We store the operands of compares until we see
3227 (define_expand "cmp<mode>"
3229 (compare (match_operand:VQHSI 0 "spu_reg_operand" "")
3230 (match_operand:VQHSI 1 "spu_nonmem_operand" "")))]
3233 spu_compare_op0 = operands[0];
3234 spu_compare_op1 = operands[1];
3238 (define_expand "cmp<mode>"
3240 (compare (match_operand:DTI 0 "spu_reg_operand" "")
3241 (match_operand:DTI 1 "spu_reg_operand" "")))]
3244 spu_compare_op0 = operands[0];
3245 spu_compare_op1 = operands[1];
3249 (define_expand "cmp<mode>"
3251 (compare (match_operand:VSF 0 "spu_reg_operand" "")
3252 (match_operand:VSF 1 "spu_reg_operand" "")))]
3255 spu_compare_op0 = operands[0];
3256 spu_compare_op1 = operands[1];
3260 (define_expand "cmpdf"
3262 (compare (match_operand:DF 0 "register_operand" "")
3263 (match_operand:DF 1 "register_operand" "")))]
3264 "(flag_unsafe_math_optimizations && spu_arch == PROCESSOR_CELL)
3265 || spu_arch == PROCESSOR_CELLEDP "
3267 spu_compare_op0 = operands[0];
3268 spu_compare_op1 = operands[1];
3272 ;; vector conditional compare patterns
3273 (define_expand "vcond<mode>"
3274 [(set (match_operand:VCMP 0 "spu_reg_operand" "=r")
3276 (match_operator 3 "comparison_operator"
3277 [(match_operand:VCMP 4 "spu_reg_operand" "r")
3278 (match_operand:VCMP 5 "spu_reg_operand" "r")])
3279 (match_operand:VCMP 1 "spu_reg_operand" "r")
3280 (match_operand:VCMP 2 "spu_reg_operand" "r")))]
3283 if (spu_emit_vector_cond_expr (operands[0], operands[1], operands[2],
3284 operands[3], operands[4], operands[5]))
3290 (define_expand "vcondu<mode>"
3291 [(set (match_operand:VCMPU 0 "spu_reg_operand" "=r")
3293 (match_operator 3 "comparison_operator"
3294 [(match_operand:VCMPU 4 "spu_reg_operand" "r")
3295 (match_operand:VCMPU 5 "spu_reg_operand" "r")])
3296 (match_operand:VCMPU 1 "spu_reg_operand" "r")
3297 (match_operand:VCMPU 2 "spu_reg_operand" "r")))]
3300 if (spu_emit_vector_cond_expr (operands[0], operands[1], operands[2],
3301 operands[3], operands[4], operands[5]))
3308 ;; branch on condition
3310 (define_expand "beq"
3311 [(use (match_operand 0 "" ""))]
3313 { spu_emit_branch_or_set (0, EQ, operands); DONE; })
3315 (define_expand "bne"
3316 [(use (match_operand 0 "" ""))]
3318 { spu_emit_branch_or_set (0, NE, operands); DONE; })
3320 (define_expand "bge"
3321 [(use (match_operand 0 "" ""))]
3323 { spu_emit_branch_or_set (0, GE, operands); DONE; })
3325 (define_expand "bgt"
3326 [(use (match_operand 0 "" ""))]
3328 { spu_emit_branch_or_set (0, GT, operands); DONE; })
3330 (define_expand "ble"
3331 [(use (match_operand 0 "" ""))]
3333 { spu_emit_branch_or_set (0, LE, operands); DONE; })
3335 (define_expand "blt"
3336 [(use (match_operand 0 "" ""))]
3338 { spu_emit_branch_or_set (0, LT, operands); DONE; })
3340 (define_expand "bgeu"
3341 [(use (match_operand 0 "" ""))]
3343 { spu_emit_branch_or_set (0, GEU, operands); DONE; })
3345 (define_expand "bgtu"
3346 [(use (match_operand 0 "" ""))]
3348 { spu_emit_branch_or_set (0, GTU, operands); DONE; })
3350 (define_expand "bleu"
3351 [(use (match_operand 0 "" ""))]
3353 { spu_emit_branch_or_set (0, LEU, operands); DONE; })
3355 (define_expand "bltu"
3356 [(use (match_operand 0 "" ""))]
3358 { spu_emit_branch_or_set (0, LTU, operands); DONE; })
3363 (define_expand "seq"
3364 [(clobber (match_operand:SI 0 "spu_reg_operand" ""))]
3366 { spu_emit_branch_or_set (1, EQ, operands); DONE; })
3368 (define_expand "sne"
3369 [(clobber (match_operand:SI 0 "spu_reg_operand" ""))]
3371 { spu_emit_branch_or_set (1, NE, operands); DONE; })
3373 (define_expand "sgt"
3374 [(clobber (match_operand:SI 0 "spu_reg_operand" ""))]
3376 { spu_emit_branch_or_set (1, GT, operands); DONE; })
3378 (define_expand "slt"
3379 [(clobber (match_operand:SI 0 "spu_reg_operand" ""))]
3381 { spu_emit_branch_or_set (1, LT, operands); DONE; })
3383 (define_expand "sge"
3384 [(clobber (match_operand:SI 0 "spu_reg_operand" ""))]
3386 { spu_emit_branch_or_set (1, GE, operands); DONE; })
3388 (define_expand "sle"
3389 [(clobber (match_operand:SI 0 "spu_reg_operand" ""))]
3391 { spu_emit_branch_or_set (1, LE, operands); DONE; })
3393 (define_expand "sgtu"
3394 [(clobber (match_operand:SI 0 "spu_reg_operand" ""))]
3396 { spu_emit_branch_or_set (1, GTU, operands); DONE; })
3398 (define_expand "sltu"
3399 [(clobber (match_operand:SI 0 "spu_reg_operand" ""))]
3401 { spu_emit_branch_or_set (1, LTU, operands); DONE; })
3403 (define_expand "sgeu"
3404 [(clobber (match_operand:SI 0 "spu_reg_operand" ""))]
3406 { spu_emit_branch_or_set (1, GEU, operands); DONE; })
3408 (define_expand "sleu"
3409 [(clobber (match_operand:SI 0 "spu_reg_operand" ""))]
3411 { spu_emit_branch_or_set (1, LEU, operands); DONE; })
3416 ;; Define this first one so HAVE_conditional_move is defined.
3417 (define_insn "movcc_dummy"
3418 [(set (match_operand 0 "" "")
3419 (if_then_else (match_operand 1 "" "")
3420 (match_operand 2 "" "")
3421 (match_operand 3 "" "")))]
3425 (define_expand "mov<mode>cc"
3426 [(set (match_operand:ALL 0 "spu_reg_operand" "")
3427 (if_then_else:ALL (match_operand 1 "comparison_operator" "")
3428 (match_operand:ALL 2 "spu_reg_operand" "")
3429 (match_operand:ALL 3 "spu_reg_operand" "")))]
3432 spu_emit_branch_or_set(2, GET_CODE(operands[1]), operands);
3436 ;; This pattern is used when the result of a compare is not large
3437 ;; enough to use in a selb when expanding conditional moves.
3438 (define_expand "extend_compare"
3439 [(set (match_operand 0 "spu_reg_operand" "=r")
3440 (unspec [(match_operand 1 "spu_reg_operand" "r")] UNSPEC_EXTEND_CMP))]
3443 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
3444 gen_rtx_UNSPEC (GET_MODE (operands[0]),
3445 gen_rtvec (1, operands[1]),
3446 UNSPEC_EXTEND_CMP)));
3450 (define_insn "extend_compare<mode>"
3451 [(set (match_operand:ALL 0 "spu_reg_operand" "=r")
3452 (unspec:ALL [(match_operand 1 "spu_reg_operand" "r")] UNSPEC_EXTEND_CMP))]
3455 [(set_attr "type" "shuf")])
3460 ;; operand 0 is index
3461 ;; operand 1 is the minimum bound
3462 ;; operand 2 is the maximum bound - minimum bound + 1
3463 ;; operand 3 is CODE_LABEL for the table;
3464 ;; operand 4 is the CODE_LABEL to go to if index out of range.
3465 (define_expand "casesi"
3466 [(match_operand:SI 0 "spu_reg_operand" "")
3467 (match_operand:SI 1 "immediate_operand" "")
3468 (match_operand:SI 2 "immediate_operand" "")
3469 (match_operand 3 "" "")
3470 (match_operand 4 "" "")]
3473 rtx table = gen_reg_rtx (SImode);
3474 rtx index = gen_reg_rtx (SImode);
3475 rtx sindex = gen_reg_rtx (SImode);
3476 rtx addr = gen_reg_rtx (Pmode);
3478 emit_move_insn (table, gen_rtx_LABEL_REF (SImode, operands[3]));
3480 emit_insn (gen_subsi3(index, operands[0], force_reg(SImode, operands[1])));
3481 emit_insn (gen_ashlsi3(sindex, index, GEN_INT (2)));
3482 emit_move_insn (addr, gen_rtx_MEM (SImode,
3483 gen_rtx_PLUS (SImode, table, sindex)));
3485 emit_insn (gen_addsi3 (addr, addr, table));
3487 emit_cmp_and_jump_insns (index, operands[2], GTU, NULL_RTX, SImode, 1, operands[4]);
3488 emit_jump_insn (gen_tablejump (addr, operands[3]));
3492 (define_insn "tablejump"
3493 [(set (pc) (match_operand:SI 0 "spu_reg_operand" "r"))
3494 (use (label_ref (match_operand 1 "" "")))]
3497 [(set_attr "type" "br")])
3502 ;; Note that operand 1 is total size of args, in bytes,
3503 ;; and what the call insn wants is the number of words.
3504 (define_expand "sibcall"
3506 [(call (match_operand:QI 0 "call_operand" "")
3507 (match_operand:QI 1 "" ""))
3511 if (! call_operand (operands[0], QImode))
3512 XEXP (operands[0], 0) = copy_to_mode_reg (Pmode, XEXP (operands[0], 0));
3515 (define_insn "_sibcall"
3517 [(call (match_operand:QI 0 "call_operand" "R,S")
3518 (match_operand:QI 1 "" "i,i"))
3520 "SIBLING_CALL_P(insn)"
3524 [(set_attr "type" "br,br")])
3526 (define_expand "sibcall_value"
3528 [(set (match_operand 0 "" "")
3529 (call (match_operand:QI 1 "call_operand" "")
3530 (match_operand:QI 2 "" "")))
3534 if (! call_operand (operands[1], QImode))
3535 XEXP (operands[1], 0) = copy_to_mode_reg (Pmode, XEXP (operands[1], 0));
3538 (define_insn "_sibcall_value"
3540 [(set (match_operand 0 "" "")
3541 (call (match_operand:QI 1 "call_operand" "R,S")
3542 (match_operand:QI 2 "" "i,i")))
3544 "SIBLING_CALL_P(insn)"
3548 [(set_attr "type" "br,br")])
3550 ;; Note that operand 1 is total size of args, in bytes,
3551 ;; and what the call insn wants is the number of words.
3552 (define_expand "call"
3554 [(call (match_operand:QI 0 "call_operand" "")
3555 (match_operand:QI 1 "" ""))
3556 (clobber (reg:SI 0))
3557 (clobber (reg:SI 130))])]
3560 if (! call_operand (operands[0], QImode))
3561 XEXP (operands[0], 0) = copy_to_mode_reg (Pmode, XEXP (operands[0], 0));
3564 (define_insn "_call"
3566 [(call (match_operand:QI 0 "call_operand" "R,S,T")
3567 (match_operand:QI 1 "" "i,i,i"))
3568 (clobber (reg:SI 0))
3569 (clobber (reg:SI 130))])]
3575 [(set_attr "type" "br")])
3577 (define_expand "call_value"
3579 [(set (match_operand 0 "" "")
3580 (call (match_operand:QI 1 "call_operand" "")
3581 (match_operand:QI 2 "" "")))
3582 (clobber (reg:SI 0))
3583 (clobber (reg:SI 130))])]
3586 if (! call_operand (operands[1], QImode))
3587 XEXP (operands[1], 0) = copy_to_mode_reg (Pmode, XEXP (operands[1], 0));
3590 (define_insn "_call_value"
3592 [(set (match_operand 0 "" "")
3593 (call (match_operand:QI 1 "call_operand" "R,S,T")
3594 (match_operand:QI 2 "" "i,i,i")))
3595 (clobber (reg:SI 0))
3596 (clobber (reg:SI 130))])]
3602 [(set_attr "type" "br")])
3604 (define_expand "untyped_call"
3605 [(parallel [(call (match_operand 0 "" "")
3607 (match_operand 1 "" "")
3608 (match_operand 2 "" "")])]
3612 rtx reg = gen_rtx_REG (TImode, 3);
3614 /* We need to use call_value so the return value registers don't get
3616 emit_call_insn (gen_call_value (reg, operands[0], const0_rtx));
3618 for (i = 0; i < XVECLEN (operands[2], 0); i++)
3620 rtx set = XVECEXP (operands[2], 0, i);
3621 emit_move_insn (SET_DEST (set), SET_SRC (set));
3624 /* The optimizer does not know that the call sets the function value
3625 registers we stored in the result block. We avoid problems by
3626 claiming that all hard registers are used and clobbered at this
3628 emit_insn (gen_blockage ());
3634 ;; Patterns used for splitting and combining.
3637 ;; Function prologue and epilogue.
3639 (define_expand "prologue"
3642 { spu_expand_prologue (); DONE; })
3644 ;; "blockage" is only emited in epilogue. This is what it took to
3645 ;; make "basic block reordering" work with the insns sequence
3646 ;; generated by the spu_expand_epilogue (taken from mips.md)
3648 (define_insn "blockage"
3649 [(unspec_volatile [(const_int 0)] UNSPEC_BLOCKAGE)]
3652 [(set_attr "type" "convert")
3653 (set_attr "length" "0")])
3655 (define_expand "epilogue"
3658 { spu_expand_epilogue (false); DONE; })
3660 (define_expand "sibcall_epilogue"
3663 { spu_expand_epilogue (true); DONE; })
3666 ;; stack manipulations
3668 ;; An insn to allocate new stack space for dynamic use (e.g., alloca).
3669 ;; We move the back-chain and decrement the stack pointer.
3670 (define_expand "allocate_stack"
3671 [(set (match_operand 0 "spu_reg_operand" "")
3672 (minus (reg 1) (match_operand 1 "spu_nonmem_operand" "")))
3674 (minus (reg 1) (match_dup 1)))]
3676 "spu_allocate_stack (operands[0], operands[1]); DONE;")
3678 ;; These patterns say how to save and restore the stack pointer. We need not
3679 ;; save the stack pointer at function level since we are careful to preserve
3683 ;; At block level the stack pointer is saved and restored, so that the
3684 ;; stack space allocated within a block is deallocated when leaving
3685 ;; block scope. By default, according to the SPU ABI, the stack
3686 ;; pointer and available stack size are saved in a register. Upon
3687 ;; restoration, the stack pointer is simply copied back, and the
3688 ;; current available stack size is calculated against the restored
3691 ;; For nonlocal gotos, we must save the stack pointer and its
3692 ;; backchain and restore both. Note that in the nonlocal case, the
3693 ;; save area is a memory location.
3695 (define_expand "save_stack_function"
3696 [(match_operand 0 "general_operand" "")
3697 (match_operand 1 "general_operand" "")]
3701 (define_expand "restore_stack_function"
3702 [(match_operand 0 "general_operand" "")
3703 (match_operand 1 "general_operand" "")]
3707 (define_expand "restore_stack_block"
3708 [(match_operand 0 "spu_reg_operand" "")
3709 (match_operand 1 "memory_operand" "")]
3713 spu_restore_stack_block (operands[0], operands[1]);
3717 (define_expand "save_stack_nonlocal"
3718 [(match_operand 0 "memory_operand" "")
3719 (match_operand 1 "spu_reg_operand" "")]
3723 rtx temp = gen_reg_rtx (Pmode);
3725 /* Copy the backchain to the first word, sp to the second. We need to
3726 save the back chain because __builtin_apply appears to clobber it. */
3727 emit_move_insn (temp, gen_rtx_MEM (Pmode, operands[1]));
3728 emit_move_insn (adjust_address_nv (operands[0], SImode, 0), temp);
3729 emit_move_insn (adjust_address_nv (operands[0], SImode, 4), operands[1]);
3733 (define_expand "restore_stack_nonlocal"
3734 [(match_operand 0 "spu_reg_operand" "")
3735 (match_operand 1 "memory_operand" "")]
3739 spu_restore_stack_nonlocal(operands[0], operands[1]);
3746 ;; Vector initialization
3747 (define_expand "vec_init<mode>"
3748 [(match_operand:V 0 "register_operand" "")
3749 (match_operand 1 "" "")]
3752 spu_expand_vector_init (operands[0], operands[1]);
3756 (define_expand "vec_set<mode>"
3757 [(use (match_operand:SI 2 "spu_nonmem_operand" ""))
3758 (set (match_dup:TI 3)
3759 (unspec:TI [(match_dup:SI 4)
3761 (match_dup:SI 6)] UNSPEC_CPAT))
3762 (set (match_operand:V 0 "spu_reg_operand" "")
3763 (unspec:V [(match_operand:<inner> 1 "spu_reg_operand" "")
3765 (match_dup:TI 3)] UNSPEC_SHUFB))]
3768 HOST_WIDE_INT size = GET_MODE_SIZE (<inner>mode);
3769 rtx offset = GEN_INT (INTVAL (operands[2]) * size);
3770 operands[3] = gen_reg_rtx (TImode);
3771 operands[4] = stack_pointer_rtx;
3772 operands[5] = offset;
3773 operands[6] = GEN_INT (size);
3776 (define_expand "vec_extract<mode>"
3777 [(set (match_operand:<inner> 0 "spu_reg_operand" "=r")
3778 (vec_select:<inner> (match_operand:V 1 "spu_reg_operand" "r")
3779 (parallel [(match_operand 2 "const_int_operand" "i")])))]
3782 if ((INTVAL (operands[2]) * <vmult> + <voff>) % 16 == 0)
3784 emit_insn (gen_spu_convert (operands[0], operands[1]));
3789 (define_insn "_vec_extract<mode>"
3790 [(set (match_operand:<inner> 0 "spu_reg_operand" "=r")
3791 (vec_select:<inner> (match_operand:V 1 "spu_reg_operand" "r")
3792 (parallel [(match_operand 2 "const_int_operand" "i")])))]
3794 "rotqbyi\t%0,%1,(%2*<vmult>+<voff>)%%16"
3795 [(set_attr "type" "shuf")])
3797 (define_insn "_vec_extractv8hi_ze"
3798 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
3799 (zero_extend:SI (vec_select:HI (match_operand:V8HI 1 "spu_reg_operand" "r")
3800 (parallel [(const_int 0)]))))]
3802 "rotqmbyi\t%0,%1,-2"
3803 [(set_attr "type" "shuf")])
3808 (define_expand "shufb"
3809 [(set (match_operand 0 "spu_reg_operand" "")
3810 (unspec [(match_operand 1 "spu_reg_operand" "")
3811 (match_operand 2 "spu_reg_operand" "")
3812 (match_operand:TI 3 "spu_reg_operand" "")] UNSPEC_SHUFB))]
3815 rtx s = gen__shufb (operands[0], operands[1], operands[2], operands[3]);
3816 PUT_MODE (SET_SRC (s), GET_MODE (operands[0]));
3821 (define_insn "_shufb"
3822 [(set (match_operand 0 "spu_reg_operand" "=r")
3823 (unspec [(match_operand 1 "spu_reg_operand" "r")
3824 (match_operand 2 "spu_reg_operand" "r")
3825 (match_operand:TI 3 "spu_reg_operand" "r")] UNSPEC_SHUFB))]
3827 "shufb\t%0,%1,%2,%3"
3828 [(set_attr "type" "shuf")])
3831 [(unspec_volatile [(const_int 0)] UNSPEC_NOP)]
3834 [(set_attr "type" "nop")])
3837 [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "K")] UNSPEC_NOP)]
3840 [(set_attr "type" "nop")])
3843 [(unspec_volatile [(const_int 0)] UNSPEC_LNOP)]
3846 [(set_attr "type" "lnop")])
3848 (define_insn "iprefetch"
3849 [(unspec [(const_int 0)] UNSPEC_IPREFETCH)]
3852 [(set_attr "type" "iprefetch")])
3856 (unspec:SI [(match_operand:SI 0 "immediate_operand" "i,i,i")
3857 (match_operand:SI 1 "nonmemory_operand" "r,s,i")] UNSPEC_HBR))
3858 (unspec [(const_int 0)] UNSPEC_HBR)]
3864 [(set_attr "type" "hbr")])
3867 [(unspec_volatile [(const_int 0)] UNSPEC_SYNC)
3868 (clobber (mem:BLK (scratch)))]
3871 [(set_attr "type" "br")])
3873 (define_insn "syncc"
3874 [(unspec_volatile [(const_int 1)] UNSPEC_SYNC)
3875 (clobber (mem:BLK (scratch)))]
3878 [(set_attr "type" "br")])
3880 (define_insn "dsync"
3881 [(unspec_volatile [(const_int 2)] UNSPEC_SYNC)
3882 (clobber (mem:BLK (scratch)))]
3885 [(set_attr "type" "br")])
3888 ;; convert between any two modes, avoiding any GCC assumptions
3889 (define_expand "spu_convert"
3890 [(set (match_operand 0 "spu_reg_operand" "")
3891 (unspec [(match_operand 1 "spu_reg_operand" "")] UNSPEC_CONVERT))]
3894 rtx c = gen__spu_convert (operands[0], operands[1]);
3895 PUT_MODE (SET_SRC (c), GET_MODE (operands[0]));
3900 (define_insn "_spu_convert"
3901 [(set (match_operand 0 "spu_reg_operand" "=r")
3902 (unspec [(match_operand 1 "spu_reg_operand" "0")] UNSPEC_CONVERT))]
3905 [(set_attr "type" "convert")
3906 (set_attr "length" "0")])
3909 [(set (match_operand 0 "spu_reg_operand")
3910 (unspec [(match_operand 1 "spu_reg_operand")] UNSPEC_CONVERT))]
3912 [(use (const_int 0))]
3917 (include "spu-builtins.md")
3920 (define_expand "smaxv4sf3"
3921 [(set (match_operand:V4SF 0 "register_operand" "=r")
3922 (smax:V4SF (match_operand:V4SF 1 "register_operand" "r")
3923 (match_operand:V4SF 2 "register_operand" "r")))]
3927 rtx mask = gen_reg_rtx (V4SImode);
3929 emit_insn (gen_cgt_v4sf (mask, operands[1], operands[2]));
3930 emit_insn (gen_selb (operands[0], operands[2], operands[1], mask));
3934 (define_expand "sminv4sf3"
3935 [(set (match_operand:V4SF 0 "register_operand" "=r")
3936 (smin:V4SF (match_operand:V4SF 1 "register_operand" "r")
3937 (match_operand:V4SF 2 "register_operand" "r")))]
3941 rtx mask = gen_reg_rtx (V4SImode);
3943 emit_insn (gen_cgt_v4sf (mask, operands[1], operands[2]));
3944 emit_insn (gen_selb (operands[0], operands[1], operands[2], mask));
3948 (define_expand "smaxv2df3"
3949 [(set (match_operand:V2DF 0 "register_operand" "=r")
3950 (smax:V2DF (match_operand:V2DF 1 "register_operand" "r")
3951 (match_operand:V2DF 2 "register_operand" "r")))]
3955 rtx mask = gen_reg_rtx (V2DImode);
3956 emit_insn (gen_cgt_v2df (mask, operands[1], operands[2]));
3957 emit_insn (gen_selb (operands[0], operands[2], operands[1],
3958 spu_gen_subreg (V4SImode, mask)));
3962 (define_expand "sminv2df3"
3963 [(set (match_operand:V2DF 0 "register_operand" "=r")
3964 (smin:V2DF (match_operand:V2DF 1 "register_operand" "r")
3965 (match_operand:V2DF 2 "register_operand" "r")))]
3969 rtx mask = gen_reg_rtx (V2DImode);
3970 emit_insn (gen_cgt_v2df (mask, operands[1], operands[2]));
3971 emit_insn (gen_selb (operands[0], operands[1], operands[2],
3972 spu_gen_subreg (V4SImode, mask)));
3976 (define_expand "vec_widen_umult_hi_v8hi"
3977 [(set (match_operand:V4SI 0 "register_operand" "=r")
3981 (match_operand:V8HI 1 "register_operand" "r")
3982 (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)])))
3985 (match_operand:V8HI 2 "register_operand" "r")
3986 (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)])))))]
3990 rtx ve = gen_reg_rtx (V4SImode);
3991 rtx vo = gen_reg_rtx (V4SImode);
3992 rtx mask = gen_reg_rtx (TImode);
3993 unsigned char arr[16] = {
3994 0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
3995 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17};
3997 emit_move_insn (mask, array_to_constant (TImode, arr));
3998 emit_insn (gen_spu_mpyhhu (ve, operands[1], operands[2]));
3999 emit_insn (gen_spu_mpyu (vo, operands[1], operands[2]));
4000 emit_insn (gen_shufb (operands[0], ve, vo, mask));
4004 (define_expand "vec_widen_umult_lo_v8hi"
4005 [(set (match_operand:V4SI 0 "register_operand" "=r")
4009 (match_operand:V8HI 1 "register_operand" "r")
4010 (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)])))
4013 (match_operand:V8HI 2 "register_operand" "r")
4014 (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)])))))]
4018 rtx ve = gen_reg_rtx (V4SImode);
4019 rtx vo = gen_reg_rtx (V4SImode);
4020 rtx mask = gen_reg_rtx (TImode);
4021 unsigned char arr[16] = {
4022 0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B,
4023 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F};
4025 emit_move_insn (mask, array_to_constant (TImode, arr));
4026 emit_insn (gen_spu_mpyhhu (ve, operands[1], operands[2]));
4027 emit_insn (gen_spu_mpyu (vo, operands[1], operands[2]));
4028 emit_insn (gen_shufb (operands[0], ve, vo, mask));
4032 (define_expand "vec_widen_smult_hi_v8hi"
4033 [(set (match_operand:V4SI 0 "register_operand" "=r")
4037 (match_operand:V8HI 1 "register_operand" "r")
4038 (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)])))
4041 (match_operand:V8HI 2 "register_operand" "r")
4042 (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)])))))]
4046 rtx ve = gen_reg_rtx (V4SImode);
4047 rtx vo = gen_reg_rtx (V4SImode);
4048 rtx mask = gen_reg_rtx (TImode);
4049 unsigned char arr[16] = {
4050 0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
4051 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17};
4053 emit_move_insn (mask, array_to_constant (TImode, arr));
4054 emit_insn (gen_spu_mpyhh (ve, operands[1], operands[2]));
4055 emit_insn (gen_spu_mpy (vo, operands[1], operands[2]));
4056 emit_insn (gen_shufb (operands[0], ve, vo, mask));
4060 (define_expand "vec_widen_smult_lo_v8hi"
4061 [(set (match_operand:V4SI 0 "register_operand" "=r")
4065 (match_operand:V8HI 1 "register_operand" "r")
4066 (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)])))
4069 (match_operand:V8HI 2 "register_operand" "r")
4070 (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)])))))]
4074 rtx ve = gen_reg_rtx (V4SImode);
4075 rtx vo = gen_reg_rtx (V4SImode);
4076 rtx mask = gen_reg_rtx (TImode);
4077 unsigned char arr[16] = {
4078 0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B,
4079 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F};
4081 emit_move_insn (mask, array_to_constant (TImode, arr));
4082 emit_insn (gen_spu_mpyhh (ve, operands[1], operands[2]));
4083 emit_insn (gen_spu_mpy (vo, operands[1], operands[2]));
4084 emit_insn (gen_shufb (operands[0], ve, vo, mask));
4088 (define_expand "vec_realign_load_<mode>"
4089 [(set (match_operand:ALL 0 "register_operand" "=r")
4090 (unspec:ALL [(match_operand:ALL 1 "register_operand" "r")
4091 (match_operand:ALL 2 "register_operand" "r")
4092 (match_operand:TI 3 "register_operand" "r")] UNSPEC_SPU_REALIGN_LOAD))]
4096 emit_insn (gen_shufb (operands[0], operands[1], operands[2], operands[3]));
4100 (define_expand "spu_lvsr"
4101 [(set (match_operand:V16QI 0 "register_operand" "")
4102 (unspec:V16QI [(match_operand 1 "memory_operand" "")] UNSPEC_SPU_MASK_FOR_LOAD))]
4107 rtx offset = gen_reg_rtx (V8HImode);
4108 rtx addr_bits = gen_reg_rtx (SImode);
4109 rtx addr_bits_vec = gen_reg_rtx (V8HImode);
4110 rtx splatqi = gen_reg_rtx (TImode);
4111 rtx result = gen_reg_rtx (V8HImode);
4112 unsigned char arr[16] = {
4113 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
4114 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F};
4115 unsigned char arr2[16] = {
4116 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
4117 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03};
4119 emit_move_insn (offset, array_to_constant (V8HImode, arr));
4120 emit_move_insn (splatqi, array_to_constant (TImode, arr2));
4122 gcc_assert (GET_CODE (operands[1]) == MEM);
4123 addr = force_reg (Pmode, XEXP (operands[1], 0));
4124 emit_insn (gen_andsi3 (addr_bits, addr, GEN_INT (0xF)));
4125 emit_insn (gen_shufb (addr_bits_vec, addr_bits, addr_bits, splatqi));
4127 /* offset - (addr & 0xF)
4128 It is safe to use a single sfh, because each byte of offset is > 15 and
4129 each byte of addr is <= 15. */
4130 emit_insn (gen_subv8hi3 (result, offset, addr_bits_vec));
4132 result = simplify_gen_subreg (V16QImode, result, V8HImode, 0);
4133 emit_move_insn (operands[0], result);