;; Target CPU.
(define_attr "cpu"
- "sh1,sh2,sh2e,sh3,sh3e,sh4,sh5"
+ "sh1,sh2,sh2e,sh2a,sh3,sh3e,sh4,sh4a,sh5"
(const (symbol_ref "sh_cpu_attr")))
(define_attr "endian" "big,little"
;; ftrc_s fix_truncsfsi2_i4
;; dfdiv double precision floating point divide (or square root)
;; cwb ic_invalidate_line_i
+;; movua SH4a unaligned load
+;; fsrra square root reciprocal approximate
+;; fsca sine and cosine approximate
;; tls_load load TLS related address
;; arith_media SHmedia arithmetic, logical, and shift instructions
;; cbranch_media SHmedia conditional branch instructions
;; nil no-op move, will be deleted.
(define_attr "type"
- "mt_group,cbranch,jump,jump_ind,arith,arith3,arith3b,dyn_shift,load,load_si,fload,store,move,fmove,smpy,dmpy,return,pload,prset,pstore,prget,pcload,pcload_si,pcfload,rte,sfunc,call,fp,fdiv,ftrc_s,dfp_arith,dfp_cmp,dfp_conv,dfdiv,gp_fpul,fpul_gp,mac_gp,mem_fpscr,gp_fpscr,cwb,tls_load,arith_media,cbranch_media,cmp_media,dfdiv_media,dfmul_media,dfparith_media,dfpconv_media,dmpy_media,fcmp_media,fdiv_media,fload_media,fmove_media,fparith_media,fpconv_media,fstore_media,gettr_media,invalidate_line_media,jump_media,load_media,pt_media,ptabs_media,store_media,mcmp_media,mac_media,d2mpy_media,atrans_media,ustore_media,nil,other"
+ "mt_group,cbranch,jump,jump_ind,arith,arith3,arith3b,dyn_shift,load,load_si,fload,store,move,fmove,smpy,dmpy,return,pload,prset,pstore,prget,pcload,pcload_si,pcfload,rte,sfunc,call,fp,fdiv,ftrc_s,dfp_arith,dfp_cmp,dfp_conv,dfdiv,gp_fpul,fpul_gp,mac_gp,mem_fpscr,gp_fpscr,cwb,movua,fsrra,fsca,tls_load,arith_media,cbranch_media,cmp_media,dfdiv_media,dfmul_media,dfparith_media,dfpconv_media,dmpy_media,fcmp_media,fdiv_media,fload_media,fmove_media,fparith_media,fpconv_media,fstore_media,gettr_media,invalidate_line_media,jump_media,load_media,pt_media,ptabs_media,store_media,mcmp_media,mac_media,d2mpy_media,atrans_media,ustore_media,nil,other"
(const_string "other"))
;; We define a new attribute namely "insn_class".We use
(cond [(eq_attr "med_branch_p" "yes")
(const_int 2)
(and (eq (symbol_ref "GET_CODE (prev_nonnote_insn (insn))")
- (symbol_ref "INSN"))
- (eq (symbol_ref "INSN_CODE (prev_nonnote_insn (insn))")
- (symbol_ref "code_for_indirect_jump_scratch")))
- (if_then_else (eq_attr "braf_branch_p" "yes")
- (const_int 6)
- (const_int 10))
+ (symbol_ref "INSN"))
+ (eq (symbol_ref "INSN_CODE (prev_nonnote_insn (insn))")
+ (symbol_ref "code_for_indirect_jump_scratch")))
+ (cond [(eq_attr "braf_branch_p" "yes")
+ (const_int 6)
+ (eq (symbol_ref "flag_pic") (const_int 0))
+ (const_int 10)
+ (ne (symbol_ref "TARGET_SH2") (const_int 0))
+ (const_int 10)] (const_int 18))
(eq_attr "braf_branch_p" "yes")
(const_int 10)
;; ??? using pc is not computed transitively.
(const_int 4)
(const_int 2))))
-;; Load and store instructions save a cycle if they are aligned on a
-;; four byte boundary. Using a function unit for stores encourages
-;; gcc to separate load and store instructions by one instruction,
-;; which makes it more likely that the linker will be able to word
-;; align them when relaxing.
-
-;; SH-1 scheduling
-
-(define_automaton "sh1")
-(define_cpu_unit "sh1memory,sh1int,sh1mpy,sh1fp" "sh1")
-
-;; Loads have a latency of two.
-;; However, call insns can have a delay slot, so that we want one more
-;; insn to be scheduled between the load of the function address and the call.
-;; This is equivalent to a latency of three.
-;; ADJUST_COST can only properly handle reductions of the cost, so we
-;; use a latency of three here.
-;; We only do this for SImode loads of general registers, to make the work
-;; for ADJUST_COST easier.
-(define_insn_reservation "sh1_load_si" 3
- (and (eq_attr "pipe_model" "sh1")
- (eq_attr "type" "load_si,pcload_si"))
- "sh1memory*2")
-
-(define_insn_reservation "sh1_load_store" 2
- (and (eq_attr "pipe_model" "sh1")
- (eq_attr "type" "load,pcload,pload,store,pstore"))
- "sh1memory*2")
-
-(define_insn_reservation "sh1_arith3" 3
- (and (eq_attr "pipe_model" "sh1")
- (eq_attr "type" "arith3,arith3b"))
- "sh1int*3")
-
-(define_insn_reservation "sh1_dyn_shift" 2
- (and (eq_attr "pipe_model" "sh1")
- (eq_attr "type" "dyn_shift"))
- "sh1int*2")
-
-(define_insn_reservation "sh1_int" 1
- (and (eq_attr "pipe_model" "sh1")
- (eq_attr "type" "!arith3,arith3b,dyn_shift"))
- "sh1int")
-
-;; ??? These are approximations.
-(define_insn_reservation "sh1_smpy" 2
- (and (eq_attr "pipe_model" "sh1")
- (eq_attr "type" "smpy"))
- "sh1mpy*2")
-
-(define_insn_reservation "sh1_dmpy" 3
- (and (eq_attr "pipe_model" "sh1")
- (eq_attr "type" "dmpy"))
- "sh1mpy*3")
-
-(define_insn_reservation "sh1_fp" 2
- (and (eq_attr "pipe_model" "sh1")
- (eq_attr "type" "fp,fmove"))
- "sh1fp")
-
-(define_insn_reservation "sh1_fdiv" 13
- (and (eq_attr "pipe_model" "sh1")
- (eq_attr "type" "fdiv"))
- "sh1fp*12")
-
-;; SH-5 SHmedia scheduling
-;; When executing SHmedia code, the SH-5 is a fairly straightforward
-;; single-issue machine. It has four pipelines, the branch unit (br),
-;; the integer and multimedia unit (imu), the load/store unit (lsu), and
-;; the floating point unit (fpu).
-;;
-;; (define_function_unit {name} {num-units} {n-users} {test}
-;; {ready-delay} {issue-delay} [{conflict-list}])
-
-;; Every instruction on SH-5 occupies the issue resource for at least one
-;; cycle.
-(define_function_unit "sh5issue" 1 0
- (and (eq_attr "pipe_model" "sh5media")
- (eq_attr "type" "!pt_media,ptabs_media,invalidate_line_media,dmpy_media,load_media,fload_media,fcmp_media,fmove_media,fparith_media,dfparith_media,fpconv_media,dfpconv_media,dfmul_media,store_media,fstore_media,mcmp_media,mac_media,d2mpy_media,atrans_media,ustore_media")) 1 1)
-
-;; Here model the instructions with a latency greater than one cycle.
-
-;; Specify the various types of instruction which have latency > 1
-(define_function_unit "sh5issue" 1 0
- (and (eq_attr "pipe_model" "sh5media")
- (eq_attr "type" "mcmp_media")) 2 1)
-
-(define_function_unit "sh5issue" 1 0
- (and (eq_attr "pipe_model" "sh5media")
- (eq_attr "type" "dmpy_media,load_media,fcmp_media,mac_media")) 3 1)
-;; but see sh_adjust_cost for mac_media exception.
-
-(define_function_unit "sh5issue" 1 0
- (and (eq_attr "pipe_model" "sh5media")
- (eq_attr "type" "fload_media,fmove_media")) 4 1)
-
-(define_function_unit "sh5issue" 1 0
- (and (eq_attr "pipe_model" "sh5media")
- (eq_attr "type" "d2mpy_media")) 4 2)
-
-(define_function_unit "sh5issue" 1 0
- (and (eq_attr "pipe_model" "sh5media")
- (eq_attr "type" "pt_media,ptabs_media")) 5 1)
-
-(define_function_unit "sh5issue" 1 0
- (and (eq_attr "pipe_model" "sh5media")
- (eq_attr "type" "fparith_media,dfparith_media,fpconv_media,dfpconv_media")) 6 1)
-
-(define_function_unit "sh5issue" 1 0
- (and (eq_attr "pipe_model" "sh5media")
- (eq_attr "type" "invalidate_line_media")) 7 7)
-
-(define_function_unit "sh5issue" 1 0
- (and (eq_attr "pipe_model" "sh5media") (eq_attr "type" "dfmul_media")) 9 4)
-
-(define_function_unit "sh5issue" 1 0
- (and (eq_attr "pipe_model" "sh5media") (eq_attr "type" "atrans_media")) 10 5)
+;; DFA descriptions for the pipelines
-;; Floating-point divide and square-root occupy an additional resource,
-;; which is not internally pipelined. However, other instructions
-;; can continue to issue.
-(define_function_unit "sh5fds" 1 0
- (and (eq_attr "pipe_model" "sh5media") (eq_attr "type" "fdiv_media")) 19 19)
+(include "sh1.md")
+(include "shmedia.md")
+(include "sh4.md")
-(define_function_unit "sh5fds" 1 0
- (and (eq_attr "pipe_model" "sh5media") (eq_attr "type" "dfdiv_media")) 35 35)
-
-; Definitions for filling branch delay slots.
+;; Definitions for filling delay slots
(define_attr "needs_delay_slot" "yes,no" (const_string "no"))
;; Say that we have annulled true branches, since this gives smaller and
;; faster code when branches are predicted as not taken.
+;; ??? The non-annulled condition should really be "in_delay_slot",
+;; but insns that can be filled in non-annulled get priority over insns
+;; that can only be filled in anulled.
+
(define_delay
(and (eq_attr "type" "cbranch")
(ne (symbol_ref "TARGET_SH2") (const_int 0)))
;; SH2e has a hardware bug that pretty much prohibits the use of
;; annuled delay slots.
- [(eq_attr "in_delay_slot" "yes") (and (eq_attr "cond_delay_slot" "yes")
+ [(eq_attr "cond_delay_slot" "yes") (and (eq_attr "cond_delay_slot" "yes")
(not (eq_attr "cpu" "sh2e"))) (nil)])
\f
;; -------------------------------------------------------------------------
""
[(set_attr "length" "0")])
+(define_insn "udivsi3_sh2a"
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+ (udiv:SI (match_operand:SI 1 "arith_reg_operand" "0")
+ (match_operand:SI 2 "arith_reg_operand" "z")))]
+ "TARGET_SH2A"
+ "divu %2,%1"
+ [(set_attr "type" "arith")
+ (set_attr "in_delay_slot" "no")])
+
;; We must use a pseudo-reg forced to reg 0 in the SET_DEST rather than
;; hard register 0. If we used hard register 0, then the next instruction
;; would be a move from hard register 0 to a pseudo-reg. If the pseudo-reg
emit_insn (gen_udivsi3_i4_media (operands[0], operands[1], operands[2]));
DONE;
}
+ else if (TARGET_SH2A)
+ {
+ operands[1] = force_reg (SImode, operands[1]);
+ operands[2] = force_reg (SImode, operands[2]);
+ emit_insn (gen_udivsi3_sh2a (operands[0], operands[1], operands[2]));
+ DONE;
+ }
else if (TARGET_SH5)
{
emit_move_insn (operands[3],
DONE;
}")
+(define_insn "divsi3_sh2a"
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+ (div:SI (match_operand:SI 1 "arith_reg_operand" "0")
+ (match_operand:SI 2 "arith_reg_operand" "z")))]
+ "TARGET_SH2A"
+ "divs %2,%1"
+ [(set_attr "type" "arith")
+ (set_attr "in_delay_slot" "no")])
+
(define_insn "divsi3_i1"
[(set (match_operand:SI 0 "register_operand" "=z")
(div:SI (reg:SI R4_REG) (reg:SI R5_REG)))
else
last = gen_divsi3_i4 (operands[0], operands[3]);
}
+ else if (TARGET_SH2A)
+ {
+ operands[1] = force_reg (SImode, operands[1]);
+ operands[2] = force_reg (SImode, operands[2]);
+ emit_insn (gen_divsi3_sh2a (operands[0], operands[1], operands[2]));
+ DONE;
+ }
else if (TARGET_SHMEDIA_FPU)
{
operands[1] = force_reg (SImode, operands[1]);
"TARGET_SH1"
"")
+(define_insn "mul_r"
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+ (mult:SI (match_operand:SI 1 "arith_reg_operand" "0")
+ (match_operand:SI 2 "arith_reg_operand" "z")))]
+ "TARGET_SH2A"
+ "mulr %2,%0"
+ [(set_attr "type" "dmpy")])
+
(define_insn "mul_l"
[(set (reg:SI MACL_REG)
(mult:SI (match_operand:SI 0 "arith_reg_operand" "r")
;;
;; shift left
+(define_insn "ashlsi3_sh2a"
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+ (ashift:SI (match_operand:SI 1 "arith_reg_operand" "0")
+ (match_operand:SI 2 "arith_reg_operand" "r")))]
+ "TARGET_SH2A"
+ "shad %2,%0"
+ [(set_attr "type" "arith")
+ (set_attr "length" "4")])
+
;; This pattern is used by init_expmed for computing the costs of shift
;; insns.
; arithmetic shift right
;
+(define_insn "ashrsi3_sh2a"
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+ (ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+ (neg:SI (match_operand:SI 2 "arith_reg_operand" "r"))))]
+ "TARGET_SH2A"
+ "shad %2,%0"
+ [(set_attr "type" "dyn_shift")
+ (set_attr "length" "4")])
+
(define_insn "ashrsi3_k"
[(set (match_operand:SI 0 "arith_reg_operand" "=r")
(ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
;; logical shift right
+(define_insn "lshrsi3_sh2a"
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+ (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+ (neg:SI (match_operand:SI 2 "arith_reg_operand" "r"))))]
+ "TARGET_SH2A"
+ "shld %2,%0"
+ [(set_attr "type" "dyn_shift")
+ (set_attr "length" "4")])
+
(define_insn "lshrsi3_d"
[(set (match_operand:SI 0 "arith_reg_operand" "=r")
(lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
"Q,rI08,r,mr,x,l,t,r,x,l,r,r,>,>,i"))]
"TARGET_SH1
&& ! TARGET_SH2E
+ && ! TARGET_SH2A
&& (register_operand (operands[0], SImode)
|| register_operand (operands[1], SImode))"
"@
;; TARGET_FMOVD is in effect, and mode switching is done before reload.
(define_insn "movsi_ie"
[(set (match_operand:SI 0 "general_movdst_operand"
- "=r,r,t,r,r,r,r,m,<,<,x,l,x,l,y,<,r,y,r,*f,y,*f,y")
+ "=r,r,r,t,r,r,r,r,m,<,<,x,l,x,l,y,<,r,y,r,*f,y,*f,y")
(match_operand:SI 1 "general_movsrc_operand"
- "Q,rI08,r,mr,x,l,t,r,x,l,r,r,>,>,>,y,i,r,y,y,*f,*f,y"))]
- "TARGET_SH2E
+ "Q,rI08,I20,r,mr,x,l,t,r,x,l,r,r,>,>,>,y,i,r,y,y,*f,*f,y"))]
+ "(TARGET_SH2E || TARGET_SH2A)
&& (register_operand (operands[0], SImode)
|| register_operand (operands[1], SImode))"
"@
mov.l %1,%0
mov %1,%0
+ movi20 %1,%0
cmp/pl %1
mov.l %1,%0
sts %1,%0
flds %1,fpul
fmov %1,%0
! move optimized away"
- [(set_attr "type" "pcload_si,move,*,load_si,mac_gp,prget,move,store,store,pstore,move,prset,load,pload,load,store,pcload_si,gp_fpul,fpul_gp,fmove,fmove,fmove,nil")
- (set_attr "late_fp_use" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes,*,*,yes,*,*,*,*")
- (set_attr "length" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,0")])
+ [(set_attr "type" "pcload_si,move,move,*,load_si,mac_gp,prget,move,store,store,pstore,move,prset,load,pload,load,store,pcload_si,gp_fpul,fpul_gp,fmove,fmove,fmove,nil")
+ (set_attr "late_fp_use" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes,*,*,yes,*,*,*,*")
+ (set_attr "length" "*,*,4,*,4,*,*,*,4,*,*,*,*,*,*,*,*,*,*,*,*,*,*,0")])
(define_insn "movsi_i_lowpart"
[(set (strict_low_part (match_operand:SI 0 "general_movdst_operand" "+r,r,r,r,r,r,m,r"))
(unspec:SI [(match_operand 1 "register_operand" "")] UNSPEC_RA))]
"TARGET_SH1"
"#"
- "&& ! rtx_equal_function_value_matters"
+ "&& ! currently_expanding_to_rtl"
[(set (match_dup 0) (match_dup 1))]
"
{
emit_insn (gen_ic_invalidate_line_compact (operands[0], operands[1]));
DONE;
}
+ else if (TARGET_SH4A_ARCH)
+ {
+ emit_insn (gen_ic_invalidate_line_sh4a (operands[0]));
+ DONE;
+ }
operands[0] = force_reg (Pmode, operands[0]);
operands[1] = force_reg (Pmode, GEN_INT (trunc_int_for_mode (0xf0000008,
Pmode)));
[(set_attr "length" "8")
(set_attr "type" "cwb")])
+(define_insn "ic_invalidate_line_sh4a"
+ [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")]
+ UNSPEC_ICACHE)]
+ "TARGET_SH4A_ARCH"
+ "ocbwb\\t@%0\;synco\;icbi\\t@%0"
+ [(set_attr "length" "16")
+ (set_attr "type" "cwb")])
+
;; ??? could make arg 0 an offsettable memory operand to allow to save
;; an add in the code that calculates the address.
(define_insn "ic_invalidate_line_media"
}
/* Try movi / mshflo.l */
val2 = (HOST_WIDE_INT) val >> 32;
- if (val2 == trunc_int_for_mode (val, SImode))
+ if (val2 == ((unsigned HOST_WIDE_INT)
+ trunc_int_for_mode (val, SImode)))
{
operands[1] = gen_mshflo_l_di (operands[0], operands[0],
operands[0]);
[(set (match_operand:DF 0 "general_movdst_operand" "=r,r,r,m")
(match_operand:DF 1 "general_movsrc_operand" "r,FQ,m,r"))]
"TARGET_SH1
- && (! TARGET_SH4 || reload_completed
+ && (! (TARGET_SH4 || TARGET_SH2A_DOUBLE) || reload_completed
/* ??? We provide some insn so that direct_{load,store}[DFmode] get set */
|| (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 3)
|| (GET_CODE (operands[1]) == REG && REGNO (operands[1]) == 3))
(match_operand:DF 1 "general_movsrc_operand" "d,r,F,m,d,FQ,m,r,d,r"))
(use (match_operand:PSI 2 "fpscr_operand" "c,c,c,c,c,c,c,c,c,c"))
(clobber (match_scratch:SI 3 "=X,X,&z,X,X,X,X,X,X,X"))]
- "TARGET_SH4
+ "(TARGET_SH4 || TARGET_SH2A_DOUBLE)
&& (arith_reg_operand (operands[0], DFmode)
|| arith_reg_operand (operands[1], DFmode))"
"@
[(if_then_else (eq_attr "fmovd" "yes") (const_int 2) (const_int 4))
(const_int 4)
(if_then_else (eq_attr "fmovd" "yes") (const_int 4) (const_int 6))
- (if_then_else (eq_attr "fmovd" "yes") (const_int 2) (const_int 6))
- (if_then_else (eq_attr "fmovd" "yes") (const_int 2) (const_int 6))
+ (if_then_else (eq_attr "fmovd" "yes") (const_int 4) (const_int 6))
+ (if_then_else (eq_attr "fmovd" "yes") (const_int 4) (const_int 6))
(const_int 4)
(const_int 8) (const_int 8) ;; these need only 8 bytes for @(r0,rn)
;; We can't use 4-byte push/pop on SHcompact, so we have to
(match_operand:DF 1 "register_operand" ""))
(use (match_operand:PSI 2 "fpscr_operand" ""))
(clobber (match_scratch:SI 3 "=X"))]
- "TARGET_SH4 && reload_completed
+ "(TARGET_SH4 || TARGET_SH2A_DOUBLE) && reload_completed
&& (true_regnum (operands[0]) < 16) != (true_regnum (operands[1]) < 16)"
[(const_int 0)]
"
(match_operand:DF 1 "general_movsrc_operand" ""))
(use (match_operand:PSI 2 "fpscr_operand" ""))
(clobber (match_scratch:SI 3 ""))]
- "TARGET_SH4
+ "(TARGET_SH4 || TARGET_SH2A_DOUBLE)
&& reload_completed
&& true_regnum (operands[0]) < 16
&& true_regnum (operands[1]) < 16"
(match_operand:DF 1 "memory_operand" ""))
(use (match_operand:PSI 2 "fpscr_operand" ""))
(clobber (reg:SI R0_REG))]
- "TARGET_SH4 && reload_completed"
+ "(TARGET_SH4 || TARGET_SH2A_DOUBLE) && reload_completed"
[(parallel [(set (match_dup 0) (match_dup 1))
(use (match_dup 2))
(clobber (scratch:SI))])]
(mem:DF (match_operand:SI 1 "register_operand" "")))
(use (match_operand:PSI 2 "fpscr_operand" ""))
(clobber (match_scratch:SI 3 ""))]
- "TARGET_SH4 && ! TARGET_FMOVD && reload_completed
+ "(TARGET_SH4 || TARGET_SH2A_DOUBLE) && ! TARGET_FMOVD && reload_completed
&& FP_OR_XD_REGISTER_P (true_regnum (operands[0]))
&& find_regno_note (insn, REG_DEAD, true_regnum (operands[1]))"
[(const_int 0)]
(match_operand:DF 1 "memory_operand" ""))
(use (match_operand:PSI 2 "fpscr_operand" ""))
(clobber (match_scratch:SI 3 ""))]
- "TARGET_SH4 && ! TARGET_FMOVD && reload_completed
+ "(TARGET_SH4 || TARGET_SH2A_DOUBLE) && ! TARGET_FMOVD && reload_completed
&& FP_OR_XD_REGISTER_P (true_regnum (operands[0]))"
[(const_int 0)]
"
(match_operand:DF 1 "register_operand" ""))
(use (match_operand:PSI 2 "fpscr_operand" ""))
(clobber (match_scratch:SI 3 ""))]
- "TARGET_SH4 && ! TARGET_FMOVD && reload_completed
+ "(TARGET_SH4 || TARGET_SH2A_DOUBLE) && ! TARGET_FMOVD && reload_completed
&& FP_OR_XD_REGISTER_P (true_regnum (operands[1]))"
[(const_int 0)]
"
emit_insn (gen_movdf_media_nofpu (operands[0], operands[1]));
DONE;
}
- if (TARGET_SH4)
+ if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
{
emit_df_insn (gen_movdf_i4 (operands[0], operands[1], get_fpscr_rtx ()));
DONE;
! move optimized away"
[(set_attr "type" "fmove,move,fmove,fmove,pcfload,fload,store,pcload,load,store,fmove,fmove,load,*,fpul_gp,gp_fpul,store,load,nil")
(set_attr "late_fp_use" "*,*,*,*,*,*,yes,*,*,*,*,*,*,*,yes,*,yes,*,*")
- (set_attr "length" "*,*,*,*,4,*,*,*,*,*,2,2,2,4,2,2,2,2,0")
+ (set_attr "length" "*,*,*,*,4,4,4,*,*,*,2,2,2,4,2,2,2,2,0")
(set (attr "fp_mode") (if_then_else (eq_attr "fmovd" "yes")
(const_string "single")
(const_string "none")))])
if (GET_MODE (operands[0]) != DImode)
operands[0] = gen_rtx_SUBREG (DImode, operands[0], 0);
}
- else
+ else if (TARGET_SHMEDIA64)
{
operands[0] = shallow_copy_rtx (operands[0]);
PUT_MODE (operands[0], DImode);
}
+ else
+ {
+ rtx reg = gen_reg_rtx (DImode);
+
+ operands[0] = copy_to_mode_reg (SImode, operands[0]);
+ emit_insn (gen_extendsidi2 (reg, operands[0]));
+ operands[0] = reg;
+ }
}
if (! target_reg_operand (operands[0], DImode))
operands[0] = copy_to_mode_reg (DImode, operands[0]);
if (GET_MODE (operands[1]) != DImode)
operands[1] = gen_rtx_SUBREG (DImode, operands[1], 0);
}
- else
+ else if (TARGET_SHMEDIA64)
{
operands[1] = shallow_copy_rtx (operands[1]);
PUT_MODE (operands[1], DImode);
}
+ else
+ {
+ rtx reg = gen_reg_rtx (DImode);
+
+ operands[1] = copy_to_mode_reg (SImode, operands[1]);
+ emit_insn (gen_extendsidi2 (reg, operands[1]));
+ operands[1] = reg;
+ }
}
if (! target_reg_operand (operands[1], DImode))
operands[1] = copy_to_mode_reg (DImode, operands[1]);
(const_int 0))
(match_operand 1 "" "")
(match_operand 2 "" "")])]
- "TARGET_SH2E || TARGET_SHMEDIA"
+ "(TARGET_SH2E || TARGET_SH2A) || TARGET_SHMEDIA"
"
{
int i;
PUT_MODE (gotsym, Pmode);
insn = emit_insn (gen_symGOT_load (operands[0], gotsym));
- RTX_UNCHANGING_P (SET_SRC (PATTERN (insn))) = 1;
+ MEM_READONLY_P (SET_SRC (PATTERN (insn))) = 1;
DONE;
}")
(define_insn "tls_global_dynamic"
[(set (match_operand:SI 0 "register_operand" "=&z")
- (call (unspec:SI [(match_operand:SI 1 "" "")]
- UNSPEC_TLSGD)
+ (call (mem:SI (unspec:SI [(match_operand:SI 1 "" "")]
+ UNSPEC_TLSGD))
(const_int 0)))
(use (reg:PSI FPSCR_REG))
(use (reg:SI PIC_REG))
(define_insn "tls_local_dynamic"
[(set (match_operand:SI 0 "register_operand" "=&z")
- (call (unspec:SI [(match_operand:SI 1 "" "")]
- UNSPEC_TLSLDM)
+ (call (mem:SI (unspec:SI [(match_operand:SI 1 "" "")]
+ UNSPEC_TLSLDM))
(const_int 0)))
(use (reg:PSI FPSCR_REG))
(use (reg:SI PIC_REG))
"*
{
rtx diff_vec = PATTERN (next_real_insn (operands[2]));
- char *load;
+ const char *load;
if (GET_CODE (diff_vec) != ADDR_DIFF_VEC)
abort ();
{
rtx r18 = gen_rtx_REG (DImode, PR_MEDIA_REG);
- if (! call_used_regs[TR0_REG] || fixed_regs[TR0_REG])
+ if (! call_really_used_regs[TR0_REG] || fixed_regs[TR0_REG])
abort ();
tr_regno = TR0_REG;
tr = gen_rtx_REG (DImode, tr_regno);
[(use (match_operand 0 "register_operand" ""))]
""
{
- rtx tmp, ra = operands[0];
+ rtx ra = operands[0];
if (TARGET_SHMEDIA64)
emit_insn (gen_eh_set_ra_di (ra));
}
if (sh_expand_t_scc (EQ, operands[0]))
DONE;
- if (! rtx_equal_function_value_matters)
+ if (! currently_expanding_to_rtl)
FAIL;
operands[1] = prepare_scc_operands (EQ);
}")
}
DONE;
}
- if (! rtx_equal_function_value_matters)
+ if (! currently_expanding_to_rtl)
FAIL;
operands[1] = prepare_scc_operands (LT);
}")
}
DONE;
}
- if (! rtx_equal_function_value_matters)
+ if (! currently_expanding_to_rtl)
FAIL;
operands[1] = prepare_scc_operands (GT);
}")
DONE;
}
- if (! rtx_equal_function_value_matters)
+ if (! currently_expanding_to_rtl)
FAIL;
if (GET_MODE_CLASS (GET_MODE (sh_compare_op0)) == MODE_FLOAT)
{
sh_compare_op0, sh_compare_op1));
DONE;
}
- if (! rtx_equal_function_value_matters)
+ if (! currently_expanding_to_rtl)
FAIL;
operands[1] = prepare_scc_operands (GTU);
}")
sh_compare_op1, sh_compare_op0));
DONE;
}
- if (! rtx_equal_function_value_matters)
+ if (! currently_expanding_to_rtl)
FAIL;
operands[1] = prepare_scc_operands (LTU);
}")
DONE;
}
- if (! rtx_equal_function_value_matters)
+ if (! currently_expanding_to_rtl)
FAIL;
operands[1] = prepare_scc_operands (LEU);
}")
DONE;
}
- if (! rtx_equal_function_value_matters)
+ if (! currently_expanding_to_rtl)
FAIL;
operands[1] = prepare_scc_operands (GEU);
}")
if (sh_expand_t_scc (NE, operands[0]))
DONE;
- if (! rtx_equal_function_value_matters)
+ if (! currently_expanding_to_rtl)
FAIL;
operands[1] = prepare_scc_operands (EQ);
operands[2] = gen_reg_rtx (SImode);
(define_expand "fpu_switch0"
[(set (match_operand:SI 0 "" "") (match_dup 2))
(set (match_dup 1) (mem:PSI (match_dup 0)))]
- "TARGET_SH4"
+ "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
"
{
operands[1] = get_fpscr_rtx ();
[(set (match_operand:SI 0 "" "") (match_dup 2))
(set (match_dup 3) (plus:SI (match_dup 0) (const_int 4)))
(set (match_dup 1) (mem:PSI (match_dup 3)))]
- "TARGET_SH4"
+ "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
"
{
operands[1] = get_fpscr_rtx ();
(define_expand "movpsi"
[(set (match_operand:PSI 0 "register_operand" "")
(match_operand:PSI 1 "general_movsrc_operand" ""))]
- "TARGET_SH4"
+ "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
"")
;; The c / m alternative is a fake to guide reload to load directly into
(define_split
[(set (reg:PSI FPSCR_REG)
(mem:PSI (match_operand:SI 0 "register_operand" "")))]
- "TARGET_SH4 && find_regno_note (insn, REG_DEAD, true_regnum (operands[0]))"
+ "(TARGET_SH4 || TARGET_SH2A_DOUBLE) && find_regno_note (insn, REG_DEAD, true_regnum (operands[0]))"
[(set (match_dup 0) (match_dup 0))]
"
{
(define_split
[(set (reg:PSI FPSCR_REG)
(mem:PSI (match_operand:SI 0 "register_operand" "")))]
- "TARGET_SH4"
+ "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
[(set (match_dup 0) (plus:SI (match_dup 0) (const_int -4)))]
"
{
(define_insn "toggle_sz"
[(set (reg:PSI FPSCR_REG)
(xor:PSI (reg:PSI FPSCR_REG) (const_int 1048576)))]
- "TARGET_SH4"
+ "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
"fschg"
- [(set_attr "fp_set" "unknown")])
+ [(set_attr "type" "fp") (set_attr "fp_set" "unknown")])
+
+;; There's no way we can use it today, since optimize mode switching
+;; doesn't enable us to know from which mode we're switching to the
+;; mode it requests, to tell whether we can use a relative mode switch
+;; (like toggle_pr) or an absolute switch (like loading fpscr from
+;; memory).
+(define_insn "toggle_pr"
+ [(set (reg:PSI FPSCR_REG)
+ (xor:PSI (reg:PSI FPSCR_REG) (const_int 524288)))]
+ "TARGET_SH4A_FP && ! TARGET_FPU_SINGLE"
+ "fpchg"
+ [(set_attr "type" "fp")])
(define_expand "addsf3"
[(set (match_operand:SF 0 "arith_reg_operand" "")
"TARGET_SH2E || TARGET_SHMEDIA_FPU"
"
{
- if (TARGET_SH4)
+ if (TARGET_SH4 || TARGET_SH2A_SINGLE)
expand_sf_binop (&gen_mulsf3_i4, operands);
else if (TARGET_SH2E)
emit_insn (gen_mulsf3_ie (operands[0], operands[1], operands[2]));
[(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
(mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "%0")
(match_operand:SF 2 "fp_arith_reg_operand" "f")))]
- "TARGET_SH2E && ! TARGET_SH4"
+ "TARGET_SH2E && ! (TARGET_SH4 || TARGET_SH2A_SINGLE)"
"fmul %2,%0"
[(set_attr "type" "fp")])
"TARGET_SH2E || TARGET_SHMEDIA_FPU"
"
{
- if (TARGET_SH4)
+ if (TARGET_SH4 || TARGET_SH2A_SINGLE)
{
emit_sf_insn (gen_floatsisf2_i4 (operands[0], operands[1], get_fpscr_rtx ()));
DONE;
[(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
(float:SF (match_operand:SI 1 "fpul_operand" "y")))
(use (match_operand:PSI 2 "fpscr_operand" "c"))]
- "TARGET_SH4"
+ "(TARGET_SH4 || TARGET_SH2A_SINGLE)"
"float %1,%0"
[(set_attr "type" "fp")
(set_attr "fp_mode" "single")])
(define_insn "*floatsisf2_ie"
[(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
(float:SF (match_operand:SI 1 "fpul_operand" "y")))]
- "TARGET_SH2E && ! TARGET_SH4"
+ "TARGET_SH2E && ! (TARGET_SH4 || TARGET_SH2A_SINGLE)"
"float %1,%0"
[(set_attr "type" "fp")])
"TARGET_SH2E || TARGET_SHMEDIA_FPU"
"
{
- if (TARGET_SH4)
+ if (TARGET_SH4 || TARGET_SH2A_SINGLE)
{
emit_sf_insn (gen_fix_truncsfsi2_i4 (operands[0], operands[1], get_fpscr_rtx ()));
DONE;
[(set (match_operand:SI 0 "fpul_operand" "=y")
(fix:SI (match_operand:SF 1 "fp_arith_reg_operand" "f")))
(use (match_operand:PSI 2 "fpscr_operand" "c"))]
- "TARGET_SH4"
+ "(TARGET_SH4 || TARGET_SH2A_SINGLE)"
"ftrc %1,%0"
[(set_attr "type" "ftrc_s")
(set_attr "fp_mode" "single")])
(define_insn "*fixsfsi"
[(set (match_operand:SI 0 "fpul_operand" "=y")
(fix:SI (match_operand:SF 1 "fp_arith_reg_operand" "f")))]
- "TARGET_SH2E && ! TARGET_SH4"
+ "TARGET_SH2E && ! (TARGET_SH4 || TARGET_SH2A_SINGLE)"
"ftrc %1,%0"
[(set_attr "type" "fp")])
[(set (reg:SI T_REG)
(gt:SI (match_operand:SF 0 "fp_arith_reg_operand" "f")
(match_operand:SF 1 "fp_arith_reg_operand" "f")))]
- "TARGET_SH2E && ! TARGET_SH4"
+ "TARGET_SH2E && ! (TARGET_SH4 || TARGET_SH2A_SINGLE)"
"fcmp/gt %1,%0"
[(set_attr "type" "fp")
(set_attr "fp_mode" "single")])
[(set (reg:SI T_REG)
(eq:SI (match_operand:SF 0 "fp_arith_reg_operand" "f")
(match_operand:SF 1 "fp_arith_reg_operand" "f")))]
- "TARGET_SH2E && ! TARGET_SH4"
+ "TARGET_SH2E && ! (TARGET_SH4 || TARGET_SH2A_SINGLE)"
"fcmp/eq %1,%0"
[(set_attr "type" "fp")
(set_attr "fp_mode" "single")])
(ior:SI (reg:SI T_REG)
(eq:SI (match_operand:SF 0 "fp_arith_reg_operand" "f")
(match_operand:SF 1 "fp_arith_reg_operand" "f"))))]
- "TARGET_SH2E && TARGET_IEEE && ! TARGET_SH4"
+ "TARGET_SH2E && TARGET_IEEE && ! (TARGET_SH4 || TARGET_SH2A_SINGLE)"
"* return output_ieee_ccmpeq (insn, operands);"
[(set_attr "length" "4")])
(gt:SI (match_operand:SF 0 "fp_arith_reg_operand" "f")
(match_operand:SF 1 "fp_arith_reg_operand" "f")))
(use (match_operand:PSI 2 "fpscr_operand" "c"))]
- "TARGET_SH4"
+ "(TARGET_SH4 || TARGET_SH2A_SINGLE)"
"fcmp/gt %1,%0"
[(set_attr "type" "fp")
(set_attr "fp_mode" "single")])
(eq:SI (match_operand:SF 0 "fp_arith_reg_operand" "f")
(match_operand:SF 1 "fp_arith_reg_operand" "f")))
(use (match_operand:PSI 2 "fpscr_operand" "c"))]
- "TARGET_SH4"
+ "(TARGET_SH4 || TARGET_SH2A_SINGLE)"
"fcmp/eq %1,%0"
[(set_attr "type" "fp")
(set_attr "fp_mode" "single")])
(eq:SI (match_operand:SF 0 "fp_arith_reg_operand" "f")
(match_operand:SF 1 "fp_arith_reg_operand" "f"))))
(use (match_operand:PSI 2 "fpscr_operand" "c"))]
- "TARGET_IEEE && TARGET_SH4"
+ "TARGET_IEEE && (TARGET_SH4 || TARGET_SH2A_SINGLE)"
"* return output_ieee_ccmpeq (insn, operands);"
[(set_attr "length" "4")
(set_attr "fp_mode" "single")])
[(set_attr "type" "fdiv")
(set_attr "fp_mode" "single")])
+(define_insn "rsqrtsf2"
+ [(set (match_operand:SF 0 "register_operand" "=f")
+ (div:SF (match_operand:SF 1 "immediate_operand" "i")
+ (sqrt:SF (match_operand:SF 2 "register_operand" "0"))))
+ (use (match_operand:PSI 3 "fpscr_operand" "c"))]
+ "TARGET_SH4A_FP && flag_unsafe_math_optimizations
+ && operands[1] == CONST1_RTX (SFmode)"
+ "fsrra %0"
+ [(set_attr "type" "fsrra")
+ (set_attr "fp_mode" "single")])
+
+(define_insn "fsca"
+ [(set (match_operand:V2SF 0 "fp_arith_reg_operand" "=f")
+ (vec_concat:V2SF
+ (unspec:SF [(mult:SF
+ (float:SF (match_operand:SI 1 "fpul_operand" "y"))
+ (match_operand:SF 2 "immediate_operand" "i"))
+ ] UNSPEC_FSINA)
+ (unspec:SF [(mult:SF (float:SF (match_dup 1)) (match_dup 2))
+ ] UNSPEC_FCOSA)))
+ (use (match_operand:PSI 3 "fpscr_operand" "c"))]
+ "TARGET_SH4A_FP && flag_unsafe_math_optimizations
+ && operands[2] == sh_fsca_int2sf ()"
+ "fsca fpul,%d0"
+ [(set_attr "type" "fsca")
+ (set_attr "fp_mode" "single")])
+
+(define_expand "sinsf2"
+ [(set (match_operand:SF 0 "nonimmediate_operand" "")
+ (unspec:SF [(match_operand:SF 1 "fp_arith_reg_operand" "")]
+ UNSPEC_FSINA))]
+ "TARGET_SH4A_FP && flag_unsafe_math_optimizations"
+ "
+{
+ rtx scaled = gen_reg_rtx (SFmode);
+ rtx truncated = gen_reg_rtx (SImode);
+ rtx fsca = gen_reg_rtx (V2SFmode);
+ rtx scale_reg = force_reg (SFmode, sh_fsca_sf2int ());
+
+ emit_sf_insn (gen_mulsf3 (scaled, operands[1], scale_reg));
+ emit_sf_insn (gen_fix_truncsfsi2 (truncated, scaled));
+ emit_sf_insn (gen_fsca (fsca, truncated, sh_fsca_int2sf (),
+ get_fpscr_rtx ()));
+ emit_move_insn (operands[0], gen_rtx_SUBREG (SFmode, fsca, 0));
+ DONE;
+}")
+
+(define_expand "cossf2"
+ [(set (match_operand:SF 0 "nonimmediate_operand" "")
+ (unspec:SF [(match_operand:SF 1 "fp_arith_reg_operand" "")]
+ UNSPEC_FCOSA))]
+ "TARGET_SH4A_FP && flag_unsafe_math_optimizations"
+ "
+{
+ rtx scaled = gen_reg_rtx (SFmode);
+ rtx truncated = gen_reg_rtx (SImode);
+ rtx fsca = gen_reg_rtx (V2SFmode);
+ rtx scale_reg = force_reg (SFmode, sh_fsca_sf2int ());
+
+ emit_sf_insn (gen_mulsf3 (scaled, operands[1], scale_reg));
+ emit_sf_insn (gen_fix_truncsfsi2 (truncated, scaled));
+ emit_sf_insn (gen_fsca (fsca, truncated, sh_fsca_int2sf (),
+ get_fpscr_rtx ()));
+ emit_move_insn (operands[0], gen_rtx_SUBREG (SFmode, fsca, 4));
+ DONE;
+}")
+
+(define_expand "sindf2"
+ [(set (match_operand:DF 0 "fp_arith_reg_operand" "")
+ (unspec:DF [(match_operand:DF 1 "fp_arith_reg_operand" "")]
+ UNSPEC_FSINA))]
+ "TARGET_SH4A_FP && ! TARGET_FPU_SINGLE && flag_unsafe_math_optimizations"
+ "
+{
+ rtx scaled = gen_reg_rtx (DFmode);
+ rtx truncated = gen_reg_rtx (SImode);
+ rtx fsca = gen_reg_rtx (V2SFmode);
+ rtx scale_reg = force_reg (DFmode, sh_fsca_df2int ());
+ rtx sfresult = gen_reg_rtx (SFmode);
+
+ emit_df_insn (gen_muldf3 (scaled, operands[1], scale_reg));
+ emit_df_insn (gen_fix_truncdfsi2 (truncated, scaled));
+ emit_sf_insn (gen_fsca (fsca, truncated, sh_fsca_int2sf (),
+ get_fpscr_rtx ()));
+ emit_move_insn (sfresult, gen_rtx_SUBREG (SFmode, fsca, 0));
+ emit_df_insn (gen_extendsfdf2 (operands[0], sfresult));
+ DONE;
+}")
+
+(define_expand "cosdf2"
+ [(set (match_operand:DF 0 "fp_arith_reg_operand" "")
+ (unspec:DF [(match_operand:DF 1 "fp_arith_reg_operand" "")]
+ UNSPEC_FCOSA))]
+ "TARGET_SH4A_FP && ! TARGET_FPU_SINGLE && flag_unsafe_math_optimizations"
+ "
+{
+ rtx scaled = gen_reg_rtx (DFmode);
+ rtx truncated = gen_reg_rtx (SImode);
+ rtx fsca = gen_reg_rtx (V2SFmode);
+ rtx scale_reg = force_reg (DFmode, sh_fsca_df2int ());
+ rtx sfresult = gen_reg_rtx (SFmode);
+
+ emit_df_insn (gen_muldf3 (scaled, operands[1], scale_reg));
+ emit_df_insn (gen_fix_truncdfsi2 (truncated, scaled));
+ emit_sf_insn (gen_fsca (fsca, truncated, sh_fsca_int2sf (),
+ get_fpscr_rtx ()));
+ emit_move_insn (sfresult, gen_rtx_SUBREG (SFmode, fsca, 4));
+ emit_df_insn (gen_extendsfdf2 (operands[0], sfresult));
+ DONE;
+}")
+
(define_expand "abssf2"
[(set (match_operand:SF 0 "fp_arith_reg_operand" "")
(abs:SF (match_operand:SF 1 "fp_arith_reg_operand" "")))]
[(set (match_operand:DF 0 "fp_arith_reg_operand" "")
(plus:DF (match_operand:DF 1 "fp_arith_reg_operand" "")
(match_operand:DF 2 "fp_arith_reg_operand" "")))]
- "TARGET_SH4 || TARGET_SHMEDIA_FPU"
+ "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU"
"
{
- if (TARGET_SH4)
+ if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
{
expand_df_binop (&gen_adddf3_i, operands);
DONE;
(plus:DF (match_operand:DF 1 "fp_arith_reg_operand" "%0")
(match_operand:DF 2 "fp_arith_reg_operand" "f")))
(use (match_operand:PSI 3 "fpscr_operand" "c"))]
- "TARGET_SH4"
+ "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
"fadd %2,%0"
[(set_attr "type" "dfp_arith")
(set_attr "fp_mode" "double")])
[(set (match_operand:DF 0 "fp_arith_reg_operand" "")
(minus:DF (match_operand:DF 1 "fp_arith_reg_operand" "")
(match_operand:DF 2 "fp_arith_reg_operand" "")))]
- "TARGET_SH4 || TARGET_SHMEDIA_FPU"
+ "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU"
"
{
- if (TARGET_SH4)
+ if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
{
expand_df_binop (&gen_subdf3_i, operands);
DONE;
(minus:DF (match_operand:DF 1 "fp_arith_reg_operand" "0")
(match_operand:DF 2 "fp_arith_reg_operand" "f")))
(use (match_operand:PSI 3 "fpscr_operand" "c"))]
- "TARGET_SH4"
+ "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
"fsub %2,%0"
[(set_attr "type" "dfp_arith")
(set_attr "fp_mode" "double")])
[(set (match_operand:DF 0 "fp_arith_reg_operand" "")
(mult:DF (match_operand:DF 1 "fp_arith_reg_operand" "")
(match_operand:DF 2 "fp_arith_reg_operand" "")))]
- "TARGET_SH4 || TARGET_SHMEDIA_FPU"
+ "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU"
"
{
- if (TARGET_SH4)
+ if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
{
expand_df_binop (&gen_muldf3_i, operands);
DONE;
(mult:DF (match_operand:DF 1 "fp_arith_reg_operand" "%0")
(match_operand:DF 2 "fp_arith_reg_operand" "f")))
(use (match_operand:PSI 3 "fpscr_operand" "c"))]
- "TARGET_SH4"
+ "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
"fmul %2,%0"
[(set_attr "type" "dfp_arith")
(set_attr "fp_mode" "double")])
[(set (match_operand:DF 0 "fp_arith_reg_operand" "")
(div:DF (match_operand:DF 1 "fp_arith_reg_operand" "")
(match_operand:DF 2 "fp_arith_reg_operand" "")))]
- "TARGET_SH4 || TARGET_SHMEDIA_FPU"
+ "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU"
"
{
- if (TARGET_SH4)
+ if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
{
expand_df_binop (&gen_divdf3_i, operands);
DONE;
(div:DF (match_operand:DF 1 "fp_arith_reg_operand" "0")
(match_operand:DF 2 "fp_arith_reg_operand" "f")))
(use (match_operand:PSI 3 "fpscr_operand" "c"))]
- "TARGET_SH4"
+ "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
"fdiv %2,%0"
[(set_attr "type" "dfdiv")
(set_attr "fp_mode" "double")])
(define_expand "floatsidf2"
[(set (match_operand:DF 0 "fp_arith_reg_operand" "")
(float:DF (match_operand:SI 1 "fpul_operand" "")))]
- "TARGET_SH4 || TARGET_SHMEDIA_FPU"
+ "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU"
"
{
- if (TARGET_SH4)
+ if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
{
emit_df_insn (gen_floatsidf2_i (operands[0], operands[1],
get_fpscr_rtx ()));
[(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
(float:DF (match_operand:SI 1 "fpul_operand" "y")))
(use (match_operand:PSI 2 "fpscr_operand" "c"))]
- "TARGET_SH4"
+ "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
"float %1,%0"
[(set_attr "type" "dfp_conv")
(set_attr "fp_mode" "double")])
(define_expand "fix_truncdfsi2"
[(set (match_operand:SI 0 "fpul_operand" "")
(fix:SI (match_operand:DF 1 "fp_arith_reg_operand" "")))]
- "TARGET_SH4 || TARGET_SHMEDIA_FPU"
+ "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU"
"
{
- if (TARGET_SH4)
+ if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
{
emit_df_insn (gen_fix_truncdfsi2_i (operands[0], operands[1],
get_fpscr_rtx ()));
[(set (match_operand:SI 0 "fpul_operand" "=y")
(fix:SI (match_operand:DF 1 "fp_arith_reg_operand" "f")))
(use (match_operand:PSI 2 "fpscr_operand" "c"))]
- "TARGET_SH4"
+ "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
"ftrc %1,%0"
[(set_attr "type" "dfp_conv")
(set_attr "dfp_comp" "no")
(gt:SI (match_operand:DF 0 "arith_reg_operand" "f")
(match_operand:DF 1 "arith_reg_operand" "f")))
(use (match_operand:PSI 2 "fpscr_operand" "c"))]
- "TARGET_SH4"
+ "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
"fcmp/gt %1,%0"
[(set_attr "type" "dfp_cmp")
(set_attr "fp_mode" "double")])
(eq:SI (match_operand:DF 0 "arith_reg_operand" "f")
(match_operand:DF 1 "arith_reg_operand" "f")))
(use (match_operand:PSI 2 "fpscr_operand" "c"))]
- "TARGET_SH4"
+ "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
"fcmp/eq %1,%0"
[(set_attr "type" "dfp_cmp")
(set_attr "fp_mode" "double")])
(eq:SI (match_operand:DF 0 "arith_reg_operand" "f")
(match_operand:DF 1 "arith_reg_operand" "f"))))
(use (match_operand:PSI 2 "fpscr_operand" "c"))]
- "TARGET_IEEE && TARGET_SH4"
+ "TARGET_IEEE && (TARGET_SH4 || TARGET_SH2A_DOUBLE)"
"* return output_ieee_ccmpeq (insn, operands);"
[(set_attr "length" "4")
(set_attr "fp_mode" "double")])
[(set (reg:SI T_REG)
(compare (match_operand:DF 0 "arith_operand" "")
(match_operand:DF 1 "arith_operand" "")))]
- "TARGET_SH4 || TARGET_SHMEDIA_FPU"
+ "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU"
"
{
sh_compare_op0 = operands[0];
(define_expand "negdf2"
[(set (match_operand:DF 0 "arith_reg_operand" "")
(neg:DF (match_operand:DF 1 "arith_reg_operand" "")))]
- "TARGET_SH4 || TARGET_SHMEDIA_FPU"
+ "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU"
"
{
- if (TARGET_SH4)
+ if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
{
expand_df_unop (&gen_negdf2_i, operands);
DONE;
[(set (match_operand:DF 0 "arith_reg_operand" "=f")
(neg:DF (match_operand:DF 1 "arith_reg_operand" "0")))
(use (match_operand:PSI 2 "fpscr_operand" "c"))]
- "TARGET_SH4"
+ "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
"fneg %0"
[(set_attr "type" "fmove")
(set_attr "fp_mode" "double")])
(define_expand "sqrtdf2"
[(set (match_operand:DF 0 "arith_reg_operand" "")
(sqrt:DF (match_operand:DF 1 "arith_reg_operand" "")))]
- "TARGET_SH4 || TARGET_SHMEDIA_FPU"
+ "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU"
"
{
- if (TARGET_SH4)
+ if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
{
expand_df_unop (&gen_sqrtdf2_i, operands);
DONE;
[(set (match_operand:DF 0 "arith_reg_operand" "=f")
(sqrt:DF (match_operand:DF 1 "arith_reg_operand" "0")))
(use (match_operand:PSI 2 "fpscr_operand" "c"))]
- "TARGET_SH4"
+ "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
"fsqrt %0"
[(set_attr "type" "dfdiv")
(set_attr "fp_mode" "double")])
(define_expand "absdf2"
[(set (match_operand:DF 0 "arith_reg_operand" "")
(abs:DF (match_operand:DF 1 "arith_reg_operand" "")))]
- "TARGET_SH4 || TARGET_SHMEDIA_FPU"
+ "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU"
"
{
- if (TARGET_SH4)
+ if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
{
expand_df_unop (&gen_absdf2_i, operands);
DONE;
[(set (match_operand:DF 0 "arith_reg_operand" "=f")
(abs:DF (match_operand:DF 1 "arith_reg_operand" "0")))
(use (match_operand:PSI 2 "fpscr_operand" "c"))]
- "TARGET_SH4"
+ "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
"fabs %0"
[(set_attr "type" "fmove")
(set_attr "fp_mode" "double")])
(define_expand "extendsfdf2"
[(set (match_operand:DF 0 "fp_arith_reg_operand" "")
(float_extend:DF (match_operand:SF 1 "fpul_operand" "")))]
- "TARGET_SH4 || TARGET_SHMEDIA_FPU"
+ "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU"
"
{
- if (TARGET_SH4)
+ if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
{
emit_df_insn (gen_extendsfdf2_i4 (operands[0], operands[1],
get_fpscr_rtx ()));
[(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
(float_extend:DF (match_operand:SF 1 "fpul_operand" "y")))
(use (match_operand:PSI 2 "fpscr_operand" "c"))]
- "TARGET_SH4"
+ "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
"fcnvsd %1,%0"
[(set_attr "type" "fp")
(set_attr "fp_mode" "double")])
(define_expand "truncdfsf2"
[(set (match_operand:SF 0 "fpul_operand" "")
(float_truncate:SF (match_operand:DF 1 "fp_arith_reg_operand" "")))]
- "TARGET_SH4 || TARGET_SHMEDIA_FPU"
+ "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU"
"
{
- if (TARGET_SH4)
+ if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
{
emit_df_insn (gen_truncdfsf2_i4 (operands[0], operands[1],
get_fpscr_rtx ()));
[(set (match_operand:SF 0 "fpul_operand" "=y")
(float_truncate:SF (match_operand:DF 1 "fp_arith_reg_operand" "f")))
(use (match_operand:PSI 2 "fpscr_operand" "c"))]
- "TARGET_SH4"
+ "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
"fcnvds %1,%0"
[(set_attr "type" "fp")
(set_attr "fp_mode" "double")])
"
{
rtx addr_target, orig_address, shift_reg, qi_val;
- HOST_WIDE_INT bitsize, size, v;
+ HOST_WIDE_INT bitsize, size, v = 0;
rtx x = operands[3];
/* ??? expmed doesn't care for non-register predicates. */
DONE;
}")
+
+(define_insn "movua"
+ [(set (match_operand:SI 0 "register_operand" "=z")
+ (sign_extract:SI (match_operand:SI 1 "unaligned_load_operand" "Sua>")
+ (const_int 32) (const_int 0)))]
+ "TARGET_SH4A_ARCH"
+ "movua.l %1,%0"
+ [(set_attr "type" "movua")])
+
+;; We shouldn't need this, but cse replaces increments with references
+;; to other regs before flow has a chance to create post_inc
+;; addressing modes, and only postreload's cse_move2add brings the
+;; increments back to a usable form.
+(define_peephole2
+ [(set (match_operand:SI 0 "register_operand" "")
+ (sign_extract:SI (mem:SI (match_operand:SI 1 "register_operand" ""))
+ (const_int 32) (const_int 0)))
+ (set (match_dup 1) (plus:SI (match_dup 1) (const_int 4)))]
+ "TARGET_SH4A_ARCH && REGNO (operands[0]) != REGNO (operands[1])"
+ [(set (match_operand:SI 0 "register_operand" "")
+ (sign_extract:SI (mem:SI (post_inc:SI
+ (match_operand:SI 1 "register_operand" "")))
+ (const_int 32) (const_int 0)))]
+ "")
+
+(define_expand "extv"
+ [(set (match_operand:SI 0 "register_operand" "")
+ (sign_extract:SI (match_operand:QI 1 "unaligned_load_operand" "")
+ (match_operand 2 "const_int_operand" "")
+ (match_operand 3 "const_int_operand" "")))]
+ "TARGET_SH4A_ARCH"
+{
+ if (TARGET_SH4A_ARCH
+ && INTVAL (operands[2]) == 32
+ && INTVAL (operands[3]) == -24 * (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
+ && GET_CODE (operands[1]) == MEM && MEM_ALIGN (operands[1]) < 32)
+ {
+ emit_insn (gen_movua (operands[0],
+ adjust_address (operands[1], SImode, 0)));
+ DONE;
+ }
+
+ FAIL;
+})
+
+(define_expand "extzv"
+ [(set (match_operand:SI 0 "register_operand" "")
+ (zero_extract:SI (match_operand:QI 1 "unaligned_load_operand" "")
+ (match_operand 2 "const_int_operand" "")
+ (match_operand 3 "const_int_operand" "")))]
+ "TARGET_SH4A_ARCH"
+{
+ if (TARGET_SH4A_ARCH
+ && INTVAL (operands[2]) == 32
+ && INTVAL (operands[3]) == -24 * (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
+ && GET_CODE (operands[1]) == MEM && MEM_ALIGN (operands[1]) < 32)
+ {
+ emit_insn (gen_movua (operands[0],
+ adjust_address (operands[1], SImode, 0)));
+ DONE;
+ }
+
+ FAIL;
+})
+
\f
;; -------------------------------------------------------------------------
;; Peepholes
(match_operand 1 "sh_rep_vec" ""))]
"TARGET_SHMEDIA && reload_completed
&& GET_MODE (operands[0]) == GET_MODE (operands[1])
- && VECTOR_MODE_SUPPORTED_P (GET_MODE (operands[0]))
+ && sh_vector_mode_supported_p (GET_MODE (operands[0]))
&& GET_MODE_SIZE (GET_MODE (operands[0])) == 8
&& (XVECEXP (operands[1], 0, 0) != const0_rtx
|| XVECEXP (operands[1], 0, 1) != const0_rtx)
(match_operand 1 "sh_const_vec" ""))]
"TARGET_SHMEDIA && reload_completed
&& GET_MODE (operands[0]) == GET_MODE (operands[1])
- && VECTOR_MODE_SUPPORTED_P (GET_MODE (operands[0]))
+ && sh_vector_mode_supported_p (GET_MODE (operands[0]))
&& operands[1] != CONST0_RTX (GET_MODE (operands[1]))"
[(set (match_dup 0) (match_dup 1))]
"
"byterev %1, %0"
[(set_attr "type" "arith_media")])
-(define_insn "prefetch"
+(define_insn "prefetch_media"
[(prefetch (match_operand:QI 0 "address_operand" "p")
(match_operand:SI 1 "const_int_operand" "n")
(match_operand:SI 2 "const_int_operand" "n"))]
}"
[(set_attr "type" "other")])
-;; The following description models the
-;; SH4 pipeline using the DFA based scheduler.
-;; The DFA based description is better way to model
-;; a superscalar pipeline as compared to function unit
-;; reservation model.
-;; 1. The function unit based model is oriented to describe at most one
-;; unit reservation by each insn. It is difficult to model unit reservations in multiple
-;; pipeline units by same insn. This can be done using DFA based description.
-;; 2. The execution performance of DFA based scheduler does not depend on processor complexity.
-;; 3. Writing all unit reservations for an instruction class is more natural description
-;; of the pipeline and makes interface of the hazard recognizer simpler than the
-;; old function unit based model.
-;; 4. The DFA model is richer and is a part of greater overall framework of RCSP.
-
-
-;; Two automata are defined to reduce number of states
-;; which a single large automaton will have.(Factoring)
-
-(define_automaton "inst_pipeline,fpu_pipe")
-
-;; This unit is basically the decode unit of the processor.
-;; Since SH4 is a dual issue machine,it is as if there are two
-;; units so that any insn can be processed by either one
-;; of the decoding unit.
-
-(define_cpu_unit "pipe_01,pipe_02" "inst_pipeline")
-
-
-;; The fixed point arithmetic calculator(?? EX Unit).
-
-(define_cpu_unit "int" "inst_pipeline")
-
-;; f1_1 and f1_2 are floating point units.Actually there is
-;; a f1 unit which can overlap with other f1 unit but
-;; not another F1 unit.It is as though there were two
-;; f1 units.
-
-(define_cpu_unit "f1_1,f1_2" "fpu_pipe")
-
-;; The floating point units (except FS - F2 always precedes it.)
-
-(define_cpu_unit "F0,F1,F2,F3" "fpu_pipe")
-
-;; This is basically the MA unit of SH4
-;; used in LOAD/STORE pipeline.
-
-(define_cpu_unit "memory" "inst_pipeline")
-
-;; However, there are LS group insns that don't use it, even ones that
-;; complete in 0 cycles. So we use an extra unit for the issue of LS insns.
-(define_cpu_unit "load_store" "inst_pipeline")
-
-;; The address calculator used for branch instructions.
-;; This will be reserved after "issue" of branch instructions
-;; and this is to make sure that no two branch instructions
-;; can be issued in parallel.
-
-(define_cpu_unit "pcr_addrcalc" "inst_pipeline")
-
-;; ----------------------------------------------------
-;; This reservation is to simplify the dual issue description.
-
-(define_reservation "issue" "pipe_01|pipe_02")
-
-;; This is to express the locking of D stage.
-;; Note that the issue of a CO group insn also effectively locks the D stage.
-
-(define_reservation "d_lock" "pipe_01+pipe_02")
-
-;; Every FE instruction but fipr / ftrv starts with issue and this.
-(define_reservation "F01" "F0+F1")
-
-;; This is to simplify description where F1,F2,FS
-;; are used simultaneously.
-
-(define_reservation "fpu" "F1+F2")
-
-;; This is to highlight the fact that f1
-;; cannot overlap with F1.
-
-(exclusion_set "f1_1,f1_2" "F1")
-
-(define_insn_reservation "nil" 0 (eq_attr "type" "nil") "nothing")
-
-;; Although reg moves have a latency of zero
-;; we need to highlight that they use D stage
-;; for one cycle.
-
-;; Group: MT
-
-(define_insn_reservation "reg_mov" 0
- (and (eq_attr "pipe_model" "sh4")
- (eq_attr "type" "move"))
- "issue")
-
-;; Group: LS
+(define_insn "prefetch_i4"
+ [(prefetch (match_operand:SI 0 "register_operand" "r")
+ (match_operand:SI 1 "const_int_operand" "n")
+ (match_operand:SI 2 "const_int_operand" "n"))]
+ "TARGET_HARD_SH4"
+ "*
+{
+ return \"pref @%0\";
+}"
+ [(set_attr "type" "other")])
-(define_insn_reservation "freg_mov" 0
- (and (eq_attr "pipe_model" "sh4")
- (eq_attr "type" "fmove"))
- "issue+load_store")
-
-;; We don't model all pipeline stages; we model the issue ('D') stage
-;; inasmuch as we allow only two instructions to issue simultaneously,
-;; and CO instructions prevent any simultaneous issue of another instruction.
-;; (This uses pipe_01 and pipe_02).
-;; Double issue of EX insns is prevented by using the int unit in the EX stage.
-;; Double issue of EX / BR insns is prevented by using the int unit /
-;; pcr_addrcalc unit in the EX stage.
-;; Double issue of BR / LS instructions is prevented by using the
-;; pcr_addrcalc / load_store unit in the issue cycle.
-;; Double issue of FE instructions is prevented by using F0 in the first
-;; pipeline stage after the first D stage.
-;; There is no need to describe the [ES]X / [MN]A / S stages after a D stage
-;; (except in the cases outlined above), nor to describe the FS stage after
-;; the F2 stage.
-
-;; Other MT group instructions(1 step operations)
-;; Group: MT
-;; Latency: 1
-;; Issue Rate: 1
-
-(define_insn_reservation "mt" 1
- (and (eq_attr "pipe_model" "sh4")
- (eq_attr "type" "mt_group"))
- "issue")
-
-;; Fixed Point Arithmetic Instructions(1 step operations)
-;; Group: EX
-;; Latency: 1
-;; Issue Rate: 1
-
-(define_insn_reservation "sh4_simple_arith" 1
- (and (eq_attr "pipe_model" "sh4")
- (eq_attr "insn_class" "ex_group"))
- "issue,int")
-
-;; Load and store instructions have no alignment peculiarities for the SH4,
-;; but they use the load-store unit, which they share with the fmove type
-;; insns (fldi[01]; fmov frn,frm; flds; fsts; fabs; fneg) .
-;; Loads have a latency of two.
-;; However, call insns can only paired with a preceding insn, and have
-;; a delay slot, so that we want two more insns to be scheduled between the
-;; load of the function address and the call. This is equivalent to a
-;; latency of three.
-;; ADJUST_COST can only properly handle reductions of the cost, so we
-;; use a latency of three here, which gets multiplied by 10 to yield 30.
-;; We only do this for SImode loads of general registers, to make the work
-;; for ADJUST_COST easier.
-
-;; Load Store instructions. (MOV.[BWL]@(d,GBR)
-;; Group: LS
-;; Latency: 2
-;; Issue Rate: 1
-
-(define_insn_reservation "sh4_load" 2
- (and (eq_attr "pipe_model" "sh4")
- (eq_attr "type" "load,pcload"))
- "issue+load_store,nothing,memory")
-
-;; calls / sfuncs need an extra instruction for their delay slot.
-;; Moreover, estimating the latency for SImode loads as 3 will also allow
-;; adjust_cost to meaningfully bump it back up to 3 if they load the shift
-;; count of a dynamic shift.
-(define_insn_reservation "sh4_load_si" 3
- (and (eq_attr "pipe_model" "sh4")
- (eq_attr "type" "load_si,pcload_si"))
- "issue+load_store,nothing,memory")
-
-;; (define_bypass 2 "sh4_load_si" "!sh4_call")
-
-;; The load latency is upped to three higher if the dependent insn does
-;; double precision computation. We want the 'default' latency to reflect
-;; that increased latency because otherwise the insn priorities won't
-;; allow proper scheduling.
-(define_insn_reservation "sh4_fload" 3
- (and (eq_attr "pipe_model" "sh4")
- (eq_attr "type" "fload,pcfload"))
- "issue+load_store,nothing,memory")
-
-;; (define_bypass 2 "sh4_fload" "!")
-
-(define_insn_reservation "sh4_store" 1
- (and (eq_attr "pipe_model" "sh4")
- (eq_attr "type" "store"))
- "issue+load_store,nothing,memory")
-
-;; Load Store instructions.
-;; Group: LS
-;; Latency: 1
-;; Issue Rate: 1
-
-(define_insn_reservation "sh4_gp_fpul" 1
- (and (eq_attr "pipe_model" "sh4")
- (eq_attr "type" "gp_fpul"))
- "issue+load_store")
-
-;; Load Store instructions.
-;; Group: LS
-;; Latency: 3
-;; Issue Rate: 1
-
-(define_insn_reservation "sh4_fpul_gp" 3
- (and (eq_attr "pipe_model" "sh4")
- (eq_attr "type" "fpul_gp"))
- "issue+load_store")
-
-;; Branch (BF,BF/S,BT,BT/S,BRA)
-;; Group: BR
-;; Latency when taken: 2 (or 1)
-;; Issue Rate: 1
-;; The latency is 1 when displacement is 0.
-;; We can't really do much with the latency, even if we could express it,
-;; but the pairing restrictions are useful to take into account.
-;; ??? If the branch is likely, we might want to fill the delay slot;
-;; if the branch is likely, but not very likely, should we pretend to use
-;; a resource that CO instructions use, to get a pairable delay slot insn?
-
-(define_insn_reservation "sh4_branch" 1
- (and (eq_attr "pipe_model" "sh4")
- (eq_attr "type" "cbranch,jump"))
- "issue+pcr_addrcalc")
-
-;; Branch Far (JMP,RTS,BRAF)
-;; Group: CO
-;; Latency: 3
-;; Issue Rate: 2
-;; ??? Scheduling happens before branch shortening, and hence jmp and braf
-;; can't be distinguished from bra for the "jump" pattern.
-
-(define_insn_reservation "sh4_return" 3
- (and (eq_attr "pipe_model" "sh4")
- (eq_attr "type" "return,jump_ind"))
- "d_lock*2")
-
-;; RTE
-;; Group: CO
-;; Latency: 5
-;; Issue Rate: 5
-;; this instruction can be executed in any of the pipelines
-;; and blocks the pipeline for next 4 stages.
-
-(define_insn_reservation "sh4_return_from_exp" 5
- (and (eq_attr "pipe_model" "sh4")
- (eq_attr "type" "rte"))
- "d_lock*5")
-
-;; OCBP, OCBWB
-;; Group: CO
-;; Latency: 1-5
-;; Issue Rate: 1
-
-;; cwb is used for the sequence ocbwb @%0; extu.w %0,%2; or %1,%2; mov.l %0,@%2
-;; ocbwb on its own would be "d_lock,nothing,memory*5"
-(define_insn_reservation "ocbwb" 6
- (and (eq_attr "pipe_model" "sh4")
- (eq_attr "type" "cwb"))
- "d_lock*2,(d_lock+memory)*3,issue+load_store+memory,memory*2")
-
-;; LDS to PR,JSR
-;; Group: CO
-;; Latency: 3
-;; Issue Rate: 2
-;; The SX stage is blocked for last 2 cycles.
-;; OTOH, the only time that has an effect for insns generated by the compiler
-;; is when lds to PR is followed by sts from PR - and that is highly unlikely -
-;; or when we are doing a function call - and we don't do inter-function
-;; scheduling. For the function call case, it's really best that we end with
-;; something that models an rts.
-
-(define_insn_reservation "sh4_lds_to_pr" 3
- (and (eq_attr "pipe_model" "sh4")
- (eq_attr "type" "prset") )
- "d_lock*2")
-
-;; calls introduce a longisch delay that is likely to flush the pipelines
-;; of the caller's instructions. Ordinary functions tend to end with a
-;; load to restore a register (in the delay slot of rts), while sfuncs
-;; tend to end with an EX or MT insn. But that is not actually relevant,
-;; since there are no instructions that contend for memory access early.
-;; We could, of course, provide exact scheduling information for specific
-;; sfuncs, if that should prove useful.
-
-(define_insn_reservation "sh4_call" 16
- (and (eq_attr "pipe_model" "sh4")
- (eq_attr "type" "call,sfunc"))
- "d_lock*16")
-
-;; LDS.L to PR
-;; Group: CO
-;; Latency: 3
-;; Issue Rate: 2
-;; The SX unit is blocked for last 2 cycles.
-
-(define_insn_reservation "ldsmem_to_pr" 3
- (and (eq_attr "pipe_model" "sh4")
- (eq_attr "type" "pload"))
- "d_lock*2")
-
-;; STS from PR
-;; Group: CO
-;; Latency: 2
-;; Issue Rate: 2
-;; The SX unit in second and third cycles.
-
-(define_insn_reservation "sts_from_pr" 2
- (and (eq_attr "pipe_model" "sh4")
- (eq_attr "type" "prget"))
- "d_lock*2")
-
-;; STS.L from PR
-;; Group: CO
-;; Latency: 2
-;; Issue Rate: 2
-
-(define_insn_reservation "sh4_prstore_mem" 2
- (and (eq_attr "pipe_model" "sh4")
- (eq_attr "type" "pstore"))
- "d_lock*2,nothing,memory")
-
-;; LDS to FPSCR
-;; Group: CO
-;; Latency: 4
-;; Issue Rate: 1
-;; F1 is blocked for last three cycles.
-
-(define_insn_reservation "fpscr_load" 4
- (and (eq_attr "pipe_model" "sh4")
- (eq_attr "type" "gp_fpscr"))
- "d_lock,nothing,F1*3")
-
-;; LDS.L to FPSCR
-;; Group: CO
-;; Latency: 1 / 4
-;; Latency to update Rn is 1 and latency to update FPSCR is 4
-;; Issue Rate: 1
-;; F1 is blocked for last three cycles.
-
-(define_insn_reservation "fpscr_load_mem" 4
- (and (eq_attr "pipe_model" "sh4")
- (eq_attr "type" "mem_fpscr"))
- "d_lock,nothing,(F1+memory),F1*2")
+(define_expand "prefetch"
+ [(prefetch (match_operand:QI 0 "address_operand" "p")
+ (match_operand:SI 1 "const_int_operand" "n")
+ (match_operand:SI 2 "const_int_operand" "n"))]
+ "TARGET_SHMEDIA || TARGET_HARD_SH4"
+ "
+{
+ if (TARGET_HARD_SH4 && ! register_operand (operands[0], SImode))
+ {
+ rtx reg = gen_reg_rtx (SImode);
+ emit_move_insn (reg, operands[0]);
+ operands[0] = reg;
+ }
-\f
-;; Fixed point multiplication (DMULS.L DMULU.L MUL.L MULS.W,MULU.W)
-;; Group: CO
-;; Latency: 4 / 4
-;; Issue Rate: 1
-
-(define_insn_reservation "multi" 4
- (and (eq_attr "pipe_model" "sh4")
- (eq_attr "type" "smpy,dmpy"))
- "d_lock,(d_lock+f1_1),(f1_1|f1_2)*3,F2")
-
-;; Fixed STS from MACL / MACH
-;; Group: CO
-;; Latency: 3
-;; Issue Rate: 1
-
-(define_insn_reservation "sh4_mac_gp" 3
- (and (eq_attr "pipe_model" "sh4")
- (eq_attr "type" "mac_gp"))
- "d_lock")
-
-
-;; Single precision floating point computation FCMP/EQ,
-;; FCMP/GT, FADD, FLOAT, FMAC, FMUL, FSUB, FTRC, FRVHG, FSCHG
-;; Group: FE
-;; Latency: 3/4
-;; Issue Rate: 1
-
-(define_insn_reservation "fp_arith" 3
- (and (eq_attr "pipe_model" "sh4")
- (eq_attr "type" "fp"))
- "issue,F01,F2")
-
-(define_insn_reservation "fp_arith_ftrc" 3
- (and (eq_attr "pipe_model" "sh4")
- (eq_attr "type" "ftrc_s"))
- "issue,F01,F2")
-
-(define_bypass 1 "fp_arith_ftrc" "sh4_fpul_gp")
-
-;; Single Precision FDIV/SQRT
-;; Group: FE
-;; Latency: 12/13 (FDIV); 11/12 (FSQRT)
-;; Issue Rate: 1
-;; We describe fdiv here; fsqrt is actually one cycle faster.
-
-(define_insn_reservation "fp_div" 12
- (and (eq_attr "pipe_model" "sh4")
- (eq_attr "type" "fdiv"))
- "issue,F01+F3,F2+F3,F3*7,F1+F3,F2")
-
-;; Double Precision floating point computation
-;; (FCNVDS, FCNVSD, FLOAT, FTRC)
-;; Group: FE
-;; Latency: (3,4)/5
-;; Issue Rate: 1
-
-(define_insn_reservation "dp_float" 4
- (and (eq_attr "pipe_model" "sh4")
- (eq_attr "type" "dfp_conv"))
- "issue,F01,F1+F2,F2")
-
-;; Double-precision floating-point (FADD,FMUL,FSUB)
-;; Group: FE
-;; Latency: (7,8)/9
-;; Issue Rate: 1
-
-(define_insn_reservation "fp_double_arith" 8
- (and (eq_attr "pipe_model" "sh4")
- (eq_attr "type" "dfp_arith"))
- "issue,F01,F1+F2,fpu*4,F2")
-
-;; Double-precision FCMP (FCMP/EQ,FCMP/GT)
-;; Group: CO
-;; Latency: 3/5
-;; Issue Rate: 2
-
-(define_insn_reservation "fp_double_cmp" 3
- (and (eq_attr "pipe_model" "sh4")
- (eq_attr "type" "dfp_cmp"))
- "d_lock,(d_lock+F01),F1+F2,F2")
-
-;; Double precision FDIV/SQRT
-;; Group: FE
-;; Latency: (24,25)/26
-;; Issue Rate: 1
-
-(define_insn_reservation "dp_div" 25
- (and (eq_attr "pipe_model" "sh4")
- (eq_attr "type" "dfdiv"))
- "issue,F01+F3,F1+F2+F3,F2+F3,F3*16,F1+F3,(fpu+F3)*2,F2")
-
-
-;; Use the branch-not-taken case to model arith3 insns. For the branch taken
-;; case, we'd get a d_lock instead of issue at the end.
-(define_insn_reservation "arith3" 3
- (and (eq_attr "pipe_model" "sh4")
- (eq_attr "type" "arith3"))
- "issue,d_lock+pcr_addrcalc,issue")
-
-;; arith3b insns schedule the same no matter if the branch is taken or not.
-(define_insn_reservation "arith3b" 2
- (and (eq_attr "pipe_model" "sh4")
- (eq_attr "type" "arith3"))
- "issue,d_lock+pcr_addrcalc")
+ emit_insn ((TARGET_SHMEDIA ? gen_prefetch_media : gen_prefetch_i4)
+ (operands[0], operands[1], operands[2]));
+ DONE;
+}")