+2003-06-02 Ben Elliston <bje@wasabisystems.com>
+
+ * config/arm/arm.c (arm_use_dfa_pipeline_interface): Declare.
+ (TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE): Define if not already defined.
+ (arm_use_dfa_pipeline_interface): Implement.
+ * config/arm/arm.md (arm): New automaton.
+ (write_buf): Remove function units; new cpu unit.
+ (write_blockage): Remove function units; new cpu unit.
+ (core): Remove function units; new cpu unit.
+ (r_mem_f_wbuf): New instruction reservation.
+ (store1_wbuf, store2_wbuf, store3_wbuf, store4_wbuf): Likewise.
+ (store1_ldsched, store2, store3, store4): Likewise.
+ (load_ldsched, load_ldsched_xscale, load_or_store): Likewise.
+ (mult, mult_ldsched, mult_ldsched_strongarm): Likewise.
+ (multi_cycle, single_cycle): Likewise.
+ * config/arm/fpa.md (armfp): New automaton.
+ (fpa): Remove function units; new cpu unit.
+ (fpa_mem): Remove function unit; new cpu unit.
+ (fdivx, fdivd, fdivs, fmul, ffmul, farith, ffarith): New reservations.
+ (r_2_f, f_2_r, f_load, f_store, r_mem_f, f_mem_r): Likewise.
+
2003-06-01 Kaveh R. Ghazi <ghazi@caip.rutgers.edu>
* builtin-attrs.def (ATTR_ASM_FPRINTF): New.
;; distant label. Only applicable to Thumb code.
(define_attr "far_jump" "yes,no" (const_string "no"))
-;; (define_function_unit {name} {num-units} {n-users} {test}
-;; {ready-delay} {issue-delay} [{conflict-list}])
+(define_automaton "arm")
-;;--------------------------------------------------------------------
;; Write buffer
-;;--------------------------------------------------------------------
+;
; Strictly, we should model a 4-deep write buffer for ARM7xx based chips
;
; The write buffer on some of the arm6 processors is hard to model exactly.
; writes will take 2 FCLK cycles per word, if FCLK and MCLK are asynchronous
; (they aren't allowed to be at present) then there is a startup cost of 1MCLK
; cycle to add as well.
+(define_cpu_unit "write_buf" "arm")
-(define_function_unit "write_buf" 1 2
- (and (eq_attr "model_wbuf" "yes")
- (eq_attr "type" "store1,r_mem_f")) 5 3)
-(define_function_unit "write_buf" 1 2
- (and (eq_attr "model_wbuf" "yes")
- (eq_attr "type" "store2")) 7 4)
-(define_function_unit "write_buf" 1 2
- (and (eq_attr "model_wbuf" "yes")
- (eq_attr "type" "store3")) 9 5)
-(define_function_unit "write_buf" 1 2
- (and (eq_attr "model_wbuf" "yes")
- (eq_attr "type" "store4")) 11 6)
-
-;;--------------------------------------------------------------------
;; Write blockage unit
-;;--------------------------------------------------------------------
+;
; The write_blockage unit models (partially), the fact that reads will stall
; until the write buffer empties.
; The f_mem_r and r_mem_f could also block, but they are to the stack,
; so we don't model them here
-(define_function_unit "write_blockage" 1 0 (and (eq_attr "model_wbuf" "yes")
- (eq_attr "type" "store1")) 5 5
- [(eq_attr "write_conflict" "yes")])
-(define_function_unit "write_blockage" 1 0 (and (eq_attr "model_wbuf" "yes")
- (eq_attr "type" "store2")) 7 7
- [(eq_attr "write_conflict" "yes")])
-(define_function_unit "write_blockage" 1 0 (and (eq_attr "model_wbuf" "yes")
- (eq_attr "type" "store3")) 9 9
- [(eq_attr "write_conflict" "yes")])
-(define_function_unit "write_blockage" 1 0
- (and (eq_attr "model_wbuf" "yes") (eq_attr "type" "store4")) 11 11
- [(eq_attr "write_conflict" "yes")])
-(define_function_unit "write_blockage" 1 0
- (and (eq_attr "model_wbuf" "yes")
- (eq_attr "write_conflict" "yes")) 1 1)
-
-;;--------------------------------------------------------------------
-;; Core unit
-;;--------------------------------------------------------------------
-; Everything must spend at least one cycle in the core unit
-(define_function_unit "core" 1 0 (eq_attr "core_cycles" "single") 1 1)
-
-(define_function_unit "core" 1 0
- (and (eq_attr "ldsched" "yes") (eq_attr "type" "store1")) 1 1)
-
-(define_function_unit "core" 1 0
- (and (eq_attr "ldsched" "yes") (eq_attr "type" "load")) 2 1)
-
-;; We do not need to conditionalize the define_function_unit immediately
-;; above. This one will be ignored for anything other than xscale
-;; compiles and for xscale compiles it provides a larger delay
-;; and the scheduler will DTRT.
-;; FIXME: this test needs to be revamped to not depend on this feature
-;; of the scheduler.
-
-(define_function_unit "core" 1 0
- (and (and (eq_attr "ldsched" "yes") (eq_attr "type" "load"))
- (eq_attr "is_xscale" "yes"))
- 3 1)
+(define_cpu_unit "write_blockage" "arm")
-(define_function_unit "core" 1 0
- (and (eq_attr "ldsched" "!yes") (eq_attr "type" "load,store1")) 2 2)
+;; Core
+;
+(define_cpu_unit "core" "arm")
-(define_function_unit "core" 1 0
- (and (eq_attr "fpu" "fpa") (eq_attr "type" "f_load")) 3 3)
+(define_insn_reservation "r_mem_f_wbuf" 5
+ (and (eq_attr "model_wbuf" "yes")
+ (eq_attr "type" "r_mem_f"))
+ "core+write_buf*3")
-(define_function_unit "core" 1 0
- (and (eq_attr "fpu" "fpa") (eq_attr "type" "f_store")) 4 4)
+(define_insn_reservation "store1_wbuf" 5
+ (and (eq_attr "model_wbuf" "yes")
+ (eq_attr "type" "store1"))
+ "core+write_buf*3+write_blockage*5")
-(define_function_unit "core" 1 0
- (and (eq_attr "fpu" "fpa") (eq_attr "type" "r_mem_f")) 6 6)
+(define_insn_reservation "store2_wbuf" 7
+ (and (eq_attr "model_wbuf" "yes")
+ (eq_attr "type" "store2"))
+ "core+write_buf*4+write_blockage*7")
-(define_function_unit "core" 1 0
- (and (eq_attr "fpu" "fpa") (eq_attr "type" "f_mem_r")) 7 7)
+(define_insn_reservation "store3_wbuf" 9
+ (and (eq_attr "model_wbuf" "yes")
+ (eq_attr "type" "store3"))
+ "core+write_buf*5+write_blockage*9")
-(define_function_unit "core" 1 0
- (and (eq_attr "ldsched" "no") (eq_attr "type" "mult")) 16 16)
+(define_insn_reservation "store4_wbuf" 11
+ (and (eq_attr "model_wbuf" "yes")
+ (eq_attr "type" "store4"))
+ "core+write_buf*6+write_blockage*11")
-(define_function_unit "core" 1 0
- (and (and (eq_attr "ldsched" "yes") (eq_attr "is_strongarm" "no"))
- (eq_attr "type" "mult")) 4 4)
+(define_insn_reservation "store2" 3
+ (and (eq_attr "model_wbuf" "no")
+ (eq_attr "type" "store2"))
+ "core*3")
-(define_function_unit "core" 1 0
- (and (and (eq_attr "ldsched" "yes") (eq_attr "is_strongarm" "yes"))
- (eq_attr "type" "mult")) 3 2)
+(define_insn_reservation "store3" 4
+ (and (eq_attr "model_wbuf" "no")
+ (eq_attr "type" "store3"))
+ "core*4")
+
+(define_insn_reservation "store4" 5
+ (and (eq_attr "model_wbuf" "no")
+ (eq_attr "type" "store4"))
+ "core*5")
+
+(define_insn_reservation "store1_ldsched" 1
+ (and (eq_attr "ldsched" "yes") (eq_attr "type" "store1"))
+ "core")
+
+(define_insn_reservation "load_ldsched_xscale" 3
+ (and (and (eq_attr "ldsched" "yes") (eq_attr "type" "load"))
+ (eq_attr "is_xscale" "yes"))
+ "core")
-(define_function_unit "core" 1 0 (eq_attr "type" "store2") 3 3)
+(define_insn_reservation "load_ldsched" 2
+ (and (and (eq_attr "ldsched" "yes") (eq_attr "type" "load"))
+ (eq_attr "is_xscale" "no"))
+ "core")
+
+(define_insn_reservation "load_or_store" 2
+ (and (eq_attr "ldsched" "!yes") (eq_attr "type" "load,store1"))
+ "core*2")
-(define_function_unit "core" 1 0 (eq_attr "type" "store3") 4 4)
+(define_insn_reservation "mult" 16
+ (and (eq_attr "ldsched" "no") (eq_attr "type" "mult"))
+ "core*16")
-(define_function_unit "core" 1 0 (eq_attr "type" "store4") 5 5)
+(define_insn_reservation "mult_ldsched_strongarm" 3
+ (and (and (eq_attr "ldsched" "yes") (eq_attr "is_strongarm" "yes"))
+ (eq_attr "type" "mult"))
+ "core*2")
-(define_function_unit "core" 1 0
+(define_insn_reservation "mult_ldsched" 4
+ (and (and (eq_attr "ldsched" "yes") (eq_attr "is_strongarm" "no"))
+ (eq_attr "type" "mult"))
+ "core*4")
+
+(define_insn_reservation "multi_cycle" 32
(and (eq_attr "core_cycles" "multi")
- (eq_attr "type" "!mult,load,store1,store2,store3,store4")) 32 32)
+ (eq_attr "type" "!mult,load,store1,store2,store3,store4"))
+ "core*32")
+
+(define_insn_reservation "single_cycle" 1
+ (eq_attr "core_cycles" "single")
+ "core")
+
\f
;;---------------------------------------------------------------------------
;; Insn patterns
;; the Free Software Foundation, 59 Temple Place - Suite 330,
;; Boston, MA 02111-1307, USA.
-;;--------------------------------------------------------------------
-;; Floating point unit (FPA)
-;;--------------------------------------------------------------------
-(define_function_unit "fpa" 1 0 (and (eq_attr "fpu" "fpa")
- (eq_attr "type" "fdivx")) 71 69)
-
-(define_function_unit "fpa" 1 0 (and (eq_attr "fpu" "fpa")
- (eq_attr "type" "fdivd")) 59 57)
-
-(define_function_unit "fpa" 1 0 (and (eq_attr "fpu" "fpa")
- (eq_attr "type" "fdivs")) 31 29)
-
-(define_function_unit "fpa" 1 0 (and (eq_attr "fpu" "fpa")
- (eq_attr "type" "fmul")) 9 7)
-
-(define_function_unit "fpa" 1 0 (and (eq_attr "fpu" "fpa")
- (eq_attr "type" "ffmul")) 6 4)
+;; FPA automaton.
+(define_automaton "armfp")
-(define_function_unit "fpa" 1 0 (and (eq_attr "fpu" "fpa")
- (eq_attr "type" "farith")) 4 2)
-
-(define_function_unit "fpa" 1 0 (and (eq_attr "fpu" "fpa")
- (eq_attr "type" "ffarith")) 2 2)
-
-(define_function_unit "fpa" 1 0 (and (eq_attr "fpu" "fpa")
- (eq_attr "type" "r_2_f")) 5 3)
-
-(define_function_unit "fpa" 1 0 (and (eq_attr "fpu" "fpa")
- (eq_attr "type" "f_2_r")) 1 2)
-
-; The fpa10 doesn't really have a memory read unit, but it can start to
-; speculatively execute the instruction in the pipeline, provided the data
-; is already loaded, so pretend reads have a delay of 2 (and that the
-; pipeline is infinite).
+;; Floating point unit (FPA)
+(define_cpu_unit "fpa" "armfp")
+
+; The fpa10 doesn't really have a memory read unit, but it can start
+; to speculatively execute the instruction in the pipeline, provided
+; the data is already loaded, so pretend reads have a delay of 2 (and
+; that the pipeline is infinite).
+(define_cpu_unit "fpa_mem" "arm")
+
+(define_insn_reservation "fdivx" 71
+ (and (eq_attr "fpu" "fpa")
+ (eq_attr "type" "fdivx"))
+ "core+fpa*69")
+
+(define_insn_reservation "fdivd" 59
+ (and (eq_attr "fpu" "fpa")
+ (eq_attr "type" "fdivd"))
+ "core+fpa*57")
+
+(define_insn_reservation "fdivs" 31
+ (and (eq_attr "fpu" "fpa")
+ (eq_attr "type" "fdivs"))
+ "core+fpa*29")
+
+(define_insn_reservation "fmul" 9
+ (and (eq_attr "fpu" "fpa")
+ (eq_attr "type" "fmul"))
+ "core+fpa*7")
+
+(define_insn_reservation "ffmul" 6
+ (and (eq_attr "fpu" "fpa")
+ (eq_attr "type" "ffmul"))
+ "core+fpa*4")
+
+(define_insn_reservation "farith" 4
+ (and (eq_attr "fpu" "fpa")
+ (eq_attr "type" "farith"))
+ "core+fpa*2")
+
+(define_insn_reservation "ffarith" 2
+ (and (eq_attr "fpu" "fpa")
+ (eq_attr "type" "ffarith"))
+ "core+fpa*2")
+
+(define_insn_reservation "r_2_f" 5
+ (and (eq_attr "fpu" "fpa")
+ (eq_attr "type" "r_2_f"))
+ "core+fpa*3")
+
+(define_insn_reservation "f_2_r" 1
+ (and (eq_attr "fpu" "fpa")
+ (eq_attr "type" "f_2_r"))
+ "core+fpa*2")
+
+(define_insn_reservation "f_load" 3
+ (and (eq_attr "fpu" "fpa") (eq_attr "type" "f_load"))
+ "fpa_mem+core*3")
+
+(define_insn_reservation "f_store" 4
+ (and (eq_attr "fpu" "fpa") (eq_attr "type" "f_store"))
+ "core*4")
+
+(define_insn_reservation "r_mem_f" 6
+ (and (eq_attr "model_wbuf" "no")
+ (and (eq_attr "fpu" "fpa") (eq_attr "type" "r_mem_f")))
+ "core*6")
+
+(define_insn_reservation "f_mem_r" 7
+ (and (eq_attr "fpu" "fpa") (eq_attr "type" "f_mem_r"))
+ "core*7")
-(define_function_unit "fpa_mem" 1 0 (and (eq_attr "fpu" "fpa")
- (eq_attr "type" "f_load")) 3 1)
(define_insn "*addsf3_fpa"
[(set (match_operand:SF 0 "s_register_operand" "=f,f")