;;- Machine description for HP PA-RISC architecture for GNU C compiler
-;; Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
-;; Free Software Foundation, Inc.
+;; Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
+;; 2002 Free Software Foundation, Inc.
;; Contributed by the Center for Software Science at the University
;; of Utah.
;;
;; FIXME: Add 800 scheduling for completeness?
-(define_attr "cpu" "700,7100,7100LC,7200,8000" (const (symbol_ref "pa_cpu_attr")))
+(define_attr "cpu" "700,7100,7100LC,7200,7300,8000" (const (symbol_ref "pa_cpu_attr")))
;; Length (in # of bytes).
(define_attr "length" ""
(const_int 0)))
[(eq_attr "in_branch_delay" "true") (nil) (nil)])
-;; Function units of the HPPA. The following data is for the 700 CPUs
-;; (Mustang CPU + Timex FPU aka PA-89) because that's what I have the docs for.
-;; Scheduling instructions for PA-83 machines according to the Snake
-;; constraints shouldn't hurt.
-
-;; (define_function_unit {name} {num-units} {n-users} {test}
-;; {ready-delay} {issue-delay} [{conflict-list}])
-
-;; The integer ALU.
-;; (Noted only for documentation; units that take one cycle do not need to
-;; be specified.)
-
-;; (define_function_unit "alu" 1 0
-;; (and (eq_attr "type" "unary,shift,nullshift,binary,move,address")
-;; (eq_attr "cpu" "700"))
-;; 1 0)
-
-
;; Memory. Disregarding Cache misses, the Mustang memory times are:
;; load: 2, fpload: 3
;; store, fpstore: 3, no D-cache operations should be scheduled.
-(define_function_unit "pa700memory" 1 0
- (and (eq_attr "type" "load,fpload")
- (eq_attr "cpu" "700")) 2 0)
-(define_function_unit "pa700memory" 1 0
- (and (eq_attr "type" "store,fpstore")
- (eq_attr "cpu" "700")) 3 3)
-
;; The Timex (aka 700) has two floating-point units: ALU, and MUL/DIV/SQRT.
;; Timings:
;; Instruction Time Unit Minimum Distance (unit contention)
;; fdiv,dbl 12 MPY 12
;; fsqrt,sgl 14 MPY 14
;; fsqrt,dbl 18 MPY 18
+;;
+;; We don't model fmpyadd/fmpysub properly as those instructions
+;; keep both the FP ALU and MPY units busy. Given that these
+;; processors are obsolete, I'm not going to spend the time to
+;; model those instructions correctly.
+
+(define_automaton "pa700")
+(define_cpu_unit "dummy_700,mem_700,fpalu_700,fpmpy_700" "pa700")
-(define_function_unit "pa700fp_alu" 1 0
+(define_insn_reservation "W0" 4
(and (eq_attr "type" "fpcc")
- (eq_attr "cpu" "700")) 4 2)
-(define_function_unit "pa700fp_alu" 1 0
+ (eq_attr "cpu" "700"))
+ "fpalu_700*2")
+
+(define_insn_reservation "W1" 3
(and (eq_attr "type" "fpalu")
- (eq_attr "cpu" "700")) 3 2)
-(define_function_unit "pa700fp_mpy" 1 0
+ (eq_attr "cpu" "700"))
+ "fpalu_700*2")
+
+(define_insn_reservation "W2" 3
(and (eq_attr "type" "fpmulsgl,fpmuldbl")
- (eq_attr "cpu" "700")) 3 2)
-(define_function_unit "pa700fp_mpy" 1 0
+ (eq_attr "cpu" "700"))
+ "fpmpy_700*2")
+
+(define_insn_reservation "W3" 10
(and (eq_attr "type" "fpdivsgl")
- (eq_attr "cpu" "700")) 10 10)
-(define_function_unit "pa700fp_mpy" 1 0
+ (eq_attr "cpu" "700"))
+ "fpmpy_700*10")
+
+(define_insn_reservation "W4" 12
(and (eq_attr "type" "fpdivdbl")
- (eq_attr "cpu" "700")) 12 12)
-(define_function_unit "pa700fp_mpy" 1 0
+ (eq_attr "cpu" "700"))
+ "fpmpy_700*12")
+
+(define_insn_reservation "W5" 14
(and (eq_attr "type" "fpsqrtsgl")
- (eq_attr "cpu" "700")) 14 14)
-(define_function_unit "pa700fp_mpy" 1 0
+ (eq_attr "cpu" "700"))
+ "fpmpy_700*14")
+
+(define_insn_reservation "W6" 18
(and (eq_attr "type" "fpsqrtdbl")
- (eq_attr "cpu" "700")) 18 18)
+ (eq_attr "cpu" "700"))
+ "fpmpy_700*18")
+
+(define_insn_reservation "W7" 2
+ (and (eq_attr "type" "load")
+ (eq_attr "cpu" "700"))
+ "mem_700")
+
+(define_insn_reservation "W8" 2
+ (and (eq_attr "type" "fpload")
+ (eq_attr "cpu" "700"))
+ "mem_700")
+
+(define_insn_reservation "W9" 3
+ (and (eq_attr "type" "store")
+ (eq_attr "cpu" "700"))
+ "mem_700*3")
+
+(define_insn_reservation "W10" 3
+ (and (eq_attr "type" "fpstore")
+ (eq_attr "cpu" "700"))
+ "mem_700*3")
+
+(define_insn_reservation "W11" 1
+ (and (eq_attr "type" "!fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpdivdbl,fpsqrtsgl,fpsqrtdbl,load,fpload,store,fpstore")
+ (eq_attr "cpu" "700"))
+ "dummy_700")
+
+;; We have a bypass for all computations in the FP unit which feed an
+;; FP store as long as the sizes are the same.
+(define_bypass 2 "W1,W2" "W10" "hppa_fpstore_bypass_p")
+(define_bypass 9 "W3" "W10" "hppa_fpstore_bypass_p")
+(define_bypass 11 "W4" "W10" "hppa_fpstore_bypass_p")
+(define_bypass 13 "W5" "W10" "hppa_fpstore_bypass_p")
+(define_bypass 17 "W6" "W10" "hppa_fpstore_bypass_p")
+
+;; We have an "anti-bypass" for FP loads which feed an FP store.
+(define_bypass 4 "W8" "W10" "hppa_fpstore_bypass_p")
;; Function units for the 7100 and 7150. The 7100/7150 can dual-issue
;; floating point computations with non-floating point computations (fp loads
;; and stores are not fp computations).
;;
-
;; Memory. Disregarding Cache misses, memory loads take two cycles; stores also
;; take two cycles, during which no Dcache operations should be scheduled.
;; Any special cases are handled in pa_adjust_cost. The 7100, 7150 and 7100LC
;; all have the same memory characteristics if one disregards cache misses.
-(define_function_unit "pa7100memory" 1 0
- (and (eq_attr "type" "load,fpload")
- (eq_attr "cpu" "7100,7100LC")) 2 0)
-(define_function_unit "pa7100memory" 1 0
- (and (eq_attr "type" "store,fpstore")
- (eq_attr "cpu" "7100,7100LC")) 2 2)
-
+;;
;; The 7100/7150 has three floating-point units: ALU, MUL, and DIV.
+;; There's no value in modeling the ALU and MUL separately though
+;; since there can never be a functional unit conflict given the
+;; latency and issue rates for those units.
+;;
;; Timings:
;; Instruction Time Unit Minimum Distance (unit contention)
;; fcpy 2 ALU 1
;; fsqrt,sgl 8 DIV 8
;; fsqrt,dbl 15 DIV 15
-(define_function_unit "pa7100fp_alu" 1 0
- (and (eq_attr "type" "fpcc,fpalu")
- (eq_attr "cpu" "7100")) 2 1)
-(define_function_unit "pa7100fp_mpy" 1 0
- (and (eq_attr "type" "fpmulsgl,fpmuldbl")
- (eq_attr "cpu" "7100")) 2 1)
-(define_function_unit "pa7100fp_div" 1 0
- (and (eq_attr "type" "fpdivsgl,fpsqrtsgl")
- (eq_attr "cpu" "7100")) 8 8)
-(define_function_unit "pa7100fp_div" 1 0
- (and (eq_attr "type" "fpdivdbl,fpsqrtdbl")
- (eq_attr "cpu" "7100")) 15 15)
+(define_automaton "pa7100")
+(define_cpu_unit "i_7100, f_7100,fpmac_7100,fpdivsqrt_7100,mem_7100" "pa7100")
-;; To encourage dual issue we define function units corresponding to
-;; the instructions which can be dual issued. This is a rather crude
-;; approximation, the "pa7100nonflop" test in particular could be refined.
-(define_function_unit "pa7100flop" 1 1
- (and
- (eq_attr "type" "fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpsqrtsgl,fpdivdbl,fpsqrtdbl")
- (eq_attr "cpu" "7100")) 1 1)
-
-(define_function_unit "pa7100nonflop" 1 1
- (and
- (eq_attr "type" "!fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpsqrtsgl,fpdivdbl,fpsqrtdbl")
- (eq_attr "cpu" "7100")) 1 1)
+(define_insn_reservation "X0" 2
+ (and (eq_attr "type" "fpcc,fpalu,fpmulsgl,fpmuldbl")
+ (eq_attr "cpu" "7100"))
+ "f_7100,fpmac_7100")
+(define_insn_reservation "X1" 8
+ (and (eq_attr "type" "fpdivsgl,fpsqrtsgl")
+ (eq_attr "cpu" "7100"))
+ "f_7100+fpdivsqrt_7100,fpdivsqrt_7100*7")
-;; Memory subsystem works just like 7100/7150 (except for cache miss times which
-;; we don't model here).
+(define_insn_reservation "X2" 15
+ (and (eq_attr "type" "fpdivdbl,fpsqrtdbl")
+ (eq_attr "cpu" "7100"))
+ "f_7100+fpdivsqrt_7100,fpdivsqrt_7100*14")
+
+(define_insn_reservation "X3" 2
+ (and (eq_attr "type" "load")
+ (eq_attr "cpu" "7100"))
+ "i_7100+mem_7100")
+
+(define_insn_reservation "X4" 2
+ (and (eq_attr "type" "fpload")
+ (eq_attr "cpu" "7100"))
+ "i_7100+mem_7100")
+
+(define_insn_reservation "X5" 2
+ (and (eq_attr "type" "store")
+ (eq_attr "cpu" "7100"))
+ "i_7100+mem_7100,mem_7100")
+
+(define_insn_reservation "X6" 2
+ (and (eq_attr "type" "fpstore")
+ (eq_attr "cpu" "7100"))
+ "i_7100+mem_7100,mem_7100")
+
+(define_insn_reservation "X7" 1
+ (and (eq_attr "type" "!fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpsqrtsgl,fpdivdbl,fpsqrtdbl,load,fpload,store,fpstore")
+ (eq_attr "cpu" "7100"))
+ "i_7100")
+
+;; We have a bypass for all computations in the FP unit which feed an
+;; FP store as long as the sizes are the same.
+(define_bypass 1 "X0" "X6" "hppa_fpstore_bypass_p")
+(define_bypass 7 "X1" "X6" "hppa_fpstore_bypass_p")
+(define_bypass 14 "X2" "X6" "hppa_fpstore_bypass_p")
+
+;; We have an "anti-bypass" for FP loads which feed an FP store.
+(define_bypass 3 "X4" "X6" "hppa_fpstore_bypass_p")
;; The 7100LC has three floating-point units: ALU, MUL, and DIV.
-;; Note divides and sqrt flops lock the cpu until the flop is
-;; finished. fmpy and xmpyu (fmpyi) lock the cpu for one cycle.
-;; There's no way to avoid the penalty.
+;; There's no value in modeling the ALU and MUL separately though
+;; since there can never be a functional unit conflict that
+;; can be avoided given the latency, issue rates and mandatory
+;; one cycle cpu-wide lock for a double precision fp multiply.
+;;
;; Timings:
;; Instruction Time Unit Minimum Distance (unit contention)
;; fcpy 2 ALU 1
;; fdiv,dbl 15 DIV 15
;; fsqrt,sgl 8 DIV 8
;; fsqrt,dbl 15 DIV 15
+;;
+;; The PA7200 is just like the PA7100LC except that there is
+;; no store-store penalty.
+;;
+;; The PA7300 is just like the PA7200 except that there is
+;; no store-load penalty.
+;;
+;; Note there are some aspects of the 7100LC we are not modeling
+;; at the moment. I'll be reviewing the 7100LC scheduling info
+;; shortly and updating this description.
+;;
+;; load-load pairs
+;; store-store pairs
+;; other issue modeling
+
+(define_automaton "pa7100lc")
+(define_cpu_unit "i0_7100lc, i1_7100lc, f_7100lc" "pa7100lc")
+(define_cpu_unit "fpmac_7100lc" "pa7100lc")
+(define_cpu_unit "mem_7100lc" "pa7100lc")
+
+;; Double precision multiplies lock the entire CPU for one
+;; cycle. There is no way to avoid this lock and trying to
+;; schedule around the lock is pointless and thus there is no
+;; value in trying to model this lock.
+;;
+;; Not modeling the lock allows us to treat fp multiplies just
+;; like any other FP alu instruction. It allows for a smaller
+;; DFA and may reduce register pressure.
+(define_insn_reservation "Y0" 2
+ (and (eq_attr "type" "fpcc,fpalu,fpmulsgl,fpmuldbl")
+ (eq_attr "cpu" "7100LC,7200,7300"))
+ "f_7100lc,fpmac_7100lc")
+
+;; fp division and sqrt instructions lock the entire CPU for
+;; 7 cycles (single precision) or 14 cycles (double precision).
+;; There is no way to avoid this lock and trying to schedule
+;; around the lock is pointless and thus there is no value in
+;; trying to model this lock. Not modeling the lock allows
+;; for a smaller DFA and may reduce register pressure.
+(define_insn_reservation "Y1" 1
+ (and (eq_attr "type" "fpdivsgl,fpsqrtsgl,fpdivdbl,fpsqrtdbl")
+ (eq_attr "cpu" "7100LC,7200,7300"))
+ "f_7100lc")
+
+(define_insn_reservation "Y2" 2
+ (and (eq_attr "type" "load")
+ (eq_attr "cpu" "7100LC,7200,7300"))
+ "i1_7100lc+mem_7100lc")
+
+(define_insn_reservation "Y3" 2
+ (and (eq_attr "type" "fpload")
+ (eq_attr "cpu" "7100LC,7200,7300"))
+ "i1_7100lc+mem_7100lc")
+
+(define_insn_reservation "Y4" 2
+ (and (eq_attr "type" "store")
+ (eq_attr "cpu" "7100LC"))
+ "i1_7100lc+mem_7100lc,mem_7100lc")
+
+(define_insn_reservation "Y5" 2
+ (and (eq_attr "type" "fpstore")
+ (eq_attr "cpu" "7100LC"))
+ "i1_7100lc+mem_7100lc,mem_7100lc")
+
+(define_insn_reservation "Y6" 1
+ (and (eq_attr "type" "shift,nullshift")
+ (eq_attr "cpu" "7100LC,7200,7300"))
+ "i1_7100lc")
+
+(define_insn_reservation "Y7" 1
+ (and (eq_attr "type" "!fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpsqrtsgl,fpdivdbl,fpsqrtdbl,load,fpload,store,fpstore,shift,nullshift")
+ (eq_attr "cpu" "7100LC,7200,7300"))
+ "(i0_7100lc|i1_7100lc)")
+
+;; The 7200 has a store-load penalty
+(define_insn_reservation "Y8" 2
+ (and (eq_attr "type" "store")
+ (eq_attr "cpu" "7200"))
+ "i1_7100lc,mem_7100lc")
+
+(define_insn_reservation "Y9" 2
+ (and (eq_attr "type" "fpstore")
+ (eq_attr "cpu" "7200"))
+ "i1_7100lc,mem_7100lc")
+
+;; The 7300 has no penalty for store-store or store-load
+(define_insn_reservation "Y10" 2
+ (and (eq_attr "type" "store")
+ (eq_attr "cpu" "7300"))
+ "i1_7100lc")
+
+(define_insn_reservation "Y11" 2
+ (and (eq_attr "type" "fpstore")
+ (eq_attr "cpu" "7300"))
+ "i1_7100lc")
+
+;; We have an "anti-bypass" for FP loads which feed an FP store.
+(define_bypass 3 "Y3" "Y5,Y9,Y11" "hppa_fpstore_bypass_p")
-(define_function_unit "pa7100LCfp_alu" 1 0
- (and (eq_attr "type" "fpcc,fpalu")
- (eq_attr "cpu" "7100LC,7200")) 2 1)
-(define_function_unit "pa7100LCfp_mpy" 1 0
- (and (eq_attr "type" "fpmulsgl")
- (eq_attr "cpu" "7100LC,7200")) 2 1)
-(define_function_unit "pa7100LCfp_mpy" 1 0
- (and (eq_attr "type" "fpmuldbl")
- (eq_attr "cpu" "7100LC,7200")) 3 2)
-(define_function_unit "pa7100LCfp_div" 1 0
- (and (eq_attr "type" "fpdivsgl,fpsqrtsgl")
- (eq_attr "cpu" "7100LC,7200")) 8 8)
-(define_function_unit "pa7100LCfp_div" 1 0
- (and (eq_attr "type" "fpdivdbl,fpsqrtdbl")
- (eq_attr "cpu" "7100LC,7200")) 15 15)
-
-;; Define the various functional units for dual-issue.
-
-;; There's only one floating point unit.
-(define_function_unit "pa7100LCflop" 1 1
- (and
- (eq_attr "type" "fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpsqrtsgl,fpdivdbl,fpsqrtdbl")
- (eq_attr "cpu" "7100LC,7200")) 1 1)
-
-;; Shifts and memory ops execute in only one of the integer ALUs
-(define_function_unit "pa7100LCshiftmem" 1 1
- (and
- (eq_attr "type" "shift,nullshift,load,fpload,store,fpstore")
- (eq_attr "cpu" "7100LC,7200")) 1 1)
-
-;; We have two basic ALUs.
-(define_function_unit "pa7100LCalu" 2 1
- (and
- (eq_attr "type" "!fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpsqrtsgl,fpdivdbl,fpsqrtdbl")
- (eq_attr "cpu" "7100LC,7200")) 1 1)
-
-;; I don't have complete information on the PA7200; however, most of
-;; what I've heard makes it look like a 7100LC without the store-store
-;; penalty. So that's how we'll model it.
-
-;; Memory. Disregarding Cache misses, memory loads and stores take
-;; two cycles. Any special cases are handled in pa_adjust_cost.
-(define_function_unit "pa7200memory" 1 0
- (and (eq_attr "type" "load,fpload,store,fpstore")
- (eq_attr "cpu" "7200")) 2 0)
-
-;; I don't have detailed information on the PA7200 FP pipeline, so I
-;; treat it just like the 7100LC pipeline.
-;; Similarly for the multi-issue fake units.
-
-;;
;; Scheduling for the PA8000 is somewhat different than scheduling for a
;; traditional architecture.
;;
;; The PA8000 has a large (56) entry reorder buffer that is split between
;; memory and non-memory operations.
;;
-;; The PA800 can issue two memory and two non-memory operations per cycle to
-;; the function units. Similarly, the PA8000 can retire two memory and two
-;; non-memory operations per cycle.
+;; The PA8000 can issue two memory and two non-memory operations per cycle to
+;; the function units, with the exception of branches and multi-output
+;; instructions. The PA8000 can retire two non-memory operations per cycle
+;; and two memory operations per cycle, only one of which may be a store.
;;
;; Given the large reorder buffer, the processor can hide most latencies.
;; According to HP, they've got the best results by scheduling for retirement
;; bandwidth with limited latency scheduling for floating point operations.
;; Latency for integer operations and memory references is ignored.
;;
-;; We claim floating point operations have a 2 cycle latency and are
-;; fully pipelined, except for div and sqrt which are not pipelined.
;;
-;; It is not necessary to define the shifter and integer alu units.
+;; We claim floating point operations have a 2 cycle latency and are
+;; fully pipelined, except for div and sqrt which are not pipelined and
+;; take from 17 to 31 cycles to complete.
;;
-;; These first two define_unit_unit descriptions model retirement from
-;; the reorder buffer.
-(define_function_unit "pa8000lsu" 2 1
+;; It's worth noting that there is no way to saturate all the functional
+;; units on the PA8000 as there is not enough issue bandwidth.
+
+(define_automaton "pa8000")
+(define_cpu_unit "inm0_8000, inm1_8000, im0_8000, im1_8000" "pa8000")
+(define_cpu_unit "rnm0_8000, rnm1_8000, rm0_8000, rm1_8000" "pa8000")
+(define_cpu_unit "store_8000" "pa8000")
+(define_cpu_unit "f0_8000, f1_8000" "pa8000")
+(define_cpu_unit "fdivsqrt0_8000, fdivsqrt1_8000" "pa8000")
+(define_reservation "inm_8000" "inm0_8000 | inm1_8000")
+(define_reservation "im_8000" "im0_8000 | im1_8000")
+(define_reservation "rnm_8000" "rnm0_8000 | rnm1_8000")
+(define_reservation "rm_8000" "rm0_8000 | rm1_8000")
+(define_reservation "f_8000" "f0_8000 | f1_8000")
+(define_reservation "fdivsqrt_8000" "fdivsqrt0_8000 | fdivsqrt1_8000")
+
+;; We can issue any two memops per cycle, but we can only retire
+;; one memory store per cycle. We assume that the reorder buffer
+;; will hide any memory latencies per HP's recommendation.
+(define_insn_reservation "Z0" 0
(and
- (eq_attr "type" "load,fpload,store,fpstore")
- (eq_attr "cpu" "8000")) 1 1)
+ (eq_attr "type" "load,fpload")
+ (eq_attr "cpu" "8000"))
+ "im_8000,rm_8000")
-(define_function_unit "pa8000alu" 2 1
+(define_insn_reservation "Z1" 0
(and
- (eq_attr "type" "!load,fpload,store,fpstore")
- (eq_attr "cpu" "8000")) 1 1)
-
-;; Claim floating point ops have a 2 cycle latency, excluding div and
-;; sqrt, which are not pipelined and issue to different units.
-(define_function_unit "pa8000fmac" 2 0
+ (eq_attr "type" "store,fpstore")
+ (eq_attr "cpu" "8000"))
+ "im_8000,rm_8000+store_8000")
+
+;; We can issue and retire two non-memory operations per cycle with
+;; a few exceptions (branches). This group catches those we want
+;; to assume have zero latency.
+(define_insn_reservation "Z2" 0
(and
- (eq_attr "type" "fpcc,fpalu,fpmulsgl,fpmuldbl")
- (eq_attr "cpu" "8000")) 2 1)
+ (eq_attr "type" "!load,fpload,store,fpstore,uncond_branch,branch,cbranch,fbranch,call,dyncall,multi,milli,parallel_branch,fpcc,fpalu,fpmulsgl,fpmuldbl,fpsqrtsgl,fpsqrtdbl,fpdivsgl,fpdivdbl")
+ (eq_attr "cpu" "8000"))
+ "inm_8000,rnm_8000")
-(define_function_unit "pa8000fdiv" 2 1
+;; Branches use both slots in the non-memory issue and
+;; retirement unit.
+(define_insn_reservation "Z3" 0
(and
- (eq_attr "type" "fpdivsgl,fpsqrtsgl")
- (eq_attr "cpu" "8000")) 17 17)
+ (eq_attr "type" "uncond_branch,branch,cbranch,fbranch,call,dyncall,multi,milli,parallel_branch")
+ (eq_attr "cpu" "8000"))
+ "inm0_8000+inm1_8000,rnm0_8000+rnm1_8000")
+
+;; We partial latency schedule the floating point units.
+;; They can issue/retire two at a time in the non-memory
+;; units. We fix their latency at 2 cycles and they
+;; are fully pipelined.
+(define_insn_reservation "Z4" 1
+ (and
+ (eq_attr "type" "fpcc,fpalu,fpmulsgl,fpmuldbl")
+ (eq_attr "cpu" "8000"))
+ "inm_8000,f_8000,rnm_8000")
+
+;; The fdivsqrt units are not pipelined and have a very long latency.
+;; To keep the DFA from exploding, we do not show all the
+;; reservations for the divsqrt unit.
+(define_insn_reservation "Z5" 17
+ (and
+ (eq_attr "type" "fpdivsgl,fpsqrtsgl")
+ (eq_attr "cpu" "8000"))
+ "inm_8000,fdivsqrt_8000*6,rnm_8000")
+
+(define_insn_reservation "Z6" 31
+ (and
+ (eq_attr "type" "fpdivdbl,fpsqrtdbl")
+ (eq_attr "cpu" "8000"))
+ "inm_8000,fdivsqrt_8000*6,rnm_8000")
-(define_function_unit "pa8000fdiv" 2 1
- (and
- (eq_attr "type" "fpdivdbl,fpsqrtdbl")
- (eq_attr "cpu" "8000")) 31 31)
\f
;; Compare instructions.
[(set_attr "length" "4")
(set_attr "type" "fpcc")])
+;; The following two patterns are optimization placeholders. In almost
+;; all cases, the user of the condition code will be simplified and the
+;; original condition code setting insn should be eliminated.
+
+(define_insn "*setccfp0"
+ [(set (reg:CCFP 0)
+ (const_int 0))]
+ "! TARGET_SOFT_FLOAT"
+ "fcmp,dbl,!= %%fr0,%%fr0"
+ [(set_attr "length" "4")
+ (set_attr "type" "fpcc")])
+
+(define_insn "*setccfp1"
+ [(set (reg:CCFP 0)
+ (const_int 1))]
+ "! TARGET_SOFT_FLOAT"
+ "fcmp,dbl,= %%fr0,%%fr0"
+ [(set_attr "length" "4")
+ (set_attr "type" "fpcc")])
+
;; scc insns.
(define_expand "seq"
DONE;
}")
-;;; pic symbol references
-
-(define_insn ""
- [(set (match_operand:SI 0 "register_operand" "=r")
- (mem:SI (plus:SI (match_operand:SI 1 "register_operand" "r")
- (match_operand:SI 2 "symbolic_operand" ""))))]
- "flag_pic && operands[1] == pic_offset_table_rtx"
- "ldw T'%2(%1),%0"
- [(set_attr "type" "load")
- (set_attr "length" "4")])
-
(define_insn ""
[(set (match_operand:SI 0 "reg_or_nonsymb_mem_operand"
- "=r,r,r,r,r,Q,*q,!f,f,*TR")
+ "=r,r,r,r,r,r,Q,*q,!f,f,*TR")
(match_operand:SI 1 "move_operand"
- "r,J,N,K,RQ,rM,rM,!fM,*RT,f"))]
+ "A,r,J,N,K,RQ,rM,rM,!fM,*RT,f"))]
"(register_operand (operands[0], SImode)
|| reg_or_0_operand (operands[1], SImode))
&& ! TARGET_SOFT_FLOAT"
"@
+ ldw RT'%A1,%0
copy %1,%0
ldi %1,%0
ldil L'%1,%0
fcpy,sgl %f1,%0
fldw%F1 %1,%0
fstw%F0 %1,%0"
- [(set_attr "type" "move,move,move,shift,load,store,move,fpalu,fpload,fpstore")
+ [(set_attr "type" "load,move,move,move,shift,load,store,move,fpalu,fpload,fpstore")
(set_attr "pa_combine_type" "addmove")
- (set_attr "length" "4,4,4,4,4,4,4,4,4,4")])
+ (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4")])
(define_insn ""
[(set (match_operand:SI 0 "reg_or_nonsymb_mem_operand"
- "=r,r,r,r,r,Q,*q")
+ "=r,r,r,r,r,r,Q,*q")
(match_operand:SI 1 "move_operand"
- "r,J,N,K,RQ,rM,rM"))]
+ "A,r,J,N,K,RQ,rM,rM"))]
"(register_operand (operands[0], SImode)
|| reg_or_0_operand (operands[1], SImode))
&& TARGET_SOFT_FLOAT"
"@
+ ldw RT'%A1,%0
copy %1,%0
ldi %1,%0
ldil L'%1,%0
ldw%M1 %1,%0
stw%M0 %r1,%0
mtsar %r1"
- [(set_attr "type" "move,move,move,move,load,store,move")
+ [(set_attr "type" "load,move,move,move,move,load,store,move")
(set_attr "pa_combine_type" "addmove")
- (set_attr "length" "4,4,4,4,4,4,4")])
+ (set_attr "length" "4,4,4,4,4,4,4,4")])
(define_insn ""
[(set (match_operand:SI 0 "register_operand" "=r")
""
"*
{
- rtx label_rtx = gen_label_rtx ();
rtx xoperands[3];
extern FILE *asm_out_file;
xoperands[0] = operands[0];
xoperands[1] = operands[1];
- xoperands[2] = label_rtx;
+ if (TARGET_SOM || ! TARGET_GAS)
+ xoperands[2] = gen_label_rtx ();
+
output_asm_insn (\"{bl|b,l} .+8,%0\", xoperands);
output_asm_insn (\"{depi|depwi} 0,31,2,%0\", xoperands);
- ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, \"L\",
- CODE_LABEL_NUMBER (label_rtx));
+ if (TARGET_SOM || ! TARGET_GAS)
+ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, \"L\",
+ CODE_LABEL_NUMBER (xoperands[2]));
/* If we're trying to load the address of a label that happens to be
close, then we can use a shorter sequence. */
{
/* Prefixing with R% here is wrong, it extracts just 11 bits and is
always non-negative. */
- output_asm_insn (\"ldo %1-%2(%0),%0\", xoperands);
+ if (TARGET_SOM || ! TARGET_GAS)
+ output_asm_insn (\"ldo %1-%2(%0),%0\", xoperands);
+ else
+ output_asm_insn (\"ldo %1-$PIC_pcrel$0+8(%0),%0\", xoperands);
}
else
{
- output_asm_insn (\"addil L%%%1-%2,%0\", xoperands);
- output_asm_insn (\"ldo R%%%1-%2(%0),%0\", xoperands);
+ if (TARGET_SOM || ! TARGET_GAS)
+ {
+ output_asm_insn (\"addil L%%%1-%2,%0\", xoperands);
+ output_asm_insn (\"ldo R%%%1-%2(%0),%0\", xoperands);
+ }
+ else
+ {
+ output_asm_insn (\"addil L%%%1-$PIC_pcrel$0+8,%0\", xoperands);
+ output_asm_insn (\"ldo R%%%1-$PIC_pcrel$0+12(%0),%0\",
+ xoperands);
+ }
}
return \"\";
}"
(high:SI (match_operand 2 "" ""))))]
"symbolic_operand (operands[2], Pmode)
&& ! function_label_operand (operands[2], Pmode)
- && flag_pic == 2"
+ && flag_pic"
"addil LT'%G2,%1"
[(set_attr "type" "binary")
(set_attr "length" "4")])
"symbolic_operand (operands[2], Pmode)
&& ! function_label_operand (operands[2], Pmode)
&& TARGET_64BIT
- && flag_pic == 2"
+ && flag_pic"
"addil LT'%G2,%1"
[(set_attr "type" "binary")
(set_attr "length" "4")])
-; We need this to make sure CSE doesn't simplify a memory load with a
-; symbolic address, whose content it think it knows. For PIC, what CSE
-; think is the real value will be the address of that value.
-(define_insn ""
- [(set (match_operand:SI 0 "register_operand" "=r")
- (mem:SI
- (lo_sum:SI (match_operand:SI 1 "register_operand" "r")
- (unspec:SI
- [(match_operand:SI 2 "symbolic_operand" "")] 0))))]
- ""
- "*
-{
- if (flag_pic != 2)
- abort ();
- return \"ldw RT'%G2(%1),%0\";
-}"
- [(set_attr "type" "load")
- (set_attr "length" "4")])
-
-(define_insn ""
- [(set (match_operand:DI 0 "register_operand" "=r")
- (mem:DI
- (lo_sum:DI (match_operand:DI 1 "register_operand" "r")
- (unspec:DI
- [(match_operand:DI 2 "symbolic_operand" "")] 0))))]
- "TARGET_64BIT"
- "*
-{
- if (flag_pic != 2)
- abort ();
- return \"ldd RT'%G2(%1),%0\";
-}"
- [(set_attr "type" "load")
- (set_attr "length" "4")])
-
;; Always use addil rather than ldil;add sequences. This allows the
;; HP linker to eliminate the dp relocation if the symbolic operand
;; lives in the TEXT space.
(define_insn ""
[(set (match_operand:HI 0 "register_operand" "=r")
- (high:HI (match_operand 1 "const_int_operand" "")))]
- ""
- "ldil L'%G1,%0"
- [(set_attr "type" "move")
- (set_attr "length" "4")])
-
-(define_insn ""
- [(set (match_operand:HI 0 "register_operand" "=r")
- (lo_sum:HI (match_operand:HI 1 "register_operand" "r")
- (match_operand 2 "const_int_operand" "")))]
+ (plus:HI (match_operand:HI 1 "register_operand" "r")
+ (match_operand 2 "const_int_operand" "J")))]
""
- "ldo R'%G2(%1),%0"
- [(set_attr "type" "move")
+ "ldo %2(%1),%0"
+ [(set_attr "type" "binary")
+ (set_attr "pa_combine_type" "addmove")
(set_attr "length" "4")])
(define_expand "movqi"
/* Fall through means we're going to use our block move pattern. */
operands[0]
- = change_address (operands[0], VOIDmode,
- copy_to_mode_reg (SImode, XEXP (operands[0], 0)));
+ = replace_equiv_address (operands[0],
+ copy_to_mode_reg (SImode, XEXP (operands[0], 0)));
operands[1]
- = change_address (operands[1], VOIDmode,
- copy_to_mode_reg (SImode, XEXP (operands[1], 0)));
+ = replace_equiv_address (operands[1],
+ copy_to_mode_reg (SImode, XEXP (operands[1], 0)));
operands[4] = gen_reg_rtx (SImode);
operands[5] = gen_reg_rtx (SImode);
operands[6] = gen_reg_rtx (SImode);
}")
(define_expand "reload_outdi"
- [(set (match_operand:DI 0 "general_operand" "")
+ [(set (match_operand:DI 0 "non_hard_reg_operand" "")
(match_operand:DI 1 "register_operand" "Z"))
(clobber (match_operand:SI 2 "register_operand" "=&r"))]
""
(define_insn ""
[(set (match_operand:DI 0 "reg_or_nonsymb_mem_operand"
- "=r,r,r,r,r,Q,*q,!f,f,*TR")
+ "=r,r,r,r,r,r,Q,*q,!f,f,*TR")
(match_operand:DI 1 "move_operand"
- "r,J,N,K,RQ,rM,rM,!fM,*RT,f"))]
+ "A,r,J,N,K,RQ,rM,rM,!fM,*RT,f"))]
"(register_operand (operands[0], DImode)
|| reg_or_0_operand (operands[1], DImode))
&& ! TARGET_SOFT_FLOAT && TARGET_64BIT"
"@
+ ldd RT'%A1,%0
copy %1,%0
ldi %1,%0
ldil L'%1,%0
fcpy,dbl %f1,%0
fldd%F1 %1,%0
fstd%F0 %1,%0"
- [(set_attr "type" "move,move,move,shift,load,store,move,fpalu,fpload,fpstore")
+ [(set_attr "type" "load,move,move,move,shift,load,store,move,fpalu,fpload,fpstore")
(set_attr "pa_combine_type" "addmove")
- (set_attr "length" "4,4,4,4,4,4,4,4,4,4")])
+ (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4")])
(define_insn ""
[(set (match_operand:DI 0 "reg_or_nonsymb_mem_operand"
(set_attr "length" "4")])
(define_expand "floatunssisf2"
- [(set (subreg:SI (match_dup 2) 1)
+ [(set (subreg:SI (match_dup 2) 4)
(match_operand:SI 1 "register_operand" ""))
(set (subreg:SI (match_dup 2) 0)
(const_int 0))
}")
(define_expand "floatunssidf2"
- [(set (subreg:SI (match_dup 2) 1)
+ [(set (subreg:SI (match_dup 2) 4)
(match_operand:SI 1 "register_operand" ""))
(set (subreg:SI (match_dup 2) 0)
(const_int 0))
(define_expand "adddi3"
[(set (match_operand:DI 0 "register_operand" "")
(plus:DI (match_operand:DI 1 "register_operand" "")
- (match_operand:DI 2 "arith_operand" "")))]
+ (match_operand:DI 2 "adddi3_operand" "")))]
""
"")
-;; We allow arith_operand for operands2, even though strictly speaking it
-;; we would prefer to us arith11_operand since that's what the hardware
-;; can actually support.
-;;
-;; But the price of the extra reload in that case is worth the simplicity
-;; we get by allowing a trivial adddi3 expander to be used for both
-;; PA64 and PA32.
-
(define_insn ""
[(set (match_operand:DI 0 "register_operand" "=r")
(plus:DI (match_operand:DI 1 "register_operand" "%r")
- (match_operand:DI 2 "arith_operand" "rI")))]
+ (match_operand:DI 2 "arith11_operand" "rI")))]
"!TARGET_64BIT"
"*
{
(clobber (match_dup 3))
(clobber (reg:SI 26))
(clobber (reg:SI 25))
- (clobber (reg:SI 31))])
+ (clobber (match_dup 4))])
(set (match_operand:SI 0 "general_operand" "") (reg:SI 29))]
""
"
{
+ operands[4] = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
if (TARGET_PA_11 && ! TARGET_DISABLE_FPREGS && ! TARGET_SOFT_FLOAT)
{
rtx scratch = gen_reg_rtx (DImode);
operands[1] = force_reg (SImode, operands[1]);
operands[2] = force_reg (SImode, operands[2]);
emit_insn (gen_umulsidi3 (scratch, operands[1], operands[2]));
- /* We do not want (subreg:SI (XX:DI) 1)) for TARGET_64BIT since
- that has no real meaning. */
- if (TARGET_64BIT)
- {
- emit_insn (gen_rtx_SET (VOIDmode,
- operands[0],
- gen_rtx_SUBREG (SImode, scratch, 0)));
- DONE;
-
- }
emit_insn (gen_rtx_SET (VOIDmode, operands[0],
- gen_rtx_SUBREG (SImode, scratch, 1)));
+ gen_rtx_SUBREG (SImode, scratch, GET_MODE_SIZE (SImode))));
DONE;
}
operands[3] = gen_reg_rtx (SImode);
(clobber (reg:SI 26))
(clobber (reg:SI 25))
(clobber (reg:SI 31))]
- ""
+ "!TARGET_64BIT"
"* return output_mul_insn (0, insn);"
[(set_attr "type" "milli")
(set (attr "length")
(const_int 0)))
(const_int 4)
-;; NO_SPACE_REGS
- (ne (symbol_ref "TARGET_NO_SPACE_REGS || TARGET_FAST_INDIRECT_CALLS")
+;; Out of reach PIC
+ (ne (symbol_ref "flag_pic")
(const_int 0))
- (const_int 8)
+ (const_int 24)
-;; Out of reach, but not PIC or PORTABLE_RUNTIME
-;; same as NO_SPACE_REGS code
- (and (eq (symbol_ref "TARGET_PORTABLE_RUNTIME")
- (const_int 0))
- (eq (symbol_ref "flag_pic")
- (const_int 0)))
- (const_int 8)]
+;; Out of reach PORTABLE_RUNTIME
+ (ne (symbol_ref "TARGET_PORTABLE_RUNTIME")
+ (const_int 0))
+ (const_int 20)]
-;; Out of range and either PIC or PORTABLE_RUNTIME
- (const_int 24)))])
+;; Out of reach, can use ble
+ (const_int 12)))])
+
+(define_insn ""
+ [(set (reg:SI 29) (mult:SI (reg:SI 26) (reg:SI 25)))
+ (clobber (match_operand:SI 0 "register_operand" "=a"))
+ (clobber (reg:SI 26))
+ (clobber (reg:SI 25))
+ (clobber (reg:SI 2))]
+ "TARGET_64BIT"
+ "* return output_mul_insn (0, insn);"
+ [(set_attr "type" "milli")
+ (set (attr "length") (const_int 4))])
(define_expand "muldi3"
[(set (match_operand:DI 0 "register_operand" "")
GEN_INT (32)));
emit_move_insn (op2shifted, gen_rtx_LSHIFTRT (DImode, operands[2],
GEN_INT (32)));
- op1r = gen_rtx_SUBREG (SImode, operands[1], 0);
- op2r = gen_rtx_SUBREG (SImode, operands[2], 0);
- op1l = gen_rtx_SUBREG (SImode, op1shifted, 0);
- op2l = gen_rtx_SUBREG (SImode, op2shifted, 0);
+ op1r = gen_rtx_SUBREG (SImode, operands[1], 4);
+ op2r = gen_rtx_SUBREG (SImode, operands[2], 4);
+ op1l = gen_rtx_SUBREG (SImode, op1shifted, 4);
+ op2l = gen_rtx_SUBREG (SImode, op2shifted, 4);
/* Emit multiplies for the cross products. */
emit_insn (gen_umulsidi3 (cross_product1, op2r, op1l));
(clobber (match_dup 4))
(clobber (reg:SI 26))
(clobber (reg:SI 25))
- (clobber (reg:SI 31))])
+ (clobber (match_dup 5))])
(set (match_operand:SI 0 "general_operand" "") (reg:SI 29))]
""
"
{
operands[3] = gen_reg_rtx (SImode);
- operands[4] = gen_reg_rtx (SImode);
if (TARGET_64BIT)
- operands[4] = gen_rtx_REG (SImode, 2);
+ {
+ operands[5] = gen_rtx_REG (SImode, 2);
+ operands[4] = operands[5];
+ }
+ else
+ {
+ operands[5] = gen_rtx_REG (SImode, 31);
+ operands[4] = gen_reg_rtx (SImode);
+ }
if (GET_CODE (operands[2]) == CONST_INT && emit_hpdiv_const (operands, 0))
DONE;
}")
(clobber (reg:SI 26))
(clobber (reg:SI 25))
(clobber (reg:SI 31))]
- ""
+ "!TARGET_64BIT"
"*
return output_div_insn (operands, 0, insn);"
[(set_attr "type" "milli")
(const_int 0)))
(const_int 4)
-;; NO_SPACE_REGS
- (ne (symbol_ref "TARGET_NO_SPACE_REGS || TARGET_FAST_INDIRECT_CALLS")
+;; Out of reach PIC
+ (ne (symbol_ref "flag_pic")
(const_int 0))
- (const_int 8)
+ (const_int 24)
-;; Out of reach, but not PIC or PORTABLE_RUNTIME
-;; same as NO_SPACE_REGS code
- (and (eq (symbol_ref "TARGET_PORTABLE_RUNTIME")
- (const_int 0))
- (eq (symbol_ref "flag_pic")
- (const_int 0)))
- (const_int 8)]
+;; Out of reach PORTABLE_RUNTIME
+ (ne (symbol_ref "TARGET_PORTABLE_RUNTIME")
+ (const_int 0))
+ (const_int 20)]
+
+;; Out of reach, can use ble
+ (const_int 12)))])
-;; Out of range and either PIC or PORTABLE_RUNTIME
- (const_int 24)))])
+(define_insn ""
+ [(set (reg:SI 29)
+ (div:SI (reg:SI 26) (match_operand:SI 0 "div_operand" "")))
+ (clobber (match_operand:SI 1 "register_operand" "=a"))
+ (clobber (match_operand:SI 2 "register_operand" "=&r"))
+ (clobber (reg:SI 26))
+ (clobber (reg:SI 25))
+ (clobber (reg:SI 2))]
+ "TARGET_64BIT"
+ "*
+ return output_div_insn (operands, 0, insn);"
+ [(set_attr "type" "milli")
+ (set (attr "length") (const_int 4))])
(define_expand "udivsi3"
[(set (reg:SI 26) (match_operand:SI 1 "move_operand" ""))
(clobber (match_dup 4))
(clobber (reg:SI 26))
(clobber (reg:SI 25))
- (clobber (reg:SI 31))])
+ (clobber (match_dup 5))])
(set (match_operand:SI 0 "general_operand" "") (reg:SI 29))]
""
"
{
operands[3] = gen_reg_rtx (SImode);
- operands[4] = gen_reg_rtx (SImode);
if (TARGET_64BIT)
- operands[4] = gen_rtx_REG (SImode, 2);
+ {
+ operands[5] = gen_rtx_REG (SImode, 2);
+ operands[4] = operands[5];
+ }
+ else
+ {
+ operands[5] = gen_rtx_REG (SImode, 31);
+ operands[4] = gen_reg_rtx (SImode);
+ }
if (GET_CODE (operands[2]) == CONST_INT && emit_hpdiv_const (operands, 1))
DONE;
}")
(clobber (reg:SI 26))
(clobber (reg:SI 25))
(clobber (reg:SI 31))]
- ""
+ "!TARGET_64BIT"
"*
return output_div_insn (operands, 1, insn);"
[(set_attr "type" "milli")
(const_int 0)))
(const_int 4)
-;; NO_SPACE_REGS
- (ne (symbol_ref "TARGET_NO_SPACE_REGS || TARGET_FAST_INDIRECT_CALLS")
+;; Out of reach PIC
+ (ne (symbol_ref "flag_pic")
(const_int 0))
- (const_int 8)
+ (const_int 24)
-;; Out of reach, but not PIC or PORTABLE_RUNTIME
-;; same as NO_SPACE_REGS code
- (and (eq (symbol_ref "TARGET_PORTABLE_RUNTIME")
- (const_int 0))
- (eq (symbol_ref "flag_pic")
- (const_int 0)))
- (const_int 8)]
+;; Out of reach PORTABLE_RUNTIME
+ (ne (symbol_ref "TARGET_PORTABLE_RUNTIME")
+ (const_int 0))
+ (const_int 20)]
-;; Out of range and either PIC or PORTABLE_RUNTIME
- (const_int 24)))])
+;; Out of reach, can use ble
+ (const_int 12)))])
+
+(define_insn ""
+ [(set (reg:SI 29)
+ (udiv:SI (reg:SI 26) (match_operand:SI 0 "div_operand" "")))
+ (clobber (match_operand:SI 1 "register_operand" "=a"))
+ (clobber (match_operand:SI 2 "register_operand" "=&r"))
+ (clobber (reg:SI 26))
+ (clobber (reg:SI 25))
+ (clobber (reg:SI 2))]
+ "TARGET_64BIT"
+ "*
+ return output_div_insn (operands, 1, insn);"
+ [(set_attr "type" "milli")
+ (set (attr "length") (const_int 4))])
(define_expand "modsi3"
[(set (reg:SI 26) (match_operand:SI 1 "move_operand" ""))
(clobber (match_dup 4))
(clobber (reg:SI 26))
(clobber (reg:SI 25))
- (clobber (reg:SI 31))])
+ (clobber (match_dup 5))])
(set (match_operand:SI 0 "general_operand" "") (reg:SI 29))]
""
"
{
- operands[4] = gen_reg_rtx (SImode);
if (TARGET_64BIT)
- operands[4] = gen_rtx_REG (SImode, 2);
+ {
+ operands[5] = gen_rtx_REG (SImode, 2);
+ operands[4] = operands[5];
+ }
+ else
+ {
+ operands[5] = gen_rtx_REG (SImode, 31);
+ operands[4] = gen_reg_rtx (SImode);
+ }
operands[3] = gen_reg_rtx (SImode);
}")
(clobber (reg:SI 26))
(clobber (reg:SI 25))
(clobber (reg:SI 31))]
- ""
+ "!TARGET_64BIT"
"*
return output_mod_insn (0, insn);"
[(set_attr "type" "milli")
(const_int 0)))
(const_int 4)
-;; NO_SPACE_REGS
- (ne (symbol_ref "TARGET_NO_SPACE_REGS || TARGET_FAST_INDIRECT_CALLS")
+;; Out of reach PIC
+ (ne (symbol_ref "flag_pic")
(const_int 0))
- (const_int 8)
+ (const_int 24)
-;; Out of reach, but not PIC or PORTABLE_RUNTIME
-;; same as NO_SPACE_REGS code
- (and (eq (symbol_ref "TARGET_PORTABLE_RUNTIME")
- (const_int 0))
- (eq (symbol_ref "flag_pic")
- (const_int 0)))
- (const_int 8)]
+;; Out of reach PORTABLE_RUNTIME
+ (ne (symbol_ref "TARGET_PORTABLE_RUNTIME")
+ (const_int 0))
+ (const_int 20)]
+
+;; Out of reach, can use ble
+ (const_int 12)))])
-;; Out of range and either PIC or PORTABLE_RUNTIME
- (const_int 24)))])
+(define_insn ""
+ [(set (reg:SI 29) (mod:SI (reg:SI 26) (reg:SI 25)))
+ (clobber (match_operand:SI 0 "register_operand" "=a"))
+ (clobber (match_operand:SI 1 "register_operand" "=&r"))
+ (clobber (reg:SI 26))
+ (clobber (reg:SI 25))
+ (clobber (reg:SI 2))]
+ "TARGET_64BIT"
+ "*
+ return output_mod_insn (0, insn);"
+ [(set_attr "type" "milli")
+ (set (attr "length") (const_int 4))])
(define_expand "umodsi3"
[(set (reg:SI 26) (match_operand:SI 1 "move_operand" ""))
(clobber (match_dup 4))
(clobber (reg:SI 26))
(clobber (reg:SI 25))
- (clobber (reg:SI 31))])
+ (clobber (match_dup 5))])
(set (match_operand:SI 0 "general_operand" "") (reg:SI 29))]
""
"
{
- operands[4] = gen_reg_rtx (SImode);
if (TARGET_64BIT)
- operands[4] = gen_rtx_REG (SImode, 2);
+ {
+ operands[5] = gen_rtx_REG (SImode, 2);
+ operands[4] = operands[5];
+ }
+ else
+ {
+ operands[5] = gen_rtx_REG (SImode, 31);
+ operands[4] = gen_reg_rtx (SImode);
+ }
operands[3] = gen_reg_rtx (SImode);
}")
(clobber (reg:SI 26))
(clobber (reg:SI 25))
(clobber (reg:SI 31))]
- ""
+ "!TARGET_64BIT"
"*
return output_mod_insn (1, insn);"
[(set_attr "type" "milli")
(const_int 0)))
(const_int 4)
-;; NO_SPACE_REGS
- (ne (symbol_ref "TARGET_NO_SPACE_REGS || TARGET_FAST_INDIRECT_CALLS")
+;; Out of reach PIC
+ (ne (symbol_ref "flag_pic")
(const_int 0))
- (const_int 8)
+ (const_int 24)
-;; Out of reach, but not PIC or PORTABLE_RUNTIME
-;; same as NO_SPACE_REGS code
- (and (eq (symbol_ref "TARGET_PORTABLE_RUNTIME")
- (const_int 0))
- (eq (symbol_ref "flag_pic")
- (const_int 0)))
- (const_int 8)]
+;; Out of reach PORTABLE_RUNTIME
+ (ne (symbol_ref "TARGET_PORTABLE_RUNTIME")
+ (const_int 0))
+ (const_int 20)]
-;; Out of range and either PIC or PORTABLE_RUNTIME
- (const_int 24)))])
+;; Out of reach, can use ble
+ (const_int 12)))])
+
+(define_insn ""
+ [(set (reg:SI 29) (umod:SI (reg:SI 26) (reg:SI 25)))
+ (clobber (match_operand:SI 0 "register_operand" "=a"))
+ (clobber (match_operand:SI 1 "register_operand" "=&r"))
+ (clobber (reg:SI 26))
+ (clobber (reg:SI 25))
+ (clobber (reg:SI 2))]
+ "TARGET_64BIT"
+ "*
+ return output_mod_insn (1, insn);"
+ [(set_attr "type" "milli")
+ (set (attr "length") (const_int 4))])
;;- and instructions
;; We define DImode `and` so with DImode `not` we can get
[(set_attr "type" "fpdivsgl")
(set_attr "length" "4")])
-(define_insn "negdf2"
+;; Processors prior to PA 2.0 don't have a fneg instruction. Fast
+;; negation can be done by subtracting from plus zero. However, this
+;; violates the IEEE standard when negating plus and minus zero.
+(define_expand "negdf2"
+ [(parallel [(set (match_operand:DF 0 "register_operand" "")
+ (neg:DF (match_operand:DF 1 "register_operand" "")))
+ (use (match_dup 2))])]
+ "! TARGET_SOFT_FLOAT"
+{
+ if (TARGET_PA_20 || flag_unsafe_math_optimizations)
+ emit_insn (gen_negdf2_fast (operands[0], operands[1]));
+ else
+ {
+ operands[2] = force_reg (DFmode,
+ CONST_DOUBLE_FROM_REAL_VALUE (dconstm1, DFmode));
+ emit_insn (gen_muldf3 (operands[0], operands[1], operands[2]));
+ }
+ DONE;
+})
+
+(define_insn "negdf2_fast"
[(set (match_operand:DF 0 "register_operand" "=f")
(neg:DF (match_operand:DF 1 "register_operand" "f")))]
- "! TARGET_SOFT_FLOAT"
+ "! TARGET_SOFT_FLOAT && (TARGET_PA_20 || flag_unsafe_math_optimizations)"
"*
{
if (TARGET_PA_20)
[(set_attr "type" "fpalu")
(set_attr "length" "4")])
-(define_insn "negsf2"
+(define_expand "negsf2"
+ [(parallel [(set (match_operand:SF 0 "register_operand" "")
+ (neg:SF (match_operand:SF 1 "register_operand" "")))
+ (use (match_dup 2))])]
+ "! TARGET_SOFT_FLOAT"
+{
+ if (TARGET_PA_20 || flag_unsafe_math_optimizations)
+ emit_insn (gen_negsf2_fast (operands[0], operands[1]));
+ else
+ {
+ operands[2] = force_reg (SFmode,
+ CONST_DOUBLE_FROM_REAL_VALUE (dconstm1, SFmode));
+ emit_insn (gen_mulsf3 (operands[0], operands[1], operands[2]));
+ }
+ DONE;
+})
+
+(define_insn "negsf2_fast"
[(set (match_operand:SF 0 "register_operand" "=f")
(neg:SF (match_operand:SF 1 "register_operand" "f")))]
- "! TARGET_SOFT_FLOAT"
+ "! TARGET_SOFT_FLOAT && (TARGET_PA_20 || flag_unsafe_math_optimizations)"
"*
{
if (TARGET_PA_20)
;; We want to split this up during scheduling since we want both insns
;; to schedule independently.
(define_split
- [(set (match_operand:DF 0 "register_operand" "=f")
- (plus:DF (mult:DF (match_operand:DF 1 "register_operand" "f")
- (match_operand:DF 2 "register_operand" "f"))
- (match_operand:DF 3 "register_operand" "f")))
- (set (match_operand:DF 4 "register_operand" "=&f")
+ [(set (match_operand:DF 0 "register_operand" "")
+ (plus:DF (mult:DF (match_operand:DF 1 "register_operand" "")
+ (match_operand:DF 2 "register_operand" ""))
+ (match_operand:DF 3 "register_operand" "")))
+ (set (match_operand:DF 4 "register_operand" "")
(mult:DF (match_dup 1) (match_dup 2)))]
"! TARGET_SOFT_FLOAT && TARGET_PA_20"
[(set (match_dup 4) (mult:DF (match_dup 1) (match_dup 2)))
;; We want to split this up during scheduling since we want both insns
;; to schedule independently.
(define_split
- [(set (match_operand:SF 0 "register_operand" "=f")
- (plus:SF (mult:SF (match_operand:SF 1 "register_operand" "f")
- (match_operand:SF 2 "register_operand" "f"))
- (match_operand:SF 3 "register_operand" "f")))
- (set (match_operand:SF 4 "register_operand" "=&f")
+ [(set (match_operand:SF 0 "register_operand" "")
+ (plus:SF (mult:SF (match_operand:SF 1 "register_operand" "")
+ (match_operand:SF 2 "register_operand" ""))
+ (match_operand:SF 3 "register_operand" "")))
+ (set (match_operand:SF 4 "register_operand" "")
(mult:SF (match_dup 1) (match_dup 2)))]
"! TARGET_SOFT_FLOAT && TARGET_PA_20"
[(set (match_dup 4) (mult:SF (match_dup 1) (match_dup 2)))
(set_attr "length" "8")])
(define_split
- [(set (match_operand:DF 0 "register_operand" "=f")
- (neg:DF (mult:DF (match_operand:DF 1 "register_operand" "f")
- (match_operand:DF 2 "register_operand" "f"))))
- (set (match_operand:DF 3 "register_operand" "=&f")
+ [(set (match_operand:DF 0 "register_operand" "")
+ (neg:DF (mult:DF (match_operand:DF 1 "register_operand" "")
+ (match_operand:DF 2 "register_operand" ""))))
+ (set (match_operand:DF 3 "register_operand" "")
(mult:DF (match_dup 1) (match_dup 2)))]
"! TARGET_SOFT_FLOAT && TARGET_PA_20"
[(set (match_dup 3) (mult:DF (match_dup 1) (match_dup 2)))
(set_attr "length" "8")])
(define_split
- [(set (match_operand:SF 0 "register_operand" "=f")
- (neg:SF (mult:SF (match_operand:SF 1 "register_operand" "f")
- (match_operand:SF 2 "register_operand" "f"))))
- (set (match_operand:SF 3 "register_operand" "=&f")
+ [(set (match_operand:SF 0 "register_operand" "")
+ (neg:SF (mult:SF (match_operand:SF 1 "register_operand" "")
+ (match_operand:SF 2 "register_operand" ""))))
+ (set (match_operand:SF 3 "register_operand" "")
(mult:SF (match_dup 1) (match_dup 2)))]
"! TARGET_SOFT_FLOAT && TARGET_PA_20"
[(set (match_dup 3) (mult:SF (match_dup 1) (match_dup 2)))
(set_attr "length" "8")])
(define_split
- [(set (match_operand:DF 0 "register_operand" "=f")
- (plus:DF (neg:DF (mult:DF (match_operand:DF 1 "register_operand" "f")
- (match_operand:DF 2 "register_operand" "f")))
- (match_operand:DF 3 "register_operand" "f")))
- (set (match_operand:DF 4 "register_operand" "=&f")
+ [(set (match_operand:DF 0 "register_operand" "")
+ (plus:DF (neg:DF (mult:DF (match_operand:DF 1 "register_operand" "")
+ (match_operand:DF 2 "register_operand" "")))
+ (match_operand:DF 3 "register_operand" "")))
+ (set (match_operand:DF 4 "register_operand" "")
(mult:DF (match_dup 1) (match_dup 2)))]
"! TARGET_SOFT_FLOAT && TARGET_PA_20"
[(set (match_dup 4) (mult:DF (match_dup 1) (match_dup 2)))
(set_attr "length" "8")])
(define_split
- [(set (match_operand:SF 0 "register_operand" "=f")
- (plus:SF (neg:SF (mult:SF (match_operand:SF 1 "register_operand" "f")
- (match_operand:SF 2 "register_operand" "f")))
- (match_operand:SF 3 "register_operand" "f")))
- (set (match_operand:SF 4 "register_operand" "=&f")
+ [(set (match_operand:SF 0 "register_operand" "")
+ (plus:SF (neg:SF (mult:SF (match_operand:SF 1 "register_operand" "")
+ (match_operand:SF 2 "register_operand" "")))
+ (match_operand:SF 3 "register_operand" "")))
+ (set (match_operand:SF 4 "register_operand" "")
(mult:SF (match_dup 1) (match_dup 2)))]
"! TARGET_SOFT_FLOAT && TARGET_PA_20"
[(set (match_dup 4) (mult:SF (match_dup 1) (match_dup 2)))
(set_attr "length" "8")])
(define_split
- [(set (match_operand:DF 0 "register_operand" "=f")
- (minus:DF (match_operand:DF 3 "register_operand" "f")
- (mult:DF (match_operand:DF 1 "register_operand" "f")
- (match_operand:DF 2 "register_operand" "f"))))
- (set (match_operand:DF 4 "register_operand" "=&f")
+ [(set (match_operand:DF 0 "register_operand" "")
+ (minus:DF (match_operand:DF 3 "register_operand" "")
+ (mult:DF (match_operand:DF 1 "register_operand" "")
+ (match_operand:DF 2 "register_operand" ""))))
+ (set (match_operand:DF 4 "register_operand" "")
(mult:DF (match_dup 1) (match_dup 2)))]
"! TARGET_SOFT_FLOAT && TARGET_PA_20"
[(set (match_dup 4) (mult:DF (match_dup 1) (match_dup 2)))
(set_attr "length" "8")])
(define_split
- [(set (match_operand:SF 0 "register_operand" "=f")
- (minus:SF (match_operand:SF 3 "register_operand" "f")
- (mult:SF (match_operand:SF 1 "register_operand" "f")
- (match_operand:SF 2 "register_operand" "f"))))
- (set (match_operand:SF 4 "register_operand" "=&f")
+ [(set (match_operand:SF 0 "register_operand" "")
+ (minus:SF (match_operand:SF 3 "register_operand" "")
+ (mult:SF (match_operand:SF 1 "register_operand" "")
+ (match_operand:SF 2 "register_operand" ""))))
+ (set (match_operand:SF 4 "register_operand" "")
(mult:SF (match_dup 1) (match_dup 2)))]
"! TARGET_SOFT_FLOAT && TARGET_PA_20"
[(set (match_dup 4) (mult:SF (match_dup 1) (match_dup 2)))
(set_attr "length" "8")])
(define_split
- [(set (match_operand:DF 0 "register_operand" "=f")
- (neg:DF (abs:DF (match_operand:DF 1 "register_operand" "f"))))
- (set (match_operand:DF 2 "register_operand" "=&f") (abs:DF (match_dup 1)))]
+ [(set (match_operand:DF 0 "register_operand" "")
+ (neg:DF (abs:DF (match_operand:DF 1 "register_operand" ""))))
+ (set (match_operand:DF 2 "register_operand" "") (abs:DF (match_dup 1)))]
"! TARGET_SOFT_FLOAT && TARGET_PA_20"
[(set (match_dup 2) (abs:DF (match_dup 1)))
(set (match_dup 0) (neg:DF (abs:DF (match_dup 1))))]
(set_attr "length" "8")])
(define_split
- [(set (match_operand:SF 0 "register_operand" "=f")
- (neg:SF (abs:SF (match_operand:SF 1 "register_operand" "f"))))
- (set (match_operand:SF 2 "register_operand" "=&f") (abs:SF (match_dup 1)))]
+ [(set (match_operand:SF 0 "register_operand" "")
+ (neg:SF (abs:SF (match_operand:SF 1 "register_operand" ""))))
+ (set (match_operand:SF 2 "register_operand" "") (abs:SF (match_dup 1)))]
"! TARGET_SOFT_FLOAT && TARGET_PA_20"
[(set (match_dup 2) (abs:SF (match_dup 1)))
(set (match_dup 0) (neg:SF (abs:SF (match_dup 1))))]
;; Unconditional and other jump instructions.
;; This can only be used in a leaf function, so we do
-;; not need to use the PIC register.
+;; not need to use the PIC register when generating PIC code.
(define_insn "return"
[(return)
(use (reg:SI 2))
[(set_attr "type" "branch")
(set_attr "length" "4")])
-;; Use a different pattern for functions which have non-trivial
+;; Emit a different pattern for functions which have non-trivial
;; epilogues so as not to confuse jump and reorg.
-;;
-;; We use the PIC register to ensure it's restored after a
-;; call in PIC mode. This can be non-optimal for non-PIC
-;; code but the real world cost should be unmeasurable.
(define_insn "return_internal"
[(return)
- (use (match_operand 0 "register_operand" "r"))
(use (reg:SI 2))
(const_int 1)]
- "true_regnum (operands[0]) == PIC_OFFSET_TABLE_REGNUM"
+ "! flag_pic"
+ "*
+{
+ if (TARGET_PA_20)
+ return \"bve%* (%%r2)\";
+ return \"bv%* %%r0(%%r2)\";
+}"
+ [(set_attr "type" "branch")
+ (set_attr "length" "4")])
+
+;; Use the PIC register to ensure it's restored after a
+;; call in PIC mode.
+(define_insn "return_internal_pic"
+ [(return)
+ (use (match_operand 0 "register_operand" "r"))
+ (use (reg:SI 2))]
+ "flag_pic && true_regnum (operands[0]) == PIC_OFFSET_TABLE_REGNUM"
"*
{
if (TARGET_PA_20)
[(set_attr "type" "branch")
(set_attr "length" "4")])
+;; Use the PIC register to ensure it's restored after a
+;; call in PIC mode. This is used for eh returns which
+;; bypass the return stub.
+(define_insn "return_external_pic"
+ [(return)
+ (use (match_operand 0 "register_operand" "r"))
+ (use (reg:SI 2))
+ (clobber (reg:SI 1))]
+ "flag_pic
+ && current_function_calls_eh_return
+ && true_regnum (operands[0]) == PIC_OFFSET_TABLE_REGNUM"
+ "ldsid (%%sr0,%%r2),%%r1\;mtsp %%r1,%%sr0\;be%* 0(%%sr0,%%r2)"
+ [(set_attr "type" "branch")
+ (set_attr "length" "12")])
+
(define_expand "prologue"
[(const_int 0)]
""
/* Try to use the trivial return first. Else use the full
epilogue. */
if (hppa_can_use_return_insn_p ())
- emit_jump_insn (gen_return ());
+ emit_jump_insn (gen_return ());
else
{
rtx x;
hppa_expand_epilogue ();
- x = gen_return_internal (gen_rtx_REG (word_mode,
- PIC_OFFSET_TABLE_REGNUM));
+ if (flag_pic)
+ {
+ rtx pic = gen_rtx_REG (word_mode, PIC_OFFSET_TABLE_REGNUM);
+
+ /* EH returns bypass the normal return stub. Thus, we must do an
+ interspace branch to return from functions that call eh_return.
+ This is only a problem for returns from shared code. */
+ if (current_function_calls_eh_return)
+ x = gen_return_external_pic (pic);
+ else
+ x = gen_return_internal_pic (pic);
+ }
+ else
+ x = gen_return_internal ();
emit_jump_insn (x);
}
DONE;
;; from within its delay slot to set the value for the 2nd parameter to
;; the call.
(define_insn "call_profiler"
- [(unspec_volatile [(const_int 0)] 0)
- (use (match_operand:SI 0 "const_int_operand" ""))]
+ [(call (mem:SI (match_operand 0 "call_operand_address" ""))
+ (match_operand 1 "" ""))
+ (use (match_operand 2 "" ""))
+ (use (reg:SI 25))
+ (use (reg:SI 26))
+ (clobber (reg:SI 2))]
""
- "{bl|b,l} _mcount,%%r2\;ldo %0(%%r2),%%r25"
+ "*
+{
+ rtx xoperands[3];
+
+ output_arg_descriptor (insn);
+
+ xoperands[0] = operands[0];
+ xoperands[1] = operands[2];
+ xoperands[2] = gen_label_rtx ();
+ output_asm_insn (\"{bl|b,l} %0,%%r2\;ldo %1-%2(%%r2),%%r25\", xoperands);
+
+ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, \"L\",
+ CODE_LABEL_NUMBER (xoperands[2]));
+ return \"\";
+}"
[(set_attr "type" "multi")
(set_attr "length" "8")])
{
rtx xoperands[2];
xoperands[0] = operands[0];
- xoperands[1] = gen_label_rtx ();
+ if (TARGET_SOM || ! TARGET_GAS)
+ {
+ xoperands[1] = gen_label_rtx ();
- output_asm_insn (\"{bl|b,l} .+8,%%r1\\n\\taddil L'%l0-%l1,%%r1\",
- xoperands);
- ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, \"L\",
- CODE_LABEL_NUMBER (xoperands[1]));
- output_asm_insn (\"ldo R'%l0-%l1(%%r1),%%r1\\n\\tbv %%r0(%%r1)\",
- xoperands);
+ output_asm_insn (\"{bl|b,l} .+8,%%r1\\n\\taddil L'%l0-%l1,%%r1\",
+ xoperands);
+ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, \"L\",
+ CODE_LABEL_NUMBER (xoperands[1]));
+ output_asm_insn (\"ldo R'%l0-%l1(%%r1),%%r1\", xoperands);
+ }
+ else
+ {
+ output_asm_insn (\"{bl|b,l} .+8,%%r1\", xoperands);
+ output_asm_insn (\"addil L'%l0-$PIC_pcrel$0+4,%%r1\", xoperands);
+ output_asm_insn (\"ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1\", xoperands);
+ }
+ output_asm_insn (\"bv %%r0(%%r1)\", xoperands);
}
else
output_asm_insn (\"ldil L'%l0,%%r1\\n\\tbe R'%l0(%%sr4,%%r1)\", operands);;
{
rtx reg = gen_reg_rtx (DImode);
emit_insn (gen_extendsidi2 (reg, operands[0]));
- operands[0] = gen_rtx_SUBREG (SImode, reg, 0);
+ operands[0] = gen_rtx_SUBREG (SImode, reg, 4);
}
if (!INT_5_BITS (operands[2]))
/* After each call we must restore the PIC register, even if it
doesn't appear to be used. */
- emit_move_insn (pic_offset_table_rtx, PIC_OFFSET_TABLE_SAVE_RTX);
+ emit_move_insn (pic_offset_table_rtx, hppa_pic_save_rtx ());
}
DONE;
}")
[(set_attr "type" "call")
(set (attr "length")
;; If we're sure that we can either reach the target or that the
-;; linker can use a long-branch stub, then the length is 4 bytes.
+;; linker can use a long-branch stub, then the length is at most
+;; 8 bytes.
;;
-;; For long-calls the length will be either 52 bytes (non-pic)
-;; or 68 bytes (pic). */
+;; For long-calls the length will be at most 68 bytes (non-pic)
+;; or 84 bytes (pic). */
;; Else we have to use a long-call;
(if_then_else (lt (plus (symbol_ref "total_code_bytes") (pc))
(const_int 240000))
- (const_int 4)
+ (const_int 8)
(if_then_else (eq (symbol_ref "flag_pic")
(const_int 0))
- (const_int 52)
- (const_int 68))))])
+ (const_int 68)
+ (const_int 84))))])
(define_insn "call_internal_reg_64bit"
[(call (mem:SI (match_operand:DI 0 "register_operand" "r"))
"TARGET_64BIT"
"*
{
- rtx xoperands[2];
-
/* ??? Needs more work. Length computation, split into multiple insns,
do not use %r22 directly, expose delay slot. */
return \"ldd 16(%0),%%r2\;ldd 24(%0),%%r27\;bve,l (%%r2),%%r2\;nop\";
rtx xoperands[2];
/* First the special case for kernels, level 0 systems, etc. */
- if (TARGET_NO_SPACE_REGS || TARGET_FAST_INDIRECT_CALLS)
+ if (TARGET_FAST_INDIRECT_CALLS)
return \"ble 0(%%sr4,%%r22)\;copy %%r31,%%r2\";
/* Now the normal case -- we can reach $$dyncall directly or
/* If we're generating PIC code. */
xoperands[0] = operands[0];
- xoperands[1] = gen_label_rtx ();
+ if (TARGET_SOM || ! TARGET_GAS)
+ xoperands[1] = gen_label_rtx ();
output_asm_insn (\"{bl|b,l} .+8,%%r1\", xoperands);
- output_asm_insn (\"addil L%%$$dyncall-%1,%%r1\", xoperands);
- ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, \"L\",
- CODE_LABEL_NUMBER (xoperands[1]));
- output_asm_insn (\"ldo R%%$$dyncall-%1(%%r1),%%r1\", xoperands);
+ if (TARGET_SOM || ! TARGET_GAS)
+ {
+ output_asm_insn (\"addil L%%$$dyncall-%1,%%r1\", xoperands);
+ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, \"L\",
+ CODE_LABEL_NUMBER (xoperands[1]));
+ output_asm_insn (\"ldo R%%$$dyncall-%1(%%r1),%%r1\", xoperands);
+ }
+ else
+ {
+ output_asm_insn (\"addil L%%$$dyncall-$PIC_pcrel$0+4,%%r1\", xoperands);
+ output_asm_insn (\"ldo R%%$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1\",
+ xoperands);
+ }
output_asm_insn (\"blr %%r0,%%r2\", xoperands);
output_asm_insn (\"bv,n %%r0(%%r1)\\n\\tnop\", xoperands);
return \"\";
[(set_attr "type" "dyncall")
(set (attr "length")
(cond [
-;; First NO_SPACE_REGS
- (ne (symbol_ref "TARGET_NO_SPACE_REGS || TARGET_FAST_INDIRECT_CALLS")
+;; First FAST_INDIRECT_CALLS
+ (ne (symbol_ref "TARGET_FAST_INDIRECT_CALLS")
(const_int 0))
(const_int 8)
(const_int 0)))
(const_int 8)
-;; Out of reach, but not PIC or PORTABLE_RUNTIME
- (and (eq (symbol_ref "TARGET_PORTABLE_RUNTIME")
- (const_int 0))
- (eq (symbol_ref "flag_pic")
- (const_int 0)))
- (const_int 12)
+;; Out of reach PIC
+ (ne (symbol_ref "flag_pic")
+ (const_int 0))
+ (const_int 24)
+;; Out of reach PORTABLE_RUNTIME
(ne (symbol_ref "TARGET_PORTABLE_RUNTIME")
(const_int 0))
(const_int 20)]
-;; Out of range PIC case
- (const_int 24)))])
+;; Out of reach, can use ble
+ (const_int 12)))])
(define_expand "call_value"
[(parallel [(set (match_operand 0 "" "")
/* After each call we must restore the PIC register, even if it
doesn't appear to be used. */
- emit_move_insn (pic_offset_table_rtx, PIC_OFFSET_TABLE_SAVE_RTX);
+ emit_move_insn (pic_offset_table_rtx, hppa_pic_save_rtx ());
}
DONE;
}")
[(set_attr "type" "call")
(set (attr "length")
;; If we're sure that we can either reach the target or that the
-;; linker can use a long-branch stub, then the length is 4 bytes.
+;; linker can use a long-branch stub, then the length is at most
+;; 8 bytes.
;;
-;; For long-calls the length will be either 52 bytes (non-pic)
-;; or 68 bytes (pic). */
+;; For long-calls the length will be at most 68 bytes (non-pic)
+;; or 84 bytes (pic). */
;; Else we have to use a long-call;
(if_then_else (lt (plus (symbol_ref "total_code_bytes") (pc))
(const_int 240000))
- (const_int 4)
+ (const_int 8)
(if_then_else (eq (symbol_ref "flag_pic")
(const_int 0))
- (const_int 52)
- (const_int 68))))])
+ (const_int 68)
+ (const_int 84))))])
(define_insn "call_value_internal_reg_64bit"
[(set (match_operand 0 "" "=rf")
rtx xoperands[2];
/* First the special case for kernels, level 0 systems, etc. */
- if (TARGET_NO_SPACE_REGS || TARGET_FAST_INDIRECT_CALLS)
+ if (TARGET_FAST_INDIRECT_CALLS)
return \"ble 0(%%sr4,%%r22)\;copy %%r31,%%r2\";
/* Now the normal case -- we can reach $$dyncall directly or
/* If we're generating PIC code. */
xoperands[0] = operands[1];
- xoperands[1] = gen_label_rtx ();
+ if (TARGET_SOM || ! TARGET_GAS)
+ xoperands[1] = gen_label_rtx ();
output_asm_insn (\"{bl|b,l} .+8,%%r1\", xoperands);
- output_asm_insn (\"addil L%%$$dyncall-%1,%%r1\", xoperands);
- ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, \"L\",
- CODE_LABEL_NUMBER (xoperands[1]));
- output_asm_insn (\"ldo R%%$$dyncall-%1(%%r1),%%r1\", xoperands);
+ if (TARGET_SOM || ! TARGET_GAS)
+ {
+ output_asm_insn (\"addil L%%$$dyncall-%1,%%r1\", xoperands);
+ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, \"L\",
+ CODE_LABEL_NUMBER (xoperands[1]));
+ output_asm_insn (\"ldo R%%$$dyncall-%1(%%r1),%%r1\", xoperands);
+ }
+ else
+ {
+ output_asm_insn (\"addil L%%$$dyncall-$PIC_pcrel$0+4,%%r1\", xoperands);
+ output_asm_insn (\"ldo R%%$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1\",
+ xoperands);
+ }
output_asm_insn (\"blr %%r0,%%r2\", xoperands);
output_asm_insn (\"bv,n %%r0(%%r1)\\n\\tnop\", xoperands);
return \"\";
[(set_attr "type" "dyncall")
(set (attr "length")
(cond [
-;; First NO_SPACE_REGS
- (ne (symbol_ref "TARGET_NO_SPACE_REGS || TARGET_FAST_INDIRECT_CALLS")
+;; First FAST_INDIRECT_CALLS
+ (ne (symbol_ref "TARGET_FAST_INDIRECT_CALLS")
(const_int 0))
(const_int 8)
(const_int 0)))
(const_int 8)
-;; Out of reach, but not PIC or PORTABLE_RUNTIME
- (and (eq (symbol_ref "TARGET_PORTABLE_RUNTIME")
- (const_int 0))
- (eq (symbol_ref "flag_pic")
- (const_int 0)))
- (const_int 12)
+;; Out of reach PIC
+ (ne (symbol_ref "flag_pic")
+ (const_int 0))
+ (const_int 24)
+;; Out of reach PORTABLE_RUNTIME
(ne (symbol_ref "TARGET_PORTABLE_RUNTIME")
(const_int 0))
(const_int 20)]
-;; Out of range PIC case
- (const_int 24)))])
+;; Out of reach, can use ble
+ (const_int 12)))])
;; Call subroutine returning any type.
/* After each call we must restore the PIC register, even if it
doesn't appear to be used. */
- emit_move_insn (pic_offset_table_rtx, PIC_OFFSET_TABLE_SAVE_RTX);
+ emit_move_insn (pic_offset_table_rtx, hppa_pic_save_rtx ());
}
DONE;
}")
[(set_attr "type" "call")
(set (attr "length")
;; If we're sure that we can either reach the target or that the
-;; linker can use a long-branch stub, then the length is 4 bytes.
+;; linker can use a long-branch stub, then the length is at most
+;; 8 bytes.
;;
-;; For long-calls the length will be either 52 bytes (non-pic)
-;; or 68 bytes (pic). */
+;; For long-calls the length will be at most 68 bytes (non-pic)
+;; or 84 bytes (pic). */
;; Else we have to use a long-call;
(if_then_else (lt (plus (symbol_ref "total_code_bytes") (pc))
(const_int 240000))
- (const_int 4)
+ (const_int 8)
(if_then_else (eq (symbol_ref "flag_pic")
(const_int 0))
- (const_int 52)
- (const_int 68))))])
+ (const_int 68)
+ (const_int 84))))])
(define_expand "sibcall_value"
[(parallel [(set (match_operand 0 "" "")
/* After each call we must restore the PIC register, even if it
doesn't appear to be used. */
- emit_move_insn (pic_offset_table_rtx, PIC_OFFSET_TABLE_SAVE_RTX);
+ emit_move_insn (pic_offset_table_rtx, hppa_pic_save_rtx ());
}
DONE;
}")
[(set_attr "type" "call")
(set (attr "length")
;; If we're sure that we can either reach the target or that the
-;; linker can use a long-branch stub, then the length is 4 bytes.
+;; linker can use a long-branch stub, then the length is at most
+;; 8 bytes.
;;
-;; For long-calls the length will be either 52 bytes (non-pic)
-;; or 68 bytes (pic). */
+;; For long-calls the length will be at most 68 bytes (non-pic)
+;; or 84 bytes (pic). */
;; Else we have to use a long-call;
(if_then_else (lt (plus (symbol_ref "total_code_bytes") (pc))
(const_int 240000))
- (const_int 4)
+ (const_int 8)
(if_then_else (eq (symbol_ref "flag_pic")
(const_int 0))
- (const_int 52)
- (const_int 68))))])
+ (const_int 68)
+ (const_int 84))))])
(define_insn "nop"
[(const_int 0)]
;;; EH does longjmp's from and within the data section. Thus,
;;; an interspace branch is required for the longjmp implementation.
-;;; Registers r1 and r2 are not saved in the jmpbuf environment.
-;;; Thus, they can be used as scratch registers for the jump.
+;;; Registers r1 and r2 are used as scratch registers for the jump.
(define_expand "interspace_jump"
[(parallel
[(set (pc) (match_operand 0 "pmode_register_operand" "a"))
{
/* The elements of the buffer are, in order: */
rtx fp = gen_rtx_MEM (Pmode, operands[0]);
- rtx lab = gen_rtx_MEM (Pmode, plus_constant (operands[0], 4));
- rtx stack = gen_rtx_MEM (Pmode, plus_constant (operands[0], 8));
+ rtx lab = gen_rtx_MEM (Pmode, plus_constant (operands[0],
+ POINTER_SIZE / BITS_PER_UNIT));
+ rtx stack = gen_rtx_MEM (Pmode, plus_constant (operands[0],
+ (POINTER_SIZE * 2) / BITS_PER_UNIT));
rtx pv = gen_rtx_REG (Pmode, 1);
/* This bit is the same as expand_builtin_longjmp. */
where to look for it when we get back to setjmp's function for
restoring the gp. */
emit_move_insn (pv, lab);
+
+ /* Prevent the insns above from being scheduled into the delay slot
+ of the interspace jump because the space register could change. */
+ emit_insn (gen_blockage ());
+
emit_jump_insn (gen_interspace_jump (pv));
emit_barrier ();
DONE;
}")
+
;;; Hope this is only within a function...
(define_insn "indirect_jump"
[(set (pc) (match_operand 0 "register_operand" "r"))]
emit_insn (gen_extzv_64 (operands[0], operands[1],
operands[2], operands[3]));
else
- emit_insn (gen_extzv_32 (operands[0], operands[1],
- operands[2], operands[3]));
+ {
+ if (! uint5_operand (operands[2], SImode)
+ || ! uint5_operand (operands[3], SImode))
+ FAIL;
+ emit_insn (gen_extzv_32 (operands[0], operands[1],
+ operands[2], operands[3]));
+ }
DONE;
}")
emit_insn (gen_extv_64 (operands[0], operands[1],
operands[2], operands[3]));
else
- emit_insn (gen_extv_32 (operands[0], operands[1],
- operands[2], operands[3]));
+ {
+ if (! uint5_operand (operands[2], SImode)
+ || ! uint5_operand (operands[3], SImode))
+ FAIL;
+ emit_insn (gen_extv_32 (operands[0], operands[1],
+ operands[2], operands[3]));
+ }
DONE;
}")
emit_insn (gen_insv_64 (operands[0], operands[1],
operands[2], operands[3]));
else
- emit_insn (gen_insv_32 (operands[0], operands[1],
- operands[2], operands[3]));
+ {
+ if (! uint5_operand (operands[2], SImode)
+ || ! uint5_operand (operands[3], SImode))
+ FAIL;
+ emit_insn (gen_insv_32 (operands[0], operands[1],
+ operands[2], operands[3]));
+ }
DONE;
}")
[(set (pc)
(if_then_else
(match_operator 2 "comparison_operator"
- [(plus:SI (match_operand:SI 0 "register_operand" "+!r,!*f,!*m")
- (match_operand:SI 1 "int5_operand" "L,L,L"))
+ [(plus:SI
+ (match_operand:SI 0 "reg_before_reload_operand" "+!r,!*f,*m")
+ (match_operand:SI 1 "int5_operand" "L,L,L"))
(const_int 0)])
(label_ref (match_operand 3 "" ""))
(pc)))
[(match_operand:SI 1 "register_operand" "r,r,r,r") (const_int 0)])
(label_ref (match_operand 3 "" ""))
(pc)))
- (set (match_operand:SI 0 "register_operand" "=!r,!*f,!*m,!*q")
+ (set (match_operand:SI 0 "reg_before_reload_operand" "=!r,!*f,*m,!*q")
(match_dup 1))]
""
"* return output_movb (operands, insn, which_alternative, 0); "
[(match_operand:SI 1 "register_operand" "r,r,r,r") (const_int 0)])
(pc)
(label_ref (match_operand 3 "" ""))))
- (set (match_operand:SI 0 "register_operand" "=!r,!*f,!*m,!*q")
+ (set (match_operand:SI 0 "reg_before_reload_operand" "=!r,!*f,*m,!*q")
(match_dup 1))]
""
"* return output_movb (operands, insn, which_alternative, 1); "
(clobber (reg:SI 31))])
(set (match_operand:SI 0 "register_operand" "")
(reg:SI 29))]
- "! TARGET_PORTABLE_RUNTIME && !TARGET_64BIT"
+ "! TARGET_PORTABLE_RUNTIME && !TARGET_64BIT && !TARGET_ELF32"
"
{
operands[2] = gen_reg_rtx (SImode);
(const_int 0)))
(const_int 28)
-;; NO_SPACE_REGS
- (ne (symbol_ref "TARGET_NO_SPACE_REGS || TARGET_FAST_INDIRECT_CALLS")
- (const_int 0))
- (const_int 32)
-
-;; Out of reach, but not PIC or PORTABLE_RUNTIME
-;; same as NO_SPACE_REGS code
- (and (eq (symbol_ref "TARGET_PORTABLE_RUNTIME")
- (const_int 0))
- (eq (symbol_ref "flag_pic")
- (const_int 0)))
- (const_int 32)
+;; Out of reach PIC
+ (ne (symbol_ref "flag_pic")
+ (const_int 0))
+ (const_int 44)
-;; PORTABLE_RUNTIME
+;; Out of reach PORTABLE_RUNTIME
(ne (symbol_ref "TARGET_PORTABLE_RUNTIME")
(const_int 0))
(const_int 40)]
-;; Out of range and PIC
- (const_int 44)))])
+;; Out of reach, can use ble
+ (const_int 32)))])
;; On the PA, the PIC register is call clobbered, so it must
;; be saved & restored around calls by the caller. If the call
;; restore the PIC register.
(define_expand "exception_receiver"
[(const_int 4)]
- "!TARGET_PORTABLE_RUNTIME && flag_pic"
+ "flag_pic"
"
{
- /* Load the PIC register from the stack slot (in our caller's
- frame). */
- emit_move_insn (pic_offset_table_rtx,
- gen_rtx_MEM (SImode,
- plus_constant (stack_pointer_rtx, -32)));
- emit_insn (gen_rtx (USE, VOIDmode, pic_offset_table_rtx));
- emit_insn (gen_blockage ());
+ /* On the 64-bit port, we need a blockage because there is
+ confusion regarding the dependence of the restore on the
+ frame pointer. As a result, the frame pointer and pic
+ register restores sometimes are interchanged erroneously. */
+ if (TARGET_64BIT)
+ emit_insn (gen_blockage ());
+ /* Restore the PIC register using hppa_pic_save_rtx (). The
+ PIC register is not saved in the frame in 64-bit ABI. */
+ emit_move_insn (pic_offset_table_rtx, hppa_pic_save_rtx ());
+ DONE;
+}")
+
+(define_expand "builtin_setjmp_receiver"
+ [(label_ref (match_operand 0 "" ""))]
+ "flag_pic"
+ "
+{
+ if (TARGET_64BIT)
+ emit_insn (gen_blockage ());
+ /* Restore the PIC register. Hopefully, this will always be from
+ a stack slot. The only registers that are valid after a
+ builtin_longjmp are the stack and frame pointers. */
+ emit_move_insn (pic_offset_table_rtx, hppa_pic_save_rtx ());
DONE;
}")