From: mgretton Date: Wed, 30 Nov 2011 13:51:35 +0000 (+0000) Subject: * gcc/config/arm/arm.c (arm_issue_rate): Cortex-A15 can triple issue. X-Git-Url: http://git.sourceforge.jp/view?p=pf3gnuchains%2Fgcc-fork.git;a=commitdiff_plain;h=65f2f7584a675a5b881bf06fafddd8a5f15629e5 * gcc/config/arm/arm.c (arm_issue_rate): Cortex-A15 can triple issue. * gcc/config/arm/arm.md (mul64): New attribute. (generic_sched): Cortex-A15 is not scheduled generically. (cortex-a15.md): Include. * gcc/config/arm/cortex-a15.md: New machine description. * gcc/config/arm/t-arm (MD_INCLUDES): Add cortex-a15.md. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@181843 138bc75d-0d04-0410-961f-82ee72b054a4 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 53ac833bc84..606cbe9dbbf 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,14 @@ 2011-11-30 Matthew Gretton-Dann + * config/arm/arm.c (arm_issue_rate): Cortex-A15 can triple issue. + * config/arm/arm.md (mul64): New attribute. + (generic_sched): Cortex-A15 is not scheduled generically. + (cortex-a15.md): Include. + * config/arm/cortex-a15.md: New machine description. + * config/arm/t-arm (MD_INCLUDES): Add cortex-a15.md. + +2011-11-30 Matthew Gretton-Dann + * config/arm/t-arm (MD_INCLUDES): Ensure all md files are listed. 2011-11-30 Iain Sandoe diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index ee26c51b0a0..a57494c4c3e 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -24127,6 +24127,9 @@ arm_issue_rate (void) { switch (arm_tune) { + case cortexa15: + return 3; + case cortexr4: case cortexr4f: case cortexr5: diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index 7ac3f5c8565..8ec9b2288bb 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -355,6 +355,13 @@ (const_string "mult") (const_string "alu"))) +; Is this an (integer side) multiply with a 64-bit result? +(define_attr "mul64" "no,yes" + (if_then_else + (eq_attr "insn" "smlalxy,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals") + (const_string "yes") + (const_string "no"))) + ; Load scheduling, set from the arm_ld_sched variable ; initialized by arm_option_override() (define_attr "ldsched" "no,yes" (const (symbol_ref "arm_ld_sched"))) @@ -518,7 +525,7 @@ (define_attr "generic_sched" "yes,no" (const (if_then_else - (ior (eq_attr "tune" "fa526,fa626,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa8,cortexa9,cortexm4") + (ior (eq_attr "tune" "fa526,fa626,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa8,cortexa9,cortexa15,cortexm4") (eq_attr "tune_cortexr4" "yes")) (const_string "no") (const_string "yes")))) @@ -544,6 +551,7 @@ (include "cortex-a5.md") (include "cortex-a8.md") (include "cortex-a9.md") +(include "cortex-a15.md") (include "cortex-r4.md") (include "cortex-r4f.md") (include "cortex-m4.md") diff --git a/gcc/config/arm/cortex-a15.md b/gcc/config/arm/cortex-a15.md new file mode 100644 index 00000000000..ccab7cbe906 --- /dev/null +++ b/gcc/config/arm/cortex-a15.md @@ -0,0 +1,186 @@ +;; ARM Cortex-A15 pipeline description +;; Copyright (C) 2011 Free Software Foundation, Inc. +;; +;; Written by Matthew Gretton-Dann + +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_automaton "cortex_a15") + +;; The Cortex-A15 core is modelled as a triple issue pipeline that has +;; the following dispatch units. +;; 1. Two pipelines for simple integer operations: SX1, SX2 +;; 2. Two pipelines for Neon and FP data-processing operations: CX1, CX2 +;; 3. One pipeline for branch operations: BX +;; 4. One pipeline for integer multiply and divide operations: MX +;; 5. Two pipelines for load and store operations: LS1, LS2 +;; +;; We can issue into three pipelines per-cycle. +;; +;; We assume that where we have unit pairs xx1 is always filled before xx2. + +;; The three issue units +(define_cpu_unit "ca15_i0, ca15_i1, ca15_i2" "cortex_a15") + +(define_reservation "ca15_issue1" "(ca15_i0|ca15_i1|ca15_i2)") +(define_reservation "ca15_issue2" "((ca15_i0+ca15_i1)|(ca15_i1+ca15_i2))") +(define_reservation "ca15_issue3" "(ca15_i0+ca15_i1+ca15_i2)") +(final_presence_set "ca15_i1" "ca15_i0") +(final_presence_set "ca15_i2" "ca15_i1") + +;; The main dispatch units +(define_cpu_unit "ca15_sx1, ca15_sx2" "cortex_a15") +(define_cpu_unit "ca15_cx1, ca15_cx2" "cortex_a15") +(define_cpu_unit "ca15_ls1, ca15_ls2" "cortex_a15") +(define_cpu_unit "ca15_bx, ca15_mx" "cortex_a15") + +(define_reservation "ca15_ls" "(ca15_ls1|ca15_ls2)") + +;; The extended load-store pipeline +(define_cpu_unit "ca15_ldr, ca15_str" "cortex_a15") + +;; The extended ALU pipeline +(define_cpu_unit "ca15_sx1_alu, ca15_sx1_shf, ca15_sx1_sat" "cortex_a15") +(define_cpu_unit "ca15_sx2_alu, ca15_sx2_shf, ca15_sx2_sat" "cortex_a15") + +;; Simple Execution Unit: +;; +;; Simple ALU without shift +(define_insn_reservation "cortex_a15_alu" 2 + (and (eq_attr "tune" "cortexa15") + (and (eq_attr "type" "alu") + (eq_attr "neon_type" "none"))) + "ca15_issue1,(ca15_sx1,ca15_sx1_alu)|(ca15_sx2,ca15_sx2_alu)") + +;; ALU ops with immediate shift +(define_insn_reservation "cortex_a15_alu_shift" 3 + (and (eq_attr "tune" "cortexa15") + (and (eq_attr "type" "alu_shift") + (eq_attr "neon_type" "none"))) + "ca15_issue1,(ca15_sx1,ca15_sx1+ca15_sx1_shf,ca15_sx1_alu)\ + |(ca15_sx2,ca15_sx2+ca15_sx2_shf,ca15_sx2_alu)") + +;; ALU ops with register controlled shift +(define_insn_reservation "cortex_a15_alu_shift_reg" 3 + (and (eq_attr "tune" "cortexa15") + (and (eq_attr "type" "alu_shift_reg") + (eq_attr "neon_type" "none"))) + "(ca15_issue2,ca15_sx1+ca15_sx2,ca15_sx1_shf,ca15_sx2_alu)\ + |(ca15_issue1,(ca15_issue1+ca15_sx2,ca15_sx1+ca15_sx2_shf)\ + |(ca15_issue1+ca15_sx1,ca15_sx1+ca15_sx1_shf),ca15_sx1_alu)") + +;; Multiply Execution Unit: +;; +;; 32-bit multiplies +(define_insn_reservation "cortex_a15_mult32" 3 + (and (eq_attr "tune" "cortexa15") + (and (eq_attr "type" "mult") + (and (eq_attr "neon_type" "none") + (eq_attr "mul64" "no")))) + "ca15_issue1,ca15_mx") + +;; 64-bit multiplies +(define_insn_reservation "cortex_a15_mult64" 4 + (and (eq_attr "tune" "cortexa15") + (and (eq_attr "type" "mult") + (and (eq_attr "neon_type" "none") + (eq_attr "mul64" "yes")))) + "ca15_issue1,ca15_mx*2") + +;; Integer divide +(define_insn_reservation "cortex_a15_udiv" 9 + (and (eq_attr "tune" "cortexa15") + (eq_attr "insn" "udiv")) + "ca15_issue1,ca15_mx") + +(define_insn_reservation "cortex_a15_sdiv" 10 + (and (eq_attr "tune" "cortexa15") + (eq_attr "insn" "sdiv")) + "ca15_issue1,ca15_mx") + +;; Block all issue pipes for a cycle +(define_insn_reservation "cortex_a15_block" 1 + (and (eq_attr "tune" "cortexa15") + (and (eq_attr "type" "block") + (eq_attr "neon_type" "none"))) + "ca15_issue3") + +;; Branch execution Unit +;; +;; Branches take one issue slot. +;; No latency as there is no result +(define_insn_reservation "cortex_a15_branch" 0 + (and (eq_attr "tune" "cortexa15") + (and (eq_attr "type" "branch") + (eq_attr "neon_type" "none"))) + "ca15_issue1,ca15_bx") + + +;; We lie with calls. They take up all issue slots, and form a block in the +;; pipeline. The result however is available the next cycle. +;; +;; Addition of new units requires this to be updated. +(define_insn_reservation "cortex_a15_call" 1 + (and (eq_attr "tune" "cortexa15") + (and (eq_attr "type" "call") + (eq_attr "neon_type" "none"))) + "ca15_issue3,\ + ca15_sx1+ca15_sx2+ca15_bx+ca15_mx+ca15_cx1+ca15_cx2+ca15_ls1+ca15_ls2,\ + ca15_sx1_alu+ca15_sx1_shf+ca15_sx1_sat+ca15_sx2_alu+ca15_sx2_shf\ + +ca15_sx2_sat+ca15_ldr+ca15_str") + +;; Load-store execution Unit +;; +;; Loads of up to two words. +(define_insn_reservation "cortex_a15_load1" 4 + (and (eq_attr "tune" "cortexa15") + (and (eq_attr "type" "load_byte,load1,load2") + (eq_attr "neon_type" "none"))) + "ca15_issue1,ca15_ls,ca15_ldr,nothing") + +;; Loads of three or four words. +(define_insn_reservation "cortex_a15_load3" 5 + (and (eq_attr "tune" "cortexa15") + (and (eq_attr "type" "load3,load4") + (eq_attr "neon_type" "none"))) + "ca15_issue2,ca15_ls1+ca15_ls2,ca15_ldr,ca15_ldr,nothing") + +;; Stores of up to two words. +(define_insn_reservation "cortex_a15_store1" 0 + (and (eq_attr "tune" "cortexa15") + (and (eq_attr "type" "store1,store2") + (eq_attr "neon_type" "none"))) + "ca15_issue1,ca15_ls,ca15_str") + +;; Stores of three or four words. +(define_insn_reservation "cortex_a15_store3" 0 + (and (eq_attr "tune" "cortexa15") + (and (eq_attr "type" "store3,store4") + (eq_attr "neon_type" "none"))) + "ca15_issue2,ca15_ls1+ca15_ls2,ca15_str,ca15_str") + +;; Simple execution unit bypasses +(define_bypass 1 "cortex_a15_alu" + "cortex_a15_alu,cortex_a15_alu_shift,cortex_a15_alu_shift_reg") +(define_bypass 2 "cortex_a15_alu_shift" + "cortex_a15_alu,cortex_a15_alu_shift,cortex_a15_alu_shift_reg") +(define_bypass 2 "cortex_a15_alu_shift_reg" + "cortex_a15_alu,cortex_a15_alu_shift,cortex_a15_alu_shift_reg") +(define_bypass 1 "cortex_a15_alu" "cortex_a15_load1,cortex_a15_load3") +(define_bypass 2 "cortex_a15_alu_shift" "cortex_a15_load1,cortex_a15_load3") +(define_bypass 2 "cortex_a15_alu_shift_reg" + "cortex_a15_load1,cortex_a15_load3") diff --git a/gcc/config/arm/t-arm b/gcc/config/arm/t-arm index 4b94a7e0baa..1128d1904b0 100644 --- a/gcc/config/arm/t-arm +++ b/gcc/config/arm/t-arm @@ -31,6 +31,7 @@ MD_INCLUDES= $(srcdir)/config/arm/arm1020e.md \ $(srcdir)/config/arm/arm-tune.md \ $(srcdir)/config/arm/cirrus.md \ $(srcdir)/config/arm/constraints.md \ + $(srcdir)/config/arm/cortex-a15.md \ $(srcdir)/config/arm/cortex-a5.md \ $(srcdir)/config/arm/cortex-a8.md \ $(srcdir)/config/arm/cortex-a8-neon.md \