2013-01-29 Greta Yorsh <Greta.Yorsh@arm.com>

author gretay <gretay@138bc75d-0d04-0410-961f-82ee72b054a4>

Tue, 29 Jan 2013 18:36:53 +0000 (18:36 +0000)

committer gretay <gretay@138bc75d-0d04-0410-961f-82ee72b054a4>

Tue, 29 Jan 2013 18:36:53 +0000 (18:36 +0000)
author gretay <gretay@138bc75d-0d04-0410-961f-82ee72b054a4>
Tue, 29 Jan 2013 18:36:53 +0000 (18:36 +0000)
committer gretay <gretay@138bc75d-0d04-0410-961f-82ee72b054a4>
Tue, 29 Jan 2013 18:36:53 +0000 (18:36 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index 8a49fd8..6691859 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,17 @@
  2013-01-29  Greta Yorsh  <Greta.Yorsh@arm.com>
  
+        * config/arm/cortex-a7.md (cortex_a7_neon_mul):  New reservation.
+       (cortex_a7_neon_mla): Likewise.
+        (cortex_a7_fpfmad): New reservation.
+        (cortex_a7_fpmacs): Use ffmas and update required units.
+        (cortex_a7_fpmuld): Update required units and latency.
+        (cortex_a7_fpmacd): Likewise.
+        (cortex_a7_fdivs, cortex_a7_fdivd): Likewise.
+        (cortex_a7_neon). Likewise.
+        (bypass) Update participating units.
+
+2013-01-29  Greta Yorsh  <Greta.Yorsh@arm.com>
+
          * config/arm/arm.md (type): Add ffmas and ffmad to "type" attribute.
          * config/arm/vfp.md (fma,fmsub,fnmsub,fnmadd): Change type
          from fmac to ffma.
diff --git a/gcc/config/arm/cortex-a7.md b/gcc/config/arm/cortex-a7.md

index 58fe4bf..a55efeb 100644 (file)
--- a/gcc/config/arm/cortex-a7.md
+++ b/gcc/config/arm/cortex-a7.md
@@ -202,6 +202,9 @@
  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  ;; Floating-point arithmetic.
  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Neon integer, neon floating point, and single-precision floating
+;; point instructions of the same type have the same timing
+;; characteristics, but neon instructions cannot dual-issue.
  
  (define_insn_reservation "cortex_a7_fpalu" 4
    (and (eq_attr "tune" "cortexa7")
@@ -229,18 +232,37 @@
              (eq_attr "neon_type" "none")))
    "cortex_a7_ex1+cortex_a7_fpmul_pipe")
  
-;; For single-precision multiply-accumulate, the add (accumulate) is issued
-;; whilst the multiply is in F4.  The multiply result can then be forwarded
-;; from F5 to F1.  The issue unit is only used once (when we first start
-;; processing the instruction), but the usage of the FP add pipeline could
-;; block other instructions attempting to use it simultaneously.  We try to
-;; avoid that using cortex_a7_fpadd_pipe.
+(define_insn_reservation "cortex_a7_neon_mul" 4
+  (and (eq_attr "tune" "cortexa7")
+       (eq_attr "neon_type"
+                "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+                 neon_mul_qqq_8_16_32_ddd_32,\
+                 neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar,\
+                 neon_mul_ddd_16_scalar_32_16_long_scalar,\
+                 neon_mul_qqd_32_scalar,\
+                 neon_fp_vmul_ddd,\
+                 neon_fp_vmul_qqd"))
+  "(cortex_a7_both+cortex_a7_fpmul_pipe)*2")
  
  (define_insn_reservation "cortex_a7_fpmacs" 8
    (and (eq_attr "tune" "cortexa7")
-       (and (eq_attr "type" "fmacs")
+       (and (eq_attr "type" "fmacs,ffmas")
              (eq_attr "neon_type" "none")))
-  "cortex_a7_ex1+cortex_a7_fpmul_pipe, nothing*3, cortex_a7_fpadd_pipe")
+  "cortex_a7_ex1+cortex_a7_fpmul_pipe")
+
+(define_insn_reservation "cortex_a7_neon_mla" 8
+  (and (eq_attr "tune" "cortexa7")
+       (eq_attr "neon_type"
+                "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+                 neon_mla_qqq_8_16,\
+                 neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long,\
+                 neon_mla_qqq_32_qqd_32_scalar,\
+                 neon_mla_ddd_16_scalar_qdd_32_16_long_scalar,\
+                 neon_fp_vmla_ddd,\
+                 neon_fp_vmla_qqq,\
+                 neon_fp_vmla_ddd_scalar,\
+                 neon_fp_vmla_qqq_scalar"))
+  "cortex_a7_both+cortex_a7_fpmul_pipe")
  
  ;; Non-multiply instructions can issue between two cycles of a
  ;; double-precision multiply. 
@@ -249,15 +271,19 @@
    (and (eq_attr "tune" "cortexa7")
         (and (eq_attr "type" "fmuld")
              (eq_attr "neon_type" "none")))
-  "cortex_a7_ex1+cortex_a7_fpmul_pipe, cortex_a7_fpmul_pipe*2,\
-   cortex_a7_ex1+cortex_a7_fpmul_pipe")
+  "cortex_a7_ex1+cortex_a7_fpmul_pipe, cortex_a7_fpmul_pipe*3")
  
  (define_insn_reservation "cortex_a7_fpmacd" 11
    (and (eq_attr "tune" "cortexa7")
         (and (eq_attr "type" "fmacd")
              (eq_attr "neon_type" "none")))
-  "cortex_a7_ex1+cortex_a7_fpmul_pipe, cortex_a7_fpmul_pipe*2,\
-   cortex_a7_ex1+cortex_a7_fpmul_pipe, nothing*3, cortex_a7_fpadd_pipe")
+  "cortex_a7_ex1+cortex_a7_fpmul_pipe, cortex_a7_fpmul_pipe*3")
+
+(define_insn_reservation "cortex_a7_fpfmad" 8
+  (and (eq_attr "tune" "cortexa7")
+       (and (eq_attr "type" "ffmad")
+            (eq_attr "neon_type" "none")))
+  "cortex_a7_ex1+cortex_a7_fpmul_pipe, cortex_a7_fpmul_pipe*4")
  
  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  ;; Floating-point divide/square root instructions.
@@ -267,13 +293,13 @@
    (and (eq_attr "tune" "cortexa7")
         (and (eq_attr "type" "fdivs")
              (eq_attr "neon_type" "none")))
-  "cortex_a7_ex1, cortex_a7_fp_div_sqrt * 14")
+  "cortex_a7_ex1+cortex_a7_fp_div_sqrt, cortex_a7_fp_div_sqrt * 13")
  
-(define_insn_reservation "cortex_a7_fdivd" 29
+(define_insn_reservation "cortex_a7_fdivd" 31
    (and (eq_attr "tune" "cortexa7")
         (and (eq_attr "type" "fdivd")
              (eq_attr "neon_type" "none")))
-  "cortex_a7_ex1, cortex_a7_fp_div_sqrt * 28")
+  "cortex_a7_ex1+cortex_a7_fp_div_sqrt, cortex_a7_fp_div_sqrt * 28")
  
  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  ;; VFP to/from core transfers.
@@ -338,16 +364,36 @@
  ;; i.e. a latency of two.
  
  (define_bypass 2 "cortex_a7_f_loads, cortex_a7_f_loadd"
-                 "cortex_a7_fpalu, cortex_a7_fpmacs, cortex_a7_fpmuld,\
-                 cortex_a7_fpmacd, cortex_a7_fdivs, cortex_a7_fdivd,\
-                 cortex_a7_f2r")
+                  "cortex_a7_fpalu,\
+                   cortex_a7_fpmuls,cortex_a7_fpmacs,\
+                   cortex_a7_fpmuld,cortex_a7_fpmacd, cortex_a7_fpfmad,\
+                   cortex_a7_fdivs, cortex_a7_fdivd,\
+                  cortex_a7_f2r")
  
  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; NEON load/store.
+;; NEON
  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  
+;; Simple modeling for all neon instructions not covered earlier.
  
  (define_insn_reservation "cortex_a7_neon" 4
    (and (eq_attr "tune" "cortexa7")
-       (eq_attr "neon_type" "!none"))
+       (eq_attr "neon_type"
+                "!none,\
+                  neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+                  neon_mul_qqq_8_16_32_ddd_32,\
+                  neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar,\
+                  neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+                  neon_mla_qqq_8_16,\
+                  neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long,\
+                  neon_mla_qqq_32_qqd_32_scalar,\
+                  neon_mul_ddd_16_scalar_32_16_long_scalar,\
+                  neon_mul_qqd_32_scalar,\
+                  neon_mla_ddd_16_scalar_qdd_32_16_long_scalar,\
+                  neon_fp_vmul_ddd,\
+                  neon_fp_vmul_qqd,\
+                  neon_fp_vmla_ddd,\
+                  neon_fp_vmla_qqq,\
+                  neon_fp_vmla_ddd_scalar,\
+                  neon_fp_vmla_qqq_scalar"))
    "cortex_a7_both*2")
author	gretay <gretay@138bc75d-0d04-0410-961f-82ee72b054a4>
	Tue, 29 Jan 2013 18:36:53 +0000 (18:36 +0000)
committer	gretay <gretay@138bc75d-0d04-0410-961f-82ee72b054a4>
	Tue, 29 Jan 2013 18:36:53 +0000 (18:36 +0000)
gcc/ChangeLog		patch \| blob \| history
gcc/config/arm/cortex-a7.md		patch \| blob \| history