OSDN Git Service

gcc/
authorjules <jules@138bc75d-0d04-0410-961f-82ee72b054a4>
Thu, 26 Jul 2007 12:04:02 +0000 (12:04 +0000)
committerjules <jules@138bc75d-0d04-0410-961f-82ee72b054a4>
Thu, 26 Jul 2007 12:04:02 +0000 (12:04 +0000)
* config/arm/arm.c (arm_mac_accumulator_is_mul_result): New.
* config/arm/arm-protos.h (arm_mac_accumulator_is_mul_result): New.
* config/arm/cortex-a8.md: New.
* config/arm/cortex-a8-neon.md: New.
* config/arm/neon-schedgen.ml: New.
* config/arm/neon.md (vqh_mnem): New.
(neon_type): New.
(Is_float_mode): New.
(Scalar_mul_8_16): New.
(Is_d_reg): New.
(V_mode_nunits): New.
(All instruction patterns): Annotate with neon_type attribute
values.
* config/arm/arm.md: Include cortex-a8.md.
(insn): Add smmla, umaal, smlald, smlsld, clz, mrs, msr and xtab
values.
Annotate instruction patterns accordingly.
(generic_sched): Do not use generic scheduling for Cortex-A8.
(generic_vfp): Do not use generic VFP scheduling for Cortex-A8.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@126953 138bc75d-0d04-0410-961f-82ee72b054a4

gcc/ChangeLog
gcc/config/arm/arm-protos.h
gcc/config/arm/arm.c
gcc/config/arm/arm.md
gcc/config/arm/cortex-a8-neon.md [new file with mode: 0644]
gcc/config/arm/cortex-a8.md [new file with mode: 0644]
gcc/config/arm/neon-schedgen.ml [new file with mode: 0644]
gcc/config/arm/neon.md

index 37ece5c..aa1e9d3 100644 (file)
@@ -1,3 +1,26 @@
+2007-07-26  Mark Shinwell  <shinwell@codesourcery.com>
+           Julian Brown  <julian@codesourcery.com>
+
+       * config/arm/arm.c (arm_mac_accumulator_is_mul_result): New.
+       * config/arm/arm-protos.h (arm_mac_accumulator_is_mul_result): New.
+       * config/arm/cortex-a8.md: New.
+       * config/arm/cortex-a8-neon.md: New.
+       * config/arm/neon-schedgen.ml: New.
+       * config/arm/neon.md (vqh_mnem): New.
+       (neon_type): New.
+       (Is_float_mode): New.
+       (Scalar_mul_8_16): New.
+       (Is_d_reg): New.
+       (V_mode_nunits): New.
+       (All instruction patterns): Annotate with neon_type attribute
+       values.
+       * config/arm/arm.md: Include cortex-a8.md.
+       (insn): Add smmla, umaal, smlald, smlsld, clz, mrs, msr and xtab
+       values.
+       Annotate instruction patterns accordingly.
+       (generic_sched): Do not use generic scheduling for Cortex-A8.
+       (generic_vfp): Do not use generic VFP scheduling for Cortex-A8.
+
 2007-07-26  Daniel Jacobowitz  <dan@codesourcery.com>
 
        * fold-const.c (fold_read_from_constant_string): Use
index 000775d..f238026 100644 (file)
@@ -94,6 +94,7 @@ extern int arm_no_early_store_addr_dep (rtx, rtx);
 extern int arm_no_early_alu_shift_dep (rtx, rtx);
 extern int arm_no_early_alu_shift_value_dep (rtx, rtx);
 extern int arm_no_early_mul_dep (rtx, rtx);
+extern int arm_mac_accumulator_is_mul_result (rtx, rtx);
 
 extern int tls_mentioned_p (rtx);
 extern int symbol_mentioned_p (rtx);
index 68aa89e..de0fb41 100644 (file)
@@ -18167,6 +18167,39 @@ arm_cxx_guard_type (void)
   return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
 }
 
+/* Return non-zero if the consumer (a multiply-accumulate instruction)
+   has an accumulator dependency on the result of the producer (a
+   multiplication instruction) and no other dependency on that result.  */
+int
+arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
+{
+  rtx mul = PATTERN (producer);
+  rtx mac = PATTERN (consumer);
+  rtx mul_result;
+  rtx mac_op0, mac_op1, mac_acc;
+
+  if (GET_CODE (mul) == COND_EXEC)
+    mul = COND_EXEC_CODE (mul);
+  if (GET_CODE (mac) == COND_EXEC)
+    mac = COND_EXEC_CODE (mac);
+
+  /* Check that mul is of the form (set (...) (mult ...))
+     and mla is of the form (set (...) (plus (mult ...) (...))).  */
+  if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
+      || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
+          || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
+    return 0;
+
+  mul_result = XEXP (mul, 0);
+  mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
+  mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
+  mac_acc = XEXP (XEXP (mac, 1), 1);
+
+  return (reg_overlap_mentioned_p (mul_result, mac_acc)
+          && !reg_overlap_mentioned_p (mul_result, mac_op0)
+          && !reg_overlap_mentioned_p (mul_result, mac_op1));
+}
+
 
 /* The EABI says test the least significant bit of a guard variable.  */
 
index ddc8bed..1d5313e 100644 (file)
 ;; scheduling information.
 
 (define_attr "insn"
-        "smulxy,smlaxy,smlalxy,smulwy,smlawx,mul,muls,mla,mlas,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals,smlawy,smuad,smuadx,smlad,smladx,smusd,smusdx,smlsd,smlsdx,smmul,smmulr,other"
+        "mov,mvn,smulxy,smlaxy,smlalxy,smulwy,smlawx,mul,muls,mla,mlas,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals,smlawy,smuad,smuadx,smlad,smladx,smusd,smusdx,smlsd,smlsdx,smmul,smmulr,smmla,umaal,smlald,smlsld,clz,mrs,msr,xtab,other"
         (const_string "other"))
 
 ; TYPE attribute is used to detect floating point instructions which, if
 ; mav_farith   Floating point arithmetic (4 cycle)
 ; mav_dmult    Double multiplies (7 cycle)
 ;
+
 (define_attr "type"
-       "alu,alu_shift,alu_shift_reg,mult,block,float,fdivx,fdivd,fdivs,fmul,ffmul,farith,ffarith,f_flag,float_em,f_load,f_store,f_loads,f_loadd,f_stores,f_stored,f_mem_r,r_mem_f,f_2_r,r_2_f,f_cvt,branch,call,load_byte,load1,load2,load3,load4,store1,store2,store3,store4,mav_farith,mav_dmult" 
+       "alu,alu_shift,alu_shift_reg,mult,block,float,fdivx,fdivd,fdivs,fmul,fmuls,fmuld,fmacs,fmacd,ffmul,farith,ffarith,f_flag,float_em,f_load,f_store,f_loads,f_loadd,f_stores,f_stored,f_mem_r,r_mem_f,f_2_r,r_2_f,f_cvt,branch,call,load_byte,load1,load2,load3,load4,store1,store2,store3,store4,mav_farith,mav_dmult"
        (if_then_else 
         (eq_attr "insn" "smulxy,smlaxy,smlalxy,smulwy,smlawx,mul,muls,mla,mlas,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals")
         (const_string "mult")
 
 (define_attr "generic_sched" "yes,no"
   (const (if_then_else 
-          (eq_attr "tune" "arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs") 
+          (eq_attr "tune" "arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa8")
           (const_string "no")
           (const_string "yes"))))
 
 (define_attr "generic_vfp" "yes,no"
   (const (if_then_else
          (and (eq_attr "fpu" "vfp")
-              (eq_attr "tune" "!arm1020e,arm1022e"))
+              (eq_attr "tune" "!arm1020e,arm1022e,cortexa8"))
          (const_string "yes")
          (const_string "no"))))
 
 (include "arm1020e.md")
 (include "arm1026ejs.md")
 (include "arm1136jfs.md")
+(include "cortex-a8.md")
 
 \f
 ;;---------------------------------------------------------------------------
   "TARGET_INT_SIMD"
   "uxtab%?\\t%0, %2, %1"
   [(set_attr "predicable" "yes")
+   (set_attr "insn" "xtab")
    (set_attr "type" "alu_shift")]
 )
 
   "TARGET_INT_SIMD"
   "sxtab%?\\t%0, %2, %1"
   [(set_attr "type" "alu_shift")
+   (set_attr "insn" "xtab")
    (set_attr "predicable" "yes")]
 )
 
        (clz:SI (match_operand:SI 1 "s_register_operand" "r")))]
   "TARGET_32BIT && arm_arch5"
   "clz%?\\t%0, %1"
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set_attr "insn" "clz")])
 
 (define_expand "ffssi2"
   [(set (match_operand:SI 0 "s_register_operand" "")
diff --git a/gcc/config/arm/cortex-a8-neon.md b/gcc/config/arm/cortex-a8-neon.md
new file mode 100644 (file)
index 0000000..ed97ed1
--- /dev/null
@@ -0,0 +1,1307 @@
+;; ARM Cortex-A8 NEON scheduling description.
+;; Copyright (C) 2007 Free Software Foundation, Inc.
+;; Contributed by CodeSourcery.
+
+;; This file is part of GCC.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING.  If not, write to
+;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+;; Boston, MA 02110-1301, USA.
+
+(define_automaton "cortex_a8_neon")
+
+;; Only one load, store, permute, MCR or MRC instruction can be issued
+;; per cycle.
+(define_cpu_unit "cortex_a8_neon_issue_perm" "cortex_a8_neon")
+
+;; Only one data-processing instruction can be issued per cycle.
+(define_cpu_unit "cortex_a8_neon_issue_dp" "cortex_a8_neon")
+
+;; The VFPLite unit (non-pipelined).
+(define_cpu_unit "cortex_a8_vfplite" "cortex_a8_neon")
+
+;; We need a special mutual exclusion (to be used in addition to
+;; cortex_a8_neon_issue_dp) for the case when an instruction such as
+;; vmla.f is forwarded from E5 of the floating-point multiply pipeline to
+;; E2 of the floating-point add pipeline.  On the cycle previous to that
+;; forward we must prevent issue of any instruction to the floating-point
+;; add pipeline, but still allow issue of a data-processing instruction
+;; to any of the other pipelines.
+(define_cpu_unit "cortex_a8_neon_issue_fadd" "cortex_a8_neon")
+
+;; Patterns of reservation.
+;; We model the NEON issue units as running in parallel with the core ones.
+;; We assume that multi-cycle NEON instructions get decomposed into
+;; micro-ops as they are issued into the NEON pipeline, and not as they
+;; are issued into the ARM pipeline.  Dual issue may not occur except
+;; upon the first and last cycles of a multi-cycle instruction, but it
+;; is unclear whether two multi-cycle instructions can issue together (in
+;; this model they cannot).  It is also unclear whether a pair of
+;; a multi-cycle and single-cycle instructions, that could potentially
+;; issue together, only do so if (say) the single-cycle one precedes
+;; the other.
+
+(define_reservation "cortex_a8_neon_dp"
+                    "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp")
+(define_reservation "cortex_a8_neon_dp_2"
+                    "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp,\
+                     cortex_a8_neon_issue_dp")
+(define_reservation "cortex_a8_neon_dp_4"
+                    "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp,\
+                     cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
+                     cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
+                     cortex_a8_neon_issue_dp")
+
+(define_reservation "cortex_a8_neon_fadd"
+                    "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp+\
+                     cortex_a8_neon_issue_fadd")
+(define_reservation "cortex_a8_neon_fadd_2"
+                    "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp+\
+                     cortex_a8_neon_issue_fadd,\
+                     cortex_a8_neon_issue_dp+cortex_a8_neon_issue_fadd")
+
+(define_reservation "cortex_a8_neon_perm"
+                    "(cortex_a8_alu0|cortex_a8_alu1)+\
+                     cortex_a8_neon_issue_perm")
+(define_reservation "cortex_a8_neon_perm_2"
+                    "(cortex_a8_alu0|cortex_a8_alu1)+\
+                     cortex_a8_neon_issue_perm,\
+                     cortex_a8_neon_issue_perm")
+(define_reservation "cortex_a8_neon_perm_3"
+                    "(cortex_a8_alu0|cortex_a8_alu1)+\
+                     cortex_a8_neon_issue_perm,\
+                     cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
+                     cortex_a8_neon_issue_perm")
+
+(define_reservation "cortex_a8_neon_ls"
+                    "cortex_a8_issue_ls+cortex_a8_neon_issue_perm")
+(define_reservation "cortex_a8_neon_ls_2"
+                    "cortex_a8_issue_ls+cortex_a8_neon_issue_perm,\
+                     cortex_a8_neon_issue_perm")
+(define_reservation "cortex_a8_neon_ls_3"
+                    "cortex_a8_issue_ls+cortex_a8_neon_issue_perm,\
+                     cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
+                     cortex_a8_neon_issue_perm")
+(define_reservation "cortex_a8_neon_ls_4"
+                    "cortex_a8_issue_ls+cortex_a8_neon_issue_perm,\
+                     cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
+                     cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
+                     cortex_a8_neon_issue_perm")
+(define_reservation "cortex_a8_neon_ls_5"
+                    "cortex_a8_issue_ls+cortex_a8_neon_issue_perm,\
+                     cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
+                     cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
+                     cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
+                     cortex_a8_neon_issue_perm")
+
+(define_reservation "cortex_a8_neon_fmul_then_fadd"
+                    "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp,\
+                    nothing*3,\
+                    cortex_a8_neon_issue_fadd")
+(define_reservation "cortex_a8_neon_fmul_then_fadd_2"
+                    "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp,\
+                    cortex_a8_neon_issue_dp,\
+                    nothing*2,\
+                    cortex_a8_neon_issue_fadd,\
+                    cortex_a8_neon_issue_fadd")
+
+;; VFP instructions can only be single-issued into the NEON pipeline.
+(define_reservation "cortex_a8_vfp"
+                    "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp+\
+                     cortex_a8_neon_issue_perm+cortex_a8_vfplite")
+
+;; VFP instructions.
+;; The VFPLite unit that executes these isn't pipelined; we give the
+;; worst-case latencies (and choose the double-precision ones where we
+;; do not distinguish on precision).  We assume RunFast mode is not
+;; enabled and therefore do not model the possible VFP instruction
+;; execution in the NEON floating point pipelines, nor additional
+;; latencies for the processing of subnormals.
+;;
+;; TODO: RunFast mode could potentially be enabled when -ffast-math
+;; is specified.
+
+(define_insn_reservation "cortex_a8_vfp_add_sub" 10
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "farith"))
+  "cortex_a8_vfp,cortex_a8_vfplite*9")
+
+(define_insn_reservation "cortex_a8_vfp_muls" 12
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "fmuls"))
+  "cortex_a8_vfp,cortex_a8_vfplite*11")
+
+(define_insn_reservation "cortex_a8_vfp_muld" 17
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "fmuld"))
+  "cortex_a8_vfp,cortex_a8_vfplite*16")
+
+(define_insn_reservation "cortex_a8_vfp_macs" 21
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "fmacs"))
+  "cortex_a8_vfp,cortex_a8_vfplite*20")
+
+(define_insn_reservation "cortex_a8_vfp_macd" 26
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "fmacd"))
+  "cortex_a8_vfp,cortex_a8_vfplite*25")
+
+(define_insn_reservation "cortex_a8_vfp_divs" 37
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "fdivs"))
+  "cortex_a8_vfp,cortex_a8_vfplite*36")
+
+(define_insn_reservation "cortex_a8_vfp_divd" 65
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "fdivd"))
+  "cortex_a8_vfp,cortex_a8_vfplite*64")
+
+;; Comparisons can actually take 7 cycles sometimes instead of four,
+;; but given all the other instructions lumped into type=ffarith that
+;; take four cycles, we pick that latency.
+(define_insn_reservation "cortex_a8_vfp_farith" 4
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "ffarith"))
+  "cortex_a8_vfp,cortex_a8_vfplite*3")
+
+(define_insn_reservation "cortex_a8_vfp_cvt" 7
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "f_cvt"))
+  "cortex_a8_vfp,cortex_a8_vfplite*6")
+
+;; NEON -> core transfers.
+
+(define_insn_reservation "neon_mrc" 20
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_mrc"))
+  "cortex_a8_neon_ls")
+
+(define_insn_reservation "neon_mrrc" 21
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_mrrc"))
+  "cortex_a8_neon_ls_2")
+
+;; The remainder of this file is auto-generated by neon-schedgen.
+
+;; Instructions using this reservation read their source operands at N2, and
+;; produce a result at N3.
+(define_insn_reservation "neon_int_1" 3
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_int_1"))
+  "cortex_a8_neon_dp")
+
+;; Instructions using this reservation read their (D|Q)m operands at N1,
+;; their (D|Q)n operands at N2, and produce a result at N3.
+(define_insn_reservation "neon_int_2" 3
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_int_2"))
+  "cortex_a8_neon_dp")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N3.
+(define_insn_reservation "neon_int_3" 3
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_int_3"))
+  "cortex_a8_neon_dp")
+
+;; Instructions using this reservation read their source operands at N2, and
+;; produce a result at N4.
+(define_insn_reservation "neon_int_4" 4
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_int_4"))
+  "cortex_a8_neon_dp")
+
+;; Instructions using this reservation read their (D|Q)m operands at N1,
+;; their (D|Q)n operands at N2, and produce a result at N4.
+(define_insn_reservation "neon_int_5" 4
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_int_5"))
+  "cortex_a8_neon_dp")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N4.
+(define_insn_reservation "neon_vqneg_vqabs" 4
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_vqneg_vqabs"))
+  "cortex_a8_neon_dp")
+
+;; Instructions using this reservation produce a result at N3.
+(define_insn_reservation "neon_vmov" 3
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_vmov"))
+  "cortex_a8_neon_dp")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
+;; produce a result at N6.
+(define_insn_reservation "neon_vaba" 6
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_vaba"))
+  "cortex_a8_neon_dp")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
+;; produce a result at N6 on cycle 2.
+(define_insn_reservation "neon_vaba_qqq" 7
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_vaba_qqq"))
+  "cortex_a8_neon_dp_2")
+
+;; Instructions using this reservation read their (D|Q)m operands at N1,
+;; their (D|Q)d operands at N3, and produce a result at N6.
+(define_insn_reservation "neon_vsma" 6
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_vsma"))
+  "cortex_a8_neon_dp")
+
+;; Instructions using this reservation read their source operands at N2, and
+;; produce a result at N6.
+(define_insn_reservation "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long" 6
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long"))
+  "cortex_a8_neon_dp")
+
+;; Instructions using this reservation read their source operands at N2, and
+;; produce a result at N6 on cycle 2.
+(define_insn_reservation "neon_mul_qqq_8_16_32_ddd_32" 7
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_mul_qqq_8_16_32_ddd_32"))
+  "cortex_a8_neon_dp_2")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, and produce a result at N6 on cycle 2.
+(define_insn_reservation "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar" 7
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar"))
+  "cortex_a8_neon_dp_2")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and
+;; produce a result at N6.
+(define_insn_reservation "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long" 6
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long"))
+  "cortex_a8_neon_dp")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and
+;; produce a result at N6 on cycle 2.
+(define_insn_reservation "neon_mla_qqq_8_16" 7
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_mla_qqq_8_16"))
+  "cortex_a8_neon_dp_2")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
+;; produce a result at N6 on cycle 2.
+(define_insn_reservation "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long" 7
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long"))
+  "cortex_a8_neon_dp_2")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
+;; produce a result at N6 on cycle 4.
+(define_insn_reservation "neon_mla_qqq_32_qqd_32_scalar" 9
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_mla_qqq_32_qqd_32_scalar"))
+  "cortex_a8_neon_dp_4")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, and produce a result at N6.
+(define_insn_reservation "neon_mul_ddd_16_scalar_32_16_long_scalar" 6
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_mul_ddd_16_scalar_32_16_long_scalar"))
+  "cortex_a8_neon_dp")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, and produce a result at N6 on cycle 4.
+(define_insn_reservation "neon_mul_qqd_32_scalar" 9
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_mul_qqd_32_scalar"))
+  "cortex_a8_neon_dp_4")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
+;; produce a result at N6.
+(define_insn_reservation "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar" 6
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar"))
+  "cortex_a8_neon_dp")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N3.
+(define_insn_reservation "neon_shift_1" 3
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_shift_1"))
+  "cortex_a8_neon_dp")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N4.
+(define_insn_reservation "neon_shift_2" 4
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_shift_2"))
+  "cortex_a8_neon_dp")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N3 on cycle 2.
+(define_insn_reservation "neon_shift_3" 4
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_shift_3"))
+  "cortex_a8_neon_dp_2")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N1.
+(define_insn_reservation "neon_vshl_ddd" 1
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_vshl_ddd"))
+  "cortex_a8_neon_dp")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N4 on cycle 2.
+(define_insn_reservation "neon_vqshl_vrshl_vqrshl_qqq" 5
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_vqshl_vrshl_vqrshl_qqq"))
+  "cortex_a8_neon_dp_2")
+
+;; Instructions using this reservation read their (D|Q)m operands at N1,
+;; their (D|Q)d operands at N3, and produce a result at N6.
+(define_insn_reservation "neon_vsra_vrsra" 6
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_vsra_vrsra"))
+  "cortex_a8_neon_dp")
+
+;; Instructions using this reservation read their source operands at N2, and
+;; produce a result at N5.
+(define_insn_reservation "neon_fp_vadd_ddd_vabs_dd" 5
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_fp_vadd_ddd_vabs_dd"))
+  "cortex_a8_neon_fadd")
+
+;; Instructions using this reservation read their source operands at N2, and
+;; produce a result at N5 on cycle 2.
+(define_insn_reservation "neon_fp_vadd_qqq_vabs_qq" 6
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_fp_vadd_qqq_vabs_qq"))
+  "cortex_a8_neon_fadd_2")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N5.
+(define_insn_reservation "neon_fp_vsum" 5
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_fp_vsum"))
+  "cortex_a8_neon_fadd")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, and produce a result at N5.
+(define_insn_reservation "neon_fp_vmul_ddd" 5
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_fp_vmul_ddd"))
+  "cortex_a8_neon_dp")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, and produce a result at N5 on cycle 2.
+(define_insn_reservation "neon_fp_vmul_qqd" 6
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_fp_vmul_qqd"))
+  "cortex_a8_neon_dp_2")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and
+;; produce a result at N9.
+(define_insn_reservation "neon_fp_vmla_ddd" 9
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_fp_vmla_ddd"))
+  "cortex_a8_neon_fmul_then_fadd")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and
+;; produce a result at N9 on cycle 2.
+(define_insn_reservation "neon_fp_vmla_qqq" 10
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_fp_vmla_qqq"))
+  "cortex_a8_neon_fmul_then_fadd_2")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
+;; produce a result at N9.
+(define_insn_reservation "neon_fp_vmla_ddd_scalar" 9
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_fp_vmla_ddd_scalar"))
+  "cortex_a8_neon_fmul_then_fadd")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
+;; produce a result at N9 on cycle 2.
+(define_insn_reservation "neon_fp_vmla_qqq_scalar" 10
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_fp_vmla_qqq_scalar"))
+  "cortex_a8_neon_fmul_then_fadd_2")
+
+;; Instructions using this reservation read their source operands at N2, and
+;; produce a result at N9.
+(define_insn_reservation "neon_fp_vrecps_vrsqrts_ddd" 9
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_fp_vrecps_vrsqrts_ddd"))
+  "cortex_a8_neon_fmul_then_fadd")
+
+;; Instructions using this reservation read their source operands at N2, and
+;; produce a result at N9 on cycle 2.
+(define_insn_reservation "neon_fp_vrecps_vrsqrts_qqq" 10
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_fp_vrecps_vrsqrts_qqq"))
+  "cortex_a8_neon_fmul_then_fadd_2")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N2.
+(define_insn_reservation "neon_bp_simple" 2
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_bp_simple"))
+  "cortex_a8_neon_perm")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N2 on cycle 2.
+(define_insn_reservation "neon_bp_2cycle" 3
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_bp_2cycle"))
+  "cortex_a8_neon_perm_2")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N2 on cycle 3.
+(define_insn_reservation "neon_bp_3cycle" 4
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_bp_3cycle"))
+  "cortex_a8_neon_perm_3")
+
+;; Instructions using this reservation produce a result at N1.
+(define_insn_reservation "neon_ldr" 1
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_ldr"))
+  "cortex_a8_neon_ls")
+
+;; Instructions using this reservation read their source operands at N1.
+(define_insn_reservation "neon_str" 0
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_str"))
+  "cortex_a8_neon_ls")
+
+;; Instructions using this reservation produce a result at N1 on cycle 2.
+(define_insn_reservation "neon_vld1_1_2_regs" 2
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_vld1_1_2_regs"))
+  "cortex_a8_neon_ls_2")
+
+;; Instructions using this reservation produce a result at N1 on cycle 3.
+(define_insn_reservation "neon_vld1_3_4_regs" 3
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_vld1_3_4_regs"))
+  "cortex_a8_neon_ls_3")
+
+;; Instructions using this reservation produce a result at N2 on cycle 2.
+(define_insn_reservation "neon_vld2_2_regs_vld1_vld2_all_lanes" 3
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_vld2_2_regs_vld1_vld2_all_lanes"))
+  "cortex_a8_neon_ls_2")
+
+;; Instructions using this reservation produce a result at N2 on cycle 3.
+(define_insn_reservation "neon_vld2_4_regs" 4
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_vld2_4_regs"))
+  "cortex_a8_neon_ls_3")
+
+;; Instructions using this reservation produce a result at N2 on cycle 4.
+(define_insn_reservation "neon_vld3_vld4" 5
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_vld3_vld4"))
+  "cortex_a8_neon_ls_4")
+
+;; Instructions using this reservation read their source operands at N1.
+(define_insn_reservation "neon_vst1_1_2_regs_vst2_2_regs" 0
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs"))
+  "cortex_a8_neon_ls_2")
+
+;; Instructions using this reservation read their source operands at N1.
+(define_insn_reservation "neon_vst1_3_4_regs" 0
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_vst1_3_4_regs"))
+  "cortex_a8_neon_ls_3")
+
+;; Instructions using this reservation read their source operands at N1.
+(define_insn_reservation "neon_vst2_4_regs_vst3_vst4" 0
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_vst2_4_regs_vst3_vst4"))
+  "cortex_a8_neon_ls_4")
+
+;; Instructions using this reservation read their source operands at N1.
+(define_insn_reservation "neon_vst3_vst4" 0
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_vst3_vst4"))
+  "cortex_a8_neon_ls_4")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N2 on cycle 3.
+(define_insn_reservation "neon_vld1_vld2_lane" 4
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_vld1_vld2_lane"))
+  "cortex_a8_neon_ls_3")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N2 on cycle 5.
+(define_insn_reservation "neon_vld3_vld4_lane" 6
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_vld3_vld4_lane"))
+  "cortex_a8_neon_ls_5")
+
+;; Instructions using this reservation read their source operands at N1.
+(define_insn_reservation "neon_vst1_vst2_lane" 0
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_vst1_vst2_lane"))
+  "cortex_a8_neon_ls_2")
+
+;; Instructions using this reservation read their source operands at N1.
+(define_insn_reservation "neon_vst3_vst4_lane" 0
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_vst3_vst4_lane"))
+  "cortex_a8_neon_ls_3")
+
+;; Instructions using this reservation produce a result at N2 on cycle 2.
+(define_insn_reservation "neon_vld3_vld4_all_lanes" 3
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_vld3_vld4_all_lanes"))
+  "cortex_a8_neon_ls_3")
+
+;; Instructions using this reservation produce a result at N2.
+(define_insn_reservation "neon_mcr" 2
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_mcr"))
+  "cortex_a8_neon_perm")
+
+;; Instructions using this reservation produce a result at N2.
+(define_insn_reservation "neon_mcr_2_mcrr" 2
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_mcr_2_mcrr"))
+  "cortex_a8_neon_perm_2")
+
+;; Exceptions to the default latencies.
+
+(define_bypass 1 "neon_mcr_2_mcrr"
+               "neon_int_1,\
+               neon_int_4,\
+               neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mul_qqq_8_16_32_ddd_32,\
+               neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mla_qqq_8_16,\
+               neon_fp_vadd_ddd_vabs_dd,\
+               neon_fp_vadd_qqq_vabs_qq,\
+               neon_fp_vmla_ddd,\
+               neon_fp_vmla_qqq,\
+               neon_fp_vrecps_vrsqrts_ddd,\
+               neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 1 "neon_mcr"
+               "neon_int_1,\
+               neon_int_4,\
+               neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mul_qqq_8_16_32_ddd_32,\
+               neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mla_qqq_8_16,\
+               neon_fp_vadd_ddd_vabs_dd,\
+               neon_fp_vadd_qqq_vabs_qq,\
+               neon_fp_vmla_ddd,\
+               neon_fp_vmla_qqq,\
+               neon_fp_vrecps_vrsqrts_ddd,\
+               neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 2 "neon_vld3_vld4_all_lanes"
+               "neon_int_1,\
+               neon_int_4,\
+               neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mul_qqq_8_16_32_ddd_32,\
+               neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mla_qqq_8_16,\
+               neon_fp_vadd_ddd_vabs_dd,\
+               neon_fp_vadd_qqq_vabs_qq,\
+               neon_fp_vmla_ddd,\
+               neon_fp_vmla_qqq,\
+               neon_fp_vrecps_vrsqrts_ddd,\
+               neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 5 "neon_vld3_vld4_lane"
+               "neon_int_1,\
+               neon_int_4,\
+               neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mul_qqq_8_16_32_ddd_32,\
+               neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mla_qqq_8_16,\
+               neon_fp_vadd_ddd_vabs_dd,\
+               neon_fp_vadd_qqq_vabs_qq,\
+               neon_fp_vmla_ddd,\
+               neon_fp_vmla_qqq,\
+               neon_fp_vrecps_vrsqrts_ddd,\
+               neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 3 "neon_vld1_vld2_lane"
+               "neon_int_1,\
+               neon_int_4,\
+               neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mul_qqq_8_16_32_ddd_32,\
+               neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mla_qqq_8_16,\
+               neon_fp_vadd_ddd_vabs_dd,\
+               neon_fp_vadd_qqq_vabs_qq,\
+               neon_fp_vmla_ddd,\
+               neon_fp_vmla_qqq,\
+               neon_fp_vrecps_vrsqrts_ddd,\
+               neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 4 "neon_vld3_vld4"
+               "neon_int_1,\
+               neon_int_4,\
+               neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mul_qqq_8_16_32_ddd_32,\
+               neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mla_qqq_8_16,\
+               neon_fp_vadd_ddd_vabs_dd,\
+               neon_fp_vadd_qqq_vabs_qq,\
+               neon_fp_vmla_ddd,\
+               neon_fp_vmla_qqq,\
+               neon_fp_vrecps_vrsqrts_ddd,\
+               neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 3 "neon_vld2_4_regs"
+               "neon_int_1,\
+               neon_int_4,\
+               neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mul_qqq_8_16_32_ddd_32,\
+               neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mla_qqq_8_16,\
+               neon_fp_vadd_ddd_vabs_dd,\
+               neon_fp_vadd_qqq_vabs_qq,\
+               neon_fp_vmla_ddd,\
+               neon_fp_vmla_qqq,\
+               neon_fp_vrecps_vrsqrts_ddd,\
+               neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 2 "neon_vld2_2_regs_vld1_vld2_all_lanes"
+               "neon_int_1,\
+               neon_int_4,\
+               neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mul_qqq_8_16_32_ddd_32,\
+               neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mla_qqq_8_16,\
+               neon_fp_vadd_ddd_vabs_dd,\
+               neon_fp_vadd_qqq_vabs_qq,\
+               neon_fp_vmla_ddd,\
+               neon_fp_vmla_qqq,\
+               neon_fp_vrecps_vrsqrts_ddd,\
+               neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 2 "neon_vld1_3_4_regs"
+               "neon_int_1,\
+               neon_int_4,\
+               neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mul_qqq_8_16_32_ddd_32,\
+               neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mla_qqq_8_16,\
+               neon_fp_vadd_ddd_vabs_dd,\
+               neon_fp_vadd_qqq_vabs_qq,\
+               neon_fp_vmla_ddd,\
+               neon_fp_vmla_qqq,\
+               neon_fp_vrecps_vrsqrts_ddd,\
+               neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 1 "neon_vld1_1_2_regs"
+               "neon_int_1,\
+               neon_int_4,\
+               neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mul_qqq_8_16_32_ddd_32,\
+               neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mla_qqq_8_16,\
+               neon_fp_vadd_ddd_vabs_dd,\
+               neon_fp_vadd_qqq_vabs_qq,\
+               neon_fp_vmla_ddd,\
+               neon_fp_vmla_qqq,\
+               neon_fp_vrecps_vrsqrts_ddd,\
+               neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 0 "neon_ldr"
+               "neon_int_1,\
+               neon_int_4,\
+               neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mul_qqq_8_16_32_ddd_32,\
+               neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mla_qqq_8_16,\
+               neon_fp_vadd_ddd_vabs_dd,\
+               neon_fp_vadd_qqq_vabs_qq,\
+               neon_fp_vmla_ddd,\
+               neon_fp_vmla_qqq,\
+               neon_fp_vrecps_vrsqrts_ddd,\
+               neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 3 "neon_bp_3cycle"
+               "neon_int_1,\
+               neon_int_4,\
+               neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mul_qqq_8_16_32_ddd_32,\
+               neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mla_qqq_8_16,\
+               neon_fp_vadd_ddd_vabs_dd,\
+               neon_fp_vadd_qqq_vabs_qq,\
+               neon_fp_vmla_ddd,\
+               neon_fp_vmla_qqq,\
+               neon_fp_vrecps_vrsqrts_ddd,\
+               neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 2 "neon_bp_2cycle"
+               "neon_int_1,\
+               neon_int_4,\
+               neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mul_qqq_8_16_32_ddd_32,\
+               neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mla_qqq_8_16,\
+               neon_fp_vadd_ddd_vabs_dd,\
+               neon_fp_vadd_qqq_vabs_qq,\
+               neon_fp_vmla_ddd,\
+               neon_fp_vmla_qqq,\
+               neon_fp_vrecps_vrsqrts_ddd,\
+               neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 1 "neon_bp_simple"
+               "neon_int_1,\
+               neon_int_4,\
+               neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mul_qqq_8_16_32_ddd_32,\
+               neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mla_qqq_8_16,\
+               neon_fp_vadd_ddd_vabs_dd,\
+               neon_fp_vadd_qqq_vabs_qq,\
+               neon_fp_vmla_ddd,\
+               neon_fp_vmla_qqq,\
+               neon_fp_vrecps_vrsqrts_ddd,\
+               neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 9 "neon_fp_vrecps_vrsqrts_qqq"
+               "neon_int_1,\
+               neon_int_4,\
+               neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mul_qqq_8_16_32_ddd_32,\
+               neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mla_qqq_8_16,\
+               neon_fp_vadd_ddd_vabs_dd,\
+               neon_fp_vadd_qqq_vabs_qq,\
+               neon_fp_vmla_ddd,\
+               neon_fp_vmla_qqq,\
+               neon_fp_vrecps_vrsqrts_ddd,\
+               neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 8 "neon_fp_vrecps_vrsqrts_ddd"
+               "neon_int_1,\
+               neon_int_4,\
+               neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mul_qqq_8_16_32_ddd_32,\
+               neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mla_qqq_8_16,\
+               neon_fp_vadd_ddd_vabs_dd,\
+               neon_fp_vadd_qqq_vabs_qq,\
+               neon_fp_vmla_ddd,\
+               neon_fp_vmla_qqq,\
+               neon_fp_vrecps_vrsqrts_ddd,\
+               neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 9 "neon_fp_vmla_qqq_scalar"
+               "neon_int_1,\
+               neon_int_4,\
+               neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mul_qqq_8_16_32_ddd_32,\
+               neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mla_qqq_8_16,\
+               neon_fp_vadd_ddd_vabs_dd,\
+               neon_fp_vadd_qqq_vabs_qq,\
+               neon_fp_vmla_ddd,\
+               neon_fp_vmla_qqq,\
+               neon_fp_vrecps_vrsqrts_ddd,\
+               neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 8 "neon_fp_vmla_ddd_scalar"
+               "neon_int_1,\
+               neon_int_4,\
+               neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mul_qqq_8_16_32_ddd_32,\
+               neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mla_qqq_8_16,\
+               neon_fp_vadd_ddd_vabs_dd,\
+               neon_fp_vadd_qqq_vabs_qq,\
+               neon_fp_vmla_ddd,\
+               neon_fp_vmla_qqq,\
+               neon_fp_vrecps_vrsqrts_ddd,\
+               neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 9 "neon_fp_vmla_qqq"
+               "neon_int_1,\
+               neon_int_4,\
+               neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mul_qqq_8_16_32_ddd_32,\
+               neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mla_qqq_8_16,\
+               neon_fp_vadd_ddd_vabs_dd,\
+               neon_fp_vadd_qqq_vabs_qq,\
+               neon_fp_vmla_ddd,\
+               neon_fp_vmla_qqq,\
+               neon_fp_vrecps_vrsqrts_ddd,\
+               neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 8 "neon_fp_vmla_ddd"
+               "neon_int_1,\
+               neon_int_4,\
+               neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mul_qqq_8_16_32_ddd_32,\
+               neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mla_qqq_8_16,\
+               neon_fp_vadd_ddd_vabs_dd,\
+               neon_fp_vadd_qqq_vabs_qq,\
+               neon_fp_vmla_ddd,\
+               neon_fp_vmla_qqq,\
+               neon_fp_vrecps_vrsqrts_ddd,\
+               neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 5 "neon_fp_vmul_qqd"
+               "neon_int_1,\
+               neon_int_4,\
+               neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mul_qqq_8_16_32_ddd_32,\
+               neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mla_qqq_8_16,\
+               neon_fp_vadd_ddd_vabs_dd,\
+               neon_fp_vadd_qqq_vabs_qq,\
+               neon_fp_vmla_ddd,\
+               neon_fp_vmla_qqq,\
+               neon_fp_vrecps_vrsqrts_ddd,\
+               neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 4 "neon_fp_vmul_ddd"
+               "neon_int_1,\
+               neon_int_4,\
+               neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mul_qqq_8_16_32_ddd_32,\
+               neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mla_qqq_8_16,\
+               neon_fp_vadd_ddd_vabs_dd,\
+               neon_fp_vadd_qqq_vabs_qq,\
+               neon_fp_vmla_ddd,\
+               neon_fp_vmla_qqq,\
+               neon_fp_vrecps_vrsqrts_ddd,\
+               neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 4 "neon_fp_vsum"
+               "neon_int_1,\
+               neon_int_4,\
+               neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mul_qqq_8_16_32_ddd_32,\
+               neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mla_qqq_8_16,\
+               neon_fp_vadd_ddd_vabs_dd,\
+               neon_fp_vadd_qqq_vabs_qq,\
+               neon_fp_vmla_ddd,\
+               neon_fp_vmla_qqq,\
+               neon_fp_vrecps_vrsqrts_ddd,\
+               neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 5 "neon_fp_vadd_qqq_vabs_qq"
+               "neon_int_1,\
+               neon_int_4,\
+               neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mul_qqq_8_16_32_ddd_32,\
+               neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mla_qqq_8_16,\
+               neon_fp_vadd_ddd_vabs_dd,\
+               neon_fp_vadd_qqq_vabs_qq,\
+               neon_fp_vmla_ddd,\
+               neon_fp_vmla_qqq,\
+               neon_fp_vrecps_vrsqrts_ddd,\
+               neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 4 "neon_fp_vadd_ddd_vabs_dd"
+               "neon_int_1,\
+               neon_int_4,\
+               neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mul_qqq_8_16_32_ddd_32,\
+               neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mla_qqq_8_16,\
+               neon_fp_vadd_ddd_vabs_dd,\
+               neon_fp_vadd_qqq_vabs_qq,\
+               neon_fp_vmla_ddd,\
+               neon_fp_vmla_qqq,\
+               neon_fp_vrecps_vrsqrts_ddd,\
+               neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 5 "neon_vsra_vrsra"
+               "neon_int_1,\
+               neon_int_4,\
+               neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mul_qqq_8_16_32_ddd_32,\
+               neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mla_qqq_8_16,\
+               neon_fp_vadd_ddd_vabs_dd,\
+               neon_fp_vadd_qqq_vabs_qq,\
+               neon_fp_vmla_ddd,\
+               neon_fp_vmla_qqq,\
+               neon_fp_vrecps_vrsqrts_ddd,\
+               neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 4 "neon_vqshl_vrshl_vqrshl_qqq"
+               "neon_int_1,\
+               neon_int_4,\
+               neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mul_qqq_8_16_32_ddd_32,\
+               neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mla_qqq_8_16,\
+               neon_fp_vadd_ddd_vabs_dd,\
+               neon_fp_vadd_qqq_vabs_qq,\
+               neon_fp_vmla_ddd,\
+               neon_fp_vmla_qqq,\
+               neon_fp_vrecps_vrsqrts_ddd,\
+               neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 0 "neon_vshl_ddd"
+               "neon_int_1,\
+               neon_int_4,\
+               neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mul_qqq_8_16_32_ddd_32,\
+               neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mla_qqq_8_16,\
+               neon_fp_vadd_ddd_vabs_dd,\
+               neon_fp_vadd_qqq_vabs_qq,\
+               neon_fp_vmla_ddd,\
+               neon_fp_vmla_qqq,\
+               neon_fp_vrecps_vrsqrts_ddd,\
+               neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 3 "neon_shift_3"
+               "neon_int_1,\
+               neon_int_4,\
+               neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mul_qqq_8_16_32_ddd_32,\
+               neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mla_qqq_8_16,\
+               neon_fp_vadd_ddd_vabs_dd,\
+               neon_fp_vadd_qqq_vabs_qq,\
+               neon_fp_vmla_ddd,\
+               neon_fp_vmla_qqq,\
+               neon_fp_vrecps_vrsqrts_ddd,\
+               neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 3 "neon_shift_2"
+               "neon_int_1,\
+               neon_int_4,\
+               neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mul_qqq_8_16_32_ddd_32,\
+               neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mla_qqq_8_16,\
+               neon_fp_vadd_ddd_vabs_dd,\
+               neon_fp_vadd_qqq_vabs_qq,\
+               neon_fp_vmla_ddd,\
+               neon_fp_vmla_qqq,\
+               neon_fp_vrecps_vrsqrts_ddd,\
+               neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 2 "neon_shift_1"
+               "neon_int_1,\
+               neon_int_4,\
+               neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mul_qqq_8_16_32_ddd_32,\
+               neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mla_qqq_8_16,\
+               neon_fp_vadd_ddd_vabs_dd,\
+               neon_fp_vadd_qqq_vabs_qq,\
+               neon_fp_vmla_ddd,\
+               neon_fp_vmla_qqq,\
+               neon_fp_vrecps_vrsqrts_ddd,\
+               neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 5 "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar"
+               "neon_int_1,\
+               neon_int_4,\
+               neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mul_qqq_8_16_32_ddd_32,\
+               neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mla_qqq_8_16,\
+               neon_fp_vadd_ddd_vabs_dd,\
+               neon_fp_vadd_qqq_vabs_qq,\
+               neon_fp_vmla_ddd,\
+               neon_fp_vmla_qqq,\
+               neon_fp_vrecps_vrsqrts_ddd,\
+               neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 8 "neon_mul_qqd_32_scalar"
+               "neon_int_1,\
+               neon_int_4,\
+               neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mul_qqq_8_16_32_ddd_32,\
+               neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mla_qqq_8_16,\
+               neon_fp_vadd_ddd_vabs_dd,\
+               neon_fp_vadd_qqq_vabs_qq,\
+               neon_fp_vmla_ddd,\
+               neon_fp_vmla_qqq,\
+               neon_fp_vrecps_vrsqrts_ddd,\
+               neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 5 "neon_mul_ddd_16_scalar_32_16_long_scalar"
+               "neon_int_1,\
+               neon_int_4,\
+               neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mul_qqq_8_16_32_ddd_32,\
+               neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mla_qqq_8_16,\
+               neon_fp_vadd_ddd_vabs_dd,\
+               neon_fp_vadd_qqq_vabs_qq,\
+               neon_fp_vmla_ddd,\
+               neon_fp_vmla_qqq,\
+               neon_fp_vrecps_vrsqrts_ddd,\
+               neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 8 "neon_mla_qqq_32_qqd_32_scalar"
+               "neon_int_1,\
+               neon_int_4,\
+               neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mul_qqq_8_16_32_ddd_32,\
+               neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mla_qqq_8_16,\
+               neon_fp_vadd_ddd_vabs_dd,\
+               neon_fp_vadd_qqq_vabs_qq,\
+               neon_fp_vmla_ddd,\
+               neon_fp_vmla_qqq,\
+               neon_fp_vrecps_vrsqrts_ddd,\
+               neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 6 "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long"
+               "neon_int_1,\
+               neon_int_4,\
+               neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mul_qqq_8_16_32_ddd_32,\
+               neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mla_qqq_8_16,\
+               neon_fp_vadd_ddd_vabs_dd,\
+               neon_fp_vadd_qqq_vabs_qq,\
+               neon_fp_vmla_ddd,\
+               neon_fp_vmla_qqq,\
+               neon_fp_vrecps_vrsqrts_ddd,\
+               neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 6 "neon_mla_qqq_8_16"
+               "neon_int_1,\
+               neon_int_4,\
+               neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mul_qqq_8_16_32_ddd_32,\
+               neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mla_qqq_8_16,\
+               neon_fp_vadd_ddd_vabs_dd,\
+               neon_fp_vadd_qqq_vabs_qq,\
+               neon_fp_vmla_ddd,\
+               neon_fp_vmla_qqq,\
+               neon_fp_vrecps_vrsqrts_ddd,\
+               neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 5 "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long"
+               "neon_int_1,\
+               neon_int_4,\
+               neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mul_qqq_8_16_32_ddd_32,\
+               neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mla_qqq_8_16,\
+               neon_fp_vadd_ddd_vabs_dd,\
+               neon_fp_vadd_qqq_vabs_qq,\
+               neon_fp_vmla_ddd,\
+               neon_fp_vmla_qqq,\
+               neon_fp_vrecps_vrsqrts_ddd,\
+               neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 6 "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar"
+               "neon_int_1,\
+               neon_int_4,\
+               neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mul_qqq_8_16_32_ddd_32,\
+               neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mla_qqq_8_16,\
+               neon_fp_vadd_ddd_vabs_dd,\
+               neon_fp_vadd_qqq_vabs_qq,\
+               neon_fp_vmla_ddd,\
+               neon_fp_vmla_qqq,\
+               neon_fp_vrecps_vrsqrts_ddd,\
+               neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 6 "neon_mul_qqq_8_16_32_ddd_32"
+               "neon_int_1,\
+               neon_int_4,\
+               neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mul_qqq_8_16_32_ddd_32,\
+               neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mla_qqq_8_16,\
+               neon_fp_vadd_ddd_vabs_dd,\
+               neon_fp_vadd_qqq_vabs_qq,\
+               neon_fp_vmla_ddd,\
+               neon_fp_vmla_qqq,\
+               neon_fp_vrecps_vrsqrts_ddd,\
+               neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 5 "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long"
+               "neon_int_1,\
+               neon_int_4,\
+               neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mul_qqq_8_16_32_ddd_32,\
+               neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mla_qqq_8_16,\
+               neon_fp_vadd_ddd_vabs_dd,\
+               neon_fp_vadd_qqq_vabs_qq,\
+               neon_fp_vmla_ddd,\
+               neon_fp_vmla_qqq,\
+               neon_fp_vrecps_vrsqrts_ddd,\
+               neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 5 "neon_vsma"
+               "neon_int_1,\
+               neon_int_4,\
+               neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mul_qqq_8_16_32_ddd_32,\
+               neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mla_qqq_8_16,\
+               neon_fp_vadd_ddd_vabs_dd,\
+               neon_fp_vadd_qqq_vabs_qq,\
+               neon_fp_vmla_ddd,\
+               neon_fp_vmla_qqq,\
+               neon_fp_vrecps_vrsqrts_ddd,\
+               neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 6 "neon_vaba_qqq"
+               "neon_int_1,\
+               neon_int_4,\
+               neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mul_qqq_8_16_32_ddd_32,\
+               neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mla_qqq_8_16,\
+               neon_fp_vadd_ddd_vabs_dd,\
+               neon_fp_vadd_qqq_vabs_qq,\
+               neon_fp_vmla_ddd,\
+               neon_fp_vmla_qqq,\
+               neon_fp_vrecps_vrsqrts_ddd,\
+               neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 5 "neon_vaba"
+               "neon_int_1,\
+               neon_int_4,\
+               neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mul_qqq_8_16_32_ddd_32,\
+               neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mla_qqq_8_16,\
+               neon_fp_vadd_ddd_vabs_dd,\
+               neon_fp_vadd_qqq_vabs_qq,\
+               neon_fp_vmla_ddd,\
+               neon_fp_vmla_qqq,\
+               neon_fp_vrecps_vrsqrts_ddd,\
+               neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 2 "neon_vmov"
+               "neon_int_1,\
+               neon_int_4,\
+               neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mul_qqq_8_16_32_ddd_32,\
+               neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mla_qqq_8_16,\
+               neon_fp_vadd_ddd_vabs_dd,\
+               neon_fp_vadd_qqq_vabs_qq,\
+               neon_fp_vmla_ddd,\
+               neon_fp_vmla_qqq,\
+               neon_fp_vrecps_vrsqrts_ddd,\
+               neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 3 "neon_vqneg_vqabs"
+               "neon_int_1,\
+               neon_int_4,\
+               neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mul_qqq_8_16_32_ddd_32,\
+               neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mla_qqq_8_16,\
+               neon_fp_vadd_ddd_vabs_dd,\
+               neon_fp_vadd_qqq_vabs_qq,\
+               neon_fp_vmla_ddd,\
+               neon_fp_vmla_qqq,\
+               neon_fp_vrecps_vrsqrts_ddd,\
+               neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 3 "neon_int_5"
+               "neon_int_1,\
+               neon_int_4,\
+               neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mul_qqq_8_16_32_ddd_32,\
+               neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mla_qqq_8_16,\
+               neon_fp_vadd_ddd_vabs_dd,\
+               neon_fp_vadd_qqq_vabs_qq,\
+               neon_fp_vmla_ddd,\
+               neon_fp_vmla_qqq,\
+               neon_fp_vrecps_vrsqrts_ddd,\
+               neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 3 "neon_int_4"
+               "neon_int_1,\
+               neon_int_4,\
+               neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mul_qqq_8_16_32_ddd_32,\
+               neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mla_qqq_8_16,\
+               neon_fp_vadd_ddd_vabs_dd,\
+               neon_fp_vadd_qqq_vabs_qq,\
+               neon_fp_vmla_ddd,\
+               neon_fp_vmla_qqq,\
+               neon_fp_vrecps_vrsqrts_ddd,\
+               neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 2 "neon_int_3"
+               "neon_int_1,\
+               neon_int_4,\
+               neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mul_qqq_8_16_32_ddd_32,\
+               neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mla_qqq_8_16,\
+               neon_fp_vadd_ddd_vabs_dd,\
+               neon_fp_vadd_qqq_vabs_qq,\
+               neon_fp_vmla_ddd,\
+               neon_fp_vmla_qqq,\
+               neon_fp_vrecps_vrsqrts_ddd,\
+               neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 2 "neon_int_2"
+               "neon_int_1,\
+               neon_int_4,\
+               neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mul_qqq_8_16_32_ddd_32,\
+               neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mla_qqq_8_16,\
+               neon_fp_vadd_ddd_vabs_dd,\
+               neon_fp_vadd_qqq_vabs_qq,\
+               neon_fp_vmla_ddd,\
+               neon_fp_vmla_qqq,\
+               neon_fp_vrecps_vrsqrts_ddd,\
+               neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 2 "neon_int_1"
+               "neon_int_1,\
+               neon_int_4,\
+               neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mul_qqq_8_16_32_ddd_32,\
+               neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               neon_mla_qqq_8_16,\
+               neon_fp_vadd_ddd_vabs_dd,\
+               neon_fp_vadd_qqq_vabs_qq,\
+               neon_fp_vmla_ddd,\
+               neon_fp_vmla_qqq,\
+               neon_fp_vrecps_vrsqrts_ddd,\
+               neon_fp_vrecps_vrsqrts_qqq")
+
diff --git a/gcc/config/arm/cortex-a8.md b/gcc/config/arm/cortex-a8.md
new file mode 100644 (file)
index 0000000..69d44de
--- /dev/null
@@ -0,0 +1,272 @@
+;; ARM Cortex-A8 scheduling description.
+;; Copyright (C) 2007 Free Software Foundation, Inc.
+;; Contributed by CodeSourcery.
+
+;; This file is part of GCC.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING.  If not, write to
+;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+;; Boston, MA 02110-1301, USA.
+
+(define_automaton "cortex_a8")
+
+;; Only one load/store instruction can be issued per cycle
+;; (although reservation of this unit is only required for single
+;; loads and stores -- see below).
+(define_cpu_unit "cortex_a8_issue_ls" "cortex_a8")
+
+;; Only one branch instruction can be issued per cycle.
+(define_cpu_unit "cortex_a8_issue_branch" "cortex_a8")
+
+;; The two ALU pipelines.
+(define_cpu_unit "cortex_a8_alu0" "cortex_a8")
+(define_cpu_unit "cortex_a8_alu1" "cortex_a8")
+
+;; The usual flow of an instruction through the pipelines.
+(define_reservation "cortex_a8_default"
+                    "cortex_a8_alu0|cortex_a8_alu1")
+
+;; The flow of a branch instruction through the pipelines.
+(define_reservation "cortex_a8_branch"
+                    "(cortex_a8_alu0+cortex_a8_issue_branch)|\
+                     (cortex_a8_alu1+cortex_a8_issue_branch)")
+
+;; The flow of a load or store instruction through the pipeline in
+;; the case where that instruction consists of only one micro-op...
+(define_reservation "cortex_a8_load_store_1"
+                    "(cortex_a8_alu0+cortex_a8_issue_ls)|\
+                     (cortex_a8_alu1+cortex_a8_issue_ls)")
+
+;; ...and in the case of two micro-ops.  We don't need to reserve
+;; cortex_a8_issue_ls here because dual issue is altogether forbidden
+;; during the issue cycle of the first micro-op.  (Instead of modelling
+;; a separate issue unit, we instead reserve alu0 and alu1 to
+;; prevent any other instructions from being issued upon that first cycle.)
+;; Even though the load/store pipeline is usually available in either
+;; ALU pipe, multi-cycle instructions always issue in pipeline 0.  This
+;; reservation is therefore the same as cortex_a8_multiply_2 below.
+(define_reservation "cortex_a8_load_store_2"
+                    "cortex_a8_alu0+cortex_a8_alu1,\
+                     cortex_a8_alu0")
+
+;; The flow of a single-cycle multiplication.
+(define_reservation "cortex_a8_multiply"
+                    "cortex_a8_alu0")
+
+;; The flow of a multiplication instruction that gets decomposed into
+;; two micro-ops.  The two micro-ops will be issued to pipeline 0 on
+;; successive cycles.  Dual issue cannot happen at the same time as the
+;; first of the micro-ops.
+(define_reservation "cortex_a8_multiply_2"
+                    "cortex_a8_alu0+cortex_a8_alu1,\
+                     cortex_a8_alu0")
+
+;; Similarly, the flow of a multiplication instruction that gets
+;; decomposed into three micro-ops.  Dual issue cannot occur except on
+;; the cycle upon which the third micro-op is issued.
+(define_reservation "cortex_a8_multiply_3"
+                    "cortex_a8_alu0+cortex_a8_alu1,\
+                     cortex_a8_alu0+cortex_a8_alu1,\
+                     cortex_a8_alu0")
+
+;; The model given here assumes that all instructions are unconditional.
+
+;; Data processing instructions, but not move instructions.
+
+;; We include CLZ with these since it has the same execution pattern
+;; (source read in E2 and destination available at the end of that cycle).
+(define_insn_reservation "cortex_a8_alu" 2
+  (and (eq_attr "tune" "cortexa8")
+       (ior (and (eq_attr "type" "alu")
+                (not (eq_attr "insn" "mov,mvn")))
+            (eq_attr "insn" "clz")))
+  "cortex_a8_default")
+
+(define_insn_reservation "cortex_a8_alu_shift" 2
+  (and (eq_attr "tune" "cortexa8")
+       (and (eq_attr "type" "alu_shift")
+            (not (eq_attr "insn" "mov,mvn"))))
+  "cortex_a8_default")
+
+(define_insn_reservation "cortex_a8_alu_shift_reg" 2
+  (and (eq_attr "tune" "cortexa8")
+       (and (eq_attr "type" "alu_shift_reg")
+            (not (eq_attr "insn" "mov,mvn"))))
+  "cortex_a8_default")
+
+;; Move instructions.
+
+(define_insn_reservation "cortex_a8_mov" 1
+  (and (eq_attr "tune" "cortexa8")
+       (and (eq_attr "type" "alu,alu_shift,alu_shift_reg")
+            (eq_attr "insn" "mov,mvn")))
+  "cortex_a8_default")
+
+;; Exceptions to the default latencies for data processing instructions.
+
+;; A move followed by an ALU instruction with no early dep.
+;; (Such a pair can be issued in parallel, hence latency zero.)
+(define_bypass 0 "cortex_a8_mov" "cortex_a8_alu")
+(define_bypass 0 "cortex_a8_mov" "cortex_a8_alu_shift"
+               "arm_no_early_alu_shift_dep")
+(define_bypass 0 "cortex_a8_mov" "cortex_a8_alu_shift_reg"
+               "arm_no_early_alu_shift_value_dep")
+
+;; An ALU instruction followed by an ALU instruction with no early dep.
+(define_bypass 1 "cortex_a8_alu,cortex_a8_alu_shift,cortex_a8_alu_shift_reg"
+               "cortex_a8_alu")
+(define_bypass 1 "cortex_a8_alu,cortex_a8_alu_shift,cortex_a8_alu_shift_reg"
+               "cortex_a8_alu_shift"
+               "arm_no_early_alu_shift_dep")
+(define_bypass 1 "cortex_a8_alu,cortex_a8_alu_shift,cortex_a8_alu_shift_reg"
+               "cortex_a8_alu_shift_reg"
+               "arm_no_early_alu_shift_value_dep")
+
+;; Multiplication instructions.  These are categorized according to their
+;; reservation behaviour and the need below to distinguish certain
+;; varieties for bypasses.  Results are available at the E5 stage
+;; (but some of these are multi-cycle instructions which explains the
+;; latencies below).
+
+(define_insn_reservation "cortex_a8_mul" 6
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "insn" "mul,smulxy,smmul"))
+  "cortex_a8_multiply_2")
+
+(define_insn_reservation "cortex_a8_mla" 6
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "insn" "mla,smlaxy,smlawy,smmla,smlad,smlsd"))
+  "cortex_a8_multiply_2")
+
+(define_insn_reservation "cortex_a8_mull" 7
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "insn" "smull,umull,smlal,umlal,umaal,smlalxy"))
+  "cortex_a8_multiply_3")
+
+(define_insn_reservation "cortex_a8_smulwy" 5
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "insn" "smulwy,smuad,smusd"))
+  "cortex_a8_multiply")
+
+;; smlald and smlsld are multiply-accumulate instructions but do not
+;; received bypassed data from other multiplication results; thus, they
+;; cannot go in cortex_a8_mla above.  (See below for bypass details.)
+(define_insn_reservation "cortex_a8_smlald" 6
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "insn" "smlald,smlsld"))
+  "cortex_a8_multiply_2")
+
+;; A multiply with a single-register result or an MLA, followed by an
+;; MLA with an accumulator dependency, has its result forwarded so two
+;; such instructions can issue back-to-back.
+(define_bypass 1 "cortex_a8_mul,cortex_a8_mla,cortex_a8_smulwy"
+               "cortex_a8_mla"
+               "arm_mac_accumulator_is_mul_result")
+
+;; A multiply followed by an ALU instruction needing the multiply
+;; result only at E2 has lower latency than one needing it at E1.
+(define_bypass 4 "cortex_a8_mul,cortex_a8_mla,cortex_a8_mull,\
+                  cortex_a8_smulwy,cortex_a8_smlald"
+               "cortex_a8_alu")
+(define_bypass 4 "cortex_a8_mul,cortex_a8_mla,cortex_a8_mull,\
+                  cortex_a8_smulwy,cortex_a8_smlald"
+               "cortex_a8_alu_shift"
+               "arm_no_early_alu_shift_dep")
+(define_bypass 4 "cortex_a8_mul,cortex_a8_mla,cortex_a8_mull,\
+                  cortex_a8_smulwy,cortex_a8_smlald"
+               "cortex_a8_alu_shift_reg"
+               "arm_no_early_alu_shift_value_dep")
+
+;; Load instructions.
+;; The presence of any register writeback is ignored here.
+
+;; A load result has latency 3 unless the dependent instruction has
+;; no early dep, in which case it is only latency two.
+;; We assume 64-bit alignment for doubleword loads.
+(define_insn_reservation "cortex_a8_load1_2" 3
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "load1,load2,load_byte"))
+  "cortex_a8_load_store_1")
+
+(define_bypass 2 "cortex_a8_load1_2"
+               "cortex_a8_alu")
+(define_bypass 2 "cortex_a8_load1_2"
+               "cortex_a8_alu_shift"
+               "arm_no_early_alu_shift_dep")
+(define_bypass 2 "cortex_a8_load1_2"
+               "cortex_a8_alu_shift_reg"
+               "arm_no_early_alu_shift_value_dep")
+
+;; We do not currently model the fact that loads with scaled register
+;; offsets that are not LSL #2 have an extra cycle latency (they issue
+;; as two micro-ops).
+
+;; A load multiple of three registers is usually issued as two micro-ops.
+;; The first register will be available at E3 of the first iteration,
+;; the second at E3 of the second iteration, and the third at E4 of
+;; the second iteration.  A load multiple of four registers is usually
+;; issued as two micro-ops.
+(define_insn_reservation "cortex_a8_load3_4" 5
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "load3,load4"))
+  "cortex_a8_load_store_2")
+
+(define_bypass 4 "cortex_a8_load3_4"
+               "cortex_a8_alu")
+(define_bypass 4 "cortex_a8_load3_4"
+               "cortex_a8_alu_shift"
+               "arm_no_early_alu_shift_dep")
+(define_bypass 4 "cortex_a8_load3_4"
+               "cortex_a8_alu_shift_reg"
+               "arm_no_early_alu_shift_value_dep")
+
+;; Store instructions.
+;; Writeback is again ignored.
+
+(define_insn_reservation "cortex_a8_store1_2" 0
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "store1,store2"))
+  "cortex_a8_load_store_1")
+
+(define_insn_reservation "cortex_a8_store3_4" 0
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "store3,store4"))
+  "cortex_a8_load_store_2")
+
+;; An ALU instruction acting as a producer for a store instruction
+;; that only uses the result as the value to be stored (as opposed to
+;; using it to calculate the address) has latency zero; the store
+;; reads the value to be stored at the start of E3 and the ALU insn
+;; writes it at the end of E2.  Move instructions actually produce the
+;; result at the end of E1, but since we don't have delay slots, the
+;; scheduling behaviour will be the same.
+(define_bypass 0 "cortex_a8_alu,cortex_a8_alu_shift,\
+                  cortex_a8_alu_shift_reg,cortex_a8_mov"
+               "cortex_a8_store1_2,cortex_a8_store3_4"
+               "arm_no_early_store_addr_dep")
+
+;; Branch instructions
+
+(define_insn_reservation "cortex_a8_branch" 0
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "branch"))
+  "cortex_a8_branch")
+
+;; Call latencies are not predictable.  A semi-arbitrary very large
+;; number is used as "positive infinity" so that everything should be
+;; finished by the time of return.
+(define_insn_reservation "cortex_a8_call" 32
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "call"))
+  "cortex_a8_issue_branch")
+
+;; NEON (including VFP) instructions.
+
+(include "cortex-a8-neon.md")
+
diff --git a/gcc/config/arm/neon-schedgen.ml b/gcc/config/arm/neon-schedgen.ml
new file mode 100644 (file)
index 0000000..b47a0ae
--- /dev/null
@@ -0,0 +1,497 @@
+(* Emission of the core of the Cortex-A8 NEON scheduling description.
+   Copyright (C) 2007 Free Software Foundation, Inc.
+   Contributed by CodeSourcery.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 2, or (at your option) any later
+   version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING.  If not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+   02110-1301, USA.
+*)
+
+(* This scheduling description generator works as follows.
+   - Each group of instructions has source and destination requirements
+     specified.  The source requirements may be specified using
+     Source (the stage at which all source operands not otherwise
+     described are read), Source_m (the stage at which Rm operands are
+     read), Source_n (likewise for Rn) and Source_d (likewise for Rd).
+   - For each group of instructions the earliest stage where a source
+     operand may be required is calculated.
+   - Each group of instructions is selected in turn as a producer.
+     The latencies between this group and every other group are then
+     calculated, yielding up to four values for each combination:
+       1. Producer -> consumer Rn latency
+       2. Producer -> consumer Rm latency
+       3. Producer -> consumer Rd (as a source) latency
+       4. Producer -> consumer worst-case latency.
+     Value 4 is calculated from the destination availability requirements
+     of the consumer and the earliest source availability requirements
+     of the producer.
+   - The largest Value 4 calculated for the current producer is the
+     worse-case latency, L, for that instruction group.  This value is written
+     out in a define_insn_reservation for the producer group.
+   - For each producer and consumer pair, the latencies calculated above
+     are collated.  The average (of up to four values) is calculated and
+     if this average is different from the worst-case latency, an
+     unguarded define_bypass construction is issued for that pair.
+     (For each pair only one define_bypass construction will be emitted,
+     and at present we do not emit specific guards.)
+*)
+
+open Utils
+
+let n1 = 1 and n2 = 2 and n3 = 3 and n4 = 4 and n5 = 5 and n6 = 6
+    and n7 = 7 and n8 = 8 and n9 = 9
+
+type availability = Source of int
+                  | Source_n of int
+                  | Source_m of int
+                  | Source_d of int
+                  | Dest of int
+                 | Dest_n_after of int * int
+
+type guard = Guard_none | Guard_only_m | Guard_only_n | Guard_only_d
+
+(* Reservation behaviours.  All but the last row here correspond to one
+   pipeline each.  Each constructor will correspond to one
+   define_reservation.  *)
+type reservation =
+  Mul | Mul_2cycle | Mul_4cycle
+| Shift | Shift_2cycle
+| ALU | ALU_2cycle
+| Fmul | Fmul_2cycle
+| Fadd | Fadd_2cycle
+(* | VFP *)
+| Permute of int
+| Ls of int
+| Fmul_then_fadd | Fmul_then_fadd_2
+
+(* This table must be kept as short as possible by conflating
+   entries with the same availability behaviour.
+
+   First components: instruction group names
+   Second components: availability requirements, in the order in which
+   they should appear in the comments in the .md file.
+   Third components: reservation info
+*)
+let availability_table = [
+  (* NEON integer ALU instructions.  *)
+  (* vbit vbif vbsl vorr vbic vnot vcls vclz vcnt vadd vand vorr
+     veor vbic vorn ddd qqq *)
+  "neon_int_1", [Source n2; Dest n3], ALU;
+  (* vadd vsub qqd vsub ddd qqq *)
+  "neon_int_2", [Source_m n1; Source_n n2; Dest n3], ALU;
+  (* vsum vneg dd qq vadd vsub qdd *)
+  "neon_int_3", [Source n1; Dest n3], ALU;
+  (* vabs vceqz vcgez vcbtz vclez vcltz vadh vradh vsbh vrsbh dqq *)
+  (* vhadd vrhadd vqadd vtst ddd qqq *)
+  "neon_int_4", [Source n2; Dest n4], ALU;
+  (* vabd qdd vhsub vqsub vabd vceq vcge vcgt vmax vmin vfmx vfmn ddd ddd *)
+  "neon_int_5", [Source_m n1; Source_n n2; Dest n4], ALU;
+  (* vqneg vqabs dd qq *)
+  "neon_vqneg_vqabs", [Source n1; Dest n4], ALU;
+  (* vmov vmvn *)
+  "neon_vmov", [Dest n3], ALU;
+  (* vaba *)
+  "neon_vaba", [Source_n n2; Source_m n1; Source_d n3; Dest n6], ALU;
+  "neon_vaba_qqq",
+    [Source_n n2; Source_m n1; Source_d n3; Dest_n_after (1, n6)], ALU_2cycle;
+  (* vsma *)
+  "neon_vsma", [Source_m n1; Source_d n3; Dest n6], ALU;
+
+  (* NEON integer multiply instructions.  *)
+  (* vmul, vqdmlh, vqrdmlh *)
+  (* vmul, vqdmul, qdd 16/8 long 32/16 long *)
+  "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long", [Source n2; Dest n6], Mul;
+  "neon_mul_qqq_8_16_32_ddd_32", [Source n2; Dest_n_after (1, n6)], Mul_2cycle;
+  (* vmul, vqdmul again *)
+  "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar",
+    [Source_n n2; Source_m n1; Dest_n_after (1, n6)], Mul_2cycle;
+  (* vmla, vmls *)
+  "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long",
+    [Source_n n2; Source_m n2; Source_d n3; Dest n6], Mul;
+  "neon_mla_qqq_8_16",
+    [Source_n n2; Source_m n2; Source_d n3; Dest_n_after (1, n6)], Mul_2cycle;
+  "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long",
+    [Source_n n2; Source_m n1; Source_d n3; Dest_n_after (1, n6)], Mul_2cycle;
+  "neon_mla_qqq_32_qqd_32_scalar",
+    [Source_n n2; Source_m n1; Source_d n3; Dest_n_after (3, n6)], Mul_4cycle;
+  (* vmul, vqdmulh, vqrdmulh *)
+  (* vmul, vqdmul *)
+  "neon_mul_ddd_16_scalar_32_16_long_scalar",
+    [Source_n n2; Source_m n1; Dest n6], Mul;
+  "neon_mul_qqd_32_scalar",
+    [Source_n n2; Source_m n1; Dest_n_after (3, n6)], Mul_4cycle;
+  (* vmla, vmls *)
+  (* vmla, vmla, vqdmla, vqdmls *)
+  "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar",
+    [Source_n n2; Source_m n1; Source_d n3; Dest n6], Mul;
+
+  (* NEON integer shift instructions.  *)
+  (* vshr/vshl immediate, vshr_narrow, vshl_vmvh, vsli_vsri_ddd *)
+  "neon_shift_1", [Source n1; Dest n3], Shift;
+  (* vqshl, vrshr immediate; vqshr, vqmov, vrshr, vqrshr narrow;
+     vqshl_vrshl_vqrshl_ddd *)
+  "neon_shift_2", [Source n1; Dest n4], Shift;
+  (* vsli, vsri and vshl for qqq *)
+  "neon_shift_3", [Source n1; Dest_n_after (1, n3)], Shift_2cycle;
+  "neon_vshl_ddd", [Source n1; Dest n1], Shift;
+  "neon_vqshl_vrshl_vqrshl_qqq", [Source n1; Dest_n_after (1, n4)],
+    Shift_2cycle;
+  "neon_vsra_vrsra", [Source_m n1; Source_d n3; Dest n6], Shift;
+
+  (* NEON floating-point instructions.  *)
+  (* vadd, vsub, vabd, vmul, vceq, vcge, vcgt, vcage, vcagt, vmax, vmin *)
+  (* vabs, vneg, vceqz, vcgez, vcgtz, vclez, vcltz, vrecpe, vrsqrte, vcvt *)
+  "neon_fp_vadd_ddd_vabs_dd", [Source n2; Dest n5], Fadd;
+  "neon_fp_vadd_qqq_vabs_qq", [Source n2; Dest_n_after (1, n5)],
+    Fadd_2cycle;
+  (* vsum, fvmx, vfmn *)
+  "neon_fp_vsum", [Source n1; Dest n5], Fadd;
+  "neon_fp_vmul_ddd", [Source_n n2; Source_m n1; Dest n5], Fmul;
+  "neon_fp_vmul_qqd", [Source_n n2; Source_m n1; Dest_n_after (1, n5)],
+    Fmul_2cycle;
+  (* vmla, vmls *)
+  "neon_fp_vmla_ddd",
+    [Source_n n2; Source_m n2; Source_d n3; Dest n9], Fmul_then_fadd;
+  "neon_fp_vmla_qqq",
+    [Source_n n2; Source_m n2; Source_d n3; Dest_n_after (1, n9)],
+    Fmul_then_fadd_2;
+  "neon_fp_vmla_ddd_scalar",
+    [Source_n n2; Source_m n1; Source_d n3; Dest n9], Fmul_then_fadd;
+  "neon_fp_vmla_qqq_scalar",
+    [Source_n n2; Source_m n1; Source_d n3; Dest_n_after (1, n9)],
+    Fmul_then_fadd_2;
+  "neon_fp_vrecps_vrsqrts_ddd", [Source n2; Dest n9], Fmul_then_fadd;
+  "neon_fp_vrecps_vrsqrts_qqq", [Source n2; Dest_n_after (1, n9)],
+    Fmul_then_fadd_2;
+
+  (* NEON byte permute instructions.  *)
+  (* vmov; vtrn and vswp for dd; vzip for dd; vuzp for dd; vrev; vext for dd *)
+  "neon_bp_simple", [Source n1; Dest n2], Permute 1;
+  (* vswp for qq; vext for qqq; vtbl with {Dn} or {Dn, Dn1};
+     similarly for vtbx *)
+  "neon_bp_2cycle", [Source n1; Dest_n_after (1, n2)], Permute 2;
+  (* all the rest *)
+  "neon_bp_3cycle", [Source n1; Dest_n_after (2, n2)], Permute 3;
+
+  (* NEON load/store instructions.  *)
+  "neon_ldr", [Dest n1], Ls 1;
+  "neon_str", [Source n1], Ls 1;
+  "neon_vld1_1_2_regs", [Dest_n_after (1, n1)], Ls 2;
+  "neon_vld1_3_4_regs", [Dest_n_after (2, n1)], Ls 3;
+  "neon_vld2_2_regs_vld1_vld2_all_lanes", [Dest_n_after (1, n2)], Ls 2;
+  "neon_vld2_4_regs", [Dest_n_after (2, n2)], Ls 3;
+  "neon_vld3_vld4", [Dest_n_after (3, n2)], Ls 4;
+  "neon_vst1_1_2_regs_vst2_2_regs", [Source n1], Ls 2;
+  "neon_vst1_3_4_regs", [Source n1], Ls 3;
+  "neon_vst2_4_regs_vst3_vst4", [Source n1], Ls 4;
+  "neon_vst3_vst4", [Source n1], Ls 4;
+  "neon_vld1_vld2_lane", [Source n1; Dest_n_after (2, n2)], Ls 3;
+  "neon_vld3_vld4_lane", [Source n1; Dest_n_after (4, n2)], Ls 5;
+  "neon_vst1_vst2_lane", [Source n1], Ls 2;
+  "neon_vst3_vst4_lane", [Source n1], Ls 3;
+  "neon_vld3_vld4_all_lanes", [Dest_n_after (1, n2)], Ls 3;
+
+  (* NEON register transfer instructions.  *)
+  "neon_mcr", [Dest n2], Permute 1;
+  "neon_mcr_2_mcrr", [Dest n2], Permute 2;
+  (* MRC instructions are in the .tpl file.  *)
+]
+
+(* Augment the tuples in the availability table with an extra component
+   that describes the earliest stage where a source operand may be
+   required.  (It is also possible that an entry in the table has no
+   source requirements.)  *)
+let calculate_sources =
+  List.map (fun (name, avail, res) ->
+              let earliest_stage =
+                List.fold_left
+                  (fun cur -> fun info ->
+                     match info with
+                       Source stage
+                     | Source_n stage
+                     | Source_m stage
+                     | Source_d stage ->
+                         (match cur with
+                           None -> Some stage
+                         | Some stage' when stage < stage' -> Some stage
+                         | _ -> cur)
+                     | _ -> cur) None avail
+              in
+                (name, avail, res, earliest_stage))
+
+(* Find the stage, if any, at the end of which a group produces a result.  *)
+let find_dest (attr, avail, _, _) =
+  try
+    find_with_result
+      (fun av -> match av with
+                   Dest st -> Some (Some st)
+                 | Dest_n_after (after, st) -> Some (Some (after + st))
+                 | _ -> None) avail
+  with Not_found -> None
+
+(* Find the worst-case latency between a producer and a consumer.  *)
+let worst_case_latency producer (_, _, _, earliest_required) =
+  let dest = find_dest producer in
+    match earliest_required, dest with
+      None, _ ->
+        (* The consumer doesn't have any source requirements.  *)
+        None
+    | _, None ->
+        (* The producer doesn't produce any results (e.g. a store insn).  *)
+        None
+    | Some consumed, Some produced -> Some (produced - consumed + 1)
+
+(* Helper function for below.  *)
+let latency_calc f producer (_, avail, _, _) =
+  try
+    let source_avail = find_with_result f avail in
+      match find_dest producer with
+        None ->
+          (* The producer does not produce a result.  *)
+          Some 0
+      | Some produced ->
+          let latency = produced - source_avail + 1 in
+            (* Latencies below zero are raised to zero since we don't have
+               delay slots.  *)
+            if latency < 0 then Some 0 else Some latency
+  with Not_found -> None
+
+(* Find any Rm latency between a producer and a consumer.  If no
+   Rm source requirement is explicitly specified for the consumer,
+   return "positive infinity".  Also return "positive infinity" if
+   the latency matches the supplied worst-case latency for this
+   producer.  *)
+let get_m_latency producer consumer =
+  match latency_calc (fun av -> match av with Source_m stage -> Some stage
+                                            | _ -> None) producer consumer
+  with None -> [] | Some latency -> [(Guard_only_m, latency)]
+
+(* Likewise for Rn.  *)
+let get_n_latency producer consumer =
+  match latency_calc (fun av -> match av with Source_n stage -> Some stage
+                                            | _ -> None) producer consumer
+  with None -> [] | Some latency -> [(Guard_only_n, latency)]
+
+(* Likewise for Rd.  *)
+let get_d_latency producer consumer =
+  match
+    latency_calc (fun av -> match av with Source_d stage -> Some stage
+                                        | _ -> None) producer consumer
+  with None -> [] | Some latency -> [(Guard_only_d, latency)]
+
+(* Given a producer and a consumer, work out the latency of the producer
+   to the consumer in each of the four cases (availability information
+   permitting) identified at the top of this file.  Return the
+   consumer, the worst-case unguarded latency and any guarded latencies.  *)
+let calculate_latencies producer consumer =
+  let worst = worst_case_latency producer consumer in
+  let m_latency = get_m_latency producer consumer in
+  let n_latency = get_n_latency producer consumer in
+  let d_latency = get_d_latency producer consumer in
+    (consumer, worst, m_latency @ n_latency @ d_latency)
+
+(* Helper function for below.  *)
+let pick_latency largest worst guards =
+  let guards =
+    match worst with
+      None -> guards
+    | Some worst -> (Guard_none, worst) :: guards
+  in
+  if List.length guards = 0 then None else
+    let total_latency =
+      List.fold_left (fun acc -> fun (_, latency) -> acc + latency) 0 guards
+    in
+    let average_latency = (float_of_int total_latency) /.
+                          (float_of_int (List.length guards)) in
+    let rounded_latency = int_of_float (ceil average_latency) in
+      if rounded_latency = largest then None
+      else Some (Guard_none, rounded_latency)
+
+(* Collate all bypasses for a particular producer as required in
+   worst_case_latencies_and_bypasses.  (By this stage there is a maximum
+   of one bypass from this producer to any particular consumer listed
+   in LATENCIES.)  Use a hash table to collate bypasses with the
+   same latency and guard.  *)
+let collate_bypasses (producer_name, _, _, _) largest latencies =
+  let ht = Hashtbl.create 42 in
+  let keys = ref [] in
+    List.iter (
+      fun ((consumer, _, _, _), worst, guards) ->
+        (* Find out which latency to use.  Ignoring latencies that match
+           the *overall* worst-case latency for this producer (which will
+           be in define_insn_reservation), we have to examine:
+          1. the latency with no guard between this producer and this
+              consumer; and
+          2. any guarded latency.  *)
+        let guard_latency_opt = pick_latency largest worst guards in
+          match guard_latency_opt with
+            None -> ()
+          | Some (guard, latency) ->
+            begin
+              (if (try ignore (Hashtbl.find ht (guard, latency)); false
+                   with Not_found -> true) then
+                 keys := (guard, latency) :: !keys);
+              Hashtbl.add ht (guard, latency) consumer
+            end
+    ) latencies;
+    (* The hash table now has bypasses collated so that ones with the
+       same latency and guard have the same keys.  Walk through all the
+       keys, extract the associated bypasses, and concatenate the names
+       of the consumers for each bypass.  *)
+    List.map (
+      fun ((guard, latency) as key) ->
+        let consumers = Hashtbl.find_all ht key in
+          (producer_name,
+           String.concat ",\\\n               " consumers,
+           latency,
+           guard)
+      ) !keys
+
+(* For every producer, find the worst-case latency between it and
+   *any* consumer.  Also determine (if such a thing exists) the
+   lowest-latency bypass from each producer to each consumer.  Group
+   the output in such a way that all bypasses with the same producer
+   and latency are together, and so that bypasses with the worst-case
+   latency are ignored.  *)
+let worst_case_latencies_and_bypasses =
+  let rec f (worst_acc, bypasses_acc) prev xs =
+    match xs with
+      [] -> (worst_acc, bypasses_acc)
+    | ((producer_name, producer_avail, res_string, _) as producer)::next ->
+      (* For this particular producer, work out the latencies between
+         it and every consumer.  *)
+      let latencies =
+        List.fold_left (fun acc -> fun consumer ->
+                          (calculate_latencies producer consumer) :: acc)
+                       [] (prev @ xs)
+      in
+        (* Now work out what the overall worst case latency was for this
+           particular producer.  *)
+        match latencies with
+          [] -> assert false
+        | _ ->
+          let comp_fn (_, l1, _) (_, l2, _) =
+            if l1 > l2 then -1 else if l1 = l2 then 0 else 1
+          in
+          let largest =
+            match List.hd (List.sort comp_fn latencies) with
+              (_, None, _) -> 0 (* Producer has no consumers. *)
+            | (_, Some worst, _) -> worst
+          in
+          (* Having got the largest latency, collect all bypasses for
+             this producer and filter out those with that larger
+             latency.  Record the others for later emission.  *)
+          let bypasses = collate_bypasses producer largest latencies in
+            (* Go on to process remaining producers, having noted
+               the result for this one.  *)
+            f ((producer_name, producer_avail, largest,
+                res_string) :: worst_acc,
+               bypasses @ bypasses_acc)
+              (prev @ [producer]) next
+  in
+    f ([], []) []
+
+(* Emit a helpful comment for a define_insn_reservation.  *)
+let write_comment producer avail =
+  let seen_source = ref false in
+  let describe info =
+    let read = if !seen_source then "" else "read " in
+    match info with
+      Source stage ->
+        seen_source := true;
+       Printf.printf "%stheir source operands at N%d" read stage
+    | Source_n stage ->
+        seen_source := true;
+       Printf.printf "%stheir (D|Q)n operands at N%d" read stage
+    | Source_m stage ->
+        seen_source := true;
+       Printf.printf "%stheir (D|Q)m operands at N%d" read stage
+    | Source_d stage ->
+       Printf.printf "%stheir (D|Q)d operands at N%d" read stage
+    | Dest stage ->
+       Printf.printf "produce a result at N%d" stage
+    | Dest_n_after (after, stage) ->
+       Printf.printf "produce a result at N%d on cycle %d" stage (after + 1)
+  in
+    Printf.printf ";; Instructions using this reservation ";
+    let rec f infos x =
+      let sep = if x mod 2 = 1 then "" else "\n;;" in
+      match infos with
+        [] -> assert false
+      | [info] -> describe info; Printf.printf ".\n"
+      | info::(_::[] as infos) ->
+          describe info; Printf.printf ", and%s " sep; f infos (x+1)
+      | info::infos -> describe info; Printf.printf ",%s " sep; f infos (x+1)
+    in
+      f avail 0
+
+(* Emit a define_insn_reservation for each producer.  The latency
+   written in will be its worst-case latency.  *)
+let emit_insn_reservations =
+  List.iter (
+     fun (producer, avail, latency, reservation) ->
+        write_comment producer avail;
+        Printf.printf "(define_insn_reservation \"%s\" %d\n" producer latency;
+        Printf.printf "  (and (eq_attr \"tune\" \"cortexa8\")\n";
+        Printf.printf "       (eq_attr \"neon_type\" \"%s\"))\n" producer;
+        let str =
+          match reservation with
+           Mul -> "dp" | Mul_2cycle -> "dp_2" | Mul_4cycle -> "dp_4"
+         | Shift -> "dp" | Shift_2cycle -> "dp_2"
+         | ALU -> "dp" | ALU_2cycle -> "dp_2"
+         | Fmul -> "dp" | Fmul_2cycle -> "dp_2"
+         | Fadd -> "fadd" | Fadd_2cycle -> "fadd_2"
+         | Ls 1 -> "ls"
+          | Ls n -> "ls_" ^ (string_of_int n)
+         | Permute 1 -> "perm"
+          | Permute n -> "perm_" ^ (string_of_int n)
+         | Fmul_then_fadd -> "fmul_then_fadd"
+         | Fmul_then_fadd_2 -> "fmul_then_fadd_2"
+        in
+          Printf.printf "  \"cortex_a8_neon_%s\")\n\n" str
+    )
+
+(* Given a guard description, return the name of the C function to
+   be used as the guard for define_bypass.  *)
+let guard_fn g =
+  match g with
+    Guard_only_m -> "arm_neon_only_m_dependency"
+  | Guard_only_n -> "arm_neon_only_n_dependency"
+  | Guard_only_d -> "arm_neon_only_d_dependency"
+  | Guard_none -> assert false
+
+(* Emit a define_bypass for each bypass.  *)
+let emit_bypasses =
+  List.iter (
+      fun (producer, consumers, latency, guard) ->
+        Printf.printf "(define_bypass %d \"%s\"\n" latency producer;
+        if guard = Guard_none then
+          Printf.printf "               \"%s\")\n\n" consumers
+        else
+          begin
+            Printf.printf "               \"%s\"\n" consumers;
+            Printf.printf "               \"%s\")\n\n" (guard_fn guard)
+          end
+    )
+
+(* Program entry point.  *)
+let main =
+  let table = calculate_sources availability_table in
+  let worst_cases, bypasses = worst_case_latencies_and_bypasses table in
+    emit_insn_reservations (List.rev worst_cases);
+    Printf.printf ";; Exceptions to the default latencies.\n\n";
+    emit_bypasses bypasses
+
index 48b4e2a..c62ffc3 100644 (file)
 (define_mode_attr scalar_mul_constraint [(V4HI "x") (V2SI "t") (V2SF "t")
                                          (V8HI "x") (V4SI "t") (V4SF "t")])
 
+;; Attribute used to permit string comparisons against <VQH_mnem> in
+;; neon_type attribute definitions.
+(define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
+
+;; Classification of NEON instructions for scheduling purposes.
+;; Do not set this attribute and the "type" attribute together in
+;; any one instruction pattern.
+(define_attr "neon_type"
+   "neon_int_1,\
+   neon_int_2,\
+   neon_int_3,\
+   neon_int_4,\
+   neon_int_5,\
+   neon_vqneg_vqabs,\
+   neon_vmov,\
+   neon_vaba,\
+   neon_vsma,\
+   neon_vaba_qqq,\
+   neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+   neon_mul_qqq_8_16_32_ddd_32,\
+   neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar,\
+   neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+   neon_mla_qqq_8_16,\
+   neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long,\
+   neon_mla_qqq_32_qqd_32_scalar,\
+   neon_mul_ddd_16_scalar_32_16_long_scalar,\
+   neon_mul_qqd_32_scalar,\
+   neon_mla_ddd_16_scalar_qdd_32_16_long_scalar,\
+   neon_shift_1,\
+   neon_shift_2,\
+   neon_shift_3,\
+   neon_vshl_ddd,\
+   neon_vqshl_vrshl_vqrshl_qqq,\
+   neon_vsra_vrsra,\
+   neon_fp_vadd_ddd_vabs_dd,\
+   neon_fp_vadd_qqq_vabs_qq,\
+   neon_fp_vsum,\
+   neon_fp_vmul_ddd,\
+   neon_fp_vmul_qqd,\
+   neon_fp_vmla_ddd,\
+   neon_fp_vmla_qqq,\
+   neon_fp_vmla_ddd_scalar,\
+   neon_fp_vmla_qqq_scalar,\
+   neon_fp_vrecps_vrsqrts_ddd,\
+   neon_fp_vrecps_vrsqrts_qqq,\
+   neon_bp_simple,\
+   neon_bp_2cycle,\
+   neon_bp_3cycle,\
+   neon_ldr,\
+   neon_str,\
+   neon_vld1_1_2_regs,\
+   neon_vld1_3_4_regs,\
+   neon_vld2_2_regs_vld1_vld2_all_lanes,\
+   neon_vld2_4_regs,\
+   neon_vld3_vld4,\
+   neon_vst1_1_2_regs_vst2_2_regs,\
+   neon_vst1_3_4_regs,\
+   neon_vst2_4_regs_vst3_vst4,\
+   neon_vst3_vst4,\
+   neon_vld1_vld2_lane,\
+   neon_vld3_vld4_lane,\
+   neon_vst1_vst2_lane,\
+   neon_vst3_vst4_lane,\
+   neon_vld3_vld4_all_lanes,\
+   neon_mcr,\
+   neon_mcr_2_mcrr,\
+   neon_mrc,\
+   neon_mrrc,\
+   neon_ldm_2,\
+   neon_stm_2,\
+   none"
+ (const_string "none"))
+
+;; Predicates used for setting the above attribute.
+
+(define_mode_attr Is_float_mode [(V8QI "false") (V16QI "false")
+                                (V4HI "false") (V8HI "false")
+                                (V2SI "false") (V4SI "false")
+                                (V2SF "true") (V4SF "true")
+                                (DI "false") (V2DI "false")])
+
+(define_mode_attr Scalar_mul_8_16 [(V8QI "true") (V16QI "true")
+                                  (V4HI "true") (V8HI "true")
+                                  (V2SI "false") (V4SI "false")
+                                  (V2SF "false") (V4SF "false")
+                                  (DI "false") (V2DI "false")])
+
+
+(define_mode_attr Is_d_reg [(V8QI "true") (V16QI "false")
+                            (V4HI "true") (V8HI  "false")
+                            (V2SI "true") (V4SI  "false")
+                            (V2SF "true") (V4SF  "false")
+                            (DI   "true") (V2DI  "false")])
+
+(define_mode_attr V_mode_nunits [(V8QI "8") (V16QI "16")
+                                 (V4HI "4") (V8HI "8")
+                                 (V2SI "2") (V4SI "4")
+                                 (V2SF "2") (V4SF "4")
+                                 (DI "1")   (V2DI "2")])
+
 (define_insn "*neon_mov<mode>"
   [(set (match_operand:VD 0 "nonimmediate_operand"
          "=w,Uv,w, w,  ?r,?w,?r,?r, ?Us")
     default: return output_move_double (operands);
     }
 }
-  [(set_attr "type" "farith,f_stored,farith,f_loadd,f_2_r,r_2_f,*,load2,store2")
-   (set_attr "length" "4,4,4,4,4,4,8,8,8")
-   (set_attr "pool_range"     "*,*,*,1020,*,*,*,1020,*")
-   (set_attr "neg_pool_range" "*,*,*,1008,*,*,*,1008,*")])
+ [(set_attr "neon_type" "neon_int_1,*,neon_vmov,*,neon_mrrc,neon_mcr_2_mcrr,*,*,*")
+  (set_attr "type" "*,f_stored,*,f_loadd,*,*,alu,load2,store2")
+  (set_attr "insn" "*,*,*,*,*,*,mov,*,*")
+  (set_attr "length" "4,4,4,4,4,4,8,8,8")
+  (set_attr "pool_range"     "*,*,*,1020,*,*,*,1020,*")
+  (set_attr "neg_pool_range" "*,*,*,1008,*,*,*,1008,*")])
 
 (define_insn "*neon_mov<mode>"
   [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
     default: return output_move_quad (operands);
     }
 }
-  [(set_attr "type" "farith,f_stored,farith,f_loadd,f_2_r,r_2_f,*,load2,store2")
+  [(set_attr "neon_type" "neon_int_1,neon_stm_2,neon_vmov,neon_ldm_2,\
+                          neon_mrrc,neon_mcr_2_mcrr,*,*,*")
+   (set_attr "type" "*,*,*,*,*,*,alu,load4,store4")
+   (set_attr "insn" "*,*,*,*,*,*,mov,*,*")
    (set_attr "length" "4,8,4,8,8,8,16,8,16")
    (set_attr "pool_range" "*,*,*,1020,*,*,*,1020,*")
    (set_attr "neg_pool_range" "*,*,*,1008,*,*,*,1008,*")])
                      (match_operand:SI 2 "immediate_operand" "i"))))]
   "TARGET_NEON"
   "vmov%?.<V_uf_sclr>\t%P0[%c2], %1"
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set_attr "neon_type" "neon_mcr")]
+)
 
 (define_insn "vec_set<mode>"
   [(set (match_operand:VQ 0 "s_register_operand" "+w")
 
   return "vmov%?.<V_uf_sclr>\t%P0[%c2], %1";
 }
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set_attr "neon_type" "neon_mcr")]
+)
 
 (define_insn "vec_setv2di"
   [(set (match_operand:V2DI 0 "s_register_operand" "+w")
 
   return "vmov%?.64\t%P0, %Q1, %R1";
 }
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set_attr "neon_type" "neon_mcr_2_mcrr")]
+)
 
 (define_insn "vec_extract<mode>"
   [(set (match_operand:<V_elem> 0 "s_register_operand" "=r")
           (parallel [(match_operand:SI 2 "immediate_operand" "i")])))]
   "TARGET_NEON"
   "vmov%?.<V_uf_sclr>\t%0, %P1[%c2]"
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set_attr "neon_type" "neon_bp_simple")]
+)
 
 (define_insn "vec_extract<mode>"
   [(set (match_operand:<V_elem> 0 "s_register_operand" "=r")
 
   return "vmov%?.<V_uf_sclr>\t%0, %P1[%c2]";
 }
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set_attr "neon_type" "neon_bp_simple")]
+)
 
 (define_insn "vec_extractv2di"
   [(set (match_operand:DI 0 "s_register_operand" "=r")
 
   return "vmov%?.64\t%Q0, %R0, %P1";
 }
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set_attr "neon_type" "neon_int_1")]
+)
 
 (define_expand "vec_init<mode>"
   [(match_operand:VDQ 0 "s_register_operand" "")
         (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
                  (match_operand:VDQ 2 "s_register_operand" "w")))]
   "TARGET_NEON"
-  "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2")
+  "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                    (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                  (const_string "neon_fp_vadd_ddd_vabs_dd")
+                                  (const_string "neon_fp_vadd_qqq_vabs_qq"))
+                    (const_string "neon_int_1")))]
+)
 
 (define_insn "*sub<mode>3_neon"
   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
         (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
                    (match_operand:VDQ 2 "s_register_operand" "w")))]
   "TARGET_NEON"
-  "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2")
+  "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                    (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                  (const_string "neon_fp_vadd_ddd_vabs_dd")
+                                  (const_string "neon_fp_vadd_qqq_vabs_qq"))
+                    (const_string "neon_int_2")))]
+)
 
 (define_insn "*mul<mode>3_neon"
   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
         (mult:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
                   (match_operand:VDQ 2 "s_register_operand" "w")))]
   "TARGET_NEON"
-  "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2")
+  "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                    (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                  (const_string "neon_fp_vadd_ddd_vabs_dd")
+                                  (const_string "neon_fp_vadd_qqq_vabs_qq"))
+                    (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                  (if_then_else
+                                    (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                                    (const_string "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long")
+                                    (const_string "neon_mul_qqq_8_16_32_ddd_32"))
+                                  (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                                    (const_string "neon_mul_qqq_8_16_32_ddd_32")
+                                    (const_string "neon_mul_qqq_8_16_32_ddd_32")))))]
+)
 
 (define_insn "ior<mode>3"
   [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
                     <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
     default: gcc_unreachable ();
     }
-})
+}
+  [(set_attr "neon_type" "neon_int_1")]
+)
 
 (define_insn "iordi3_neon"
   [(set (match_operand:DI 0 "s_register_operand" "=w,w")
                     DImode, 0, VALID_NEON_QREG_MODE (DImode));
     default: gcc_unreachable ();
     }
-})
+}
+  [(set_attr "neon_type" "neon_int_1")]
+)
 
 ;; The concrete forms of the Neon immediate-logic instructions are vbic and
 ;; vorr. We support the pseudo-instruction vand instead, because that
                     <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
     default: gcc_unreachable ();
     }
-})
+}
+  [(set_attr "neon_type" "neon_int_1")]
+)
 
 (define_insn "anddi3_neon"
   [(set (match_operand:DI 0 "s_register_operand" "=w,w")
                     DImode, 1, VALID_NEON_QREG_MODE (DImode));
     default: gcc_unreachable ();
     }
-})
+}
+  [(set_attr "neon_type" "neon_int_1")]
+)
 
 (define_insn "orn<mode>3_neon"
   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
        (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
                 (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))))]
   "TARGET_NEON"
-  "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2")
+  "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "neon_type" "neon_int_1")]
+)
 
 (define_insn "orndi3_neon"
   [(set (match_operand:DI 0 "s_register_operand" "=w")
                    (match_operand:DI 2 "s_register_operand" "w")]
                     UNSPEC_VORN))]
   "TARGET_NEON"
-  "vorn\t%P0, %P1, %P2")
+  "vorn\t%P0, %P1, %P2"
+  [(set_attr "neon_type" "neon_int_1")]
+)
 
 (define_insn "bic<mode>3_neon"
   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
        (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
                  (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))))]
   "TARGET_NEON"
-  "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2")
+  "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "neon_type" "neon_int_1")]
+)
 
 (define_insn "bicdi3_neon"
   [(set (match_operand:DI 0 "s_register_operand" "=w")
                     (match_operand:DI 2 "s_register_operand" "w")]
                     UNSPEC_VBIC))]
   "TARGET_NEON"
-  "vbic\t%P0, %P1, %P2")
+  "vbic\t%P0, %P1, %P2"
+  [(set_attr "neon_type" "neon_int_1")]
+)
 
 (define_insn "xor<mode>3"
   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
        (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
                 (match_operand:VDQ 2 "s_register_operand" "w")))]
   "TARGET_NEON"
-  "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2")
+  "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "neon_type" "neon_int_1")]
+)
 
 (define_insn "xordi3_neon"
   [(set (match_operand:DI 0 "s_register_operand" "=w")
                     (match_operand:DI 2 "s_register_operand" "w")]
                     UNSPEC_VEOR))]
   "TARGET_NEON"
-  "veor\t%P0, %P1, %P2")
+  "veor\t%P0, %P1, %P2"
+  [(set_attr "neon_type" "neon_int_1")]
+)
 
 (define_insn "one_cmpl<mode>2"
   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
         (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
   "TARGET_NEON"
-  "vmvn\t%<V_reg>0, %<V_reg>1")
+  "vmvn\t%<V_reg>0, %<V_reg>1"
+  [(set_attr "neon_type" "neon_int_1")]
+)
 
 (define_insn "abs<mode>2"
   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
        (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
   "TARGET_NEON"
-  "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1")
+  "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                    (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                  (const_string "neon_fp_vadd_ddd_vabs_dd")
+                                  (const_string "neon_fp_vadd_qqq_vabs_qq"))
+                    (const_string "neon_int_3")))]
+)
 
 (define_insn "neg<mode>2"
   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
        (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
   "TARGET_NEON"
-  "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1")
+  "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                    (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                  (const_string "neon_fp_vadd_ddd_vabs_dd")
+                                  (const_string "neon_fp_vadd_qqq_vabs_qq"))
+                    (const_string "neon_int_3")))]
+)
 
 (define_insn "*umin<mode>3_neon"
   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
        (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
                    (match_operand:VDQIW 2 "s_register_operand" "w")))]
   "TARGET_NEON"
-  "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2")
+  "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "neon_type" "neon_int_5")]
+)
 
 (define_insn "*umax<mode>3_neon"
   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
        (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
                    (match_operand:VDQIW 2 "s_register_operand" "w")))]
   "TARGET_NEON"
-  "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2")
+  "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "neon_type" "neon_int_5")]
+)
 
 (define_insn "*smin<mode>3_neon"
   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
        (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
                   (match_operand:VDQW 2 "s_register_operand" "w")))]
   "TARGET_NEON"
-  "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2")
+  "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                    (const_string "neon_fp_vadd_ddd_vabs_dd")
+                    (const_string "neon_int_5")))]
+)
 
 (define_insn "*smax<mode>3_neon"
   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
        (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
                   (match_operand:VDQW 2 "s_register_operand" "w")))]
   "TARGET_NEON"
-  "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2")
+  "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                    (const_string "neon_fp_vadd_ddd_vabs_dd")
+                    (const_string "neon_int_5")))]
+)
 
 ; TODO: V2DI shifts are current disabled because there are bugs in the
 ; generic vectorizer code.  It ends up creating a V2DI constructor with
        (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
                      (match_operand:VDQIW 2 "s_register_operand" "w")))]
   "TARGET_NEON"
-  "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2")
+  "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                    (const_string "neon_vshl_ddd")
+                    (const_string "neon_shift_3")))]
+)
 
 ; Used for implementing logical shift-right, which is a left-shift by a negative
 ; amount, with signed operands. This is essentially the same as ashl<mode>3
                      (match_operand:VDQI 2 "s_register_operand" "w")]
                     UNSPEC_ASHIFT_SIGNED))]
   "TARGET_NEON"
-  "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2")
+  "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                    (const_string "neon_vshl_ddd")
+                    (const_string "neon_shift_3")))]
+)
 
 ; Used for implementing logical shift-right, which is a left-shift by a negative
 ; amount, with unsigned operands.
                      (match_operand:VDQI 2 "s_register_operand" "w")]
                     UNSPEC_ASHIFT_UNSIGNED))]
   "TARGET_NEON"
-  "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2")
+  "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                    (const_string "neon_vshl_ddd")
+                    (const_string "neon_shift_3")))]
+)
 
 (define_expand "ashr<mode>3"
   [(set (match_operand:VDQIW 0 "s_register_operand" "")
                          (match_operand:VW 1 "s_register_operand" "%w"))
                        (match_operand:<V_widen> 2 "s_register_operand" "w")))]
   "TARGET_NEON"
-  "vaddw.<V_s_elem>\t%q0, %q2, %P1")
+  "vaddw.<V_s_elem>\t%q0, %q2, %P1"
+  [(set_attr "neon_type" "neon_int_3")]
+)
 
 (define_insn "widen_usum<mode>3"
   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
                          (match_operand:VW 1 "s_register_operand" "%w"))
                        (match_operand:<V_widen> 2 "s_register_operand" "w")))]
   "TARGET_NEON"
-  "vaddw.<V_u_elem>\t%q0, %q2, %P1")
+  "vaddw.<V_u_elem>\t%q0, %q2, %P1"
+  [(set_attr "neon_type" "neon_int_3")]
+)
 
 ;; VEXT can be used to synthesize coarse whole-vector shifts with 8-bit
 ;; shift-count granularity. That's good enough for the middle-end's current
           (vec_select:V2SI (match_dup 1)
                            (parallel [(const_int 2) (const_int 3)]))))]
   "TARGET_NEON"
-  "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1")
+  "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
+  [(set_attr "vqh_mnem" "<VQH_mnem>")
+   (set (attr "neon_type")
+      (if_then_else (eq_attr "vqh_mnem" "vadd")
+                    (const_string "neon_int_1") (const_string "neon_int_5")))]
+)
 
 (define_insn "quad_halves_<code>v4sf"
   [(set (match_operand:V2SF 0 "s_register_operand" "=w")
           (vec_select:V2SF (match_dup 1)
                            (parallel [(const_int 2) (const_int 3)]))))]
   "TARGET_NEON"
-  "<VQH_mnem>.f32\t%P0, %e1, %f1")
+  "<VQH_mnem>.f32\t%P0, %e1, %f1"
+  [(set_attr "vqh_mnem" "<VQH_mnem>")
+   (set (attr "neon_type")
+      (if_then_else (eq_attr "vqh_mnem" "vadd")
+                    (const_string "neon_int_1") (const_string "neon_int_5")))]
+)
 
 (define_insn "quad_halves_<code>v8hi"
   [(set (match_operand:V4HI 0 "s_register_operand" "+w")
                            (parallel [(const_int 4) (const_int 5)
                                      (const_int 6) (const_int 7)]))))]
   "TARGET_NEON"
-  "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1")
+  "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
+  [(set_attr "vqh_mnem" "<VQH_mnem>")
+   (set (attr "neon_type")
+      (if_then_else (eq_attr "vqh_mnem" "vadd")
+                    (const_string "neon_int_1") (const_string "neon_int_5")))]
+)
 
 (define_insn "quad_halves_<code>v16qi"
   [(set (match_operand:V8QI 0 "s_register_operand" "+w")
                                      (const_int 12) (const_int 13)
                                      (const_int 14) (const_int 15)]))))]
   "TARGET_NEON"
-  "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1")
+  "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
+  [(set_attr "vqh_mnem" "<VQH_mnem>")
+   (set (attr "neon_type")
+      (if_then_else (eq_attr "vqh_mnem" "vadd")
+                    (const_string "neon_int_1") (const_string "neon_int_5")))]
+)
 
 ; FIXME: We wouldn't need the following insns if we could write subregs of
 ; vector registers. Make an attempt at removing unnecessary moves, though
     return "vmov\t%e0, %P1";
   else
     return "";
-})
+}
+  [(set_attr "neon_type" "neon_bp_simple")]
+)
 
 (define_insn "move_lo_quad_v4sf"
   [(set (match_operand:V4SF 0 "s_register_operand" "+w")
     return "vmov\t%e0, %P1";
   else
     return "";
-})
+}
+  [(set_attr "neon_type" "neon_bp_simple")]
+)
 
 (define_insn "move_lo_quad_v8hi"
   [(set (match_operand:V8HI 0 "s_register_operand" "+w")
     return "vmov\t%e0, %P1";
   else
     return "";
-})
+}
+  [(set_attr "neon_type" "neon_bp_simple")]
+)
 
 (define_insn "move_lo_quad_v16qi"
   [(set (match_operand:V16QI 0 "s_register_operand" "+w")
     return "vmov\t%e0, %P1";
   else
     return "";
-})
+}
+  [(set_attr "neon_type" "neon_bp_simple")]
+)
 
 ;; Reduction operations
 
        (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
                     UNSPEC_VPADD))]
   "TARGET_NEON"
-  "vadd.i64\t%e0, %e1, %f1")
+  "vadd.i64\t%e0, %e1, %f1"
+  [(set_attr "neon_type" "neon_int_1")]
+)
 
 ;; NEON does not distinguish between signed and unsigned addition except on
 ;; widening operations.
                    (match_operand:VD 2 "s_register_operand" "w")]
                    UNSPEC_VPADD))]
   "TARGET_NEON"
-  "vpadd.<V_if_elem>\t%P0, %P1, %P2")
+  "vpadd.<V_if_elem>\t%P0, %P1, %P2"
+  ;; Assume this schedules like vadd.
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                    (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                  (const_string "neon_fp_vadd_ddd_vabs_dd")
+                                  (const_string "neon_fp_vadd_qqq_vabs_qq"))
+                    (const_string "neon_int_1")))]
+)
 
 (define_insn "neon_vpsmin<mode>"
   [(set (match_operand:VD 0 "s_register_operand" "=w")
                    (match_operand:VD 2 "s_register_operand" "w")]
                    UNSPEC_VPSMIN))]
   "TARGET_NEON"
-  "vpmin.<V_s_elem>\t%P0, %P1, %P2")
+  "vpmin.<V_s_elem>\t%P0, %P1, %P2"
+  ;; Assume this schedules like vmin.
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                    (const_string "neon_fp_vadd_ddd_vabs_dd")
+                    (const_string "neon_int_5")))]
+)
 
 (define_insn "neon_vpsmax<mode>"
   [(set (match_operand:VD 0 "s_register_operand" "=w")
                    (match_operand:VD 2 "s_register_operand" "w")]
                    UNSPEC_VPSMAX))]
   "TARGET_NEON"
-  "vpmax.<V_s_elem>\t%P0, %P1, %P2")
+  "vpmax.<V_s_elem>\t%P0, %P1, %P2"
+  ;; Assume this schedules like vmax.
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                    (const_string "neon_fp_vadd_ddd_vabs_dd")
+                    (const_string "neon_int_5")))]
+)
 
 (define_insn "neon_vpumin<mode>"
   [(set (match_operand:VDI 0 "s_register_operand" "=w")
                     (match_operand:VDI 2 "s_register_operand" "w")]
                    UNSPEC_VPUMIN))]
   "TARGET_NEON"
-  "vpmin.<V_u_elem>\t%P0, %P1, %P2")
+  "vpmin.<V_u_elem>\t%P0, %P1, %P2"
+  ;; Assume this schedules like umin.
+  [(set_attr "neon_type" "neon_int_5")]
+)
 
 (define_insn "neon_vpumax<mode>"
   [(set (match_operand:VDI 0 "s_register_operand" "=w")
                     (match_operand:VDI 2 "s_register_operand" "w")]
                    UNSPEC_VPUMAX))]
   "TARGET_NEON"
-  "vpmax.<V_u_elem>\t%P0, %P1, %P2")
+  "vpmax.<V_u_elem>\t%P0, %P1, %P2"
+  ;; Assume this schedules like umax.
+  [(set_attr "neon_type" "neon_int_5")]
+)
 
 ;; Saturating arithmetic
 
        (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
                    (match_operand:VD 2 "s_register_operand" "w")))]
   "TARGET_NEON"
-  "vqadd.<V_s_elem>\t%P0, %P1, %P2")
+  "vqadd.<V_s_elem>\t%P0, %P1, %P2"
+  [(set_attr "neon_type" "neon_int_4")]
+)
 
 (define_insn "*us_add<mode>_neon"
   [(set (match_operand:VD 0 "s_register_operand" "=w")
        (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
                    (match_operand:VD 2 "s_register_operand" "w")))]
   "TARGET_NEON"
-  "vqadd.<V_u_elem>\t%P0, %P1, %P2")
+  "vqadd.<V_u_elem>\t%P0, %P1, %P2"
+  [(set_attr "neon_type" "neon_int_4")]
+)
 
 (define_insn "*ss_sub<mode>_neon"
   [(set (match_operand:VD 0 "s_register_operand" "=w")
        (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
                     (match_operand:VD 2 "s_register_operand" "w")))]
   "TARGET_NEON"
-  "vqsub.<V_s_elem>\t%P0, %P1, %P2")
+  "vqsub.<V_s_elem>\t%P0, %P1, %P2"
+  [(set_attr "neon_type" "neon_int_5")]
+)
 
 (define_insn "*us_sub<mode>_neon"
   [(set (match_operand:VD 0 "s_register_operand" "=w")
        (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
                     (match_operand:VD 2 "s_register_operand" "w")))]
   "TARGET_NEON"
-  "vqsub.<V_u_elem>\t%P0, %P1, %P2")
+  "vqsub.<V_u_elem>\t%P0, %P1, %P2"
+  [(set_attr "neon_type" "neon_int_5")]
+)
 
 ;; Patterns for builtins.
 
                       (match_operand:SI 3 "immediate_operand" "i")]
                      UNSPEC_VADD))]
   "TARGET_NEON"
-  "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2")
+  "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                    (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                  (const_string "neon_fp_vadd_ddd_vabs_dd")
+                                  (const_string "neon_fp_vadd_qqq_vabs_qq"))
+                    (const_string "neon_int_1")))]
+)
 
 ; operand 3 represents in bits:
 ;  bit 0: signed (vs unsigned).
                            (match_operand:SI 3 "immediate_operand" "i")]
                           UNSPEC_VADDL))]
   "TARGET_NEON"
-  "vaddl.%T3%#<V_sz_elem>\t%q0, %P1, %P2")
+  "vaddl.%T3%#<V_sz_elem>\t%q0, %P1, %P2"
+  [(set_attr "neon_type" "neon_int_3")]
+)
 
 (define_insn "neon_vaddw<mode>"
   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
                            (match_operand:SI 3 "immediate_operand" "i")]
                           UNSPEC_VADDW))]
   "TARGET_NEON"
-  "vaddw.%T3%#<V_sz_elem>\t%q0, %q1, %P2")
+  "vaddw.%T3%#<V_sz_elem>\t%q0, %q1, %P2"
+  [(set_attr "neon_type" "neon_int_2")]
+)
 
 ; vhadd and vrhadd.
 
                       (match_operand:SI 3 "immediate_operand" "i")]
                      UNSPEC_VHADD))]
   "TARGET_NEON"
-  "v%O3hadd.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2")
+  "v%O3hadd.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "neon_type" "neon_int_4")]
+)
 
 (define_insn "neon_vqadd<mode>"
   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
                        (match_operand:SI 3 "immediate_operand" "i")]
                      UNSPEC_VQADD))]
   "TARGET_NEON"
-  "vqadd.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2")
+  "vqadd.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "neon_type" "neon_int_4")]
+)
 
 (define_insn "neon_vaddhn<mode>"
   [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
                             (match_operand:SI 3 "immediate_operand" "i")]
                            UNSPEC_VADDHN))]
   "TARGET_NEON"
-  "v%O3addhn.<V_if_elem>\t%P0, %q1, %q2")
+  "v%O3addhn.<V_if_elem>\t%P0, %q1, %q2"
+  [(set_attr "neon_type" "neon_int_4")]
+)
 
 (define_insn "neon_vmul<mode>"
   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
                      (match_operand:SI 3 "immediate_operand" "i")]
                     UNSPEC_VMUL))]
   "TARGET_NEON"
-  "vmul.%F3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2")
+  "vmul.%F3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                    (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                  (const_string "neon_fp_vadd_ddd_vabs_dd")
+                                  (const_string "neon_fp_vadd_qqq_vabs_qq"))
+                    (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                  (if_then_else
+                                    (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                                    (const_string "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long")
+                                    (const_string "neon_mul_qqq_8_16_32_ddd_32"))
+                                  (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                                    (const_string "neon_mul_qqq_8_16_32_ddd_32")
+                                    (const_string "neon_mul_qqq_8_16_32_ddd_32")))))]
+)
 
 (define_insn "neon_vmla<mode>"
   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
                      (match_operand:SI 4 "immediate_operand" "i")]
                     UNSPEC_VMLA))]
   "TARGET_NEON"
-  "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3")
+  "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                    (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                  (const_string "neon_fp_vmla_ddd")
+                                  (const_string "neon_fp_vmla_qqq"))
+                    (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                  (if_then_else
+                                    (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                                    (const_string "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long")
+                                    (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long"))
+                                  (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                                    (const_string "neon_mla_qqq_8_16")
+                                    (const_string "neon_mla_qqq_32_qqd_32_scalar")))))]
+)
 
 (define_insn "neon_vmlal<mode>"
   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
                            (match_operand:SI 4 "immediate_operand" "i")]
                           UNSPEC_VMLAL))]
   "TARGET_NEON"
-  "vmlal.%T4%#<V_sz_elem>\t%q0, %P2, %P3")
+  "vmlal.%T4%#<V_sz_elem>\t%q0, %P2, %P3"
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                   (const_string "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long")
+                   (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")))]
+)
 
 (define_insn "neon_vmls<mode>"
   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
                      (match_operand:SI 4 "immediate_operand" "i")]
                     UNSPEC_VMLS))]
   "TARGET_NEON"
-  "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3")
+  "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                    (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                  (const_string "neon_fp_vmla_ddd")
+                                  (const_string "neon_fp_vmla_qqq"))
+                    (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                  (if_then_else
+                                    (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                                    (const_string "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long")
+                                    (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long"))
+                                  (if_then_else
+                                    (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                                    (const_string "neon_mla_qqq_8_16")
+                                    (const_string "neon_mla_qqq_32_qqd_32_scalar")))))]
+)
 
 (define_insn "neon_vmlsl<mode>"
   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
                            (match_operand:SI 4 "immediate_operand" "i")]
                           UNSPEC_VMLSL))]
   "TARGET_NEON"
-  "vmlsl.%T4%#<V_sz_elem>\t%q0, %P2, %P3")
+  "vmlsl.%T4%#<V_sz_elem>\t%q0, %P2, %P3"
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                   (const_string "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long")
+                   (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")))]
+)
 
 (define_insn "neon_vqdmulh<mode>"
   [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
                        (match_operand:SI 3 "immediate_operand" "i")]
                       UNSPEC_VQDMULH))]
   "TARGET_NEON"
-  "vq%O3dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2")
+  "vq%O3dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+        (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                      (const_string "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long")
+                      (const_string "neon_mul_qqq_8_16_32_ddd_32"))
+        (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                      (const_string "neon_mul_qqq_8_16_32_ddd_32")
+                      (const_string "neon_mul_qqq_8_16_32_ddd_32"))))]
+)
 
 (define_insn "neon_vqdmlal<mode>"
   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
                            (match_operand:SI 4 "immediate_operand" "i")]
                           UNSPEC_VQDMLAL))]
   "TARGET_NEON"
-  "vqdmlal.<V_s_elem>\t%q0, %P2, %P3")
+  "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                   (const_string "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long")
+                   (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")))]
+)
 
 (define_insn "neon_vqdmlsl<mode>"
   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
                            (match_operand:SI 4 "immediate_operand" "i")]
                           UNSPEC_VQDMLSL))]
   "TARGET_NEON"
-  "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3")
+  "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                   (const_string "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long")
+                   (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")))]
+)
 
 (define_insn "neon_vmull<mode>"
   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
                            (match_operand:SI 3 "immediate_operand" "i")]
                           UNSPEC_VMULL))]
   "TARGET_NEON"
-  "vmull.%T3%#<V_sz_elem>\t%q0, %P1, %P2")
+  "vmull.%T3%#<V_sz_elem>\t%q0, %P1, %P2"
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                   (const_string "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long")
+                   (const_string "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")))]
+)
 
 (define_insn "neon_vqdmull<mode>"
   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
                            (match_operand:SI 3 "immediate_operand" "i")]
                           UNSPEC_VQDMULL))]
   "TARGET_NEON"
-  "vqdmull.<V_s_elem>\t%q0, %P1, %P2")
+  "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                   (const_string "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long")
+                   (const_string "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")))]
+)
 
 (define_insn "neon_vsub<mode>"
   [(set (match_operand:VDQX 0 "s_register_operand" "=w")
                       (match_operand:SI 3 "immediate_operand" "i")]
                      UNSPEC_VSUB))]
   "TARGET_NEON"
-  "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2")
+  "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                    (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                  (const_string "neon_fp_vadd_ddd_vabs_dd")
+                                  (const_string "neon_fp_vadd_qqq_vabs_qq"))
+                    (const_string "neon_int_2")))]
+)
 
 (define_insn "neon_vsubl<mode>"
   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
                            (match_operand:SI 3 "immediate_operand" "i")]
                           UNSPEC_VSUBL))]
   "TARGET_NEON"
-  "vsubl.%T3%#<V_sz_elem>\t%q0, %P1, %P2")
+  "vsubl.%T3%#<V_sz_elem>\t%q0, %P1, %P2"
+  [(set_attr "neon_type" "neon_int_2")]
+)
 
 (define_insn "neon_vsubw<mode>"
   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
                            (match_operand:SI 3 "immediate_operand" "i")]
                          UNSPEC_VSUBW))]
   "TARGET_NEON"
-  "vsubw.%T3%#<V_sz_elem>\t%q0, %q1, %P2")
+  "vsubw.%T3%#<V_sz_elem>\t%q0, %q1, %P2"
+  [(set_attr "neon_type" "neon_int_2")]
+)
 
 (define_insn "neon_vqsub<mode>"
   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
                        (match_operand:SI 3 "immediate_operand" "i")]
                      UNSPEC_VQSUB))]
   "TARGET_NEON"
-  "vqsub.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2")
+  "vqsub.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "neon_type" "neon_int_5")]
+)
 
 (define_insn "neon_vhsub<mode>"
   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
                        (match_operand:SI 3 "immediate_operand" "i")]
                      UNSPEC_VHSUB))]
   "TARGET_NEON"
-  "vhsub.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2")
+  "vhsub.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "neon_type" "neon_int_5")]
+)
 
 (define_insn "neon_vsubhn<mode>"
   [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
                             (match_operand:SI 3 "immediate_operand" "i")]
                            UNSPEC_VSUBHN))]
   "TARGET_NEON"
-  "v%O3subhn.<V_if_elem>\t%P0, %q1, %q2")
+  "v%O3subhn.<V_if_elem>\t%P0, %q1, %q2"
+  [(set_attr "neon_type" "neon_int_4")]
+)
 
 (define_insn "neon_vceq<mode>"
   [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
                                 (match_operand:SI 3 "immediate_operand" "i")]
                                UNSPEC_VCEQ))]
   "TARGET_NEON"
-  "vceq.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2")
+  "vceq.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                    (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                  (const_string "neon_fp_vadd_ddd_vabs_dd")
+                                  (const_string "neon_fp_vadd_qqq_vabs_qq"))
+                    (const_string "neon_int_5")))]
+)
 
 (define_insn "neon_vcge<mode>"
   [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
                                 (match_operand:SI 3 "immediate_operand" "i")]
                                UNSPEC_VCGE))]
   "TARGET_NEON"
-  "vcge.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2")
+  "vcge.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                   (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                 (const_string "neon_fp_vadd_ddd_vabs_dd")
+                                 (const_string "neon_fp_vadd_qqq_vabs_qq"))
+                   (const_string "neon_int_5")))]
+)
 
 (define_insn "neon_vcgt<mode>"
   [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
                                 (match_operand:SI 3 "immediate_operand" "i")]
                                UNSPEC_VCGT))]
   "TARGET_NEON"
-  "vcgt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2")
+  "vcgt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                   (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                 (const_string "neon_fp_vadd_ddd_vabs_dd")
+                                 (const_string "neon_fp_vadd_qqq_vabs_qq"))
+                   (const_string "neon_int_5")))]
+)
 
 (define_insn "neon_vcage<mode>"
   [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
                                 (match_operand:SI 3 "immediate_operand" "i")]
                                UNSPEC_VCAGE))]
   "TARGET_NEON"
-  "vacge.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2")
+  "vacge.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                   (const_string "neon_fp_vadd_ddd_vabs_dd")
+                   (const_string "neon_fp_vadd_qqq_vabs_qq")))]
+)
 
 (define_insn "neon_vcagt<mode>"
   [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
                                 (match_operand:SI 3 "immediate_operand" "i")]
                                UNSPEC_VCAGT))]
   "TARGET_NEON"
-  "vacgt.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2")
+  "vacgt.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                   (const_string "neon_fp_vadd_ddd_vabs_dd")
+                   (const_string "neon_fp_vadd_qqq_vabs_qq")))]
+)
 
 (define_insn "neon_vtst<mode>"
   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
                        (match_operand:SI 3 "immediate_operand" "i")]
                      UNSPEC_VTST))]
   "TARGET_NEON"
-  "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2")
+  "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "neon_type" "neon_int_4")]
+)
 
 (define_insn "neon_vabd<mode>"
   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
                      (match_operand:SI 3 "immediate_operand" "i")]
                     UNSPEC_VABD))]
   "TARGET_NEON"
-  "vabd.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2")
+  "vabd.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                   (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                 (const_string "neon_fp_vadd_ddd_vabs_dd")
+                                 (const_string "neon_fp_vadd_qqq_vabs_qq"))
+                   (const_string "neon_int_5")))]
+)
 
 (define_insn "neon_vabdl<mode>"
   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
                            (match_operand:SI 3 "immediate_operand" "i")]
                           UNSPEC_VABDL))]
   "TARGET_NEON"
-  "vabdl.%T3%#<V_sz_elem>\t%q0, %P1, %P2")
+  "vabdl.%T3%#<V_sz_elem>\t%q0, %P1, %P2"
+  [(set_attr "neon_type" "neon_int_5")]
+)
 
 (define_insn "neon_vaba<mode>"
   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
                        (match_operand:SI 4 "immediate_operand" "i")]
                      UNSPEC_VABA))]
   "TARGET_NEON"
-  "vaba.%T4%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3")
+  "vaba.%T4%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                   (const_string "neon_vaba") (const_string "neon_vaba_qqq")))]
+)
 
 (define_insn "neon_vabal<mode>"
   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
                            (match_operand:SI 4 "immediate_operand" "i")]
                           UNSPEC_VABAL))]
   "TARGET_NEON"
-  "vabal.%T4%#<V_sz_elem>\t%q0, %P2, %P3")
+  "vabal.%T4%#<V_sz_elem>\t%q0, %P2, %P3"
+  [(set_attr "neon_type" "neon_vaba")]
+)
 
 (define_insn "neon_vmax<mode>"
   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
                      (match_operand:SI 3 "immediate_operand" "i")]
                      UNSPEC_VMAX))]
   "TARGET_NEON"
-  "vmax.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2")
+  "vmax.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "neon_type")
+    (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                  (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                (const_string "neon_fp_vadd_ddd_vabs_dd")
+                                (const_string "neon_fp_vadd_qqq_vabs_qq"))
+                  (const_string "neon_int_5")))]
+)
 
 (define_insn "neon_vmin<mode>"
   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
                      (match_operand:SI 3 "immediate_operand" "i")]
                      UNSPEC_VMIN))]
   "TARGET_NEON"
-  "vmin.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2")
+  "vmin.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "neon_type")
+    (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                  (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                (const_string "neon_fp_vadd_ddd_vabs_dd")
+                                (const_string "neon_fp_vadd_qqq_vabs_qq"))
+                  (const_string "neon_int_5")))]
+)
 
 (define_expand "neon_vpadd<mode>"
   [(match_operand:VD 0 "s_register_operand" "=w")
                                   (match_operand:SI 2 "immediate_operand" "i")]
                                  UNSPEC_VPADDL))]
   "TARGET_NEON"
-  "vpaddl.%T2%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1")
+  "vpaddl.%T2%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
+  ;; Assume this schedules like vaddl.
+  [(set_attr "neon_type" "neon_int_3")]
+)
 
 (define_insn "neon_vpadal<mode>"
   [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
                                   (match_operand:SI 3 "immediate_operand" "i")]
                                  UNSPEC_VPADAL))]
   "TARGET_NEON"
-  "vpadal.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2")
+  "vpadal.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
+  ;; Assume this schedules like vpadd.
+  [(set_attr "neon_type" "neon_int_1")]
+)
 
 (define_insn "neon_vpmax<mode>"
   [(set (match_operand:VD 0 "s_register_operand" "=w")
                     (match_operand:SI 3 "immediate_operand" "i")]
                    UNSPEC_VPMAX))]
   "TARGET_NEON"
-  "vpmax.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2")
+  "vpmax.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  ;; Assume this schedules like vmax.
+  [(set (attr "neon_type")
+    (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                  (const_string "neon_fp_vadd_ddd_vabs_dd")
+                  (const_string "neon_int_5")))]
+)
 
 (define_insn "neon_vpmin<mode>"
   [(set (match_operand:VD 0 "s_register_operand" "=w")
                     (match_operand:SI 3 "immediate_operand" "i")]
                    UNSPEC_VPMIN))]
   "TARGET_NEON"
-  "vpmin.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2")
+  "vpmin.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  ;; Assume this schedules like vmin.
+  [(set (attr "neon_type")
+    (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                  (const_string "neon_fp_vadd_ddd_vabs_dd")
+                  (const_string "neon_int_5")))]
+)
 
 (define_insn "neon_vrecps<mode>"
   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
                        (match_operand:SI 3 "immediate_operand" "i")]
                       UNSPEC_VRECPS))]
   "TARGET_NEON"
-  "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2")
+  "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                    (const_string "neon_fp_vrecps_vrsqrts_ddd")
+                    (const_string "neon_fp_vrecps_vrsqrts_qqq")))]
+)
 
 (define_insn "neon_vrsqrts<mode>"
   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
                        (match_operand:SI 3 "immediate_operand" "i")]
                       UNSPEC_VRSQRTS))]
   "TARGET_NEON"
-  "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2")
+  "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                    (const_string "neon_fp_vrecps_vrsqrts_ddd")
+                    (const_string "neon_fp_vrecps_vrsqrts_qqq")))]
+)
 
 (define_insn "neon_vabs<mode>"
   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
                      (match_operand:SI 2 "immediate_operand" "i")]
                      UNSPEC_VABS))]
   "TARGET_NEON"
-  "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1")
+  "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
+  [(set (attr "neon_type")
+     (if_then_else (ior (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                        (ne (symbol_ref "<Is_float_mode>") (const_int 0)))
+                   (if_then_else
+                      (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                      (const_string "neon_fp_vadd_ddd_vabs_dd")
+                      (const_string "neon_fp_vadd_qqq_vabs_qq"))
+                   (const_string "neon_vqneg_vqabs")))]
+)
 
 (define_insn "neon_vqabs<mode>"
   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
                       (match_operand:SI 2 "immediate_operand" "i")]
                      UNSPEC_VQABS))]
   "TARGET_NEON"
-  "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1")
+  "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
+  [(set_attr "neon_type" "neon_vqneg_vqabs")]
+)
 
 (define_expand "neon_vneg<mode>"
   [(match_operand:VDQW 0 "s_register_operand" "")
                       (match_operand:SI 2 "immediate_operand" "i")]
                      UNSPEC_VQNEG))]
   "TARGET_NEON"
-  "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1")
+  "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
+  [(set_attr "neon_type" "neon_vqneg_vqabs")]
+)
 
 (define_insn "neon_vcls<mode>"
   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
                       (match_operand:SI 2 "immediate_operand" "i")]
                      UNSPEC_VCLS))]
   "TARGET_NEON"
-  "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1")
+  "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
+  [(set_attr "neon_type" "neon_int_1")]
+)
 
 (define_insn "neon_vclz<mode>"
   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
                       (match_operand:SI 2 "immediate_operand" "i")]
                      UNSPEC_VCLZ))]
   "TARGET_NEON"
-  "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1")
+  "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
+  [(set_attr "neon_type" "neon_int_1")]
+)
 
 (define_insn "neon_vcnt<mode>"
   [(set (match_operand:VE 0 "s_register_operand" "=w")
                     (match_operand:SI 2 "immediate_operand" "i")]
                    UNSPEC_VCNT))]
   "TARGET_NEON"
-  "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1")
+  "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
+  [(set_attr "neon_type" "neon_int_1")]
+)
 
 (define_insn "neon_vrecpe<mode>"
   [(set (match_operand:V32 0 "s_register_operand" "=w")
                      (match_operand:SI 2 "immediate_operand" "i")]
                     UNSPEC_VRECPE))]
   "TARGET_NEON"
-  "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1")
+  "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                    (const_string "neon_fp_vadd_ddd_vabs_dd")
+                    (const_string "neon_fp_vadd_qqq_vabs_qq")))]
+)
 
 (define_insn "neon_vrsqrte<mode>"
   [(set (match_operand:V32 0 "s_register_operand" "=w")
                      (match_operand:SI 2 "immediate_operand" "i")]
                     UNSPEC_VRSQRTE))]
   "TARGET_NEON"
-  "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1")
+  "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                    (const_string "neon_fp_vadd_ddd_vabs_dd")
+                    (const_string "neon_fp_vadd_qqq_vabs_qq")))]
+)
 
 (define_expand "neon_vmvn<mode>"
   [(match_operand:VDQIW 0 "s_register_operand" "")
                          UNSPEC_VGET_LANE))]
   "TARGET_NEON"
   "vmov%?.%t3%#<V_sz_elem>\t%0, %P1[%c2]"
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set_attr "neon_type" "neon_bp_simple")]
+)
 
 ; Operand 2 (lane number) is ignored because we can only extract the zeroth lane
 ; with this insn. Operand 3 (info word) is ignored because it does nothing
                   UNSPEC_VGET_LANE))]
   "TARGET_NEON"
   "vmov%?\t%Q0, %R0, %P1  @ di"
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set_attr "neon_type" "neon_bp_simple")]
+)
 
 (define_insn "neon_vget_lane<mode>"
   [(set (match_operand:<V_elem> 0 "s_register_operand" "=r")
 
   return "";
 }
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set_attr "neon_type" "neon_bp_simple")]
+)
 
 (define_insn "neon_vget_lanev2di"
   [(set (match_operand:DI 0 "s_register_operand" "=r")
 
   return "";
 }
-  [(set_attr "predicable" "yes")])
-
+  [(set_attr "predicable" "yes")
+   (set_attr "neon_type" "neon_bp_simple")]
+)
 
 (define_insn "neon_vset_lane<mode>"
   [(set (match_operand:VD 0 "s_register_operand" "=w")
                    UNSPEC_VSET_LANE))]
   "TARGET_NEON"
   "vmov%?.<V_sz_elem>\t%P0[%c3], %1"
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set_attr "neon_type" "neon_bp_simple")]
+)
 
 ; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
 
                    UNSPEC_VSET_LANE))]
   "TARGET_NEON"
   "vmov%?\t%P0, %Q1, %R1  @ di"
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set_attr "neon_type" "neon_bp_simple")]
+)
 
 (define_insn "neon_vset_lane<mode>"
   [(set (match_operand:VQ 0 "s_register_operand" "=w")
 
   return "";
 }
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set_attr "neon_type" "neon_bp_simple")]
+)
 
 (define_insn "neon_vset_lanev2di"
   [(set (match_operand:V2DI 0 "s_register_operand" "=w")
 
   return "";
 }
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set_attr "neon_type" "neon_bp_simple")]
+)
 
 (define_expand "neon_vcreate<mode>"
   [(match_operand:VDX 0 "s_register_operand" "")
                     UNSPEC_VDUP_N))]
   "TARGET_NEON"
   "vdup%?.<V_sz_elem>\t%<V_reg>0, %1"
-  [(set_attr "predicable" "yes")])
+  ;; Assume this schedules like vmov.
+  [(set_attr "predicable" "yes")
+   (set_attr "neon_type" "neon_bp_simple")]
+)
 
 (define_insn "neon_vdup_ndi"
   [(set (match_operand:DI 0 "s_register_operand" "=w")
                    UNSPEC_VDUP_N))]
   "TARGET_NEON"
   "vmov%?\t%P0, %Q1, %R1"
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set_attr "neon_type" "neon_bp_simple")]
+)
 
 (define_insn "neon_vdup_nv2di"
   [(set (match_operand:V2DI 0 "s_register_operand" "=w")
   "TARGET_NEON"
   "vmov%?\t%e0, %Q1, %R1\;vmov%?\t%f0, %Q1, %R1"
   [(set_attr "predicable" "yes")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "neon_type" "neon_bp_simple")]
+)
 
 (define_insn "neon_vdup_lane<mode>"
   [(set (match_operand:VD 0 "s_register_operand" "=w")
                    (match_operand:SI 2 "immediate_operand" "i")]
                    UNSPEC_VDUP_LANE))]
   "TARGET_NEON"
-  "vdup.<V_sz_elem>\t%P0, %P1[%c2]")
+  "vdup.<V_sz_elem>\t%P0, %P1[%c2]"
+  ;; Assume this schedules like vmov.
+  [(set_attr "neon_type" "neon_bp_simple")]
+)
 
 (define_insn "neon_vdup_lane<mode>"
   [(set (match_operand:VQ 0 "s_register_operand" "=w")
                    (match_operand:SI 2 "immediate_operand" "i")]
                    UNSPEC_VDUP_LANE))]
   "TARGET_NEON"
-  "vdup.<V_sz_elem>\t%q0, %P1[%c2]")
+  "vdup.<V_sz_elem>\t%q0, %P1[%c2]"
+  ;; Assume this schedules like vmov.
+  [(set_attr "neon_type" "neon_bp_simple")]
+)
 
 ; Scalar index is ignored, since only zero is valid here.
 (define_expand "neon_vdup_lanedi"
                      UNSPEC_VDUP_LANE))]
   "TARGET_NEON"
   "vmov\t%e0, %P1\;vmov\t%f0, %P1"
-  [(set_attr "length" "8")])
+  [(set_attr "length" "8")
+   (set_attr "neon_type" "neon_bp_simple")]
+)
 
 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
 ;; dest vector.
 
   return "";
 }
-  [(set_attr "length" "8")])
+  ;; We set the neon_type attribute based on the vmov instructions above.
+  [(set_attr "length" "8")
+   (set_attr "neon_type" "neon_bp_simple")]
+)
 
 (define_insn "neon_vget_high<mode>"
   [(set (match_operand:<V_HALF> 0 "s_register_operand" "=w")
     return "vmov\t%P0, %f1";
   else
     return "";
-})
+}
+  [(set_attr "neon_type" "neon_bp_simple")]
+)
 
 (define_insn "neon_vget_low<mode>"
   [(set (match_operand:<V_HALF> 0 "s_register_operand" "=w")
     return "vmov\t%P0, %e1";
   else
     return "";
-})
+}
+  [(set_attr "neon_type" "neon_bp_simple")]
+)
 
 (define_insn "neon_vcvt<mode>"
   [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
                           (match_operand:SI 2 "immediate_operand" "i")]
                          UNSPEC_VCVT))]
   "TARGET_NEON"
-  "vcvt.%T2%#32.f32\t%<V_reg>0, %<V_reg>1")
+  "vcvt.%T2%#32.f32\t%<V_reg>0, %<V_reg>1"
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                   (const_string "neon_fp_vadd_ddd_vabs_dd")
+                   (const_string "neon_fp_vadd_qqq_vabs_qq")))]
+)
 
 (define_insn "neon_vcvt<mode>"
   [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
                           (match_operand:SI 2 "immediate_operand" "i")]
                          UNSPEC_VCVT))]
   "TARGET_NEON"
-  "vcvt.f32.%T2%#32\t%<V_reg>0, %<V_reg>1")
+  "vcvt.f32.%T2%#32\t%<V_reg>0, %<V_reg>1"
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                   (const_string "neon_fp_vadd_ddd_vabs_dd")
+                   (const_string "neon_fp_vadd_qqq_vabs_qq")))]
+)
 
 (define_insn "neon_vcvt_n<mode>"
   [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
                            (match_operand:SI 3 "immediate_operand" "i")]
                          UNSPEC_VCVT_N))]
   "TARGET_NEON"
-  "vcvt.%T3%#32.f32\t%<V_reg>0, %<V_reg>1, %2")
+  "vcvt.%T3%#32.f32\t%<V_reg>0, %<V_reg>1, %2"
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                   (const_string "neon_fp_vadd_ddd_vabs_dd")
+                   (const_string "neon_fp_vadd_qqq_vabs_qq")))]
+)
 
 (define_insn "neon_vcvt_n<mode>"
   [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
                            (match_operand:SI 3 "immediate_operand" "i")]
                          UNSPEC_VCVT_N))]
   "TARGET_NEON"
-  "vcvt.f32.%T3%#32\t%<V_reg>0, %<V_reg>1, %2")
+  "vcvt.f32.%T3%#32\t%<V_reg>0, %<V_reg>1, %2"
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                   (const_string "neon_fp_vadd_ddd_vabs_dd")
+                   (const_string "neon_fp_vadd_qqq_vabs_qq")))]
+)
 
 (define_insn "neon_vmovn<mode>"
   [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
                            (match_operand:SI 2 "immediate_operand" "i")]
                            UNSPEC_VMOVN))]
   "TARGET_NEON"
-  "vmovn.<V_if_elem>\t%P0, %q1")
+  "vmovn.<V_if_elem>\t%P0, %q1"
+  [(set_attr "neon_type" "neon_bp_simple")]
+)
 
 (define_insn "neon_vqmovn<mode>"
   [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
                            (match_operand:SI 2 "immediate_operand" "i")]
                            UNSPEC_VQMOVN))]
   "TARGET_NEON"
-  "vqmovn.%T2%#<V_sz_elem>\t%P0, %q1")
+  "vqmovn.%T2%#<V_sz_elem>\t%P0, %q1"
+  [(set_attr "neon_type" "neon_shift_2")]
+)
 
 (define_insn "neon_vqmovun<mode>"
   [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
                            (match_operand:SI 2 "immediate_operand" "i")]
                            UNSPEC_VQMOVUN))]
   "TARGET_NEON"
-  "vqmovun.<V_s_elem>\t%P0, %q1")
+  "vqmovun.<V_s_elem>\t%P0, %q1"
+  [(set_attr "neon_type" "neon_shift_2")]
+)
 
 (define_insn "neon_vmovl<mode>"
   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
                           (match_operand:SI 2 "immediate_operand" "i")]
                           UNSPEC_VMOVL))]
   "TARGET_NEON"
-  "vmovl.%T2%#<V_sz_elem>\t%q0, %P1")
+  "vmovl.%T2%#<V_sz_elem>\t%q0, %P1"
+  [(set_attr "neon_type" "neon_shift_1")]
+)
 
 (define_insn "neon_vmul_lane<mode>"
   [(set (match_operand:VMD 0 "s_register_operand" "=w")
                      (match_operand:SI 4 "immediate_operand" "i")]
                     UNSPEC_VMUL_LANE))]
   "TARGET_NEON"
-  "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]")
+  "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]"
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                   (const_string "neon_fp_vmul_ddd")
+                   (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                                 (const_string "neon_mul_ddd_16_scalar_32_16_long_scalar")
+                                 (const_string "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar"))))]
+)
 
 (define_insn "neon_vmul_lane<mode>"
   [(set (match_operand:VMQ 0 "s_register_operand" "=w")
                      (match_operand:SI 4 "immediate_operand" "i")]
                     UNSPEC_VMUL_LANE))]
   "TARGET_NEON"
-  "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]")
+  "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]"
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                   (const_string "neon_fp_vmul_qqd")
+                   (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                                 (const_string "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")
+                                 (const_string "neon_mul_qqd_32_scalar"))))]
+)
 
 (define_insn "neon_vmull_lane<mode>"
   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
                            (match_operand:SI 4 "immediate_operand" "i")]
                           UNSPEC_VMULL_LANE))]
   "TARGET_NEON"
-  "vmull.%T4%#<V_sz_elem>\t%q0, %P1, %P2[%c3]")
+  "vmull.%T4%#<V_sz_elem>\t%q0, %P1, %P2[%c3]"
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                   (const_string "neon_mul_ddd_16_scalar_32_16_long_scalar")
+                   (const_string "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")))]
+)
 
 (define_insn "neon_vqdmull_lane<mode>"
   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
                            (match_operand:SI 4 "immediate_operand" "i")]
                           UNSPEC_VQDMULL_LANE))]
   "TARGET_NEON"
-  "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]")
+  "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]"
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                   (const_string "neon_mul_ddd_16_scalar_32_16_long_scalar")
+                   (const_string "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")))]
+)
 
 (define_insn "neon_vqdmulh_lane<mode>"
   [(set (match_operand:VMQI 0 "s_register_operand" "=w")
                       (match_operand:SI 4 "immediate_operand" "i")]
                       UNSPEC_VQDMULH_LANE))]
   "TARGET_NEON"
-  "vq%O4dmulh.%T4%#<V_sz_elem>\t%q0, %q1, %P2[%c3]")
+  "vq%O4dmulh.%T4%#<V_sz_elem>\t%q0, %q1, %P2[%c3]"
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                   (const_string "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")
+                   (const_string "neon_mul_qqd_32_scalar")))]
+)
 
 (define_insn "neon_vqdmulh_lane<mode>"
   [(set (match_operand:VMDI 0 "s_register_operand" "=w")
                       (match_operand:SI 4 "immediate_operand" "i")]
                       UNSPEC_VQDMULH_LANE))]
   "TARGET_NEON"
-  "vq%O4dmulh.%T4%#<V_sz_elem>\t%P0, %P1, %P2[%c3]")
+  "vq%O4dmulh.%T4%#<V_sz_elem>\t%P0, %P1, %P2[%c3]"
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                   (const_string "neon_mul_ddd_16_scalar_32_16_long_scalar")
+                   (const_string "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")))]
+)
 
 (define_insn "neon_vmla_lane<mode>"
   [(set (match_operand:VMD 0 "s_register_operand" "=w")
                      (match_operand:SI 5 "immediate_operand" "i")]
                      UNSPEC_VMLA_LANE))]
   "TARGET_NEON"
-  "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]")
+  "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]"
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                   (const_string "neon_fp_vmla_ddd_scalar")
+                   (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                                 (const_string "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar")
+                                 (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long"))))]
+)
 
 (define_insn "neon_vmla_lane<mode>"
   [(set (match_operand:VMQ 0 "s_register_operand" "=w")
                      (match_operand:SI 5 "immediate_operand" "i")]
                      UNSPEC_VMLA_LANE))]
   "TARGET_NEON"
-  "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]")
+  "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]"
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                   (const_string "neon_fp_vmla_qqq_scalar")
+                   (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                                 (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")
+                                 (const_string "neon_mla_qqq_32_qqd_32_scalar"))))]
+)
 
 (define_insn "neon_vmlal_lane<mode>"
   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
                            (match_operand:SI 5 "immediate_operand" "i")]
                           UNSPEC_VMLAL_LANE))]
   "TARGET_NEON"
-  "vmlal.%T5%#<V_sz_elem>\t%q0, %P2, %P3[%c4]")
+  "vmlal.%T5%#<V_sz_elem>\t%q0, %P2, %P3[%c4]"
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                   (const_string "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar")
+                   (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")))]
+)
 
 (define_insn "neon_vqdmlal_lane<mode>"
   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
                            (match_operand:SI 5 "immediate_operand" "i")]
                           UNSPEC_VQDMLAL_LANE))]
   "TARGET_NEON"
-  "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]")
+  "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]"
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                   (const_string "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar")
+                   (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")))]
+)
 
 (define_insn "neon_vmls_lane<mode>"
   [(set (match_operand:VMD 0 "s_register_operand" "=w")
                      (match_operand:SI 5 "immediate_operand" "i")]
                     UNSPEC_VMLS_LANE))]
   "TARGET_NEON"
-  "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]")
+  "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]"
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                   (const_string "neon_fp_vmla_ddd_scalar")
+                   (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                                 (const_string "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar")
+                                 (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long"))))]
+)
 
 (define_insn "neon_vmls_lane<mode>"
   [(set (match_operand:VMQ 0 "s_register_operand" "=w")
                      (match_operand:SI 5 "immediate_operand" "i")]
                     UNSPEC_VMLS_LANE))]
   "TARGET_NEON"
-  "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]")
+  "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]"
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                   (const_string "neon_fp_vmla_qqq_scalar")
+                   (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                                 (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")
+                                 (const_string "neon_mla_qqq_32_qqd_32_scalar"))))]
+)
 
 (define_insn "neon_vmlsl_lane<mode>"
   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
                            (match_operand:SI 5 "immediate_operand" "i")]
                           UNSPEC_VMLSL_LANE))]
   "TARGET_NEON"
-  "vmlsl.%T5%#<V_sz_elem>\t%q0, %P2, %P3[%c4]")
+  "vmlsl.%T5%#<V_sz_elem>\t%q0, %P2, %P3[%c4]"
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                   (const_string "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar")
+                   (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")))]
+)
 
 (define_insn "neon_vqdmlsl_lane<mode>"
   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
                            (match_operand:SI 5 "immediate_operand" "i")]
                           UNSPEC_VQDMLSL_LANE))]
   "TARGET_NEON"
-  "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]")
+  "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]"
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                   (const_string "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar")
+                   (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")))]
+)
 
 ; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
 ; core register into a temp register, then use a scalar taken from that. This
                       (match_operand:SI 3 "immediate_operand" "i")]
                      UNSPEC_VEXT))]
   "TARGET_NEON"
-  "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3")
+  "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                    (const_string "neon_bp_simple")
+                    (const_string "neon_bp_2cycle")))]
+)
 
 (define_insn "neon_vrev64<mode>"
   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
                     (match_operand:SI 2 "immediate_operand" "i")]
                     UNSPEC_VREV64))]
   "TARGET_NEON"
-  "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1")
+  "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
+  [(set_attr "neon_type" "neon_bp_simple")]
+)
 
 (define_insn "neon_vrev32<mode>"
   [(set (match_operand:VX 0 "s_register_operand" "=w")
                    (match_operand:SI 2 "immediate_operand" "i")]
                    UNSPEC_VREV32))]
   "TARGET_NEON"
-  "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1")
+  "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
+  [(set_attr "neon_type" "neon_bp_simple")]
+)
 
 (define_insn "neon_vrev16<mode>"
   [(set (match_operand:VE 0 "s_register_operand" "=w")
                    (match_operand:SI 2 "immediate_operand" "i")]
                    UNSPEC_VREV16))]
   "TARGET_NEON"
-  "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1")
+  "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
+  [(set_attr "neon_type" "neon_bp_simple")]
+)
 
 ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
 ; allocation. For an intrinsic of form:
   "@
   vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
   vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
-  vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1")
+  vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
+  [(set_attr "neon_type" "neon_int_1")]
+)
 
 (define_expand "neon_vbsl<mode>"
   [(set (match_operand:VDQX 0 "s_register_operand" "")
                        (match_operand:SI 3 "immediate_operand" "i")]
                       UNSPEC_VSHL))]
   "TARGET_NEON"
-  "v%O3shl.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2")
+  "v%O3shl.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                    (const_string "neon_vshl_ddd")
+                    (const_string "neon_shift_3")))]
+)
 
 (define_insn "neon_vqshl<mode>"
   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
                        (match_operand:SI 3 "immediate_operand" "i")]
                       UNSPEC_VQSHL))]
   "TARGET_NEON"
-  "vq%O3shl.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2")
+  "vq%O3shl.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                    (const_string "neon_shift_2")
+                    (const_string "neon_vqshl_vrshl_vqrshl_qqq")))]
+)
 
 (define_insn "neon_vshr_n<mode>"
   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
                        (match_operand:SI 3 "immediate_operand" "i")]
                       UNSPEC_VSHR_N))]
   "TARGET_NEON"
-  "v%O3shr.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2")
+  "v%O3shr.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2"
+  [(set_attr "neon_type" "neon_shift_1")]
+)
 
 (define_insn "neon_vshrn_n<mode>"
   [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
                            (match_operand:SI 3 "immediate_operand" "i")]
                            UNSPEC_VSHRN_N))]
   "TARGET_NEON"
-  "v%O3shrn.<V_if_elem>\t%P0, %q1, %2")
+  "v%O3shrn.<V_if_elem>\t%P0, %q1, %2"
+  [(set_attr "neon_type" "neon_shift_1")]
+)
 
 (define_insn "neon_vqshrn_n<mode>"
   [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
                            (match_operand:SI 3 "immediate_operand" "i")]
                            UNSPEC_VQSHRN_N))]
   "TARGET_NEON"
-  "vq%O3shrn.%T3%#<V_sz_elem>\t%P0, %q1, %2")
+  "vq%O3shrn.%T3%#<V_sz_elem>\t%P0, %q1, %2"
+  [(set_attr "neon_type" "neon_shift_2")]
+)
 
 (define_insn "neon_vqshrun_n<mode>"
   [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
                            (match_operand:SI 3 "immediate_operand" "i")]
                            UNSPEC_VQSHRUN_N))]
   "TARGET_NEON"
-  "vq%O3shrun.%T3%#<V_sz_elem>\t%P0, %q1, %2")
+  "vq%O3shrun.%T3%#<V_sz_elem>\t%P0, %q1, %2"
+  [(set_attr "neon_type" "neon_shift_2")]
+)
 
 (define_insn "neon_vshl_n<mode>"
   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
                        (match_operand:SI 3 "immediate_operand" "i")]
                       UNSPEC_VSHL_N))]
   "TARGET_NEON"
-  "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2")
+  "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2"
+  [(set_attr "neon_type" "neon_shift_1")]
+)
 
 (define_insn "neon_vqshl_n<mode>"
   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
                        (match_operand:SI 3 "immediate_operand" "i")]
                       UNSPEC_VQSHL_N))]
   "TARGET_NEON"
-  "vqshl.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2")
+  "vqshl.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2"
+  [(set_attr "neon_type" "neon_shift_2")]
+)
 
 (define_insn "neon_vqshlu_n<mode>"
   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
                        (match_operand:SI 3 "immediate_operand" "i")]
                       UNSPEC_VQSHLU_N))]
   "TARGET_NEON"
-  "vqshlu.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2")
+  "vqshlu.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2"
+  [(set_attr "neon_type" "neon_shift_2")]
+)
 
 (define_insn "neon_vshll_n<mode>"
   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
                           (match_operand:SI 3 "immediate_operand" "i")]
                          UNSPEC_VSHLL_N))]
   "TARGET_NEON"
-  "vshll.%T3%#<V_sz_elem>\t%q0, %P1, %2")
+  "vshll.%T3%#<V_sz_elem>\t%q0, %P1, %2"
+  [(set_attr "neon_type" "neon_shift_1")]
+)
 
 (define_insn "neon_vsra_n<mode>"
   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
                        (match_operand:SI 4 "immediate_operand" "i")]
                       UNSPEC_VSRA_N))]
   "TARGET_NEON"
-  "v%O4sra.%T4%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3")
+  "v%O4sra.%T4%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3"
+  [(set_attr "neon_type" "neon_vsra_vrsra")]
+)
 
 (define_insn "neon_vsri_n<mode>"
   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
                        (match_operand:SI 3 "immediate_operand" "i")]
                       UNSPEC_VSRI))]
   "TARGET_NEON"
-  "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3")
+  "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                    (const_string "neon_shift_1")
+                    (const_string "neon_shift_3")))]
+)
 
 (define_insn "neon_vsli_n<mode>"
   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
                        (match_operand:SI 3 "immediate_operand" "i")]
                       UNSPEC_VSLI))]
   "TARGET_NEON"
-  "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3")
+  "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                    (const_string "neon_shift_1")
+                    (const_string "neon_shift_3")))]
+)
 
 (define_insn "neon_vtbl1v8qi"
   [(set (match_operand:V8QI 0 "s_register_operand" "=w")
                      (match_operand:V8QI 2 "s_register_operand" "w")]
                      UNSPEC_VTBL))]
   "TARGET_NEON"
-  "vtbl.8\t%P0, {%P1}, %P2")
+  "vtbl.8\t%P0, {%P1}, %P2"
+  [(set_attr "neon_type" "neon_bp_2cycle")]
+)
 
 (define_insn "neon_vtbl2v8qi"
   [(set (match_operand:V8QI 0 "s_register_operand" "=w")
   output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
 
   return "";
-})
+}
+  [(set_attr "neon_type" "neon_bp_2cycle")]
+)
 
 (define_insn "neon_vtbl3v8qi"
   [(set (match_operand:V8QI 0 "s_register_operand" "=w")
   output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
 
   return "";
-})
+}
+  [(set_attr "neon_type" "neon_bp_3cycle")]
+)
 
 (define_insn "neon_vtbl4v8qi"
   [(set (match_operand:V8QI 0 "s_register_operand" "=w")
   output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
 
   return "";
-})
+}
+  [(set_attr "neon_type" "neon_bp_3cycle")]
+)
 
 (define_insn "neon_vtbx1v8qi"
   [(set (match_operand:V8QI 0 "s_register_operand" "=w")
                      (match_operand:V8QI 3 "s_register_operand" "w")]
                      UNSPEC_VTBX))]
   "TARGET_NEON"
-  "vtbx.8\t%P0, {%P2}, %P3")
+  "vtbx.8\t%P0, {%P2}, %P3"
+  [(set_attr "neon_type" "neon_bp_2cycle")]
+)
 
 (define_insn "neon_vtbx2v8qi"
   [(set (match_operand:V8QI 0 "s_register_operand" "=w")
   output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
 
   return "";
-})
+}
+  [(set_attr "neon_type" "neon_bp_2cycle")]
+)
 
 (define_insn "neon_vtbx3v8qi"
   [(set (match_operand:V8QI 0 "s_register_operand" "=w")
   output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
 
   return "";
-})
+}
+  [(set_attr "neon_type" "neon_bp_3cycle")]
+)
 
 (define_insn "neon_vtbx4v8qi"
   [(set (match_operand:V8QI 0 "s_register_operand" "=w")
   output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
 
   return "";
-})
+}
+  [(set_attr "neon_type" "neon_bp_3cycle")]
+)
 
 (define_insn "neon_vtrn<mode>_internal"
   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
         (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")]
                     UNSPEC_VTRN2))]
   "TARGET_NEON"
-  "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2")
+  "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                    (const_string "neon_bp_simple")
+                    (const_string "neon_bp_3cycle")))]
+)
 
 (define_expand "neon_vtrn<mode>"
   [(match_operand:SI 0 "s_register_operand" "r")
         (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")]
                     UNSPEC_VZIP2))]
   "TARGET_NEON"
-  "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2")
+  "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                    (const_string "neon_bp_simple")
+                    (const_string "neon_bp_3cycle")))]
+)
 
 (define_expand "neon_vzip<mode>"
   [(match_operand:SI 0 "s_register_operand" "r")
         (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")]
                     UNSPEC_VUZP2))]
   "TARGET_NEON"
-  "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2")
+  "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                    (const_string "neon_bp_simple")
+                    (const_string "neon_bp_3cycle")))]
+)
 
 (define_expand "neon_vuzp<mode>"
   [(match_operand:SI 0 "s_register_operand" "r")
         (unspec:VDQX [(mem:VDQX (match_operand:SI 1 "s_register_operand" "r"))]
                     UNSPEC_VLD1))]
   "TARGET_NEON"
-  "vld1.<V_sz_elem>\t%h0, [%1]")
+  "vld1.<V_sz_elem>\t%h0, [%1]"
+  [(set_attr "neon_type" "neon_vld1_1_2_regs")]
+)
 
 (define_insn "neon_vld1_lane<mode>"
   [(set (match_operand:VDX 0 "s_register_operand" "=w")
     return "vld1.<V_sz_elem>\t%P0, [%1]";
   else
     return "vld1.<V_sz_elem>\t{%P0[%c3]}, [%1]";
-})
+}
+  [(set (attr "neon_type")
+      (if_then_else (eq (const_string "<V_mode_nunits>") (const_int 2))
+                    (const_string "neon_vld1_1_2_regs")
+                    (const_string "neon_vld1_vld2_lane")))]
+)
 
 (define_insn "neon_vld1_lane<mode>"
   [(set (match_operand:VQX 0 "s_register_operand" "=w")
     return "vld1.<V_sz_elem>\t%P0, [%1]";
   else
     return "vld1.<V_sz_elem>\t{%P0[%c3]}, [%1]";
-})
+}
+  [(set (attr "neon_type")
+      (if_then_else (eq (const_string "<V_mode_nunits>") (const_int 2))
+                    (const_string "neon_vld1_1_2_regs")
+                    (const_string "neon_vld1_vld2_lane")))]
+)
 
 (define_insn "neon_vld1_dup<mode>"
   [(set (match_operand:VDX 0 "s_register_operand" "=w")
     return "vld1.<V_sz_elem>\t{%P0[]}, [%1]";
   else
     return "vld1.<V_sz_elem>\t%h0, [%1]";
-})
+}
+  [(set (attr "neon_type")
+      (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
+                    (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes")
+                    (const_string "neon_vld1_1_2_regs")))]
+)
 
 (define_insn "neon_vld1_dup<mode>"
   [(set (match_operand:VQX 0 "s_register_operand" "=w")
     return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, [%1]";
   else
     return "vld1.<V_sz_elem>\t%h0, [%1]";
-})
+}
+  [(set (attr "neon_type")
+      (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
+                    (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes")
+                    (const_string "neon_vld1_1_2_regs")))]
+)
 
 (define_insn "neon_vst1<mode>"
   [(set (mem:VDQX (match_operand:SI 0 "s_register_operand" "r"))
        (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
                     UNSPEC_VST1))]
   "TARGET_NEON"
-  "vst1.<V_sz_elem>\t%h1, [%0]")
+  "vst1.<V_sz_elem>\t%h1, [%0]"
+  [(set_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")])
 
 (define_insn "neon_vst1_lane<mode>"
   [(set (mem:<V_elem> (match_operand:SI 0 "s_register_operand" "r"))
     return "vst1.<V_sz_elem>\t{%P1}, [%0]";
   else
     return "vst1.<V_sz_elem>\t{%P1[%c2]}, [%0]";
-})
+}
+  [(set (attr "neon_type")
+      (if_then_else (eq (const_string "<V_mode_nunits>") (const_int 1))
+                    (const_string "neon_vst1_1_2_regs_vst2_2_regs")
+                    (const_string "neon_vst1_vst2_lane")))])
 
 (define_insn "neon_vst1_lane<mode>"
   [(set (mem:<V_elem> (match_operand:SI 0 "s_register_operand" "r"))
     return "vst1.<V_sz_elem>\t{%P1}, [%0]";
   else
     return "vst1.<V_sz_elem>\t{%P1[%c2]}, [%0]";
-})
+}
+  [(set_attr "neon_type" "neon_vst1_vst2_lane")]
+)
 
 (define_insn "neon_vld2<mode>"
   [(set (match_operand:TI 0 "s_register_operand" "=w")
     return "vld1.64\t%h0, [%1]";
   else
     return "vld2.<V_sz_elem>\t%h0, [%1]";
-})
+}
+  [(set (attr "neon_type")
+      (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
+                    (const_string "neon_vld1_1_2_regs")
+                    (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes")))]
+)
 
 (define_insn "neon_vld2<mode>"
   [(set (match_operand:OI 0 "s_register_operand" "=w")
                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
                    UNSPEC_VLD2))]
   "TARGET_NEON"
-  "vld2.<V_sz_elem>\t%h0, [%1]")
+  "vld2.<V_sz_elem>\t%h0, [%1]"
+  [(set_attr "neon_type" "neon_vld2_2_regs_vld1_vld2_all_lanes")])
 
 (define_insn "neon_vld2_lane<mode>"
   [(set (match_operand:TI 0 "s_register_operand" "=w")
   ops[3] = operands[3];
   output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, [%2]", ops);
   return "";
-})
+}
+  [(set_attr "neon_type" "neon_vld1_vld2_lane")]
+)
 
 (define_insn "neon_vld2_lane<mode>"
   [(set (match_operand:OI 0 "s_register_operand" "=w")
   ops[3] = GEN_INT (lane);
   output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, [%2]", ops);
   return "";
-})
+}
+  [(set_attr "neon_type" "neon_vld1_vld2_lane")]
+)
 
 (define_insn "neon_vld2_dup<mode>"
   [(set (match_operand:TI 0 "s_register_operand" "=w")
     return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, [%1]";
   else
     return "vld1.<V_sz_elem>\t%h0, [%1]";
-})
+}
+  [(set (attr "neon_type")
+      (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
+                    (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes")
+                    (const_string "neon_vld1_1_2_regs")))]
+)
 
 (define_insn "neon_vst2<mode>"
   [(set (mem:TI (match_operand:SI 0 "s_register_operand" "r"))
     return "vst1.64\t%h1, [%0]";
   else
     return "vst2.<V_sz_elem>\t%h1, [%0]";
-})
+}
+  [(set (attr "neon_type")
+      (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
+                    (const_string "neon_vst1_1_2_regs_vst2_2_regs")
+                    (const_string "neon_vst1_1_2_regs_vst2_2_regs")))]
+)
 
 (define_insn "neon_vst2<mode>"
   [(set (mem:OI (match_operand:SI 0 "s_register_operand" "r"))
                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
                   UNSPEC_VST2))]
   "TARGET_NEON"
-  "vst2.<V_sz_elem>\t%h1, [%0]")
+  "vst2.<V_sz_elem>\t%h1, [%0]"
+  [(set_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")]
+)
 
 (define_insn "neon_vst2_lane<mode>"
   [(set (mem:<V_two_elem> (match_operand:SI 0 "s_register_operand" "r"))
   ops[3] = operands[2];
   output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, [%0]", ops);
   return "";
-})
+}
+  [(set_attr "neon_type" "neon_vst1_vst2_lane")]
+)
 
 (define_insn "neon_vst2_lane<mode>"
   [(set (mem:<V_two_elem> (match_operand:SI 0 "s_register_operand" "r"))
   ops[3] = GEN_INT (lane);
   output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, [%0]", ops);
   return "";
-})
+}
+  [(set_attr "neon_type" "neon_vst1_vst2_lane")]
+)
 
 (define_insn "neon_vld3<mode>"
   [(set (match_operand:EI 0 "s_register_operand" "=w")
     return "vld1.64\t%h0, [%1]";
   else
     return "vld3.<V_sz_elem>\t%h0, [%1]";
-})
+}
+  [(set (attr "neon_type")
+      (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
+                    (const_string "neon_vld1_1_2_regs")
+                    (const_string "neon_vld3_vld4")))]
+)
 
 (define_expand "neon_vld3<mode>"
   [(match_operand:CI 0 "s_register_operand" "=w")
   ops[3] = operands[2];
   output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, [%3]!", ops);
   return "";
-})
+}
+  [(set_attr "neon_type" "neon_vld3_vld4")]
+)
 
 (define_insn "neon_vld3qb<mode>"
   [(set (match_operand:CI 0 "s_register_operand" "=w")
   ops[3] = operands[2];
   output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, [%3]!", ops);
   return "";
-})
+}
+  [(set_attr "neon_type" "neon_vld3_vld4")]
+)
 
 (define_insn "neon_vld3_lane<mode>"
   [(set (match_operand:EI 0 "s_register_operand" "=w")
   output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, [%3]",
                    ops);
   return "";
-})
+}
+  [(set_attr "neon_type" "neon_vld3_vld4_lane")]
+)
 
 (define_insn "neon_vld3_lane<mode>"
   [(set (match_operand:CI 0 "s_register_operand" "=w")
   output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, [%3]",
                    ops);
   return "";
-})
+}
+  [(set_attr "neon_type" "neon_vld3_vld4_lane")]
+)
 
 (define_insn "neon_vld3_dup<mode>"
   [(set (match_operand:EI 0 "s_register_operand" "=w")
     }
   else
     return "vld1.<V_sz_elem>\t%h0, [%1]";
-})
+}
+  [(set (attr "neon_type")
+      (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
+                    (const_string "neon_vld3_vld4_all_lanes")
+                    (const_string "neon_vld1_1_2_regs")))])
 
 (define_insn "neon_vst3<mode>"
   [(set (mem:EI (match_operand:SI 0 "s_register_operand" "r"))
     return "vst1.64\t%h1, [%0]";
   else
     return "vst3.<V_sz_elem>\t%h1, [%0]";
-})
+}
+  [(set (attr "neon_type")
+      (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
+                    (const_string "neon_vst1_1_2_regs_vst2_2_regs")
+                    (const_string "neon_vst2_4_regs_vst3_vst4")))])
 
 (define_expand "neon_vst3<mode>"
   [(match_operand:SI 0 "s_register_operand" "+r")
   ops[3] = gen_rtx_REG (DImode, regno + 8);
   output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, [%0]!", ops);
   return "";
-})
+}
+  [(set_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")]
+)
 
 (define_insn "neon_vst3qb<mode>"
   [(set (mem:EI (match_operand:SI 1 "s_register_operand" "0"))
   ops[3] = gen_rtx_REG (DImode, regno + 10);
   output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, [%0]!", ops);
   return "";
-})
+}
+  [(set_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")]
+)
 
 (define_insn "neon_vst3_lane<mode>"
   [(set (mem:<V_three_elem> (match_operand:SI 0 "s_register_operand" "r"))
   output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, [%0]",
                    ops);
   return "";
-})
+}
+  [(set_attr "neon_type" "neon_vst3_vst4_lane")]
+)
 
 (define_insn "neon_vst3_lane<mode>"
   [(set (mem:<V_three_elem> (match_operand:SI 0 "s_register_operand" "r"))
   output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, [%0]",
                    ops);
   return "";
-})
+}
+[(set_attr "neon_type" "neon_vst3_vst4_lane")])
 
 (define_insn "neon_vld4<mode>"
   [(set (match_operand:OI 0 "s_register_operand" "=w")
     return "vld1.64\t%h0, [%1]";
   else
     return "vld4.<V_sz_elem>\t%h0, [%1]";
-})
+}
+  [(set (attr "neon_type")
+      (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
+                    (const_string "neon_vld1_1_2_regs")
+                    (const_string "neon_vld3_vld4")))]
+)
 
 (define_expand "neon_vld4<mode>"
   [(match_operand:XI 0 "s_register_operand" "=w")
   ops[4] = operands[2];
   output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, [%4]!", ops);
   return "";
-})
+}
+  [(set_attr "neon_type" "neon_vld3_vld4")]
+)
 
 (define_insn "neon_vld4qb<mode>"
   [(set (match_operand:XI 0 "s_register_operand" "=w")
   ops[4] = operands[2];
   output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, [%4]!", ops);
   return "";
-})
+}
+  [(set_attr "neon_type" "neon_vld3_vld4")]
+)
 
 (define_insn "neon_vld4_lane<mode>"
   [(set (match_operand:OI 0 "s_register_operand" "=w")
   output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, [%4]",
                    ops);
   return "";
-})
+}
+  [(set_attr "neon_type" "neon_vld3_vld4_lane")]
+)
 
 (define_insn "neon_vld4_lane<mode>"
   [(set (match_operand:XI 0 "s_register_operand" "=w")
   output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, [%4]",
                    ops);
   return "";
-})
+}
+  [(set_attr "neon_type" "neon_vld3_vld4_lane")]
+)
 
 (define_insn "neon_vld4_dup<mode>"
   [(set (match_operand:OI 0 "s_register_operand" "=w")
     }
   else
     return "vld1.<V_sz_elem>\t%h0, [%1]";
-})
+}
+  [(set (attr "neon_type")
+      (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
+                    (const_string "neon_vld3_vld4_all_lanes")
+                    (const_string "neon_vld1_1_2_regs")))]
+)
 
 (define_insn "neon_vst4<mode>"
   [(set (mem:OI (match_operand:SI 0 "s_register_operand" "r"))
     return "vst1.64\t%h1, [%0]";
   else
     return "vst4.<V_sz_elem>\t%h1, [%0]";
-})
+}
+  [(set (attr "neon_type")
+      (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
+                    (const_string "neon_vst1_1_2_regs_vst2_2_regs")
+                    (const_string "neon_vst2_4_regs_vst3_vst4")))]
+)
 
 (define_expand "neon_vst4<mode>"
   [(match_operand:SI 0 "s_register_operand" "+r")
   ops[4] = gen_rtx_REG (DImode, regno + 12);
   output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, [%0]!", ops);
   return "";
-})
+}
+  [(set_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")]
+)
 
 (define_insn "neon_vst4qb<mode>"
   [(set (mem:OI (match_operand:SI 1 "s_register_operand" "0"))
   ops[4] = gen_rtx_REG (DImode, regno + 14);
   output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, [%0]!", ops);
   return "";
-})
+}
+  [(set_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")]
+)
 
 (define_insn "neon_vst4_lane<mode>"
   [(set (mem:<V_four_elem> (match_operand:SI 0 "s_register_operand" "r"))
   output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, [%0]",
                    ops);
   return "";
-})
+}
+  [(set_attr "neon_type" "neon_vst3_vst4_lane")]
+)
 
 (define_insn "neon_vst4_lane<mode>"
   [(set (mem:<V_four_elem> (match_operand:SI 0 "s_register_operand" "r"))
   output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, [%0]",
                    ops);
   return "";
-})
+}
+  [(set_attr "neon_type" "neon_vst3_vst4_lane")]
+)
 
 (define_expand "neon_vand<mode>"
   [(match_operand:VDQX 0 "s_register_operand" "")