From: jules Date: Thu, 26 Jul 2007 12:04:02 +0000 (+0000) Subject: gcc/ X-Git-Url: http://git.sourceforge.jp/view?p=pf3gnuchains%2Fgcc-fork.git;a=commitdiff_plain;h=bcaec148afbe1e0cb3fc6798496a09d0d3e56326 gcc/ * config/arm/arm.c (arm_mac_accumulator_is_mul_result): New. * config/arm/arm-protos.h (arm_mac_accumulator_is_mul_result): New. * config/arm/cortex-a8.md: New. * config/arm/cortex-a8-neon.md: New. * config/arm/neon-schedgen.ml: New. * config/arm/neon.md (vqh_mnem): New. (neon_type): New. (Is_float_mode): New. (Scalar_mul_8_16): New. (Is_d_reg): New. (V_mode_nunits): New. (All instruction patterns): Annotate with neon_type attribute values. * config/arm/arm.md: Include cortex-a8.md. (insn): Add smmla, umaal, smlald, smlsld, clz, mrs, msr and xtab values. Annotate instruction patterns accordingly. (generic_sched): Do not use generic scheduling for Cortex-A8. (generic_vfp): Do not use generic VFP scheduling for Cortex-A8. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@126953 138bc75d-0d04-0410-961f-82ee72b054a4 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 37ece5cc937..aa1e9d30fcf 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,26 @@ +2007-07-26 Mark Shinwell + Julian Brown + + * config/arm/arm.c (arm_mac_accumulator_is_mul_result): New. + * config/arm/arm-protos.h (arm_mac_accumulator_is_mul_result): New. + * config/arm/cortex-a8.md: New. + * config/arm/cortex-a8-neon.md: New. + * config/arm/neon-schedgen.ml: New. + * config/arm/neon.md (vqh_mnem): New. + (neon_type): New. + (Is_float_mode): New. + (Scalar_mul_8_16): New. + (Is_d_reg): New. + (V_mode_nunits): New. + (All instruction patterns): Annotate with neon_type attribute + values. + * config/arm/arm.md: Include cortex-a8.md. + (insn): Add smmla, umaal, smlald, smlsld, clz, mrs, msr and xtab + values. + Annotate instruction patterns accordingly. + (generic_sched): Do not use generic scheduling for Cortex-A8. + (generic_vfp): Do not use generic VFP scheduling for Cortex-A8. + 2007-07-26 Daniel Jacobowitz * fold-const.c (fold_read_from_constant_string): Use diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index 000775d7894..f2380264eee 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -94,6 +94,7 @@ extern int arm_no_early_store_addr_dep (rtx, rtx); extern int arm_no_early_alu_shift_dep (rtx, rtx); extern int arm_no_early_alu_shift_value_dep (rtx, rtx); extern int arm_no_early_mul_dep (rtx, rtx); +extern int arm_mac_accumulator_is_mul_result (rtx, rtx); extern int tls_mentioned_p (rtx); extern int symbol_mentioned_p (rtx); diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 68aa89e49f5..de0fb41c309 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -18167,6 +18167,39 @@ arm_cxx_guard_type (void) return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node; } +/* Return non-zero if the consumer (a multiply-accumulate instruction) + has an accumulator dependency on the result of the producer (a + multiplication instruction) and no other dependency on that result. */ +int +arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer) +{ + rtx mul = PATTERN (producer); + rtx mac = PATTERN (consumer); + rtx mul_result; + rtx mac_op0, mac_op1, mac_acc; + + if (GET_CODE (mul) == COND_EXEC) + mul = COND_EXEC_CODE (mul); + if (GET_CODE (mac) == COND_EXEC) + mac = COND_EXEC_CODE (mac); + + /* Check that mul is of the form (set (...) (mult ...)) + and mla is of the form (set (...) (plus (mult ...) (...))). */ + if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT) + || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS + || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT)) + return 0; + + mul_result = XEXP (mul, 0); + mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0); + mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1); + mac_acc = XEXP (XEXP (mac, 1), 1); + + return (reg_overlap_mentioned_p (mul_result, mac_acc) + && !reg_overlap_mentioned_p (mul_result, mac_op0) + && !reg_overlap_mentioned_p (mul_result, mac_op1)); +} + /* The EABI says test the least significant bit of a guard variable. */ diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index ddc8bed2858..1d5313e409f 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -184,7 +184,7 @@ ;; scheduling information. (define_attr "insn" - "smulxy,smlaxy,smlalxy,smulwy,smlawx,mul,muls,mla,mlas,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals,smlawy,smuad,smuadx,smlad,smladx,smusd,smusdx,smlsd,smlsdx,smmul,smmulr,other" + "mov,mvn,smulxy,smlaxy,smlalxy,smulwy,smlawx,mul,muls,mla,mlas,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals,smlawy,smuad,smuadx,smlad,smladx,smusd,smusdx,smlsd,smlsdx,smmul,smmulr,smmla,umaal,smlald,smlsld,clz,mrs,msr,xtab,other" (const_string "other")) ; TYPE attribute is used to detect floating point instructions which, if @@ -235,8 +235,9 @@ ; mav_farith Floating point arithmetic (4 cycle) ; mav_dmult Double multiplies (7 cycle) ; + (define_attr "type" - "alu,alu_shift,alu_shift_reg,mult,block,float,fdivx,fdivd,fdivs,fmul,ffmul,farith,ffarith,f_flag,float_em,f_load,f_store,f_loads,f_loadd,f_stores,f_stored,f_mem_r,r_mem_f,f_2_r,r_2_f,f_cvt,branch,call,load_byte,load1,load2,load3,load4,store1,store2,store3,store4,mav_farith,mav_dmult" + "alu,alu_shift,alu_shift_reg,mult,block,float,fdivx,fdivd,fdivs,fmul,fmuls,fmuld,fmacs,fmacd,ffmul,farith,ffarith,f_flag,float_em,f_load,f_store,f_loads,f_loadd,f_stores,f_stored,f_mem_r,r_mem_f,f_2_r,r_2_f,f_cvt,branch,call,load_byte,load1,load2,load3,load4,store1,store2,store3,store4,mav_farith,mav_dmult" (if_then_else (eq_attr "insn" "smulxy,smlaxy,smlalxy,smulwy,smlawx,mul,muls,mla,mlas,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals") (const_string "mult") @@ -332,14 +333,14 @@ (define_attr "generic_sched" "yes,no" (const (if_then_else - (eq_attr "tune" "arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs") + (eq_attr "tune" "arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa8") (const_string "no") (const_string "yes")))) (define_attr "generic_vfp" "yes,no" (const (if_then_else (and (eq_attr "fpu" "vfp") - (eq_attr "tune" "!arm1020e,arm1022e")) + (eq_attr "tune" "!arm1020e,arm1022e,cortexa8")) (const_string "yes") (const_string "no")))) @@ -348,6 +349,7 @@ (include "arm1020e.md") (include "arm1026ejs.md") (include "arm1136jfs.md") +(include "cortex-a8.md") ;;--------------------------------------------------------------------------- @@ -3869,6 +3871,7 @@ "TARGET_INT_SIMD" "uxtab%?\\t%0, %2, %1" [(set_attr "predicable" "yes") + (set_attr "insn" "xtab") (set_attr "type" "alu_shift")] ) @@ -4242,6 +4245,7 @@ "TARGET_INT_SIMD" "sxtab%?\\t%0, %2, %1" [(set_attr "type" "alu_shift") + (set_attr "insn" "xtab") (set_attr "predicable" "yes")] ) @@ -10772,7 +10776,8 @@ (clz:SI (match_operand:SI 1 "s_register_operand" "r")))] "TARGET_32BIT && arm_arch5" "clz%?\\t%0, %1" - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "insn" "clz")]) (define_expand "ffssi2" [(set (match_operand:SI 0 "s_register_operand" "") diff --git a/gcc/config/arm/cortex-a8-neon.md b/gcc/config/arm/cortex-a8-neon.md new file mode 100644 index 00000000000..ed97ed18a7d --- /dev/null +++ b/gcc/config/arm/cortex-a8-neon.md @@ -0,0 +1,1307 @@ +;; ARM Cortex-A8 NEON scheduling description. +;; Copyright (C) 2007 Free Software Foundation, Inc. +;; Contributed by CodeSourcery. + +;; This file is part of GCC. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING. If not, write to +;; the Free Software Foundation, 51 Franklin Street, Fifth Floor, +;; Boston, MA 02110-1301, USA. + +(define_automaton "cortex_a8_neon") + +;; Only one load, store, permute, MCR or MRC instruction can be issued +;; per cycle. +(define_cpu_unit "cortex_a8_neon_issue_perm" "cortex_a8_neon") + +;; Only one data-processing instruction can be issued per cycle. +(define_cpu_unit "cortex_a8_neon_issue_dp" "cortex_a8_neon") + +;; The VFPLite unit (non-pipelined). +(define_cpu_unit "cortex_a8_vfplite" "cortex_a8_neon") + +;; We need a special mutual exclusion (to be used in addition to +;; cortex_a8_neon_issue_dp) for the case when an instruction such as +;; vmla.f is forwarded from E5 of the floating-point multiply pipeline to +;; E2 of the floating-point add pipeline. On the cycle previous to that +;; forward we must prevent issue of any instruction to the floating-point +;; add pipeline, but still allow issue of a data-processing instruction +;; to any of the other pipelines. +(define_cpu_unit "cortex_a8_neon_issue_fadd" "cortex_a8_neon") + +;; Patterns of reservation. +;; We model the NEON issue units as running in parallel with the core ones. +;; We assume that multi-cycle NEON instructions get decomposed into +;; micro-ops as they are issued into the NEON pipeline, and not as they +;; are issued into the ARM pipeline. Dual issue may not occur except +;; upon the first and last cycles of a multi-cycle instruction, but it +;; is unclear whether two multi-cycle instructions can issue together (in +;; this model they cannot). It is also unclear whether a pair of +;; a multi-cycle and single-cycle instructions, that could potentially +;; issue together, only do so if (say) the single-cycle one precedes +;; the other. + +(define_reservation "cortex_a8_neon_dp" + "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp") +(define_reservation "cortex_a8_neon_dp_2" + "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp,\ + cortex_a8_neon_issue_dp") +(define_reservation "cortex_a8_neon_dp_4" + "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp,\ + cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\ + cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\ + cortex_a8_neon_issue_dp") + +(define_reservation "cortex_a8_neon_fadd" + "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp+\ + cortex_a8_neon_issue_fadd") +(define_reservation "cortex_a8_neon_fadd_2" + "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp+\ + cortex_a8_neon_issue_fadd,\ + cortex_a8_neon_issue_dp+cortex_a8_neon_issue_fadd") + +(define_reservation "cortex_a8_neon_perm" + "(cortex_a8_alu0|cortex_a8_alu1)+\ + cortex_a8_neon_issue_perm") +(define_reservation "cortex_a8_neon_perm_2" + "(cortex_a8_alu0|cortex_a8_alu1)+\ + cortex_a8_neon_issue_perm,\ + cortex_a8_neon_issue_perm") +(define_reservation "cortex_a8_neon_perm_3" + "(cortex_a8_alu0|cortex_a8_alu1)+\ + cortex_a8_neon_issue_perm,\ + cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\ + cortex_a8_neon_issue_perm") + +(define_reservation "cortex_a8_neon_ls" + "cortex_a8_issue_ls+cortex_a8_neon_issue_perm") +(define_reservation "cortex_a8_neon_ls_2" + "cortex_a8_issue_ls+cortex_a8_neon_issue_perm,\ + cortex_a8_neon_issue_perm") +(define_reservation "cortex_a8_neon_ls_3" + "cortex_a8_issue_ls+cortex_a8_neon_issue_perm,\ + cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\ + cortex_a8_neon_issue_perm") +(define_reservation "cortex_a8_neon_ls_4" + "cortex_a8_issue_ls+cortex_a8_neon_issue_perm,\ + cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\ + cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\ + cortex_a8_neon_issue_perm") +(define_reservation "cortex_a8_neon_ls_5" + "cortex_a8_issue_ls+cortex_a8_neon_issue_perm,\ + cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\ + cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\ + cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\ + cortex_a8_neon_issue_perm") + +(define_reservation "cortex_a8_neon_fmul_then_fadd" + "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp,\ + nothing*3,\ + cortex_a8_neon_issue_fadd") +(define_reservation "cortex_a8_neon_fmul_then_fadd_2" + "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp,\ + cortex_a8_neon_issue_dp,\ + nothing*2,\ + cortex_a8_neon_issue_fadd,\ + cortex_a8_neon_issue_fadd") + +;; VFP instructions can only be single-issued into the NEON pipeline. +(define_reservation "cortex_a8_vfp" + "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp+\ + cortex_a8_neon_issue_perm+cortex_a8_vfplite") + +;; VFP instructions. +;; The VFPLite unit that executes these isn't pipelined; we give the +;; worst-case latencies (and choose the double-precision ones where we +;; do not distinguish on precision). We assume RunFast mode is not +;; enabled and therefore do not model the possible VFP instruction +;; execution in the NEON floating point pipelines, nor additional +;; latencies for the processing of subnormals. +;; +;; TODO: RunFast mode could potentially be enabled when -ffast-math +;; is specified. + +(define_insn_reservation "cortex_a8_vfp_add_sub" 10 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "farith")) + "cortex_a8_vfp,cortex_a8_vfplite*9") + +(define_insn_reservation "cortex_a8_vfp_muls" 12 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "fmuls")) + "cortex_a8_vfp,cortex_a8_vfplite*11") + +(define_insn_reservation "cortex_a8_vfp_muld" 17 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "fmuld")) + "cortex_a8_vfp,cortex_a8_vfplite*16") + +(define_insn_reservation "cortex_a8_vfp_macs" 21 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "fmacs")) + "cortex_a8_vfp,cortex_a8_vfplite*20") + +(define_insn_reservation "cortex_a8_vfp_macd" 26 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "fmacd")) + "cortex_a8_vfp,cortex_a8_vfplite*25") + +(define_insn_reservation "cortex_a8_vfp_divs" 37 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "fdivs")) + "cortex_a8_vfp,cortex_a8_vfplite*36") + +(define_insn_reservation "cortex_a8_vfp_divd" 65 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "fdivd")) + "cortex_a8_vfp,cortex_a8_vfplite*64") + +;; Comparisons can actually take 7 cycles sometimes instead of four, +;; but given all the other instructions lumped into type=ffarith that +;; take four cycles, we pick that latency. +(define_insn_reservation "cortex_a8_vfp_farith" 4 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "ffarith")) + "cortex_a8_vfp,cortex_a8_vfplite*3") + +(define_insn_reservation "cortex_a8_vfp_cvt" 7 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "f_cvt")) + "cortex_a8_vfp,cortex_a8_vfplite*6") + +;; NEON -> core transfers. + +(define_insn_reservation "neon_mrc" 20 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_mrc")) + "cortex_a8_neon_ls") + +(define_insn_reservation "neon_mrrc" 21 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_mrrc")) + "cortex_a8_neon_ls_2") + +;; The remainder of this file is auto-generated by neon-schedgen. + +;; Instructions using this reservation read their source operands at N2, and +;; produce a result at N3. +(define_insn_reservation "neon_int_1" 3 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_int_1")) + "cortex_a8_neon_dp") + +;; Instructions using this reservation read their (D|Q)m operands at N1, +;; their (D|Q)n operands at N2, and produce a result at N3. +(define_insn_reservation "neon_int_2" 3 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_int_2")) + "cortex_a8_neon_dp") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N3. +(define_insn_reservation "neon_int_3" 3 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_int_3")) + "cortex_a8_neon_dp") + +;; Instructions using this reservation read their source operands at N2, and +;; produce a result at N4. +(define_insn_reservation "neon_int_4" 4 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_int_4")) + "cortex_a8_neon_dp") + +;; Instructions using this reservation read their (D|Q)m operands at N1, +;; their (D|Q)n operands at N2, and produce a result at N4. +(define_insn_reservation "neon_int_5" 4 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_int_5")) + "cortex_a8_neon_dp") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N4. +(define_insn_reservation "neon_vqneg_vqabs" 4 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vqneg_vqabs")) + "cortex_a8_neon_dp") + +;; Instructions using this reservation produce a result at N3. +(define_insn_reservation "neon_vmov" 3 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vmov")) + "cortex_a8_neon_dp") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and +;; produce a result at N6. +(define_insn_reservation "neon_vaba" 6 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vaba")) + "cortex_a8_neon_dp") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and +;; produce a result at N6 on cycle 2. +(define_insn_reservation "neon_vaba_qqq" 7 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vaba_qqq")) + "cortex_a8_neon_dp_2") + +;; Instructions using this reservation read their (D|Q)m operands at N1, +;; their (D|Q)d operands at N3, and produce a result at N6. +(define_insn_reservation "neon_vsma" 6 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vsma")) + "cortex_a8_neon_dp") + +;; Instructions using this reservation read their source operands at N2, and +;; produce a result at N6. +(define_insn_reservation "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long" 6 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long")) + "cortex_a8_neon_dp") + +;; Instructions using this reservation read their source operands at N2, and +;; produce a result at N6 on cycle 2. +(define_insn_reservation "neon_mul_qqq_8_16_32_ddd_32" 7 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_mul_qqq_8_16_32_ddd_32")) + "cortex_a8_neon_dp_2") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, and produce a result at N6 on cycle 2. +(define_insn_reservation "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar" 7 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")) + "cortex_a8_neon_dp_2") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and +;; produce a result at N6. +(define_insn_reservation "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long" 6 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long")) + "cortex_a8_neon_dp") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and +;; produce a result at N6 on cycle 2. +(define_insn_reservation "neon_mla_qqq_8_16" 7 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_mla_qqq_8_16")) + "cortex_a8_neon_dp_2") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and +;; produce a result at N6 on cycle 2. +(define_insn_reservation "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long" 7 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")) + "cortex_a8_neon_dp_2") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and +;; produce a result at N6 on cycle 4. +(define_insn_reservation "neon_mla_qqq_32_qqd_32_scalar" 9 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_mla_qqq_32_qqd_32_scalar")) + "cortex_a8_neon_dp_4") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, and produce a result at N6. +(define_insn_reservation "neon_mul_ddd_16_scalar_32_16_long_scalar" 6 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_mul_ddd_16_scalar_32_16_long_scalar")) + "cortex_a8_neon_dp") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, and produce a result at N6 on cycle 4. +(define_insn_reservation "neon_mul_qqd_32_scalar" 9 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_mul_qqd_32_scalar")) + "cortex_a8_neon_dp_4") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and +;; produce a result at N6. +(define_insn_reservation "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar" 6 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar")) + "cortex_a8_neon_dp") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N3. +(define_insn_reservation "neon_shift_1" 3 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_shift_1")) + "cortex_a8_neon_dp") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N4. +(define_insn_reservation "neon_shift_2" 4 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_shift_2")) + "cortex_a8_neon_dp") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N3 on cycle 2. +(define_insn_reservation "neon_shift_3" 4 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_shift_3")) + "cortex_a8_neon_dp_2") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N1. +(define_insn_reservation "neon_vshl_ddd" 1 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vshl_ddd")) + "cortex_a8_neon_dp") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N4 on cycle 2. +(define_insn_reservation "neon_vqshl_vrshl_vqrshl_qqq" 5 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vqshl_vrshl_vqrshl_qqq")) + "cortex_a8_neon_dp_2") + +;; Instructions using this reservation read their (D|Q)m operands at N1, +;; their (D|Q)d operands at N3, and produce a result at N6. +(define_insn_reservation "neon_vsra_vrsra" 6 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vsra_vrsra")) + "cortex_a8_neon_dp") + +;; Instructions using this reservation read their source operands at N2, and +;; produce a result at N5. +(define_insn_reservation "neon_fp_vadd_ddd_vabs_dd" 5 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_fp_vadd_ddd_vabs_dd")) + "cortex_a8_neon_fadd") + +;; Instructions using this reservation read their source operands at N2, and +;; produce a result at N5 on cycle 2. +(define_insn_reservation "neon_fp_vadd_qqq_vabs_qq" 6 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_fp_vadd_qqq_vabs_qq")) + "cortex_a8_neon_fadd_2") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N5. +(define_insn_reservation "neon_fp_vsum" 5 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_fp_vsum")) + "cortex_a8_neon_fadd") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, and produce a result at N5. +(define_insn_reservation "neon_fp_vmul_ddd" 5 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_fp_vmul_ddd")) + "cortex_a8_neon_dp") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, and produce a result at N5 on cycle 2. +(define_insn_reservation "neon_fp_vmul_qqd" 6 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_fp_vmul_qqd")) + "cortex_a8_neon_dp_2") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and +;; produce a result at N9. +(define_insn_reservation "neon_fp_vmla_ddd" 9 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_fp_vmla_ddd")) + "cortex_a8_neon_fmul_then_fadd") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and +;; produce a result at N9 on cycle 2. +(define_insn_reservation "neon_fp_vmla_qqq" 10 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_fp_vmla_qqq")) + "cortex_a8_neon_fmul_then_fadd_2") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and +;; produce a result at N9. +(define_insn_reservation "neon_fp_vmla_ddd_scalar" 9 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_fp_vmla_ddd_scalar")) + "cortex_a8_neon_fmul_then_fadd") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and +;; produce a result at N9 on cycle 2. +(define_insn_reservation "neon_fp_vmla_qqq_scalar" 10 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_fp_vmla_qqq_scalar")) + "cortex_a8_neon_fmul_then_fadd_2") + +;; Instructions using this reservation read their source operands at N2, and +;; produce a result at N9. +(define_insn_reservation "neon_fp_vrecps_vrsqrts_ddd" 9 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_fp_vrecps_vrsqrts_ddd")) + "cortex_a8_neon_fmul_then_fadd") + +;; Instructions using this reservation read their source operands at N2, and +;; produce a result at N9 on cycle 2. +(define_insn_reservation "neon_fp_vrecps_vrsqrts_qqq" 10 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_fp_vrecps_vrsqrts_qqq")) + "cortex_a8_neon_fmul_then_fadd_2") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N2. +(define_insn_reservation "neon_bp_simple" 2 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_bp_simple")) + "cortex_a8_neon_perm") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N2 on cycle 2. +(define_insn_reservation "neon_bp_2cycle" 3 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_bp_2cycle")) + "cortex_a8_neon_perm_2") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N2 on cycle 3. +(define_insn_reservation "neon_bp_3cycle" 4 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_bp_3cycle")) + "cortex_a8_neon_perm_3") + +;; Instructions using this reservation produce a result at N1. +(define_insn_reservation "neon_ldr" 1 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_ldr")) + "cortex_a8_neon_ls") + +;; Instructions using this reservation read their source operands at N1. +(define_insn_reservation "neon_str" 0 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_str")) + "cortex_a8_neon_ls") + +;; Instructions using this reservation produce a result at N1 on cycle 2. +(define_insn_reservation "neon_vld1_1_2_regs" 2 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vld1_1_2_regs")) + "cortex_a8_neon_ls_2") + +;; Instructions using this reservation produce a result at N1 on cycle 3. +(define_insn_reservation "neon_vld1_3_4_regs" 3 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vld1_3_4_regs")) + "cortex_a8_neon_ls_3") + +;; Instructions using this reservation produce a result at N2 on cycle 2. +(define_insn_reservation "neon_vld2_2_regs_vld1_vld2_all_lanes" 3 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vld2_2_regs_vld1_vld2_all_lanes")) + "cortex_a8_neon_ls_2") + +;; Instructions using this reservation produce a result at N2 on cycle 3. +(define_insn_reservation "neon_vld2_4_regs" 4 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vld2_4_regs")) + "cortex_a8_neon_ls_3") + +;; Instructions using this reservation produce a result at N2 on cycle 4. +(define_insn_reservation "neon_vld3_vld4" 5 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vld3_vld4")) + "cortex_a8_neon_ls_4") + +;; Instructions using this reservation read their source operands at N1. +(define_insn_reservation "neon_vst1_1_2_regs_vst2_2_regs" 0 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")) + "cortex_a8_neon_ls_2") + +;; Instructions using this reservation read their source operands at N1. +(define_insn_reservation "neon_vst1_3_4_regs" 0 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vst1_3_4_regs")) + "cortex_a8_neon_ls_3") + +;; Instructions using this reservation read their source operands at N1. +(define_insn_reservation "neon_vst2_4_regs_vst3_vst4" 0 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")) + "cortex_a8_neon_ls_4") + +;; Instructions using this reservation read their source operands at N1. +(define_insn_reservation "neon_vst3_vst4" 0 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vst3_vst4")) + "cortex_a8_neon_ls_4") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N2 on cycle 3. +(define_insn_reservation "neon_vld1_vld2_lane" 4 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vld1_vld2_lane")) + "cortex_a8_neon_ls_3") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N2 on cycle 5. +(define_insn_reservation "neon_vld3_vld4_lane" 6 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vld3_vld4_lane")) + "cortex_a8_neon_ls_5") + +;; Instructions using this reservation read their source operands at N1. +(define_insn_reservation "neon_vst1_vst2_lane" 0 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vst1_vst2_lane")) + "cortex_a8_neon_ls_2") + +;; Instructions using this reservation read their source operands at N1. +(define_insn_reservation "neon_vst3_vst4_lane" 0 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vst3_vst4_lane")) + "cortex_a8_neon_ls_3") + +;; Instructions using this reservation produce a result at N2 on cycle 2. +(define_insn_reservation "neon_vld3_vld4_all_lanes" 3 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vld3_vld4_all_lanes")) + "cortex_a8_neon_ls_3") + +;; Instructions using this reservation produce a result at N2. +(define_insn_reservation "neon_mcr" 2 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_mcr")) + "cortex_a8_neon_perm") + +;; Instructions using this reservation produce a result at N2. +(define_insn_reservation "neon_mcr_2_mcrr" 2 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_mcr_2_mcrr")) + "cortex_a8_neon_perm_2") + +;; Exceptions to the default latencies. + +(define_bypass 1 "neon_mcr_2_mcrr" + "neon_int_1,\ + neon_int_4,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 1 "neon_mcr" + "neon_int_1,\ + neon_int_4,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "neon_vld3_vld4_all_lanes" + "neon_int_1,\ + neon_int_4,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "neon_vld3_vld4_lane" + "neon_int_1,\ + neon_int_4,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "neon_vld1_vld2_lane" + "neon_int_1,\ + neon_int_4,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 4 "neon_vld3_vld4" + "neon_int_1,\ + neon_int_4,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "neon_vld2_4_regs" + "neon_int_1,\ + neon_int_4,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "neon_vld2_2_regs_vld1_vld2_all_lanes" + "neon_int_1,\ + neon_int_4,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "neon_vld1_3_4_regs" + "neon_int_1,\ + neon_int_4,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 1 "neon_vld1_1_2_regs" + "neon_int_1,\ + neon_int_4,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 0 "neon_ldr" + "neon_int_1,\ + neon_int_4,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "neon_bp_3cycle" + "neon_int_1,\ + neon_int_4,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "neon_bp_2cycle" + "neon_int_1,\ + neon_int_4,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 1 "neon_bp_simple" + "neon_int_1,\ + neon_int_4,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 9 "neon_fp_vrecps_vrsqrts_qqq" + "neon_int_1,\ + neon_int_4,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 8 "neon_fp_vrecps_vrsqrts_ddd" + "neon_int_1,\ + neon_int_4,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 9 "neon_fp_vmla_qqq_scalar" + "neon_int_1,\ + neon_int_4,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 8 "neon_fp_vmla_ddd_scalar" + "neon_int_1,\ + neon_int_4,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 9 "neon_fp_vmla_qqq" + "neon_int_1,\ + neon_int_4,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 8 "neon_fp_vmla_ddd" + "neon_int_1,\ + neon_int_4,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "neon_fp_vmul_qqd" + "neon_int_1,\ + neon_int_4,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 4 "neon_fp_vmul_ddd" + "neon_int_1,\ + neon_int_4,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 4 "neon_fp_vsum" + "neon_int_1,\ + neon_int_4,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "neon_fp_vadd_qqq_vabs_qq" + "neon_int_1,\ + neon_int_4,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 4 "neon_fp_vadd_ddd_vabs_dd" + "neon_int_1,\ + neon_int_4,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "neon_vsra_vrsra" + "neon_int_1,\ + neon_int_4,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 4 "neon_vqshl_vrshl_vqrshl_qqq" + "neon_int_1,\ + neon_int_4,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 0 "neon_vshl_ddd" + "neon_int_1,\ + neon_int_4,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "neon_shift_3" + "neon_int_1,\ + neon_int_4,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "neon_shift_2" + "neon_int_1,\ + neon_int_4,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "neon_shift_1" + "neon_int_1,\ + neon_int_4,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar" + "neon_int_1,\ + neon_int_4,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 8 "neon_mul_qqd_32_scalar" + "neon_int_1,\ + neon_int_4,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "neon_mul_ddd_16_scalar_32_16_long_scalar" + "neon_int_1,\ + neon_int_4,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 8 "neon_mla_qqq_32_qqd_32_scalar" + "neon_int_1,\ + neon_int_4,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 6 "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long" + "neon_int_1,\ + neon_int_4,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 6 "neon_mla_qqq_8_16" + "neon_int_1,\ + neon_int_4,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long" + "neon_int_1,\ + neon_int_4,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 6 "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar" + "neon_int_1,\ + neon_int_4,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 6 "neon_mul_qqq_8_16_32_ddd_32" + "neon_int_1,\ + neon_int_4,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long" + "neon_int_1,\ + neon_int_4,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "neon_vsma" + "neon_int_1,\ + neon_int_4,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 6 "neon_vaba_qqq" + "neon_int_1,\ + neon_int_4,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "neon_vaba" + "neon_int_1,\ + neon_int_4,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "neon_vmov" + "neon_int_1,\ + neon_int_4,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "neon_vqneg_vqabs" + "neon_int_1,\ + neon_int_4,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "neon_int_5" + "neon_int_1,\ + neon_int_4,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "neon_int_4" + "neon_int_1,\ + neon_int_4,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "neon_int_3" + "neon_int_1,\ + neon_int_4,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "neon_int_2" + "neon_int_1,\ + neon_int_4,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "neon_int_1" + "neon_int_1,\ + neon_int_4,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq") + diff --git a/gcc/config/arm/cortex-a8.md b/gcc/config/arm/cortex-a8.md new file mode 100644 index 00000000000..69d44de5720 --- /dev/null +++ b/gcc/config/arm/cortex-a8.md @@ -0,0 +1,272 @@ +;; ARM Cortex-A8 scheduling description. +;; Copyright (C) 2007 Free Software Foundation, Inc. +;; Contributed by CodeSourcery. + +;; This file is part of GCC. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING. If not, write to +;; the Free Software Foundation, 51 Franklin Street, Fifth Floor, +;; Boston, MA 02110-1301, USA. + +(define_automaton "cortex_a8") + +;; Only one load/store instruction can be issued per cycle +;; (although reservation of this unit is only required for single +;; loads and stores -- see below). +(define_cpu_unit "cortex_a8_issue_ls" "cortex_a8") + +;; Only one branch instruction can be issued per cycle. +(define_cpu_unit "cortex_a8_issue_branch" "cortex_a8") + +;; The two ALU pipelines. +(define_cpu_unit "cortex_a8_alu0" "cortex_a8") +(define_cpu_unit "cortex_a8_alu1" "cortex_a8") + +;; The usual flow of an instruction through the pipelines. +(define_reservation "cortex_a8_default" + "cortex_a8_alu0|cortex_a8_alu1") + +;; The flow of a branch instruction through the pipelines. +(define_reservation "cortex_a8_branch" + "(cortex_a8_alu0+cortex_a8_issue_branch)|\ + (cortex_a8_alu1+cortex_a8_issue_branch)") + +;; The flow of a load or store instruction through the pipeline in +;; the case where that instruction consists of only one micro-op... +(define_reservation "cortex_a8_load_store_1" + "(cortex_a8_alu0+cortex_a8_issue_ls)|\ + (cortex_a8_alu1+cortex_a8_issue_ls)") + +;; ...and in the case of two micro-ops. We don't need to reserve +;; cortex_a8_issue_ls here because dual issue is altogether forbidden +;; during the issue cycle of the first micro-op. (Instead of modelling +;; a separate issue unit, we instead reserve alu0 and alu1 to +;; prevent any other instructions from being issued upon that first cycle.) +;; Even though the load/store pipeline is usually available in either +;; ALU pipe, multi-cycle instructions always issue in pipeline 0. This +;; reservation is therefore the same as cortex_a8_multiply_2 below. +(define_reservation "cortex_a8_load_store_2" + "cortex_a8_alu0+cortex_a8_alu1,\ + cortex_a8_alu0") + +;; The flow of a single-cycle multiplication. +(define_reservation "cortex_a8_multiply" + "cortex_a8_alu0") + +;; The flow of a multiplication instruction that gets decomposed into +;; two micro-ops. The two micro-ops will be issued to pipeline 0 on +;; successive cycles. Dual issue cannot happen at the same time as the +;; first of the micro-ops. +(define_reservation "cortex_a8_multiply_2" + "cortex_a8_alu0+cortex_a8_alu1,\ + cortex_a8_alu0") + +;; Similarly, the flow of a multiplication instruction that gets +;; decomposed into three micro-ops. Dual issue cannot occur except on +;; the cycle upon which the third micro-op is issued. +(define_reservation "cortex_a8_multiply_3" + "cortex_a8_alu0+cortex_a8_alu1,\ + cortex_a8_alu0+cortex_a8_alu1,\ + cortex_a8_alu0") + +;; The model given here assumes that all instructions are unconditional. + +;; Data processing instructions, but not move instructions. + +;; We include CLZ with these since it has the same execution pattern +;; (source read in E2 and destination available at the end of that cycle). +(define_insn_reservation "cortex_a8_alu" 2 + (and (eq_attr "tune" "cortexa8") + (ior (and (eq_attr "type" "alu") + (not (eq_attr "insn" "mov,mvn"))) + (eq_attr "insn" "clz"))) + "cortex_a8_default") + +(define_insn_reservation "cortex_a8_alu_shift" 2 + (and (eq_attr "tune" "cortexa8") + (and (eq_attr "type" "alu_shift") + (not (eq_attr "insn" "mov,mvn")))) + "cortex_a8_default") + +(define_insn_reservation "cortex_a8_alu_shift_reg" 2 + (and (eq_attr "tune" "cortexa8") + (and (eq_attr "type" "alu_shift_reg") + (not (eq_attr "insn" "mov,mvn")))) + "cortex_a8_default") + +;; Move instructions. + +(define_insn_reservation "cortex_a8_mov" 1 + (and (eq_attr "tune" "cortexa8") + (and (eq_attr "type" "alu,alu_shift,alu_shift_reg") + (eq_attr "insn" "mov,mvn"))) + "cortex_a8_default") + +;; Exceptions to the default latencies for data processing instructions. + +;; A move followed by an ALU instruction with no early dep. +;; (Such a pair can be issued in parallel, hence latency zero.) +(define_bypass 0 "cortex_a8_mov" "cortex_a8_alu") +(define_bypass 0 "cortex_a8_mov" "cortex_a8_alu_shift" + "arm_no_early_alu_shift_dep") +(define_bypass 0 "cortex_a8_mov" "cortex_a8_alu_shift_reg" + "arm_no_early_alu_shift_value_dep") + +;; An ALU instruction followed by an ALU instruction with no early dep. +(define_bypass 1 "cortex_a8_alu,cortex_a8_alu_shift,cortex_a8_alu_shift_reg" + "cortex_a8_alu") +(define_bypass 1 "cortex_a8_alu,cortex_a8_alu_shift,cortex_a8_alu_shift_reg" + "cortex_a8_alu_shift" + "arm_no_early_alu_shift_dep") +(define_bypass 1 "cortex_a8_alu,cortex_a8_alu_shift,cortex_a8_alu_shift_reg" + "cortex_a8_alu_shift_reg" + "arm_no_early_alu_shift_value_dep") + +;; Multiplication instructions. These are categorized according to their +;; reservation behaviour and the need below to distinguish certain +;; varieties for bypasses. Results are available at the E5 stage +;; (but some of these are multi-cycle instructions which explains the +;; latencies below). + +(define_insn_reservation "cortex_a8_mul" 6 + (and (eq_attr "tune" "cortexa8") + (eq_attr "insn" "mul,smulxy,smmul")) + "cortex_a8_multiply_2") + +(define_insn_reservation "cortex_a8_mla" 6 + (and (eq_attr "tune" "cortexa8") + (eq_attr "insn" "mla,smlaxy,smlawy,smmla,smlad,smlsd")) + "cortex_a8_multiply_2") + +(define_insn_reservation "cortex_a8_mull" 7 + (and (eq_attr "tune" "cortexa8") + (eq_attr "insn" "smull,umull,smlal,umlal,umaal,smlalxy")) + "cortex_a8_multiply_3") + +(define_insn_reservation "cortex_a8_smulwy" 5 + (and (eq_attr "tune" "cortexa8") + (eq_attr "insn" "smulwy,smuad,smusd")) + "cortex_a8_multiply") + +;; smlald and smlsld are multiply-accumulate instructions but do not +;; received bypassed data from other multiplication results; thus, they +;; cannot go in cortex_a8_mla above. (See below for bypass details.) +(define_insn_reservation "cortex_a8_smlald" 6 + (and (eq_attr "tune" "cortexa8") + (eq_attr "insn" "smlald,smlsld")) + "cortex_a8_multiply_2") + +;; A multiply with a single-register result or an MLA, followed by an +;; MLA with an accumulator dependency, has its result forwarded so two +;; such instructions can issue back-to-back. +(define_bypass 1 "cortex_a8_mul,cortex_a8_mla,cortex_a8_smulwy" + "cortex_a8_mla" + "arm_mac_accumulator_is_mul_result") + +;; A multiply followed by an ALU instruction needing the multiply +;; result only at E2 has lower latency than one needing it at E1. +(define_bypass 4 "cortex_a8_mul,cortex_a8_mla,cortex_a8_mull,\ + cortex_a8_smulwy,cortex_a8_smlald" + "cortex_a8_alu") +(define_bypass 4 "cortex_a8_mul,cortex_a8_mla,cortex_a8_mull,\ + cortex_a8_smulwy,cortex_a8_smlald" + "cortex_a8_alu_shift" + "arm_no_early_alu_shift_dep") +(define_bypass 4 "cortex_a8_mul,cortex_a8_mla,cortex_a8_mull,\ + cortex_a8_smulwy,cortex_a8_smlald" + "cortex_a8_alu_shift_reg" + "arm_no_early_alu_shift_value_dep") + +;; Load instructions. +;; The presence of any register writeback is ignored here. + +;; A load result has latency 3 unless the dependent instruction has +;; no early dep, in which case it is only latency two. +;; We assume 64-bit alignment for doubleword loads. +(define_insn_reservation "cortex_a8_load1_2" 3 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "load1,load2,load_byte")) + "cortex_a8_load_store_1") + +(define_bypass 2 "cortex_a8_load1_2" + "cortex_a8_alu") +(define_bypass 2 "cortex_a8_load1_2" + "cortex_a8_alu_shift" + "arm_no_early_alu_shift_dep") +(define_bypass 2 "cortex_a8_load1_2" + "cortex_a8_alu_shift_reg" + "arm_no_early_alu_shift_value_dep") + +;; We do not currently model the fact that loads with scaled register +;; offsets that are not LSL #2 have an extra cycle latency (they issue +;; as two micro-ops). + +;; A load multiple of three registers is usually issued as two micro-ops. +;; The first register will be available at E3 of the first iteration, +;; the second at E3 of the second iteration, and the third at E4 of +;; the second iteration. A load multiple of four registers is usually +;; issued as two micro-ops. +(define_insn_reservation "cortex_a8_load3_4" 5 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "load3,load4")) + "cortex_a8_load_store_2") + +(define_bypass 4 "cortex_a8_load3_4" + "cortex_a8_alu") +(define_bypass 4 "cortex_a8_load3_4" + "cortex_a8_alu_shift" + "arm_no_early_alu_shift_dep") +(define_bypass 4 "cortex_a8_load3_4" + "cortex_a8_alu_shift_reg" + "arm_no_early_alu_shift_value_dep") + +;; Store instructions. +;; Writeback is again ignored. + +(define_insn_reservation "cortex_a8_store1_2" 0 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "store1,store2")) + "cortex_a8_load_store_1") + +(define_insn_reservation "cortex_a8_store3_4" 0 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "store3,store4")) + "cortex_a8_load_store_2") + +;; An ALU instruction acting as a producer for a store instruction +;; that only uses the result as the value to be stored (as opposed to +;; using it to calculate the address) has latency zero; the store +;; reads the value to be stored at the start of E3 and the ALU insn +;; writes it at the end of E2. Move instructions actually produce the +;; result at the end of E1, but since we don't have delay slots, the +;; scheduling behaviour will be the same. +(define_bypass 0 "cortex_a8_alu,cortex_a8_alu_shift,\ + cortex_a8_alu_shift_reg,cortex_a8_mov" + "cortex_a8_store1_2,cortex_a8_store3_4" + "arm_no_early_store_addr_dep") + +;; Branch instructions + +(define_insn_reservation "cortex_a8_branch" 0 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "branch")) + "cortex_a8_branch") + +;; Call latencies are not predictable. A semi-arbitrary very large +;; number is used as "positive infinity" so that everything should be +;; finished by the time of return. +(define_insn_reservation "cortex_a8_call" 32 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "call")) + "cortex_a8_issue_branch") + +;; NEON (including VFP) instructions. + +(include "cortex-a8-neon.md") + diff --git a/gcc/config/arm/neon-schedgen.ml b/gcc/config/arm/neon-schedgen.ml new file mode 100644 index 00000000000..b47a0ae7d3c --- /dev/null +++ b/gcc/config/arm/neon-schedgen.ml @@ -0,0 +1,497 @@ +(* Emission of the core of the Cortex-A8 NEON scheduling description. + Copyright (C) 2007 Free Software Foundation, Inc. + Contributed by CodeSourcery. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 2, or (at your option) any later + version. + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING. If not, write to the Free + Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. +*) + +(* This scheduling description generator works as follows. + - Each group of instructions has source and destination requirements + specified. The source requirements may be specified using + Source (the stage at which all source operands not otherwise + described are read), Source_m (the stage at which Rm operands are + read), Source_n (likewise for Rn) and Source_d (likewise for Rd). + - For each group of instructions the earliest stage where a source + operand may be required is calculated. + - Each group of instructions is selected in turn as a producer. + The latencies between this group and every other group are then + calculated, yielding up to four values for each combination: + 1. Producer -> consumer Rn latency + 2. Producer -> consumer Rm latency + 3. Producer -> consumer Rd (as a source) latency + 4. Producer -> consumer worst-case latency. + Value 4 is calculated from the destination availability requirements + of the consumer and the earliest source availability requirements + of the producer. + - The largest Value 4 calculated for the current producer is the + worse-case latency, L, for that instruction group. This value is written + out in a define_insn_reservation for the producer group. + - For each producer and consumer pair, the latencies calculated above + are collated. The average (of up to four values) is calculated and + if this average is different from the worst-case latency, an + unguarded define_bypass construction is issued for that pair. + (For each pair only one define_bypass construction will be emitted, + and at present we do not emit specific guards.) +*) + +open Utils + +let n1 = 1 and n2 = 2 and n3 = 3 and n4 = 4 and n5 = 5 and n6 = 6 + and n7 = 7 and n8 = 8 and n9 = 9 + +type availability = Source of int + | Source_n of int + | Source_m of int + | Source_d of int + | Dest of int + | Dest_n_after of int * int + +type guard = Guard_none | Guard_only_m | Guard_only_n | Guard_only_d + +(* Reservation behaviours. All but the last row here correspond to one + pipeline each. Each constructor will correspond to one + define_reservation. *) +type reservation = + Mul | Mul_2cycle | Mul_4cycle +| Shift | Shift_2cycle +| ALU | ALU_2cycle +| Fmul | Fmul_2cycle +| Fadd | Fadd_2cycle +(* | VFP *) +| Permute of int +| Ls of int +| Fmul_then_fadd | Fmul_then_fadd_2 + +(* This table must be kept as short as possible by conflating + entries with the same availability behaviour. + + First components: instruction group names + Second components: availability requirements, in the order in which + they should appear in the comments in the .md file. + Third components: reservation info +*) +let availability_table = [ + (* NEON integer ALU instructions. *) + (* vbit vbif vbsl vorr vbic vnot vcls vclz vcnt vadd vand vorr + veor vbic vorn ddd qqq *) + "neon_int_1", [Source n2; Dest n3], ALU; + (* vadd vsub qqd vsub ddd qqq *) + "neon_int_2", [Source_m n1; Source_n n2; Dest n3], ALU; + (* vsum vneg dd qq vadd vsub qdd *) + "neon_int_3", [Source n1; Dest n3], ALU; + (* vabs vceqz vcgez vcbtz vclez vcltz vadh vradh vsbh vrsbh dqq *) + (* vhadd vrhadd vqadd vtst ddd qqq *) + "neon_int_4", [Source n2; Dest n4], ALU; + (* vabd qdd vhsub vqsub vabd vceq vcge vcgt vmax vmin vfmx vfmn ddd ddd *) + "neon_int_5", [Source_m n1; Source_n n2; Dest n4], ALU; + (* vqneg vqabs dd qq *) + "neon_vqneg_vqabs", [Source n1; Dest n4], ALU; + (* vmov vmvn *) + "neon_vmov", [Dest n3], ALU; + (* vaba *) + "neon_vaba", [Source_n n2; Source_m n1; Source_d n3; Dest n6], ALU; + "neon_vaba_qqq", + [Source_n n2; Source_m n1; Source_d n3; Dest_n_after (1, n6)], ALU_2cycle; + (* vsma *) + "neon_vsma", [Source_m n1; Source_d n3; Dest n6], ALU; + + (* NEON integer multiply instructions. *) + (* vmul, vqdmlh, vqrdmlh *) + (* vmul, vqdmul, qdd 16/8 long 32/16 long *) + "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long", [Source n2; Dest n6], Mul; + "neon_mul_qqq_8_16_32_ddd_32", [Source n2; Dest_n_after (1, n6)], Mul_2cycle; + (* vmul, vqdmul again *) + "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar", + [Source_n n2; Source_m n1; Dest_n_after (1, n6)], Mul_2cycle; + (* vmla, vmls *) + "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long", + [Source_n n2; Source_m n2; Source_d n3; Dest n6], Mul; + "neon_mla_qqq_8_16", + [Source_n n2; Source_m n2; Source_d n3; Dest_n_after (1, n6)], Mul_2cycle; + "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long", + [Source_n n2; Source_m n1; Source_d n3; Dest_n_after (1, n6)], Mul_2cycle; + "neon_mla_qqq_32_qqd_32_scalar", + [Source_n n2; Source_m n1; Source_d n3; Dest_n_after (3, n6)], Mul_4cycle; + (* vmul, vqdmulh, vqrdmulh *) + (* vmul, vqdmul *) + "neon_mul_ddd_16_scalar_32_16_long_scalar", + [Source_n n2; Source_m n1; Dest n6], Mul; + "neon_mul_qqd_32_scalar", + [Source_n n2; Source_m n1; Dest_n_after (3, n6)], Mul_4cycle; + (* vmla, vmls *) + (* vmla, vmla, vqdmla, vqdmls *) + "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar", + [Source_n n2; Source_m n1; Source_d n3; Dest n6], Mul; + + (* NEON integer shift instructions. *) + (* vshr/vshl immediate, vshr_narrow, vshl_vmvh, vsli_vsri_ddd *) + "neon_shift_1", [Source n1; Dest n3], Shift; + (* vqshl, vrshr immediate; vqshr, vqmov, vrshr, vqrshr narrow; + vqshl_vrshl_vqrshl_ddd *) + "neon_shift_2", [Source n1; Dest n4], Shift; + (* vsli, vsri and vshl for qqq *) + "neon_shift_3", [Source n1; Dest_n_after (1, n3)], Shift_2cycle; + "neon_vshl_ddd", [Source n1; Dest n1], Shift; + "neon_vqshl_vrshl_vqrshl_qqq", [Source n1; Dest_n_after (1, n4)], + Shift_2cycle; + "neon_vsra_vrsra", [Source_m n1; Source_d n3; Dest n6], Shift; + + (* NEON floating-point instructions. *) + (* vadd, vsub, vabd, vmul, vceq, vcge, vcgt, vcage, vcagt, vmax, vmin *) + (* vabs, vneg, vceqz, vcgez, vcgtz, vclez, vcltz, vrecpe, vrsqrte, vcvt *) + "neon_fp_vadd_ddd_vabs_dd", [Source n2; Dest n5], Fadd; + "neon_fp_vadd_qqq_vabs_qq", [Source n2; Dest_n_after (1, n5)], + Fadd_2cycle; + (* vsum, fvmx, vfmn *) + "neon_fp_vsum", [Source n1; Dest n5], Fadd; + "neon_fp_vmul_ddd", [Source_n n2; Source_m n1; Dest n5], Fmul; + "neon_fp_vmul_qqd", [Source_n n2; Source_m n1; Dest_n_after (1, n5)], + Fmul_2cycle; + (* vmla, vmls *) + "neon_fp_vmla_ddd", + [Source_n n2; Source_m n2; Source_d n3; Dest n9], Fmul_then_fadd; + "neon_fp_vmla_qqq", + [Source_n n2; Source_m n2; Source_d n3; Dest_n_after (1, n9)], + Fmul_then_fadd_2; + "neon_fp_vmla_ddd_scalar", + [Source_n n2; Source_m n1; Source_d n3; Dest n9], Fmul_then_fadd; + "neon_fp_vmla_qqq_scalar", + [Source_n n2; Source_m n1; Source_d n3; Dest_n_after (1, n9)], + Fmul_then_fadd_2; + "neon_fp_vrecps_vrsqrts_ddd", [Source n2; Dest n9], Fmul_then_fadd; + "neon_fp_vrecps_vrsqrts_qqq", [Source n2; Dest_n_after (1, n9)], + Fmul_then_fadd_2; + + (* NEON byte permute instructions. *) + (* vmov; vtrn and vswp for dd; vzip for dd; vuzp for dd; vrev; vext for dd *) + "neon_bp_simple", [Source n1; Dest n2], Permute 1; + (* vswp for qq; vext for qqq; vtbl with {Dn} or {Dn, Dn1}; + similarly for vtbx *) + "neon_bp_2cycle", [Source n1; Dest_n_after (1, n2)], Permute 2; + (* all the rest *) + "neon_bp_3cycle", [Source n1; Dest_n_after (2, n2)], Permute 3; + + (* NEON load/store instructions. *) + "neon_ldr", [Dest n1], Ls 1; + "neon_str", [Source n1], Ls 1; + "neon_vld1_1_2_regs", [Dest_n_after (1, n1)], Ls 2; + "neon_vld1_3_4_regs", [Dest_n_after (2, n1)], Ls 3; + "neon_vld2_2_regs_vld1_vld2_all_lanes", [Dest_n_after (1, n2)], Ls 2; + "neon_vld2_4_regs", [Dest_n_after (2, n2)], Ls 3; + "neon_vld3_vld4", [Dest_n_after (3, n2)], Ls 4; + "neon_vst1_1_2_regs_vst2_2_regs", [Source n1], Ls 2; + "neon_vst1_3_4_regs", [Source n1], Ls 3; + "neon_vst2_4_regs_vst3_vst4", [Source n1], Ls 4; + "neon_vst3_vst4", [Source n1], Ls 4; + "neon_vld1_vld2_lane", [Source n1; Dest_n_after (2, n2)], Ls 3; + "neon_vld3_vld4_lane", [Source n1; Dest_n_after (4, n2)], Ls 5; + "neon_vst1_vst2_lane", [Source n1], Ls 2; + "neon_vst3_vst4_lane", [Source n1], Ls 3; + "neon_vld3_vld4_all_lanes", [Dest_n_after (1, n2)], Ls 3; + + (* NEON register transfer instructions. *) + "neon_mcr", [Dest n2], Permute 1; + "neon_mcr_2_mcrr", [Dest n2], Permute 2; + (* MRC instructions are in the .tpl file. *) +] + +(* Augment the tuples in the availability table with an extra component + that describes the earliest stage where a source operand may be + required. (It is also possible that an entry in the table has no + source requirements.) *) +let calculate_sources = + List.map (fun (name, avail, res) -> + let earliest_stage = + List.fold_left + (fun cur -> fun info -> + match info with + Source stage + | Source_n stage + | Source_m stage + | Source_d stage -> + (match cur with + None -> Some stage + | Some stage' when stage < stage' -> Some stage + | _ -> cur) + | _ -> cur) None avail + in + (name, avail, res, earliest_stage)) + +(* Find the stage, if any, at the end of which a group produces a result. *) +let find_dest (attr, avail, _, _) = + try + find_with_result + (fun av -> match av with + Dest st -> Some (Some st) + | Dest_n_after (after, st) -> Some (Some (after + st)) + | _ -> None) avail + with Not_found -> None + +(* Find the worst-case latency between a producer and a consumer. *) +let worst_case_latency producer (_, _, _, earliest_required) = + let dest = find_dest producer in + match earliest_required, dest with + None, _ -> + (* The consumer doesn't have any source requirements. *) + None + | _, None -> + (* The producer doesn't produce any results (e.g. a store insn). *) + None + | Some consumed, Some produced -> Some (produced - consumed + 1) + +(* Helper function for below. *) +let latency_calc f producer (_, avail, _, _) = + try + let source_avail = find_with_result f avail in + match find_dest producer with + None -> + (* The producer does not produce a result. *) + Some 0 + | Some produced -> + let latency = produced - source_avail + 1 in + (* Latencies below zero are raised to zero since we don't have + delay slots. *) + if latency < 0 then Some 0 else Some latency + with Not_found -> None + +(* Find any Rm latency between a producer and a consumer. If no + Rm source requirement is explicitly specified for the consumer, + return "positive infinity". Also return "positive infinity" if + the latency matches the supplied worst-case latency for this + producer. *) +let get_m_latency producer consumer = + match latency_calc (fun av -> match av with Source_m stage -> Some stage + | _ -> None) producer consumer + with None -> [] | Some latency -> [(Guard_only_m, latency)] + +(* Likewise for Rn. *) +let get_n_latency producer consumer = + match latency_calc (fun av -> match av with Source_n stage -> Some stage + | _ -> None) producer consumer + with None -> [] | Some latency -> [(Guard_only_n, latency)] + +(* Likewise for Rd. *) +let get_d_latency producer consumer = + match + latency_calc (fun av -> match av with Source_d stage -> Some stage + | _ -> None) producer consumer + with None -> [] | Some latency -> [(Guard_only_d, latency)] + +(* Given a producer and a consumer, work out the latency of the producer + to the consumer in each of the four cases (availability information + permitting) identified at the top of this file. Return the + consumer, the worst-case unguarded latency and any guarded latencies. *) +let calculate_latencies producer consumer = + let worst = worst_case_latency producer consumer in + let m_latency = get_m_latency producer consumer in + let n_latency = get_n_latency producer consumer in + let d_latency = get_d_latency producer consumer in + (consumer, worst, m_latency @ n_latency @ d_latency) + +(* Helper function for below. *) +let pick_latency largest worst guards = + let guards = + match worst with + None -> guards + | Some worst -> (Guard_none, worst) :: guards + in + if List.length guards = 0 then None else + let total_latency = + List.fold_left (fun acc -> fun (_, latency) -> acc + latency) 0 guards + in + let average_latency = (float_of_int total_latency) /. + (float_of_int (List.length guards)) in + let rounded_latency = int_of_float (ceil average_latency) in + if rounded_latency = largest then None + else Some (Guard_none, rounded_latency) + +(* Collate all bypasses for a particular producer as required in + worst_case_latencies_and_bypasses. (By this stage there is a maximum + of one bypass from this producer to any particular consumer listed + in LATENCIES.) Use a hash table to collate bypasses with the + same latency and guard. *) +let collate_bypasses (producer_name, _, _, _) largest latencies = + let ht = Hashtbl.create 42 in + let keys = ref [] in + List.iter ( + fun ((consumer, _, _, _), worst, guards) -> + (* Find out which latency to use. Ignoring latencies that match + the *overall* worst-case latency for this producer (which will + be in define_insn_reservation), we have to examine: + 1. the latency with no guard between this producer and this + consumer; and + 2. any guarded latency. *) + let guard_latency_opt = pick_latency largest worst guards in + match guard_latency_opt with + None -> () + | Some (guard, latency) -> + begin + (if (try ignore (Hashtbl.find ht (guard, latency)); false + with Not_found -> true) then + keys := (guard, latency) :: !keys); + Hashtbl.add ht (guard, latency) consumer + end + ) latencies; + (* The hash table now has bypasses collated so that ones with the + same latency and guard have the same keys. Walk through all the + keys, extract the associated bypasses, and concatenate the names + of the consumers for each bypass. *) + List.map ( + fun ((guard, latency) as key) -> + let consumers = Hashtbl.find_all ht key in + (producer_name, + String.concat ",\\\n " consumers, + latency, + guard) + ) !keys + +(* For every producer, find the worst-case latency between it and + *any* consumer. Also determine (if such a thing exists) the + lowest-latency bypass from each producer to each consumer. Group + the output in such a way that all bypasses with the same producer + and latency are together, and so that bypasses with the worst-case + latency are ignored. *) +let worst_case_latencies_and_bypasses = + let rec f (worst_acc, bypasses_acc) prev xs = + match xs with + [] -> (worst_acc, bypasses_acc) + | ((producer_name, producer_avail, res_string, _) as producer)::next -> + (* For this particular producer, work out the latencies between + it and every consumer. *) + let latencies = + List.fold_left (fun acc -> fun consumer -> + (calculate_latencies producer consumer) :: acc) + [] (prev @ xs) + in + (* Now work out what the overall worst case latency was for this + particular producer. *) + match latencies with + [] -> assert false + | _ -> + let comp_fn (_, l1, _) (_, l2, _) = + if l1 > l2 then -1 else if l1 = l2 then 0 else 1 + in + let largest = + match List.hd (List.sort comp_fn latencies) with + (_, None, _) -> 0 (* Producer has no consumers. *) + | (_, Some worst, _) -> worst + in + (* Having got the largest latency, collect all bypasses for + this producer and filter out those with that larger + latency. Record the others for later emission. *) + let bypasses = collate_bypasses producer largest latencies in + (* Go on to process remaining producers, having noted + the result for this one. *) + f ((producer_name, producer_avail, largest, + res_string) :: worst_acc, + bypasses @ bypasses_acc) + (prev @ [producer]) next + in + f ([], []) [] + +(* Emit a helpful comment for a define_insn_reservation. *) +let write_comment producer avail = + let seen_source = ref false in + let describe info = + let read = if !seen_source then "" else "read " in + match info with + Source stage -> + seen_source := true; + Printf.printf "%stheir source operands at N%d" read stage + | Source_n stage -> + seen_source := true; + Printf.printf "%stheir (D|Q)n operands at N%d" read stage + | Source_m stage -> + seen_source := true; + Printf.printf "%stheir (D|Q)m operands at N%d" read stage + | Source_d stage -> + Printf.printf "%stheir (D|Q)d operands at N%d" read stage + | Dest stage -> + Printf.printf "produce a result at N%d" stage + | Dest_n_after (after, stage) -> + Printf.printf "produce a result at N%d on cycle %d" stage (after + 1) + in + Printf.printf ";; Instructions using this reservation "; + let rec f infos x = + let sep = if x mod 2 = 1 then "" else "\n;;" in + match infos with + [] -> assert false + | [info] -> describe info; Printf.printf ".\n" + | info::(_::[] as infos) -> + describe info; Printf.printf ", and%s " sep; f infos (x+1) + | info::infos -> describe info; Printf.printf ",%s " sep; f infos (x+1) + in + f avail 0 + +(* Emit a define_insn_reservation for each producer. The latency + written in will be its worst-case latency. *) +let emit_insn_reservations = + List.iter ( + fun (producer, avail, latency, reservation) -> + write_comment producer avail; + Printf.printf "(define_insn_reservation \"%s\" %d\n" producer latency; + Printf.printf " (and (eq_attr \"tune\" \"cortexa8\")\n"; + Printf.printf " (eq_attr \"neon_type\" \"%s\"))\n" producer; + let str = + match reservation with + Mul -> "dp" | Mul_2cycle -> "dp_2" | Mul_4cycle -> "dp_4" + | Shift -> "dp" | Shift_2cycle -> "dp_2" + | ALU -> "dp" | ALU_2cycle -> "dp_2" + | Fmul -> "dp" | Fmul_2cycle -> "dp_2" + | Fadd -> "fadd" | Fadd_2cycle -> "fadd_2" + | Ls 1 -> "ls" + | Ls n -> "ls_" ^ (string_of_int n) + | Permute 1 -> "perm" + | Permute n -> "perm_" ^ (string_of_int n) + | Fmul_then_fadd -> "fmul_then_fadd" + | Fmul_then_fadd_2 -> "fmul_then_fadd_2" + in + Printf.printf " \"cortex_a8_neon_%s\")\n\n" str + ) + +(* Given a guard description, return the name of the C function to + be used as the guard for define_bypass. *) +let guard_fn g = + match g with + Guard_only_m -> "arm_neon_only_m_dependency" + | Guard_only_n -> "arm_neon_only_n_dependency" + | Guard_only_d -> "arm_neon_only_d_dependency" + | Guard_none -> assert false + +(* Emit a define_bypass for each bypass. *) +let emit_bypasses = + List.iter ( + fun (producer, consumers, latency, guard) -> + Printf.printf "(define_bypass %d \"%s\"\n" latency producer; + if guard = Guard_none then + Printf.printf " \"%s\")\n\n" consumers + else + begin + Printf.printf " \"%s\"\n" consumers; + Printf.printf " \"%s\")\n\n" (guard_fn guard) + end + ) + +(* Program entry point. *) +let main = + let table = calculate_sources availability_table in + let worst_cases, bypasses = worst_case_latencies_and_bypasses table in + emit_insn_reservations (List.rev worst_cases); + Printf.printf ";; Exceptions to the default latencies.\n\n"; + emit_bypasses bypasses + diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 48b4e2a2c94..c62ffc3cff6 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -416,6 +416,106 @@ (define_mode_attr scalar_mul_constraint [(V4HI "x") (V2SI "t") (V2SF "t") (V8HI "x") (V4SI "t") (V4SF "t")]) +;; Attribute used to permit string comparisons against in +;; neon_type attribute definitions. +(define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd")) + +;; Classification of NEON instructions for scheduling purposes. +;; Do not set this attribute and the "type" attribute together in +;; any one instruction pattern. +(define_attr "neon_type" + "neon_int_1,\ + neon_int_2,\ + neon_int_3,\ + neon_int_4,\ + neon_int_5,\ + neon_vqneg_vqabs,\ + neon_vmov,\ + neon_vaba,\ + neon_vsma,\ + neon_vaba_qqq,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long,\ + neon_mla_qqq_32_qqd_32_scalar,\ + neon_mul_ddd_16_scalar_32_16_long_scalar,\ + neon_mul_qqd_32_scalar,\ + neon_mla_ddd_16_scalar_qdd_32_16_long_scalar,\ + neon_shift_1,\ + neon_shift_2,\ + neon_shift_3,\ + neon_vshl_ddd,\ + neon_vqshl_vrshl_vqrshl_qqq,\ + neon_vsra_vrsra,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vsum,\ + neon_fp_vmul_ddd,\ + neon_fp_vmul_qqd,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vmla_ddd_scalar,\ + neon_fp_vmla_qqq_scalar,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq,\ + neon_bp_simple,\ + neon_bp_2cycle,\ + neon_bp_3cycle,\ + neon_ldr,\ + neon_str,\ + neon_vld1_1_2_regs,\ + neon_vld1_3_4_regs,\ + neon_vld2_2_regs_vld1_vld2_all_lanes,\ + neon_vld2_4_regs,\ + neon_vld3_vld4,\ + neon_vst1_1_2_regs_vst2_2_regs,\ + neon_vst1_3_4_regs,\ + neon_vst2_4_regs_vst3_vst4,\ + neon_vst3_vst4,\ + neon_vld1_vld2_lane,\ + neon_vld3_vld4_lane,\ + neon_vst1_vst2_lane,\ + neon_vst3_vst4_lane,\ + neon_vld3_vld4_all_lanes,\ + neon_mcr,\ + neon_mcr_2_mcrr,\ + neon_mrc,\ + neon_mrrc,\ + neon_ldm_2,\ + neon_stm_2,\ + none" + (const_string "none")) + +;; Predicates used for setting the above attribute. + +(define_mode_attr Is_float_mode [(V8QI "false") (V16QI "false") + (V4HI "false") (V8HI "false") + (V2SI "false") (V4SI "false") + (V2SF "true") (V4SF "true") + (DI "false") (V2DI "false")]) + +(define_mode_attr Scalar_mul_8_16 [(V8QI "true") (V16QI "true") + (V4HI "true") (V8HI "true") + (V2SI "false") (V4SI "false") + (V2SF "false") (V4SF "false") + (DI "false") (V2DI "false")]) + + +(define_mode_attr Is_d_reg [(V8QI "true") (V16QI "false") + (V4HI "true") (V8HI "false") + (V2SI "true") (V4SI "false") + (V2SF "true") (V4SF "false") + (DI "true") (V2DI "false")]) + +(define_mode_attr V_mode_nunits [(V8QI "8") (V16QI "16") + (V4HI "4") (V8HI "8") + (V2SI "2") (V4SI "4") + (V2SF "2") (V4SF "4") + (DI "1") (V2DI "2")]) + (define_insn "*neon_mov" [(set (match_operand:VD 0 "nonimmediate_operand" "=w,Uv,w, w, ?r,?w,?r,?r, ?Us") @@ -456,10 +556,12 @@ default: return output_move_double (operands); } } - [(set_attr "type" "farith,f_stored,farith,f_loadd,f_2_r,r_2_f,*,load2,store2") - (set_attr "length" "4,4,4,4,4,4,8,8,8") - (set_attr "pool_range" "*,*,*,1020,*,*,*,1020,*") - (set_attr "neg_pool_range" "*,*,*,1008,*,*,*,1008,*")]) + [(set_attr "neon_type" "neon_int_1,*,neon_vmov,*,neon_mrrc,neon_mcr_2_mcrr,*,*,*") + (set_attr "type" "*,f_stored,*,f_loadd,*,*,alu,load2,store2") + (set_attr "insn" "*,*,*,*,*,*,mov,*,*") + (set_attr "length" "4,4,4,4,4,4,8,8,8") + (set_attr "pool_range" "*,*,*,1020,*,*,*,1020,*") + (set_attr "neg_pool_range" "*,*,*,1008,*,*,*,1008,*")]) (define_insn "*neon_mov" [(set (match_operand:VQXMOV 0 "nonimmediate_operand" @@ -496,7 +598,10 @@ default: return output_move_quad (operands); } } - [(set_attr "type" "farith,f_stored,farith,f_loadd,f_2_r,r_2_f,*,load2,store2") + [(set_attr "neon_type" "neon_int_1,neon_stm_2,neon_vmov,neon_ldm_2,\ + neon_mrrc,neon_mcr_2_mcrr,*,*,*") + (set_attr "type" "*,*,*,*,*,*,alu,load4,store4") + (set_attr "insn" "*,*,*,*,*,*,mov,*,*") (set_attr "length" "4,8,4,8,8,8,16,8,16") (set_attr "pool_range" "*,*,*,1020,*,*,*,1020,*") (set_attr "neg_pool_range" "*,*,*,1008,*,*,*,1008,*")]) @@ -624,7 +729,9 @@ (match_operand:SI 2 "immediate_operand" "i"))))] "TARGET_NEON" "vmov%?.\t%P0[%c2], %1" - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "neon_type" "neon_mcr")] +) (define_insn "vec_set" [(set (match_operand:VQ 0 "s_register_operand" "+w") @@ -646,7 +753,9 @@ return "vmov%?.\t%P0[%c2], %1"; } - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "neon_type" "neon_mcr")] +) (define_insn "vec_setv2di" [(set (match_operand:V2DI 0 "s_register_operand" "+w") @@ -664,7 +773,9 @@ return "vmov%?.64\t%P0, %Q1, %R1"; } - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "neon_type" "neon_mcr_2_mcrr")] +) (define_insn "vec_extract" [(set (match_operand: 0 "s_register_operand" "=r") @@ -673,7 +784,9 @@ (parallel [(match_operand:SI 2 "immediate_operand" "i")])))] "TARGET_NEON" "vmov%?.\t%0, %P1[%c2]" - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "neon_type" "neon_bp_simple")] +) (define_insn "vec_extract" [(set (match_operand: 0 "s_register_operand" "=r") @@ -692,7 +805,9 @@ return "vmov%?.\t%0, %P1[%c2]"; } - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "neon_type" "neon_bp_simple")] +) (define_insn "vec_extractv2di" [(set (match_operand:DI 0 "s_register_operand" "=r") @@ -707,7 +822,9 @@ return "vmov%?.64\t%Q0, %R0, %P1"; } - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "neon_type" "neon_int_1")] +) (define_expand "vec_init" [(match_operand:VDQ 0 "s_register_operand" "") @@ -731,21 +848,49 @@ (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w") (match_operand:VDQ 2 "s_register_operand" "w")))] "TARGET_NEON" - "vadd.\t%0, %1, %2") + "vadd.\t%0, %1, %2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")) + (const_string "neon_int_1")))] +) (define_insn "*sub3_neon" [(set (match_operand:VDQ 0 "s_register_operand" "=w") (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w") (match_operand:VDQ 2 "s_register_operand" "w")))] "TARGET_NEON" - "vsub.\t%0, %1, %2") + "vsub.\t%0, %1, %2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")) + (const_string "neon_int_2")))] +) (define_insn "*mul3_neon" [(set (match_operand:VDQ 0 "s_register_operand" "=w") (mult:VDQ (match_operand:VDQ 1 "s_register_operand" "w") (match_operand:VDQ 2 "s_register_operand" "w")))] "TARGET_NEON" - "vmul.\t%0, %1, %2") + "vmul.\t%0, %1, %2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else + (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long") + (const_string "neon_mul_qqq_8_16_32_ddd_32")) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mul_qqq_8_16_32_ddd_32") + (const_string "neon_mul_qqq_8_16_32_ddd_32")))))] +) (define_insn "ior3" [(set (match_operand:VDQ 0 "s_register_operand" "=w,w") @@ -760,7 +905,9 @@ mode, 0, VALID_NEON_QREG_MODE (mode)); default: gcc_unreachable (); } -}) +} + [(set_attr "neon_type" "neon_int_1")] +) (define_insn "iordi3_neon" [(set (match_operand:DI 0 "s_register_operand" "=w,w") @@ -776,7 +923,9 @@ DImode, 0, VALID_NEON_QREG_MODE (DImode)); default: gcc_unreachable (); } -}) +} + [(set_attr "neon_type" "neon_int_1")] +) ;; The concrete forms of the Neon immediate-logic instructions are vbic and ;; vorr. We support the pseudo-instruction vand instead, because that @@ -796,7 +945,9 @@ mode, 1, VALID_NEON_QREG_MODE (mode)); default: gcc_unreachable (); } -}) +} + [(set_attr "neon_type" "neon_int_1")] +) (define_insn "anddi3_neon" [(set (match_operand:DI 0 "s_register_operand" "=w,w") @@ -812,14 +963,18 @@ DImode, 1, VALID_NEON_QREG_MODE (DImode)); default: gcc_unreachable (); } -}) +} + [(set_attr "neon_type" "neon_int_1")] +) (define_insn "orn3_neon" [(set (match_operand:VDQ 0 "s_register_operand" "=w") (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w") (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))))] "TARGET_NEON" - "vorn\t%0, %1, %2") + "vorn\t%0, %1, %2" + [(set_attr "neon_type" "neon_int_1")] +) (define_insn "orndi3_neon" [(set (match_operand:DI 0 "s_register_operand" "=w") @@ -827,14 +982,18 @@ (match_operand:DI 2 "s_register_operand" "w")] UNSPEC_VORN))] "TARGET_NEON" - "vorn\t%P0, %P1, %P2") + "vorn\t%P0, %P1, %P2" + [(set_attr "neon_type" "neon_int_1")] +) (define_insn "bic3_neon" [(set (match_operand:VDQ 0 "s_register_operand" "=w") (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w") (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))))] "TARGET_NEON" - "vbic\t%0, %1, %2") + "vbic\t%0, %1, %2" + [(set_attr "neon_type" "neon_int_1")] +) (define_insn "bicdi3_neon" [(set (match_operand:DI 0 "s_register_operand" "=w") @@ -842,14 +1001,18 @@ (match_operand:DI 2 "s_register_operand" "w")] UNSPEC_VBIC))] "TARGET_NEON" - "vbic\t%P0, %P1, %P2") + "vbic\t%P0, %P1, %P2" + [(set_attr "neon_type" "neon_int_1")] +) (define_insn "xor3" [(set (match_operand:VDQ 0 "s_register_operand" "=w") (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w") (match_operand:VDQ 2 "s_register_operand" "w")))] "TARGET_NEON" - "veor\t%0, %1, %2") + "veor\t%0, %1, %2" + [(set_attr "neon_type" "neon_int_1")] +) (define_insn "xordi3_neon" [(set (match_operand:DI 0 "s_register_operand" "=w") @@ -857,53 +1020,85 @@ (match_operand:DI 2 "s_register_operand" "w")] UNSPEC_VEOR))] "TARGET_NEON" - "veor\t%P0, %P1, %P2") + "veor\t%P0, %P1, %P2" + [(set_attr "neon_type" "neon_int_1")] +) (define_insn "one_cmpl2" [(set (match_operand:VDQ 0 "s_register_operand" "=w") (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))] "TARGET_NEON" - "vmvn\t%0, %1") + "vmvn\t%0, %1" + [(set_attr "neon_type" "neon_int_1")] +) (define_insn "abs2" [(set (match_operand:VDQW 0 "s_register_operand" "=w") (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))] "TARGET_NEON" - "vabs.\t%0, %1") + "vabs.\t%0, %1" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")) + (const_string "neon_int_3")))] +) (define_insn "neg2" [(set (match_operand:VDQW 0 "s_register_operand" "=w") (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))] "TARGET_NEON" - "vneg.\t%0, %1") + "vneg.\t%0, %1" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")) + (const_string "neon_int_3")))] +) (define_insn "*umin3_neon" [(set (match_operand:VDQIW 0 "s_register_operand" "=w") (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w") (match_operand:VDQIW 2 "s_register_operand" "w")))] "TARGET_NEON" - "vmin.\t%0, %1, %2") + "vmin.\t%0, %1, %2" + [(set_attr "neon_type" "neon_int_5")] +) (define_insn "*umax3_neon" [(set (match_operand:VDQIW 0 "s_register_operand" "=w") (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w") (match_operand:VDQIW 2 "s_register_operand" "w")))] "TARGET_NEON" - "vmax.\t%0, %1, %2") + "vmax.\t%0, %1, %2" + [(set_attr "neon_type" "neon_int_5")] +) (define_insn "*smin3_neon" [(set (match_operand:VDQW 0 "s_register_operand" "=w") (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w") (match_operand:VDQW 2 "s_register_operand" "w")))] "TARGET_NEON" - "vmin.\t%0, %1, %2") + "vmin.\t%0, %1, %2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_int_5")))] +) (define_insn "*smax3_neon" [(set (match_operand:VDQW 0 "s_register_operand" "=w") (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w") (match_operand:VDQW 2 "s_register_operand" "w")))] "TARGET_NEON" - "vmax.\t%0, %1, %2") + "vmax.\t%0, %1, %2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_int_5")))] +) ; TODO: V2DI shifts are current disabled because there are bugs in the ; generic vectorizer code. It ends up creating a V2DI constructor with @@ -914,7 +1109,12 @@ (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w") (match_operand:VDQIW 2 "s_register_operand" "w")))] "TARGET_NEON" - "vshl.\t%0, %1, %2") + "vshl.\t%0, %1, %2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_vshl_ddd") + (const_string "neon_shift_3")))] +) ; Used for implementing logical shift-right, which is a left-shift by a negative ; amount, with signed operands. This is essentially the same as ashl3 @@ -927,7 +1127,12 @@ (match_operand:VDQI 2 "s_register_operand" "w")] UNSPEC_ASHIFT_SIGNED))] "TARGET_NEON" - "vshl.\t%0, %1, %2") + "vshl.\t%0, %1, %2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_vshl_ddd") + (const_string "neon_shift_3")))] +) ; Used for implementing logical shift-right, which is a left-shift by a negative ; amount, with unsigned operands. @@ -938,7 +1143,12 @@ (match_operand:VDQI 2 "s_register_operand" "w")] UNSPEC_ASHIFT_UNSIGNED))] "TARGET_NEON" - "vshl.\t%0, %1, %2") + "vshl.\t%0, %1, %2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_vshl_ddd") + (const_string "neon_shift_3")))] +) (define_expand "ashr3" [(set (match_operand:VDQIW 0 "s_register_operand" "") @@ -976,7 +1186,9 @@ (match_operand:VW 1 "s_register_operand" "%w")) (match_operand: 2 "s_register_operand" "w")))] "TARGET_NEON" - "vaddw.\t%q0, %q2, %P1") + "vaddw.\t%q0, %q2, %P1" + [(set_attr "neon_type" "neon_int_3")] +) (define_insn "widen_usum3" [(set (match_operand: 0 "s_register_operand" "=w") @@ -984,7 +1196,9 @@ (match_operand:VW 1 "s_register_operand" "%w")) (match_operand: 2 "s_register_operand" "w")))] "TARGET_NEON" - "vaddw.\t%q0, %q2, %P1") + "vaddw.\t%q0, %q2, %P1" + [(set_attr "neon_type" "neon_int_3")] +) ;; VEXT can be used to synthesize coarse whole-vector shifts with 8-bit ;; shift-count granularity. That's good enough for the middle-end's current @@ -1062,7 +1276,12 @@ (vec_select:V2SI (match_dup 1) (parallel [(const_int 2) (const_int 3)]))))] "TARGET_NEON" - ".32\t%P0, %e1, %f1") + ".32\t%P0, %e1, %f1" + [(set_attr "vqh_mnem" "") + (set (attr "neon_type") + (if_then_else (eq_attr "vqh_mnem" "vadd") + (const_string "neon_int_1") (const_string "neon_int_5")))] +) (define_insn "quad_halves_v4sf" [(set (match_operand:V2SF 0 "s_register_operand" "=w") @@ -1072,7 +1291,12 @@ (vec_select:V2SF (match_dup 1) (parallel [(const_int 2) (const_int 3)]))))] "TARGET_NEON" - ".f32\t%P0, %e1, %f1") + ".f32\t%P0, %e1, %f1" + [(set_attr "vqh_mnem" "") + (set (attr "neon_type") + (if_then_else (eq_attr "vqh_mnem" "vadd") + (const_string "neon_int_1") (const_string "neon_int_5")))] +) (define_insn "quad_halves_v8hi" [(set (match_operand:V4HI 0 "s_register_operand" "+w") @@ -1084,7 +1308,12 @@ (parallel [(const_int 4) (const_int 5) (const_int 6) (const_int 7)]))))] "TARGET_NEON" - ".16\t%P0, %e1, %f1") + ".16\t%P0, %e1, %f1" + [(set_attr "vqh_mnem" "") + (set (attr "neon_type") + (if_then_else (eq_attr "vqh_mnem" "vadd") + (const_string "neon_int_1") (const_string "neon_int_5")))] +) (define_insn "quad_halves_v16qi" [(set (match_operand:V8QI 0 "s_register_operand" "+w") @@ -1100,7 +1329,12 @@ (const_int 12) (const_int 13) (const_int 14) (const_int 15)]))))] "TARGET_NEON" - ".8\t%P0, %e1, %f1") + ".8\t%P0, %e1, %f1" + [(set_attr "vqh_mnem" "") + (set (attr "neon_type") + (if_then_else (eq_attr "vqh_mnem" "vadd") + (const_string "neon_int_1") (const_string "neon_int_5")))] +) ; FIXME: We wouldn't need the following insns if we could write subregs of ; vector registers. Make an attempt at removing unnecessary moves, though @@ -1121,7 +1355,9 @@ return "vmov\t%e0, %P1"; else return ""; -}) +} + [(set_attr "neon_type" "neon_bp_simple")] +) (define_insn "move_lo_quad_v4sf" [(set (match_operand:V4SF 0 "s_register_operand" "+w") @@ -1138,7 +1374,9 @@ return "vmov\t%e0, %P1"; else return ""; -}) +} + [(set_attr "neon_type" "neon_bp_simple")] +) (define_insn "move_lo_quad_v8hi" [(set (match_operand:V8HI 0 "s_register_operand" "+w") @@ -1156,7 +1394,9 @@ return "vmov\t%e0, %P1"; else return ""; -}) +} + [(set_attr "neon_type" "neon_bp_simple")] +) (define_insn "move_lo_quad_v16qi" [(set (match_operand:V16QI 0 "s_register_operand" "+w") @@ -1176,7 +1416,9 @@ return "vmov\t%e0, %P1"; else return ""; -}) +} + [(set_attr "neon_type" "neon_bp_simple")] +) ;; Reduction operations @@ -1210,7 +1452,9 @@ (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")] UNSPEC_VPADD))] "TARGET_NEON" - "vadd.i64\t%e0, %e1, %f1") + "vadd.i64\t%e0, %e1, %f1" + [(set_attr "neon_type" "neon_int_1")] +) ;; NEON does not distinguish between signed and unsigned addition except on ;; widening operations. @@ -1329,7 +1573,15 @@ (match_operand:VD 2 "s_register_operand" "w")] UNSPEC_VPADD))] "TARGET_NEON" - "vpadd.\t%P0, %P1, %P2") + "vpadd.\t%P0, %P1, %P2" + ;; Assume this schedules like vadd. + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")) + (const_string "neon_int_1")))] +) (define_insn "neon_vpsmin" [(set (match_operand:VD 0 "s_register_operand" "=w") @@ -1337,7 +1589,13 @@ (match_operand:VD 2 "s_register_operand" "w")] UNSPEC_VPSMIN))] "TARGET_NEON" - "vpmin.\t%P0, %P1, %P2") + "vpmin.\t%P0, %P1, %P2" + ;; Assume this schedules like vmin. + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_int_5")))] +) (define_insn "neon_vpsmax" [(set (match_operand:VD 0 "s_register_operand" "=w") @@ -1345,7 +1603,13 @@ (match_operand:VD 2 "s_register_operand" "w")] UNSPEC_VPSMAX))] "TARGET_NEON" - "vpmax.\t%P0, %P1, %P2") + "vpmax.\t%P0, %P1, %P2" + ;; Assume this schedules like vmax. + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_int_5")))] +) (define_insn "neon_vpumin" [(set (match_operand:VDI 0 "s_register_operand" "=w") @@ -1353,7 +1617,10 @@ (match_operand:VDI 2 "s_register_operand" "w")] UNSPEC_VPUMIN))] "TARGET_NEON" - "vpmin.\t%P0, %P1, %P2") + "vpmin.\t%P0, %P1, %P2" + ;; Assume this schedules like umin. + [(set_attr "neon_type" "neon_int_5")] +) (define_insn "neon_vpumax" [(set (match_operand:VDI 0 "s_register_operand" "=w") @@ -1361,7 +1628,10 @@ (match_operand:VDI 2 "s_register_operand" "w")] UNSPEC_VPUMAX))] "TARGET_NEON" - "vpmax.\t%P0, %P1, %P2") + "vpmax.\t%P0, %P1, %P2" + ;; Assume this schedules like umax. + [(set_attr "neon_type" "neon_int_5")] +) ;; Saturating arithmetic @@ -1376,28 +1646,36 @@ (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w") (match_operand:VD 2 "s_register_operand" "w")))] "TARGET_NEON" - "vqadd.\t%P0, %P1, %P2") + "vqadd.\t%P0, %P1, %P2" + [(set_attr "neon_type" "neon_int_4")] +) (define_insn "*us_add_neon" [(set (match_operand:VD 0 "s_register_operand" "=w") (us_plus:VD (match_operand:VD 1 "s_register_operand" "w") (match_operand:VD 2 "s_register_operand" "w")))] "TARGET_NEON" - "vqadd.\t%P0, %P1, %P2") + "vqadd.\t%P0, %P1, %P2" + [(set_attr "neon_type" "neon_int_4")] +) (define_insn "*ss_sub_neon" [(set (match_operand:VD 0 "s_register_operand" "=w") (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w") (match_operand:VD 2 "s_register_operand" "w")))] "TARGET_NEON" - "vqsub.\t%P0, %P1, %P2") + "vqsub.\t%P0, %P1, %P2" + [(set_attr "neon_type" "neon_int_5")] +) (define_insn "*us_sub_neon" [(set (match_operand:VD 0 "s_register_operand" "=w") (us_minus:VD (match_operand:VD 1 "s_register_operand" "w") (match_operand:VD 2 "s_register_operand" "w")))] "TARGET_NEON" - "vqsub.\t%P0, %P1, %P2") + "vqsub.\t%P0, %P1, %P2" + [(set_attr "neon_type" "neon_int_5")] +) ;; Patterns for builtins. @@ -1410,7 +1688,14 @@ (match_operand:SI 3 "immediate_operand" "i")] UNSPEC_VADD))] "TARGET_NEON" - "vadd.\t%0, %1, %2") + "vadd.\t%0, %1, %2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")) + (const_string "neon_int_1")))] +) ; operand 3 represents in bits: ; bit 0: signed (vs unsigned). @@ -1423,7 +1708,9 @@ (match_operand:SI 3 "immediate_operand" "i")] UNSPEC_VADDL))] "TARGET_NEON" - "vaddl.%T3%#\t%q0, %P1, %P2") + "vaddl.%T3%#\t%q0, %P1, %P2" + [(set_attr "neon_type" "neon_int_3")] +) (define_insn "neon_vaddw" [(set (match_operand: 0 "s_register_operand" "=w") @@ -1432,7 +1719,9 @@ (match_operand:SI 3 "immediate_operand" "i")] UNSPEC_VADDW))] "TARGET_NEON" - "vaddw.%T3%#\t%q0, %q1, %P2") + "vaddw.%T3%#\t%q0, %q1, %P2" + [(set_attr "neon_type" "neon_int_2")] +) ; vhadd and vrhadd. @@ -1443,7 +1732,9 @@ (match_operand:SI 3 "immediate_operand" "i")] UNSPEC_VHADD))] "TARGET_NEON" - "v%O3hadd.%T3%#\t%0, %1, %2") + "v%O3hadd.%T3%#\t%0, %1, %2" + [(set_attr "neon_type" "neon_int_4")] +) (define_insn "neon_vqadd" [(set (match_operand:VDQIX 0 "s_register_operand" "=w") @@ -1452,7 +1743,9 @@ (match_operand:SI 3 "immediate_operand" "i")] UNSPEC_VQADD))] "TARGET_NEON" - "vqadd.%T3%#\t%0, %1, %2") + "vqadd.%T3%#\t%0, %1, %2" + [(set_attr "neon_type" "neon_int_4")] +) (define_insn "neon_vaddhn" [(set (match_operand: 0 "s_register_operand" "=w") @@ -1461,7 +1754,9 @@ (match_operand:SI 3 "immediate_operand" "i")] UNSPEC_VADDHN))] "TARGET_NEON" - "v%O3addhn.\t%P0, %q1, %q2") + "v%O3addhn.\t%P0, %q1, %q2" + [(set_attr "neon_type" "neon_int_4")] +) (define_insn "neon_vmul" [(set (match_operand:VDQW 0 "s_register_operand" "=w") @@ -1470,7 +1765,21 @@ (match_operand:SI 3 "immediate_operand" "i")] UNSPEC_VMUL))] "TARGET_NEON" - "vmul.%F3%#\t%0, %1, %2") + "vmul.%F3%#\t%0, %1, %2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else + (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long") + (const_string "neon_mul_qqq_8_16_32_ddd_32")) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mul_qqq_8_16_32_ddd_32") + (const_string "neon_mul_qqq_8_16_32_ddd_32")))))] +) (define_insn "neon_vmla" [(set (match_operand:VDQW 0 "s_register_operand" "=w") @@ -1480,7 +1789,21 @@ (match_operand:SI 4 "immediate_operand" "i")] UNSPEC_VMLA))] "TARGET_NEON" - "vmla.\t%0, %2, %3") + "vmla.\t%0, %2, %3" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vmla_ddd") + (const_string "neon_fp_vmla_qqq")) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else + (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long") + (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mla_qqq_8_16") + (const_string "neon_mla_qqq_32_qqd_32_scalar")))))] +) (define_insn "neon_vmlal" [(set (match_operand: 0 "s_register_operand" "=w") @@ -1490,7 +1813,12 @@ (match_operand:SI 4 "immediate_operand" "i")] UNSPEC_VMLAL))] "TARGET_NEON" - "vmlal.%T4%#\t%q0, %P2, %P3") + "vmlal.%T4%#\t%q0, %P2, %P3" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long") + (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")))] +) (define_insn "neon_vmls" [(set (match_operand:VDQW 0 "s_register_operand" "=w") @@ -1500,7 +1828,22 @@ (match_operand:SI 4 "immediate_operand" "i")] UNSPEC_VMLS))] "TARGET_NEON" - "vmls.\t%0, %2, %3") + "vmls.\t%0, %2, %3" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vmla_ddd") + (const_string "neon_fp_vmla_qqq")) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else + (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long") + (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")) + (if_then_else + (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mla_qqq_8_16") + (const_string "neon_mla_qqq_32_qqd_32_scalar")))))] +) (define_insn "neon_vmlsl" [(set (match_operand: 0 "s_register_operand" "=w") @@ -1510,7 +1853,12 @@ (match_operand:SI 4 "immediate_operand" "i")] UNSPEC_VMLSL))] "TARGET_NEON" - "vmlsl.%T4%#\t%q0, %P2, %P3") + "vmlsl.%T4%#\t%q0, %P2, %P3" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long") + (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")))] +) (define_insn "neon_vqdmulh" [(set (match_operand:VMDQI 0 "s_register_operand" "=w") @@ -1519,7 +1867,16 @@ (match_operand:SI 3 "immediate_operand" "i")] UNSPEC_VQDMULH))] "TARGET_NEON" - "vq%O3dmulh.\t%0, %1, %2") + "vq%O3dmulh.\t%0, %1, %2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long") + (const_string "neon_mul_qqq_8_16_32_ddd_32")) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mul_qqq_8_16_32_ddd_32") + (const_string "neon_mul_qqq_8_16_32_ddd_32"))))] +) (define_insn "neon_vqdmlal" [(set (match_operand: 0 "s_register_operand" "=w") @@ -1529,7 +1886,12 @@ (match_operand:SI 4 "immediate_operand" "i")] UNSPEC_VQDMLAL))] "TARGET_NEON" - "vqdmlal.\t%q0, %P2, %P3") + "vqdmlal.\t%q0, %P2, %P3" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long") + (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")))] +) (define_insn "neon_vqdmlsl" [(set (match_operand: 0 "s_register_operand" "=w") @@ -1539,7 +1901,12 @@ (match_operand:SI 4 "immediate_operand" "i")] UNSPEC_VQDMLSL))] "TARGET_NEON" - "vqdmlsl.\t%q0, %P2, %P3") + "vqdmlsl.\t%q0, %P2, %P3" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long") + (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")))] +) (define_insn "neon_vmull" [(set (match_operand: 0 "s_register_operand" "=w") @@ -1548,7 +1915,12 @@ (match_operand:SI 3 "immediate_operand" "i")] UNSPEC_VMULL))] "TARGET_NEON" - "vmull.%T3%#\t%q0, %P1, %P2") + "vmull.%T3%#\t%q0, %P1, %P2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long") + (const_string "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")))] +) (define_insn "neon_vqdmull" [(set (match_operand: 0 "s_register_operand" "=w") @@ -1557,7 +1929,12 @@ (match_operand:SI 3 "immediate_operand" "i")] UNSPEC_VQDMULL))] "TARGET_NEON" - "vqdmull.\t%q0, %P1, %P2") + "vqdmull.\t%q0, %P1, %P2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long") + (const_string "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")))] +) (define_insn "neon_vsub" [(set (match_operand:VDQX 0 "s_register_operand" "=w") @@ -1566,7 +1943,14 @@ (match_operand:SI 3 "immediate_operand" "i")] UNSPEC_VSUB))] "TARGET_NEON" - "vsub.\t%0, %1, %2") + "vsub.\t%0, %1, %2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")) + (const_string "neon_int_2")))] +) (define_insn "neon_vsubl" [(set (match_operand: 0 "s_register_operand" "=w") @@ -1575,7 +1959,9 @@ (match_operand:SI 3 "immediate_operand" "i")] UNSPEC_VSUBL))] "TARGET_NEON" - "vsubl.%T3%#\t%q0, %P1, %P2") + "vsubl.%T3%#\t%q0, %P1, %P2" + [(set_attr "neon_type" "neon_int_2")] +) (define_insn "neon_vsubw" [(set (match_operand: 0 "s_register_operand" "=w") @@ -1584,7 +1970,9 @@ (match_operand:SI 3 "immediate_operand" "i")] UNSPEC_VSUBW))] "TARGET_NEON" - "vsubw.%T3%#\t%q0, %q1, %P2") + "vsubw.%T3%#\t%q0, %q1, %P2" + [(set_attr "neon_type" "neon_int_2")] +) (define_insn "neon_vqsub" [(set (match_operand:VDQIX 0 "s_register_operand" "=w") @@ -1593,7 +1981,9 @@ (match_operand:SI 3 "immediate_operand" "i")] UNSPEC_VQSUB))] "TARGET_NEON" - "vqsub.%T3%#\t%0, %1, %2") + "vqsub.%T3%#\t%0, %1, %2" + [(set_attr "neon_type" "neon_int_5")] +) (define_insn "neon_vhsub" [(set (match_operand:VDQIW 0 "s_register_operand" "=w") @@ -1602,7 +1992,9 @@ (match_operand:SI 3 "immediate_operand" "i")] UNSPEC_VHSUB))] "TARGET_NEON" - "vhsub.%T3%#\t%0, %1, %2") + "vhsub.%T3%#\t%0, %1, %2" + [(set_attr "neon_type" "neon_int_5")] +) (define_insn "neon_vsubhn" [(set (match_operand: 0 "s_register_operand" "=w") @@ -1611,7 +2003,9 @@ (match_operand:SI 3 "immediate_operand" "i")] UNSPEC_VSUBHN))] "TARGET_NEON" - "v%O3subhn.\t%P0, %q1, %q2") + "v%O3subhn.\t%P0, %q1, %q2" + [(set_attr "neon_type" "neon_int_4")] +) (define_insn "neon_vceq" [(set (match_operand: 0 "s_register_operand" "=w") @@ -1620,7 +2014,14 @@ (match_operand:SI 3 "immediate_operand" "i")] UNSPEC_VCEQ))] "TARGET_NEON" - "vceq.\t%0, %1, %2") + "vceq.\t%0, %1, %2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")) + (const_string "neon_int_5")))] +) (define_insn "neon_vcge" [(set (match_operand: 0 "s_register_operand" "=w") @@ -1629,7 +2030,14 @@ (match_operand:SI 3 "immediate_operand" "i")] UNSPEC_VCGE))] "TARGET_NEON" - "vcge.%T3%#\t%0, %1, %2") + "vcge.%T3%#\t%0, %1, %2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")) + (const_string "neon_int_5")))] +) (define_insn "neon_vcgt" [(set (match_operand: 0 "s_register_operand" "=w") @@ -1638,7 +2046,14 @@ (match_operand:SI 3 "immediate_operand" "i")] UNSPEC_VCGT))] "TARGET_NEON" - "vcgt.%T3%#\t%0, %1, %2") + "vcgt.%T3%#\t%0, %1, %2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")) + (const_string "neon_int_5")))] +) (define_insn "neon_vcage" [(set (match_operand: 0 "s_register_operand" "=w") @@ -1647,7 +2062,12 @@ (match_operand:SI 3 "immediate_operand" "i")] UNSPEC_VCAGE))] "TARGET_NEON" - "vacge.\t%0, %1, %2") + "vacge.\t%0, %1, %2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")))] +) (define_insn "neon_vcagt" [(set (match_operand: 0 "s_register_operand" "=w") @@ -1656,7 +2076,12 @@ (match_operand:SI 3 "immediate_operand" "i")] UNSPEC_VCAGT))] "TARGET_NEON" - "vacgt.\t%0, %1, %2") + "vacgt.\t%0, %1, %2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")))] +) (define_insn "neon_vtst" [(set (match_operand:VDQIW 0 "s_register_operand" "=w") @@ -1665,7 +2090,9 @@ (match_operand:SI 3 "immediate_operand" "i")] UNSPEC_VTST))] "TARGET_NEON" - "vtst.\t%0, %1, %2") + "vtst.\t%0, %1, %2" + [(set_attr "neon_type" "neon_int_4")] +) (define_insn "neon_vabd" [(set (match_operand:VDQW 0 "s_register_operand" "=w") @@ -1674,7 +2101,14 @@ (match_operand:SI 3 "immediate_operand" "i")] UNSPEC_VABD))] "TARGET_NEON" - "vabd.%T3%#\t%0, %1, %2") + "vabd.%T3%#\t%0, %1, %2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")) + (const_string "neon_int_5")))] +) (define_insn "neon_vabdl" [(set (match_operand: 0 "s_register_operand" "=w") @@ -1683,7 +2117,9 @@ (match_operand:SI 3 "immediate_operand" "i")] UNSPEC_VABDL))] "TARGET_NEON" - "vabdl.%T3%#\t%q0, %P1, %P2") + "vabdl.%T3%#\t%q0, %P1, %P2" + [(set_attr "neon_type" "neon_int_5")] +) (define_insn "neon_vaba" [(set (match_operand:VDQIW 0 "s_register_operand" "=w") @@ -1693,7 +2129,11 @@ (match_operand:SI 4 "immediate_operand" "i")] UNSPEC_VABA))] "TARGET_NEON" - "vaba.%T4%#\t%0, %2, %3") + "vaba.%T4%#\t%0, %2, %3" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_vaba") (const_string "neon_vaba_qqq")))] +) (define_insn "neon_vabal" [(set (match_operand: 0 "s_register_operand" "=w") @@ -1703,7 +2143,9 @@ (match_operand:SI 4 "immediate_operand" "i")] UNSPEC_VABAL))] "TARGET_NEON" - "vabal.%T4%#\t%q0, %P2, %P3") + "vabal.%T4%#\t%q0, %P2, %P3" + [(set_attr "neon_type" "neon_vaba")] +) (define_insn "neon_vmax" [(set (match_operand:VDQW 0 "s_register_operand" "=w") @@ -1712,7 +2154,14 @@ (match_operand:SI 3 "immediate_operand" "i")] UNSPEC_VMAX))] "TARGET_NEON" - "vmax.%T3%#\t%0, %1, %2") + "vmax.%T3%#\t%0, %1, %2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")) + (const_string "neon_int_5")))] +) (define_insn "neon_vmin" [(set (match_operand:VDQW 0 "s_register_operand" "=w") @@ -1721,7 +2170,14 @@ (match_operand:SI 3 "immediate_operand" "i")] UNSPEC_VMIN))] "TARGET_NEON" - "vmin.%T3%#\t%0, %1, %2") + "vmin.%T3%#\t%0, %1, %2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")) + (const_string "neon_int_5")))] +) (define_expand "neon_vpadd" [(match_operand:VD 0 "s_register_operand" "=w") @@ -1741,7 +2197,10 @@ (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_VPADDL))] "TARGET_NEON" - "vpaddl.%T2%#\t%0, %1") + "vpaddl.%T2%#\t%0, %1" + ;; Assume this schedules like vaddl. + [(set_attr "neon_type" "neon_int_3")] +) (define_insn "neon_vpadal" [(set (match_operand: 0 "s_register_operand" "=w") @@ -1750,7 +2209,10 @@ (match_operand:SI 3 "immediate_operand" "i")] UNSPEC_VPADAL))] "TARGET_NEON" - "vpadal.%T3%#\t%0, %2") + "vpadal.%T3%#\t%0, %2" + ;; Assume this schedules like vpadd. + [(set_attr "neon_type" "neon_int_1")] +) (define_insn "neon_vpmax" [(set (match_operand:VD 0 "s_register_operand" "=w") @@ -1759,7 +2221,13 @@ (match_operand:SI 3 "immediate_operand" "i")] UNSPEC_VPMAX))] "TARGET_NEON" - "vpmax.%T3%#\t%0, %1, %2") + "vpmax.%T3%#\t%0, %1, %2" + ;; Assume this schedules like vmax. + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_int_5")))] +) (define_insn "neon_vpmin" [(set (match_operand:VD 0 "s_register_operand" "=w") @@ -1768,7 +2236,13 @@ (match_operand:SI 3 "immediate_operand" "i")] UNSPEC_VPMIN))] "TARGET_NEON" - "vpmin.%T3%#\t%0, %1, %2") + "vpmin.%T3%#\t%0, %1, %2" + ;; Assume this schedules like vmin. + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_int_5")))] +) (define_insn "neon_vrecps" [(set (match_operand:VCVTF 0 "s_register_operand" "=w") @@ -1777,7 +2251,12 @@ (match_operand:SI 3 "immediate_operand" "i")] UNSPEC_VRECPS))] "TARGET_NEON" - "vrecps.\t%0, %1, %2") + "vrecps.\t%0, %1, %2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vrecps_vrsqrts_ddd") + (const_string "neon_fp_vrecps_vrsqrts_qqq")))] +) (define_insn "neon_vrsqrts" [(set (match_operand:VCVTF 0 "s_register_operand" "=w") @@ -1786,7 +2265,12 @@ (match_operand:SI 3 "immediate_operand" "i")] UNSPEC_VRSQRTS))] "TARGET_NEON" - "vrsqrts.\t%0, %1, %2") + "vrsqrts.\t%0, %1, %2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vrecps_vrsqrts_ddd") + (const_string "neon_fp_vrecps_vrsqrts_qqq")))] +) (define_insn "neon_vabs" [(set (match_operand:VDQW 0 "s_register_operand" "=w") @@ -1794,7 +2278,16 @@ (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_VABS))] "TARGET_NEON" - "vabs.\t%0, %1") + "vabs.\t%0, %1" + [(set (attr "neon_type") + (if_then_else (ior (ne (symbol_ref "") (const_int 0)) + (ne (symbol_ref "") (const_int 0))) + (if_then_else + (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")) + (const_string "neon_vqneg_vqabs")))] +) (define_insn "neon_vqabs" [(set (match_operand:VDQIW 0 "s_register_operand" "=w") @@ -1802,7 +2295,9 @@ (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_VQABS))] "TARGET_NEON" - "vqabs.\t%0, %1") + "vqabs.\t%0, %1" + [(set_attr "neon_type" "neon_vqneg_vqabs")] +) (define_expand "neon_vneg" [(match_operand:VDQW 0 "s_register_operand" "") @@ -1820,7 +2315,9 @@ (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_VQNEG))] "TARGET_NEON" - "vqneg.\t%0, %1") + "vqneg.\t%0, %1" + [(set_attr "neon_type" "neon_vqneg_vqabs")] +) (define_insn "neon_vcls" [(set (match_operand:VDQIW 0 "s_register_operand" "=w") @@ -1828,7 +2325,9 @@ (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_VCLS))] "TARGET_NEON" - "vcls.\t%0, %1") + "vcls.\t%0, %1" + [(set_attr "neon_type" "neon_int_1")] +) (define_insn "neon_vclz" [(set (match_operand:VDQIW 0 "s_register_operand" "=w") @@ -1836,7 +2335,9 @@ (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_VCLZ))] "TARGET_NEON" - "vclz.\t%0, %1") + "vclz.\t%0, %1" + [(set_attr "neon_type" "neon_int_1")] +) (define_insn "neon_vcnt" [(set (match_operand:VE 0 "s_register_operand" "=w") @@ -1844,7 +2345,9 @@ (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_VCNT))] "TARGET_NEON" - "vcnt.\t%0, %1") + "vcnt.\t%0, %1" + [(set_attr "neon_type" "neon_int_1")] +) (define_insn "neon_vrecpe" [(set (match_operand:V32 0 "s_register_operand" "=w") @@ -1852,7 +2355,12 @@ (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_VRECPE))] "TARGET_NEON" - "vrecpe.\t%0, %1") + "vrecpe.\t%0, %1" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")))] +) (define_insn "neon_vrsqrte" [(set (match_operand:V32 0 "s_register_operand" "=w") @@ -1860,7 +2368,12 @@ (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_VRSQRTE))] "TARGET_NEON" - "vrsqrte.\t%0, %1") + "vrsqrte.\t%0, %1" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")))] +) (define_expand "neon_vmvn" [(match_operand:VDQIW 0 "s_register_operand" "") @@ -1883,7 +2396,9 @@ UNSPEC_VGET_LANE))] "TARGET_NEON" "vmov%?.%t3%#\t%0, %P1[%c2]" - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "neon_type" "neon_bp_simple")] +) ; Operand 2 (lane number) is ignored because we can only extract the zeroth lane ; with this insn. Operand 3 (info word) is ignored because it does nothing @@ -1897,7 +2412,9 @@ UNSPEC_VGET_LANE))] "TARGET_NEON" "vmov%?\t%Q0, %R0, %P1 @ di" - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "neon_type" "neon_bp_simple")] +) (define_insn "neon_vget_lane" [(set (match_operand: 0 "s_register_operand" "=r") @@ -1920,7 +2437,9 @@ return ""; } - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "neon_type" "neon_bp_simple")] +) (define_insn "neon_vget_lanev2di" [(set (match_operand:DI 0 "s_register_operand" "=r") @@ -1940,8 +2459,9 @@ return ""; } - [(set_attr "predicable" "yes")]) - + [(set_attr "predicable" "yes") + (set_attr "neon_type" "neon_bp_simple")] +) (define_insn "neon_vset_lane" [(set (match_operand:VD 0 "s_register_operand" "=w") @@ -1951,7 +2471,9 @@ UNSPEC_VSET_LANE))] "TARGET_NEON" "vmov%?.\t%P0[%c3], %1" - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "neon_type" "neon_bp_simple")] +) ; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored. @@ -1963,7 +2485,9 @@ UNSPEC_VSET_LANE))] "TARGET_NEON" "vmov%?\t%P0, %Q1, %R1 @ di" - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "neon_type" "neon_bp_simple")] +) (define_insn "neon_vset_lane" [(set (match_operand:VQ 0 "s_register_operand" "=w") @@ -1985,7 +2509,9 @@ return ""; } - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "neon_type" "neon_bp_simple")] +) (define_insn "neon_vset_lanev2di" [(set (match_operand:V2DI 0 "s_register_operand" "=w") @@ -2005,7 +2531,9 @@ return ""; } - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "neon_type" "neon_bp_simple")] +) (define_expand "neon_vcreate" [(match_operand:VDX 0 "s_register_operand" "") @@ -2023,7 +2551,10 @@ UNSPEC_VDUP_N))] "TARGET_NEON" "vdup%?.\t%0, %1" - [(set_attr "predicable" "yes")]) + ;; Assume this schedules like vmov. + [(set_attr "predicable" "yes") + (set_attr "neon_type" "neon_bp_simple")] +) (define_insn "neon_vdup_ndi" [(set (match_operand:DI 0 "s_register_operand" "=w") @@ -2031,7 +2562,9 @@ UNSPEC_VDUP_N))] "TARGET_NEON" "vmov%?\t%P0, %Q1, %R1" - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "neon_type" "neon_bp_simple")] +) (define_insn "neon_vdup_nv2di" [(set (match_operand:V2DI 0 "s_register_operand" "=w") @@ -2040,7 +2573,9 @@ "TARGET_NEON" "vmov%?\t%e0, %Q1, %R1\;vmov%?\t%f0, %Q1, %R1" [(set_attr "predicable" "yes") - (set_attr "length" "8")]) + (set_attr "length" "8") + (set_attr "neon_type" "neon_bp_simple")] +) (define_insn "neon_vdup_lane" [(set (match_operand:VD 0 "s_register_operand" "=w") @@ -2048,7 +2583,10 @@ (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_VDUP_LANE))] "TARGET_NEON" - "vdup.\t%P0, %P1[%c2]") + "vdup.\t%P0, %P1[%c2]" + ;; Assume this schedules like vmov. + [(set_attr "neon_type" "neon_bp_simple")] +) (define_insn "neon_vdup_lane" [(set (match_operand:VQ 0 "s_register_operand" "=w") @@ -2056,7 +2594,10 @@ (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_VDUP_LANE))] "TARGET_NEON" - "vdup.\t%q0, %P1[%c2]") + "vdup.\t%q0, %P1[%c2]" + ;; Assume this schedules like vmov. + [(set_attr "neon_type" "neon_bp_simple")] +) ; Scalar index is ignored, since only zero is valid here. (define_expand "neon_vdup_lanedi" @@ -2078,7 +2619,9 @@ UNSPEC_VDUP_LANE))] "TARGET_NEON" "vmov\t%e0, %P1\;vmov\t%f0, %P1" - [(set_attr "length" "8")]) + [(set_attr "length" "8") + (set_attr "neon_type" "neon_bp_simple")] +) ;; In this insn, operand 1 should be low, and operand 2 the high part of the ;; dest vector. @@ -2126,7 +2669,10 @@ return ""; } - [(set_attr "length" "8")]) + ;; We set the neon_type attribute based on the vmov instructions above. + [(set_attr "length" "8") + (set_attr "neon_type" "neon_bp_simple")] +) (define_insn "neon_vget_high" [(set (match_operand: 0 "s_register_operand" "=w") @@ -2141,7 +2687,9 @@ return "vmov\t%P0, %f1"; else return ""; -}) +} + [(set_attr "neon_type" "neon_bp_simple")] +) (define_insn "neon_vget_low" [(set (match_operand: 0 "s_register_operand" "=w") @@ -2156,7 +2704,9 @@ return "vmov\t%P0, %e1"; else return ""; -}) +} + [(set_attr "neon_type" "neon_bp_simple")] +) (define_insn "neon_vcvt" [(set (match_operand: 0 "s_register_operand" "=w") @@ -2164,7 +2714,12 @@ (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_VCVT))] "TARGET_NEON" - "vcvt.%T2%#32.f32\t%0, %1") + "vcvt.%T2%#32.f32\t%0, %1" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")))] +) (define_insn "neon_vcvt" [(set (match_operand: 0 "s_register_operand" "=w") @@ -2172,7 +2727,12 @@ (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_VCVT))] "TARGET_NEON" - "vcvt.f32.%T2%#32\t%0, %1") + "vcvt.f32.%T2%#32\t%0, %1" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")))] +) (define_insn "neon_vcvt_n" [(set (match_operand: 0 "s_register_operand" "=w") @@ -2181,7 +2741,12 @@ (match_operand:SI 3 "immediate_operand" "i")] UNSPEC_VCVT_N))] "TARGET_NEON" - "vcvt.%T3%#32.f32\t%0, %1, %2") + "vcvt.%T3%#32.f32\t%0, %1, %2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")))] +) (define_insn "neon_vcvt_n" [(set (match_operand: 0 "s_register_operand" "=w") @@ -2190,7 +2755,12 @@ (match_operand:SI 3 "immediate_operand" "i")] UNSPEC_VCVT_N))] "TARGET_NEON" - "vcvt.f32.%T3%#32\t%0, %1, %2") + "vcvt.f32.%T3%#32\t%0, %1, %2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")))] +) (define_insn "neon_vmovn" [(set (match_operand: 0 "s_register_operand" "=w") @@ -2198,7 +2768,9 @@ (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_VMOVN))] "TARGET_NEON" - "vmovn.\t%P0, %q1") + "vmovn.\t%P0, %q1" + [(set_attr "neon_type" "neon_bp_simple")] +) (define_insn "neon_vqmovn" [(set (match_operand: 0 "s_register_operand" "=w") @@ -2206,7 +2778,9 @@ (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_VQMOVN))] "TARGET_NEON" - "vqmovn.%T2%#\t%P0, %q1") + "vqmovn.%T2%#\t%P0, %q1" + [(set_attr "neon_type" "neon_shift_2")] +) (define_insn "neon_vqmovun" [(set (match_operand: 0 "s_register_operand" "=w") @@ -2214,7 +2788,9 @@ (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_VQMOVUN))] "TARGET_NEON" - "vqmovun.\t%P0, %q1") + "vqmovun.\t%P0, %q1" + [(set_attr "neon_type" "neon_shift_2")] +) (define_insn "neon_vmovl" [(set (match_operand: 0 "s_register_operand" "=w") @@ -2222,7 +2798,9 @@ (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_VMOVL))] "TARGET_NEON" - "vmovl.%T2%#\t%q0, %P1") + "vmovl.%T2%#\t%q0, %P1" + [(set_attr "neon_type" "neon_shift_1")] +) (define_insn "neon_vmul_lane" [(set (match_operand:VMD 0 "s_register_operand" "=w") @@ -2233,7 +2811,14 @@ (match_operand:SI 4 "immediate_operand" "i")] UNSPEC_VMUL_LANE))] "TARGET_NEON" - "vmul.\t%P0, %P1, %P2[%c3]") + "vmul.\t%P0, %P1, %P2[%c3]" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vmul_ddd") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mul_ddd_16_scalar_32_16_long_scalar") + (const_string "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar"))))] +) (define_insn "neon_vmul_lane" [(set (match_operand:VMQ 0 "s_register_operand" "=w") @@ -2244,7 +2829,14 @@ (match_operand:SI 4 "immediate_operand" "i")] UNSPEC_VMUL_LANE))] "TARGET_NEON" - "vmul.\t%q0, %q1, %P2[%c3]") + "vmul.\t%q0, %q1, %P2[%c3]" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vmul_qqd") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar") + (const_string "neon_mul_qqd_32_scalar"))))] +) (define_insn "neon_vmull_lane" [(set (match_operand: 0 "s_register_operand" "=w") @@ -2255,7 +2847,12 @@ (match_operand:SI 4 "immediate_operand" "i")] UNSPEC_VMULL_LANE))] "TARGET_NEON" - "vmull.%T4%#\t%q0, %P1, %P2[%c3]") + "vmull.%T4%#\t%q0, %P1, %P2[%c3]" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mul_ddd_16_scalar_32_16_long_scalar") + (const_string "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")))] +) (define_insn "neon_vqdmull_lane" [(set (match_operand: 0 "s_register_operand" "=w") @@ -2266,7 +2863,12 @@ (match_operand:SI 4 "immediate_operand" "i")] UNSPEC_VQDMULL_LANE))] "TARGET_NEON" - "vqdmull.\t%q0, %P1, %P2[%c3]") + "vqdmull.\t%q0, %P1, %P2[%c3]" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mul_ddd_16_scalar_32_16_long_scalar") + (const_string "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")))] +) (define_insn "neon_vqdmulh_lane" [(set (match_operand:VMQI 0 "s_register_operand" "=w") @@ -2277,7 +2879,12 @@ (match_operand:SI 4 "immediate_operand" "i")] UNSPEC_VQDMULH_LANE))] "TARGET_NEON" - "vq%O4dmulh.%T4%#\t%q0, %q1, %P2[%c3]") + "vq%O4dmulh.%T4%#\t%q0, %q1, %P2[%c3]" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar") + (const_string "neon_mul_qqd_32_scalar")))] +) (define_insn "neon_vqdmulh_lane" [(set (match_operand:VMDI 0 "s_register_operand" "=w") @@ -2288,7 +2895,12 @@ (match_operand:SI 4 "immediate_operand" "i")] UNSPEC_VQDMULH_LANE))] "TARGET_NEON" - "vq%O4dmulh.%T4%#\t%P0, %P1, %P2[%c3]") + "vq%O4dmulh.%T4%#\t%P0, %P1, %P2[%c3]" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mul_ddd_16_scalar_32_16_long_scalar") + (const_string "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")))] +) (define_insn "neon_vmla_lane" [(set (match_operand:VMD 0 "s_register_operand" "=w") @@ -2300,7 +2912,14 @@ (match_operand:SI 5 "immediate_operand" "i")] UNSPEC_VMLA_LANE))] "TARGET_NEON" - "vmla.\t%P0, %P2, %P3[%c4]") + "vmla.\t%P0, %P2, %P3[%c4]" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vmla_ddd_scalar") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar") + (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long"))))] +) (define_insn "neon_vmla_lane" [(set (match_operand:VMQ 0 "s_register_operand" "=w") @@ -2312,7 +2931,14 @@ (match_operand:SI 5 "immediate_operand" "i")] UNSPEC_VMLA_LANE))] "TARGET_NEON" - "vmla.\t%q0, %q2, %P3[%c4]") + "vmla.\t%q0, %q2, %P3[%c4]" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vmla_qqq_scalar") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long") + (const_string "neon_mla_qqq_32_qqd_32_scalar"))))] +) (define_insn "neon_vmlal_lane" [(set (match_operand: 0 "s_register_operand" "=w") @@ -2324,7 +2950,12 @@ (match_operand:SI 5 "immediate_operand" "i")] UNSPEC_VMLAL_LANE))] "TARGET_NEON" - "vmlal.%T5%#\t%q0, %P2, %P3[%c4]") + "vmlal.%T5%#\t%q0, %P2, %P3[%c4]" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar") + (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")))] +) (define_insn "neon_vqdmlal_lane" [(set (match_operand: 0 "s_register_operand" "=w") @@ -2336,7 +2967,12 @@ (match_operand:SI 5 "immediate_operand" "i")] UNSPEC_VQDMLAL_LANE))] "TARGET_NEON" - "vqdmlal.\t%q0, %P2, %P3[%c4]") + "vqdmlal.\t%q0, %P2, %P3[%c4]" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar") + (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")))] +) (define_insn "neon_vmls_lane" [(set (match_operand:VMD 0 "s_register_operand" "=w") @@ -2348,7 +2984,14 @@ (match_operand:SI 5 "immediate_operand" "i")] UNSPEC_VMLS_LANE))] "TARGET_NEON" - "vmls.\t%P0, %P2, %P3[%c4]") + "vmls.\t%P0, %P2, %P3[%c4]" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vmla_ddd_scalar") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar") + (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long"))))] +) (define_insn "neon_vmls_lane" [(set (match_operand:VMQ 0 "s_register_operand" "=w") @@ -2360,7 +3003,14 @@ (match_operand:SI 5 "immediate_operand" "i")] UNSPEC_VMLS_LANE))] "TARGET_NEON" - "vmls.\t%q0, %q2, %P3[%c4]") + "vmls.\t%q0, %q2, %P3[%c4]" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vmla_qqq_scalar") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long") + (const_string "neon_mla_qqq_32_qqd_32_scalar"))))] +) (define_insn "neon_vmlsl_lane" [(set (match_operand: 0 "s_register_operand" "=w") @@ -2372,7 +3022,12 @@ (match_operand:SI 5 "immediate_operand" "i")] UNSPEC_VMLSL_LANE))] "TARGET_NEON" - "vmlsl.%T5%#\t%q0, %P2, %P3[%c4]") + "vmlsl.%T5%#\t%q0, %P2, %P3[%c4]" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar") + (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")))] +) (define_insn "neon_vqdmlsl_lane" [(set (match_operand: 0 "s_register_operand" "=w") @@ -2384,7 +3039,12 @@ (match_operand:SI 5 "immediate_operand" "i")] UNSPEC_VQDMLSL_LANE))] "TARGET_NEON" - "vqdmlsl.\t%q0, %P2, %P3[%c4]") + "vqdmlsl.\t%q0, %P2, %P3[%c4]" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar") + (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")))] +) ; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a ; core register into a temp register, then use a scalar taken from that. This @@ -2604,7 +3264,12 @@ (match_operand:SI 3 "immediate_operand" "i")] UNSPEC_VEXT))] "TARGET_NEON" - "vext.\t%0, %1, %2, %3") + "vext.\t%0, %1, %2, %3" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_bp_simple") + (const_string "neon_bp_2cycle")))] +) (define_insn "neon_vrev64" [(set (match_operand:VDQ 0 "s_register_operand" "=w") @@ -2612,7 +3277,9 @@ (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_VREV64))] "TARGET_NEON" - "vrev64.\t%0, %1") + "vrev64.\t%0, %1" + [(set_attr "neon_type" "neon_bp_simple")] +) (define_insn "neon_vrev32" [(set (match_operand:VX 0 "s_register_operand" "=w") @@ -2620,7 +3287,9 @@ (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_VREV32))] "TARGET_NEON" - "vrev32.\t%0, %1") + "vrev32.\t%0, %1" + [(set_attr "neon_type" "neon_bp_simple")] +) (define_insn "neon_vrev16" [(set (match_operand:VE 0 "s_register_operand" "=w") @@ -2628,7 +3297,9 @@ (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_VREV16))] "TARGET_NEON" - "vrev16.\t%0, %1") + "vrev16.\t%0, %1" + [(set_attr "neon_type" "neon_bp_simple")] +) ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register ; allocation. For an intrinsic of form: @@ -2648,7 +3319,9 @@ "@ vbsl\t%0, %2, %3 vbit\t%0, %2, %1 - vbif\t%0, %3, %1") + vbif\t%0, %3, %1" + [(set_attr "neon_type" "neon_int_1")] +) (define_expand "neon_vbsl" [(set (match_operand:VDQX 0 "s_register_operand" "") @@ -2669,7 +3342,12 @@ (match_operand:SI 3 "immediate_operand" "i")] UNSPEC_VSHL))] "TARGET_NEON" - "v%O3shl.%T3%#\t%0, %1, %2") + "v%O3shl.%T3%#\t%0, %1, %2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_vshl_ddd") + (const_string "neon_shift_3")))] +) (define_insn "neon_vqshl" [(set (match_operand:VDQIX 0 "s_register_operand" "=w") @@ -2678,7 +3356,12 @@ (match_operand:SI 3 "immediate_operand" "i")] UNSPEC_VQSHL))] "TARGET_NEON" - "vq%O3shl.%T3%#\t%0, %1, %2") + "vq%O3shl.%T3%#\t%0, %1, %2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_shift_2") + (const_string "neon_vqshl_vrshl_vqrshl_qqq")))] +) (define_insn "neon_vshr_n" [(set (match_operand:VDQIX 0 "s_register_operand" "=w") @@ -2687,7 +3370,9 @@ (match_operand:SI 3 "immediate_operand" "i")] UNSPEC_VSHR_N))] "TARGET_NEON" - "v%O3shr.%T3%#\t%0, %1, %2") + "v%O3shr.%T3%#\t%0, %1, %2" + [(set_attr "neon_type" "neon_shift_1")] +) (define_insn "neon_vshrn_n" [(set (match_operand: 0 "s_register_operand" "=w") @@ -2696,7 +3381,9 @@ (match_operand:SI 3 "immediate_operand" "i")] UNSPEC_VSHRN_N))] "TARGET_NEON" - "v%O3shrn.\t%P0, %q1, %2") + "v%O3shrn.\t%P0, %q1, %2" + [(set_attr "neon_type" "neon_shift_1")] +) (define_insn "neon_vqshrn_n" [(set (match_operand: 0 "s_register_operand" "=w") @@ -2705,7 +3392,9 @@ (match_operand:SI 3 "immediate_operand" "i")] UNSPEC_VQSHRN_N))] "TARGET_NEON" - "vq%O3shrn.%T3%#\t%P0, %q1, %2") + "vq%O3shrn.%T3%#\t%P0, %q1, %2" + [(set_attr "neon_type" "neon_shift_2")] +) (define_insn "neon_vqshrun_n" [(set (match_operand: 0 "s_register_operand" "=w") @@ -2714,7 +3403,9 @@ (match_operand:SI 3 "immediate_operand" "i")] UNSPEC_VQSHRUN_N))] "TARGET_NEON" - "vq%O3shrun.%T3%#\t%P0, %q1, %2") + "vq%O3shrun.%T3%#\t%P0, %q1, %2" + [(set_attr "neon_type" "neon_shift_2")] +) (define_insn "neon_vshl_n" [(set (match_operand:VDQIX 0 "s_register_operand" "=w") @@ -2723,7 +3414,9 @@ (match_operand:SI 3 "immediate_operand" "i")] UNSPEC_VSHL_N))] "TARGET_NEON" - "vshl.\t%0, %1, %2") + "vshl.\t%0, %1, %2" + [(set_attr "neon_type" "neon_shift_1")] +) (define_insn "neon_vqshl_n" [(set (match_operand:VDQIX 0 "s_register_operand" "=w") @@ -2732,7 +3425,9 @@ (match_operand:SI 3 "immediate_operand" "i")] UNSPEC_VQSHL_N))] "TARGET_NEON" - "vqshl.%T3%#\t%0, %1, %2") + "vqshl.%T3%#\t%0, %1, %2" + [(set_attr "neon_type" "neon_shift_2")] +) (define_insn "neon_vqshlu_n" [(set (match_operand:VDQIX 0 "s_register_operand" "=w") @@ -2741,7 +3436,9 @@ (match_operand:SI 3 "immediate_operand" "i")] UNSPEC_VQSHLU_N))] "TARGET_NEON" - "vqshlu.%T3%#\t%0, %1, %2") + "vqshlu.%T3%#\t%0, %1, %2" + [(set_attr "neon_type" "neon_shift_2")] +) (define_insn "neon_vshll_n" [(set (match_operand: 0 "s_register_operand" "=w") @@ -2750,7 +3447,9 @@ (match_operand:SI 3 "immediate_operand" "i")] UNSPEC_VSHLL_N))] "TARGET_NEON" - "vshll.%T3%#\t%q0, %P1, %2") + "vshll.%T3%#\t%q0, %P1, %2" + [(set_attr "neon_type" "neon_shift_1")] +) (define_insn "neon_vsra_n" [(set (match_operand:VDQIX 0 "s_register_operand" "=w") @@ -2760,7 +3459,9 @@ (match_operand:SI 4 "immediate_operand" "i")] UNSPEC_VSRA_N))] "TARGET_NEON" - "v%O4sra.%T4%#\t%0, %2, %3") + "v%O4sra.%T4%#\t%0, %2, %3" + [(set_attr "neon_type" "neon_vsra_vrsra")] +) (define_insn "neon_vsri_n" [(set (match_operand:VDQIX 0 "s_register_operand" "=w") @@ -2769,7 +3470,12 @@ (match_operand:SI 3 "immediate_operand" "i")] UNSPEC_VSRI))] "TARGET_NEON" - "vsri.\t%0, %2, %3") + "vsri.\t%0, %2, %3" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_shift_1") + (const_string "neon_shift_3")))] +) (define_insn "neon_vsli_n" [(set (match_operand:VDQIX 0 "s_register_operand" "=w") @@ -2778,7 +3484,12 @@ (match_operand:SI 3 "immediate_operand" "i")] UNSPEC_VSLI))] "TARGET_NEON" - "vsli.\t%0, %2, %3") + "vsli.\t%0, %2, %3" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_shift_1") + (const_string "neon_shift_3")))] +) (define_insn "neon_vtbl1v8qi" [(set (match_operand:V8QI 0 "s_register_operand" "=w") @@ -2786,7 +3497,9 @@ (match_operand:V8QI 2 "s_register_operand" "w")] UNSPEC_VTBL))] "TARGET_NEON" - "vtbl.8\t%P0, {%P1}, %P2") + "vtbl.8\t%P0, {%P1}, %P2" + [(set_attr "neon_type" "neon_bp_2cycle")] +) (define_insn "neon_vtbl2v8qi" [(set (match_operand:V8QI 0 "s_register_operand" "=w") @@ -2805,7 +3518,9 @@ output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops); return ""; -}) +} + [(set_attr "neon_type" "neon_bp_2cycle")] +) (define_insn "neon_vtbl3v8qi" [(set (match_operand:V8QI 0 "s_register_operand" "=w") @@ -2825,7 +3540,9 @@ output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops); return ""; -}) +} + [(set_attr "neon_type" "neon_bp_3cycle")] +) (define_insn "neon_vtbl4v8qi" [(set (match_operand:V8QI 0 "s_register_operand" "=w") @@ -2846,7 +3563,9 @@ output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops); return ""; -}) +} + [(set_attr "neon_type" "neon_bp_3cycle")] +) (define_insn "neon_vtbx1v8qi" [(set (match_operand:V8QI 0 "s_register_operand" "=w") @@ -2855,7 +3574,9 @@ (match_operand:V8QI 3 "s_register_operand" "w")] UNSPEC_VTBX))] "TARGET_NEON" - "vtbx.8\t%P0, {%P2}, %P3") + "vtbx.8\t%P0, {%P2}, %P3" + [(set_attr "neon_type" "neon_bp_2cycle")] +) (define_insn "neon_vtbx2v8qi" [(set (match_operand:V8QI 0 "s_register_operand" "=w") @@ -2875,7 +3596,9 @@ output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops); return ""; -}) +} + [(set_attr "neon_type" "neon_bp_2cycle")] +) (define_insn "neon_vtbx3v8qi" [(set (match_operand:V8QI 0 "s_register_operand" "=w") @@ -2896,7 +3619,9 @@ output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops); return ""; -}) +} + [(set_attr "neon_type" "neon_bp_3cycle")] +) (define_insn "neon_vtbx4v8qi" [(set (match_operand:V8QI 0 "s_register_operand" "=w") @@ -2918,7 +3643,9 @@ output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops); return ""; -}) +} + [(set_attr "neon_type" "neon_bp_3cycle")] +) (define_insn "neon_vtrn_internal" [(set (match_operand:VDQW 0 "s_register_operand" "=w") @@ -2928,7 +3655,12 @@ (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")] UNSPEC_VTRN2))] "TARGET_NEON" - "vtrn.\t%0, %2") + "vtrn.\t%0, %2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_bp_simple") + (const_string "neon_bp_3cycle")))] +) (define_expand "neon_vtrn" [(match_operand:SI 0 "s_register_operand" "r") @@ -2949,7 +3681,12 @@ (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")] UNSPEC_VZIP2))] "TARGET_NEON" - "vzip.\t%0, %2") + "vzip.\t%0, %2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_bp_simple") + (const_string "neon_bp_3cycle")))] +) (define_expand "neon_vzip" [(match_operand:SI 0 "s_register_operand" "r") @@ -2970,7 +3707,12 @@ (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")] UNSPEC_VUZP2))] "TARGET_NEON" - "vuzp.\t%0, %2") + "vuzp.\t%0, %2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_bp_simple") + (const_string "neon_bp_3cycle")))] +) (define_expand "neon_vuzp" [(match_operand:SI 0 "s_register_operand" "r") @@ -3078,7 +3820,9 @@ (unspec:VDQX [(mem:VDQX (match_operand:SI 1 "s_register_operand" "r"))] UNSPEC_VLD1))] "TARGET_NEON" - "vld1.\t%h0, [%1]") + "vld1.\t%h0, [%1]" + [(set_attr "neon_type" "neon_vld1_1_2_regs")] +) (define_insn "neon_vld1_lane" [(set (match_operand:VDX 0 "s_register_operand" "=w") @@ -3096,7 +3840,12 @@ return "vld1.\t%P0, [%1]"; else return "vld1.\t{%P0[%c3]}, [%1]"; -}) +} + [(set (attr "neon_type") + (if_then_else (eq (const_string "") (const_int 2)) + (const_string "neon_vld1_1_2_regs") + (const_string "neon_vld1_vld2_lane")))] +) (define_insn "neon_vld1_lane" [(set (match_operand:VQX 0 "s_register_operand" "=w") @@ -3122,7 +3871,12 @@ return "vld1.\t%P0, [%1]"; else return "vld1.\t{%P0[%c3]}, [%1]"; -}) +} + [(set (attr "neon_type") + (if_then_else (eq (const_string "") (const_int 2)) + (const_string "neon_vld1_1_2_regs") + (const_string "neon_vld1_vld2_lane")))] +) (define_insn "neon_vld1_dup" [(set (match_operand:VDX 0 "s_register_operand" "=w") @@ -3134,7 +3888,12 @@ return "vld1.\t{%P0[]}, [%1]"; else return "vld1.\t%h0, [%1]"; -}) +} + [(set (attr "neon_type") + (if_then_else (gt (const_string "") (const_string "1")) + (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes") + (const_string "neon_vld1_1_2_regs")))] +) (define_insn "neon_vld1_dup" [(set (match_operand:VQX 0 "s_register_operand" "=w") @@ -3146,14 +3905,20 @@ return "vld1.\t{%e0[], %f0[]}, [%1]"; else return "vld1.\t%h0, [%1]"; -}) +} + [(set (attr "neon_type") + (if_then_else (gt (const_string "") (const_string "1")) + (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes") + (const_string "neon_vld1_1_2_regs")))] +) (define_insn "neon_vst1" [(set (mem:VDQX (match_operand:SI 0 "s_register_operand" "r")) (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")] UNSPEC_VST1))] "TARGET_NEON" - "vst1.\t%h1, [%0]") + "vst1.\t%h1, [%0]" + [(set_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")]) (define_insn "neon_vst1_lane" [(set (mem: (match_operand:SI 0 "s_register_operand" "r")) @@ -3170,7 +3935,11 @@ return "vst1.\t{%P1}, [%0]"; else return "vst1.\t{%P1[%c2]}, [%0]"; -}) +} + [(set (attr "neon_type") + (if_then_else (eq (const_string "") (const_int 1)) + (const_string "neon_vst1_1_2_regs_vst2_2_regs") + (const_string "neon_vst1_vst2_lane")))]) (define_insn "neon_vst1_lane" [(set (mem: (match_operand:SI 0 "s_register_operand" "r")) @@ -3195,7 +3964,9 @@ return "vst1.\t{%P1}, [%0]"; else return "vst1.\t{%P1[%c2]}, [%0]"; -}) +} + [(set_attr "neon_type" "neon_vst1_vst2_lane")] +) (define_insn "neon_vld2" [(set (match_operand:TI 0 "s_register_operand" "=w") @@ -3208,7 +3979,12 @@ return "vld1.64\t%h0, [%1]"; else return "vld2.\t%h0, [%1]"; -}) +} + [(set (attr "neon_type") + (if_then_else (eq (const_string "") (const_string "64")) + (const_string "neon_vld1_1_2_regs") + (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes")))] +) (define_insn "neon_vld2" [(set (match_operand:OI 0 "s_register_operand" "=w") @@ -3216,7 +3992,8 @@ (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VLD2))] "TARGET_NEON" - "vld2.\t%h0, [%1]") + "vld2.\t%h0, [%1]" + [(set_attr "neon_type" "neon_vld2_2_regs_vld1_vld2_all_lanes")]) (define_insn "neon_vld2_lane" [(set (match_operand:TI 0 "s_register_operand" "=w") @@ -3239,7 +4016,9 @@ ops[3] = operands[3]; output_asm_insn ("vld2.\t{%P0[%c3], %P1[%c3]}, [%2]", ops); return ""; -}) +} + [(set_attr "neon_type" "neon_vld1_vld2_lane")] +) (define_insn "neon_vld2_lane" [(set (match_operand:OI 0 "s_register_operand" "=w") @@ -3267,7 +4046,9 @@ ops[3] = GEN_INT (lane); output_asm_insn ("vld2.\t{%P0[%c3], %P1[%c3]}, [%2]", ops); return ""; -}) +} + [(set_attr "neon_type" "neon_vld1_vld2_lane")] +) (define_insn "neon_vld2_dup" [(set (match_operand:TI 0 "s_register_operand" "=w") @@ -3280,7 +4061,12 @@ return "vld2.\t{%e0[], %f0[]}, [%1]"; else return "vld1.\t%h0, [%1]"; -}) +} + [(set (attr "neon_type") + (if_then_else (gt (const_string "") (const_string "1")) + (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes") + (const_string "neon_vld1_1_2_regs")))] +) (define_insn "neon_vst2" [(set (mem:TI (match_operand:SI 0 "s_register_operand" "r")) @@ -3293,7 +4079,12 @@ return "vst1.64\t%h1, [%0]"; else return "vst2.\t%h1, [%0]"; -}) +} + [(set (attr "neon_type") + (if_then_else (eq (const_string "") (const_string "64")) + (const_string "neon_vst1_1_2_regs_vst2_2_regs") + (const_string "neon_vst1_1_2_regs_vst2_2_regs")))] +) (define_insn "neon_vst2" [(set (mem:OI (match_operand:SI 0 "s_register_operand" "r")) @@ -3301,7 +4092,9 @@ (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VST2))] "TARGET_NEON" - "vst2.\t%h1, [%0]") + "vst2.\t%h1, [%0]" + [(set_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")] +) (define_insn "neon_vst2_lane" [(set (mem: (match_operand:SI 0 "s_register_operand" "r")) @@ -3324,7 +4117,9 @@ ops[3] = operands[2]; output_asm_insn ("vst2.\t{%P1[%c3], %P2[%c3]}, [%0]", ops); return ""; -}) +} + [(set_attr "neon_type" "neon_vst1_vst2_lane")] +) (define_insn "neon_vst2_lane" [(set (mem: (match_operand:SI 0 "s_register_operand" "r")) @@ -3352,7 +4147,9 @@ ops[3] = GEN_INT (lane); output_asm_insn ("vst2.\t{%P1[%c3], %P2[%c3]}, [%0]", ops); return ""; -}) +} + [(set_attr "neon_type" "neon_vst1_vst2_lane")] +) (define_insn "neon_vld3" [(set (match_operand:EI 0 "s_register_operand" "=w") @@ -3365,7 +4162,12 @@ return "vld1.64\t%h0, [%1]"; else return "vld3.\t%h0, [%1]"; -}) +} + [(set (attr "neon_type") + (if_then_else (eq (const_string "") (const_string "64")) + (const_string "neon_vld1_1_2_regs") + (const_string "neon_vld3_vld4")))] +) (define_expand "neon_vld3" [(match_operand:CI 0 "s_register_operand" "=w") @@ -3399,7 +4201,9 @@ ops[3] = operands[2]; output_asm_insn ("vld3.\t{%P0, %P1, %P2}, [%3]!", ops); return ""; -}) +} + [(set_attr "neon_type" "neon_vld3_vld4")] +) (define_insn "neon_vld3qb" [(set (match_operand:CI 0 "s_register_operand" "=w") @@ -3420,7 +4224,9 @@ ops[3] = operands[2]; output_asm_insn ("vld3.\t{%P0, %P1, %P2}, [%3]!", ops); return ""; -}) +} + [(set_attr "neon_type" "neon_vld3_vld4")] +) (define_insn "neon_vld3_lane" [(set (match_operand:EI 0 "s_register_operand" "=w") @@ -3445,7 +4251,9 @@ output_asm_insn ("vld3.\t{%P0[%c4], %P1[%c4], %P2[%c4]}, [%3]", ops); return ""; -}) +} + [(set_attr "neon_type" "neon_vld3_vld4_lane")] +) (define_insn "neon_vld3_lane" [(set (match_operand:CI 0 "s_register_operand" "=w") @@ -3475,7 +4283,9 @@ output_asm_insn ("vld3.\t{%P0[%c4], %P1[%c4], %P2[%c4]}, [%3]", ops); return ""; -}) +} + [(set_attr "neon_type" "neon_vld3_vld4_lane")] +) (define_insn "neon_vld3_dup" [(set (match_operand:EI 0 "s_register_operand" "=w") @@ -3497,7 +4307,11 @@ } else return "vld1.\t%h0, [%1]"; -}) +} + [(set (attr "neon_type") + (if_then_else (gt (const_string "") (const_string "1")) + (const_string "neon_vld3_vld4_all_lanes") + (const_string "neon_vld1_1_2_regs")))]) (define_insn "neon_vst3" [(set (mem:EI (match_operand:SI 0 "s_register_operand" "r")) @@ -3510,7 +4324,11 @@ return "vst1.64\t%h1, [%0]"; else return "vst3.\t%h1, [%0]"; -}) +} + [(set (attr "neon_type") + (if_then_else (eq (const_string "") (const_string "64")) + (const_string "neon_vst1_1_2_regs_vst2_2_regs") + (const_string "neon_vst2_4_regs_vst3_vst4")))]) (define_expand "neon_vst3" [(match_operand:SI 0 "s_register_operand" "+r") @@ -3541,7 +4359,9 @@ ops[3] = gen_rtx_REG (DImode, regno + 8); output_asm_insn ("vst3.\t{%P1, %P2, %P3}, [%0]!", ops); return ""; -}) +} + [(set_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")] +) (define_insn "neon_vst3qb" [(set (mem:EI (match_operand:SI 1 "s_register_operand" "0")) @@ -3561,7 +4381,9 @@ ops[3] = gen_rtx_REG (DImode, regno + 10); output_asm_insn ("vst3.\t{%P1, %P2, %P3}, [%0]!", ops); return ""; -}) +} + [(set_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")] +) (define_insn "neon_vst3_lane" [(set (mem: (match_operand:SI 0 "s_register_operand" "r")) @@ -3586,7 +4408,9 @@ output_asm_insn ("vst3.\t{%P1[%c4], %P2[%c4], %P3[%c4]}, [%0]", ops); return ""; -}) +} + [(set_attr "neon_type" "neon_vst3_vst4_lane")] +) (define_insn "neon_vst3_lane" [(set (mem: (match_operand:SI 0 "s_register_operand" "r")) @@ -3616,7 +4440,8 @@ output_asm_insn ("vst3.\t{%P1[%c4], %P2[%c4], %P3[%c4]}, [%0]", ops); return ""; -}) +} +[(set_attr "neon_type" "neon_vst3_vst4_lane")]) (define_insn "neon_vld4" [(set (match_operand:OI 0 "s_register_operand" "=w") @@ -3629,7 +4454,12 @@ return "vld1.64\t%h0, [%1]"; else return "vld4.\t%h0, [%1]"; -}) +} + [(set (attr "neon_type") + (if_then_else (eq (const_string "") (const_string "64")) + (const_string "neon_vld1_1_2_regs") + (const_string "neon_vld3_vld4")))] +) (define_expand "neon_vld4" [(match_operand:XI 0 "s_register_operand" "=w") @@ -3664,7 +4494,9 @@ ops[4] = operands[2]; output_asm_insn ("vld4.\t{%P0, %P1, %P2, %P3}, [%4]!", ops); return ""; -}) +} + [(set_attr "neon_type" "neon_vld3_vld4")] +) (define_insn "neon_vld4qb" [(set (match_operand:XI 0 "s_register_operand" "=w") @@ -3686,7 +4518,9 @@ ops[4] = operands[2]; output_asm_insn ("vld4.\t{%P0, %P1, %P2, %P3}, [%4]!", ops); return ""; -}) +} + [(set_attr "neon_type" "neon_vld3_vld4")] +) (define_insn "neon_vld4_lane" [(set (match_operand:OI 0 "s_register_operand" "=w") @@ -3712,7 +4546,9 @@ output_asm_insn ("vld4.\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, [%4]", ops); return ""; -}) +} + [(set_attr "neon_type" "neon_vld3_vld4_lane")] +) (define_insn "neon_vld4_lane" [(set (match_operand:XI 0 "s_register_operand" "=w") @@ -3743,7 +4579,9 @@ output_asm_insn ("vld4.\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, [%4]", ops); return ""; -}) +} + [(set_attr "neon_type" "neon_vld3_vld4_lane")] +) (define_insn "neon_vld4_dup" [(set (match_operand:OI 0 "s_register_operand" "=w") @@ -3767,7 +4605,12 @@ } else return "vld1.\t%h0, [%1]"; -}) +} + [(set (attr "neon_type") + (if_then_else (gt (const_string "") (const_string "1")) + (const_string "neon_vld3_vld4_all_lanes") + (const_string "neon_vld1_1_2_regs")))] +) (define_insn "neon_vst4" [(set (mem:OI (match_operand:SI 0 "s_register_operand" "r")) @@ -3780,7 +4623,12 @@ return "vst1.64\t%h1, [%0]"; else return "vst4.\t%h1, [%0]"; -}) +} + [(set (attr "neon_type") + (if_then_else (eq (const_string "") (const_string "64")) + (const_string "neon_vst1_1_2_regs_vst2_2_regs") + (const_string "neon_vst2_4_regs_vst3_vst4")))] +) (define_expand "neon_vst4" [(match_operand:SI 0 "s_register_operand" "+r") @@ -3812,7 +4660,9 @@ ops[4] = gen_rtx_REG (DImode, regno + 12); output_asm_insn ("vst4.\t{%P1, %P2, %P3, %P4}, [%0]!", ops); return ""; -}) +} + [(set_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")] +) (define_insn "neon_vst4qb" [(set (mem:OI (match_operand:SI 1 "s_register_operand" "0")) @@ -3833,7 +4683,9 @@ ops[4] = gen_rtx_REG (DImode, regno + 14); output_asm_insn ("vst4.\t{%P1, %P2, %P3, %P4}, [%0]!", ops); return ""; -}) +} + [(set_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")] +) (define_insn "neon_vst4_lane" [(set (mem: (match_operand:SI 0 "s_register_operand" "r")) @@ -3859,7 +4711,9 @@ output_asm_insn ("vst4.\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, [%0]", ops); return ""; -}) +} + [(set_attr "neon_type" "neon_vst3_vst4_lane")] +) (define_insn "neon_vst4_lane" [(set (mem: (match_operand:SI 0 "s_register_operand" "r")) @@ -3890,7 +4744,9 @@ output_asm_insn ("vst4.\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, [%0]", ops); return ""; -}) +} + [(set_attr "neon_type" "neon_vst3_vst4_lane")] +) (define_expand "neon_vand" [(match_operand:VDQX 0 "s_register_operand" "")