gcc/config/arm/arm-fixed.md

   1 ;; Copyright 2011 Free Software Foundation, Inc.
   2 ;;
   3 ;; This file is part of GCC.
   4 ;;
   5 ;; GCC is free software; you can redistribute it and/or modify it
   6 ;; under the terms of the GNU General Public License as published
   7 ;; by the Free Software Foundation; either version 3, or (at your
   8 ;; option) any later version.
   9 ;;
  10 ;; GCC is distributed in the hope that it will be useful, but WITHOUT
  11 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  12 ;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  13 ;; License for more details.
  14 ;;
  15 ;; You should have received a copy of the GNU General Public License
  16 ;; along with GCC; see the file COPYING3.  If not see
  17 ;; <http://www.gnu.org/licenses/>.
  18 ;;
  19 ;; This file contains ARM instructions that support fixed-point operations.
  20
  21 (define_insn "add<mode>3"
  22   [(set (match_operand:FIXED 0 "s_register_operand" "=r")
  23         (plus:FIXED (match_operand:FIXED 1 "s_register_operand" "r")
  24                     (match_operand:FIXED 2 "s_register_operand" "r")))]
  25   "TARGET_32BIT"
  26   "add%?\\t%0, %1, %2"
  27   [(set_attr "predicable" "yes")])
  28
  29 (define_insn "add<mode>3"
  30   [(set (match_operand:ADDSUB 0 "s_register_operand" "=r")
  31         (plus:ADDSUB (match_operand:ADDSUB 1 "s_register_operand" "r")
  32                      (match_operand:ADDSUB 2 "s_register_operand" "r")))]
  33   "TARGET_INT_SIMD"
  34   "sadd<qaddsub_suf>%?\\t%0, %1, %2"
  35   [(set_attr "predicable" "yes")])
  36
  37 (define_insn "usadd<mode>3"
  38   [(set (match_operand:UQADDSUB 0 "s_register_operand" "=r")
  39         (us_plus:UQADDSUB (match_operand:UQADDSUB 1 "s_register_operand" "r")
  40                           (match_operand:UQADDSUB 2 "s_register_operand" "r")))]
  41   "TARGET_INT_SIMD"
  42   "uqadd<qaddsub_suf>%?\\t%0, %1, %2"
  43   [(set_attr "predicable" "yes")])
  44
  45 (define_insn "ssadd<mode>3"
  46   [(set (match_operand:QADDSUB 0 "s_register_operand" "=r")
  47         (ss_plus:QADDSUB (match_operand:QADDSUB 1 "s_register_operand" "r")
  48                          (match_operand:QADDSUB 2 "s_register_operand" "r")))]
  49   "TARGET_INT_SIMD"
  50   "qadd<qaddsub_suf>%?\\t%0, %1, %2"
  51   [(set_attr "predicable" "yes")])
  52
  53 (define_insn "sub<mode>3"
  54   [(set (match_operand:FIXED 0 "s_register_operand" "=r")
  55         (minus:FIXED (match_operand:FIXED 1 "s_register_operand" "r")
  56                      (match_operand:FIXED 2 "s_register_operand" "r")))]
  57   "TARGET_32BIT"
  58   "sub%?\\t%0, %1, %2"
  59   [(set_attr "predicable" "yes")])
  60
  61 (define_insn "sub<mode>3"
  62   [(set (match_operand:ADDSUB 0 "s_register_operand" "=r")
  63         (minus:ADDSUB (match_operand:ADDSUB 1 "s_register_operand" "r")
  64                       (match_operand:ADDSUB 2 "s_register_operand" "r")))]
  65   "TARGET_INT_SIMD"
  66   "ssub<qaddsub_suf>%?\\t%0, %1, %2"
  67   [(set_attr "predicable" "yes")])
  68
  69 (define_insn "ussub<mode>3"
  70   [(set (match_operand:UQADDSUB 0 "s_register_operand" "=r")
  71         (us_minus:UQADDSUB
  72           (match_operand:UQADDSUB 1 "s_register_operand" "r")
  73           (match_operand:UQADDSUB 2 "s_register_operand" "r")))]
  74   "TARGET_INT_SIMD"
  75   "uqsub<qaddsub_suf>%?\\t%0, %1, %2"
  76   [(set_attr "predicable" "yes")])
  77
  78 (define_insn "sssub<mode>3"
  79   [(set (match_operand:QADDSUB 0 "s_register_operand" "=r")
  80         (ss_minus:QADDSUB (match_operand:QADDSUB 1 "s_register_operand" "r")
  81                           (match_operand:QADDSUB 2 "s_register_operand" "r")))]
  82   "TARGET_INT_SIMD"
  83   "qsub<qaddsub_suf>%?\\t%0, %1, %2"
  84   [(set_attr "predicable" "yes")])
  85
  86 ;; Fractional multiplies.
  87
  88 ; Note: none of these do any rounding.
  89
  90 (define_expand "mulqq3"
  91   [(set (match_operand:QQ 0 "s_register_operand" "")
  92         (mult:QQ (match_operand:QQ 1 "s_register_operand" "")
  93                  (match_operand:QQ 2 "s_register_operand" "")))]
  94   "TARGET_DSP_MULTIPLY && arm_arch_thumb2"
  95 {
  96   rtx tmp1 = gen_reg_rtx (HImode);
  97   rtx tmp2 = gen_reg_rtx (HImode);
  98   rtx tmp3 = gen_reg_rtx (SImode);
  99
 100   emit_insn (gen_extendqihi2 (tmp1, gen_lowpart (QImode, operands[1])));
 101   emit_insn (gen_extendqihi2 (tmp2, gen_lowpart (QImode, operands[2])));
 102   emit_insn (gen_mulhisi3 (tmp3, tmp1, tmp2));
 103   emit_insn (gen_extv (gen_lowpart (SImode, operands[0]), tmp3, GEN_INT (8),
 104                        GEN_INT (7)));
 105   DONE;
 106 })
 107
 108 (define_expand "mulhq3"
 109   [(set (match_operand:HQ 0 "s_register_operand" "")
 110         (mult:HQ (match_operand:HQ 1 "s_register_operand" "")
 111                  (match_operand:HQ 2 "s_register_operand" "")))]
 112   "TARGET_DSP_MULTIPLY && arm_arch_thumb2"
 113 {
 114   rtx tmp = gen_reg_rtx (SImode);
 115
 116   emit_insn (gen_mulhisi3 (tmp, gen_lowpart (HImode, operands[1]),
 117                            gen_lowpart (HImode, operands[2])));
 118   /* We're doing a s.15 * s.15 multiplication, getting an s.30 result.  Extract
 119      an s.15 value from that.  This won't overflow/saturate for _Fract
 120      values.  */
 121   emit_insn (gen_extv (gen_lowpart (SImode, operands[0]), tmp,
 122                        GEN_INT (16), GEN_INT (15)));
 123   DONE;
 124 })
 125
 126 (define_expand "mulsq3"
 127   [(set (match_operand:SQ 0 "s_register_operand" "")
 128         (mult:SQ (match_operand:SQ 1 "s_register_operand" "")
 129                  (match_operand:SQ 2 "s_register_operand" "")))]
 130   "TARGET_32BIT && arm_arch3m"
 131 {
 132   rtx tmp1 = gen_reg_rtx (DImode);
 133   rtx tmp2 = gen_reg_rtx (SImode);
 134   rtx tmp3 = gen_reg_rtx (SImode);
 135
 136   /* s.31 * s.31 -> s.62 multiplication.  */
 137   emit_insn (gen_mulsidi3 (tmp1, gen_lowpart (SImode, operands[1]),
 138                            gen_lowpart (SImode, operands[2])));
 139   emit_insn (gen_lshrsi3 (tmp2, gen_lowpart (SImode, tmp1), GEN_INT (31)));
 140   emit_insn (gen_ashlsi3 (tmp3, gen_highpart (SImode, tmp1), GEN_INT (1)));
 141   emit_insn (gen_iorsi3 (gen_lowpart (SImode, operands[0]), tmp2, tmp3));
 142
 143   DONE;
 144 })
 145
 146 ;; Accumulator multiplies.
 147
 148 (define_expand "mulsa3"
 149   [(set (match_operand:SA 0 "s_register_operand" "")
 150         (mult:SA (match_operand:SA 1 "s_register_operand" "")
 151                  (match_operand:SA 2 "s_register_operand" "")))]
 152   "TARGET_32BIT && arm_arch3m"
 153 {
 154   rtx tmp1 = gen_reg_rtx (DImode);
 155   rtx tmp2 = gen_reg_rtx (SImode);
 156   rtx tmp3 = gen_reg_rtx (SImode);
 157
 158   emit_insn (gen_mulsidi3 (tmp1, gen_lowpart (SImode, operands[1]),
 159                            gen_lowpart (SImode, operands[2])));
 160   emit_insn (gen_lshrsi3 (tmp2, gen_lowpart (SImode, tmp1), GEN_INT (15)));
 161   emit_insn (gen_ashlsi3 (tmp3, gen_highpart (SImode, tmp1), GEN_INT (17)));
 162   emit_insn (gen_iorsi3 (gen_lowpart (SImode, operands[0]), tmp2, tmp3));
 163
 164   DONE;
 165 })
 166
 167 (define_expand "mulusa3"
 168   [(set (match_operand:USA 0 "s_register_operand" "")
 169         (mult:USA (match_operand:USA 1 "s_register_operand" "")
 170                   (match_operand:USA 2 "s_register_operand" "")))]
 171   "TARGET_32BIT && arm_arch3m"
 172 {
 173   rtx tmp1 = gen_reg_rtx (DImode);
 174   rtx tmp2 = gen_reg_rtx (SImode);
 175   rtx tmp3 = gen_reg_rtx (SImode);
 176
 177   emit_insn (gen_umulsidi3 (tmp1, gen_lowpart (SImode, operands[1]),
 178                             gen_lowpart (SImode, operands[2])));
 179   emit_insn (gen_lshrsi3 (tmp2, gen_lowpart (SImode, tmp1), GEN_INT (16)));
 180   emit_insn (gen_ashlsi3 (tmp3, gen_highpart (SImode, tmp1), GEN_INT (16)));
 181   emit_insn (gen_iorsi3 (gen_lowpart (SImode, operands[0]), tmp2, tmp3));
 182
 183   DONE;
 184 })
 185
 186 ;; The code sequence emitted by this insn pattern uses the Q flag, which GCC
 187 ;; doesn't generally know about, so we don't bother expanding to individual
 188 ;; instructions.  It may be better to just use an out-of-line asm libcall for
 189 ;; this.
 190
 191 (define_insn "ssmulsa3"
 192   [(set (match_operand:SA 0 "s_register_operand" "=r")
 193         (ss_mult:SA (match_operand:SA 1 "s_register_operand" "r")
 194                     (match_operand:SA 2 "s_register_operand" "r")))
 195    (clobber (match_scratch:DI 3 "=r"))
 196    (clobber (match_scratch:SI 4 "=r"))
 197    (clobber (reg:CC CC_REGNUM))]
 198   "TARGET_32BIT && arm_arch6"
 199 {
 200   /* s16.15 * s16.15 -> s32.30.  */
 201   output_asm_insn ("smull\\t%Q3, %R3, %1, %2", operands);
 202
 203   if (TARGET_ARM)
 204     output_asm_insn ("msr\\tAPSR_nzcvq, #0", operands);
 205   else
 206     {
 207       output_asm_insn ("mov\\t%4, #0", operands);
 208       output_asm_insn ("msr\\tAPSR_nzcvq, %4", operands);
 209     }
 210
 211   /* We have:
 212       31  high word  0     31  low word  0
 213
 214     [ S i i .... i i i ] [ i f f f ... f f ]
 215                         |
 216                         v
 217              [ S i ... i f ... f f ]
 218
 219     Need 16 integral bits, so saturate at 15th bit of high word.  */
 220
 221   output_asm_insn ("ssat\\t%R3, #15, %R3", operands);
 222   output_asm_insn ("mrs\\t%4, APSR", operands);
 223   output_asm_insn ("tst\\t%4, #1<<27", operands);
 224   if (TARGET_THUMB2)
 225     output_asm_insn ("it\\tne", operands);
 226   output_asm_insn ("mvnne\\t%Q3, %R3, asr #32", operands);
 227   output_asm_insn ("mov\\t%0, %Q3, lsr #15", operands);
 228   output_asm_insn ("orr\\t%0, %0, %R3, asl #17", operands);
 229   return "";
 230 }
 231   [(set_attr "conds" "clob")
 232    (set (attr "length")
 233         (if_then_else (eq_attr "is_thumb" "yes")
 234                       (const_int 38)
 235                       (const_int 32)))])
 236
 237 ;; Same goes for this.
 238
 239 (define_insn "usmulusa3"
 240   [(set (match_operand:USA 0 "s_register_operand" "=r")
 241         (us_mult:USA (match_operand:USA 1 "s_register_operand" "r")
 242                      (match_operand:USA 2 "s_register_operand" "r")))
 243    (clobber (match_scratch:DI 3 "=r"))
 244    (clobber (match_scratch:SI 4 "=r"))
 245    (clobber (reg:CC CC_REGNUM))]
 246   "TARGET_32BIT && arm_arch6"
 247 {
 248   /* 16.16 * 16.16 -> 32.32.  */
 249   output_asm_insn ("umull\\t%Q3, %R3, %1, %2", operands);
 250
 251   if (TARGET_ARM)
 252     output_asm_insn ("msr\\tAPSR_nzcvq, #0", operands);
 253   else
 254     {
 255       output_asm_insn ("mov\\t%4, #0", operands);
 256       output_asm_insn ("msr\\tAPSR_nzcvq, %4", operands);
 257     }
 258
 259   /* We have:
 260       31  high word  0     31  low word  0
 261
 262     [ i i i .... i i i ] [ f f f f ... f f ]
 263                         |
 264                         v
 265              [ i i ... i f ... f f ]
 266
 267     Need 16 integral bits, so saturate at 16th bit of high word.  */
 268
 269   output_asm_insn ("usat\\t%R3, #16, %R3", operands);
 270   output_asm_insn ("mrs\\t%4, APSR", operands);
 271   output_asm_insn ("tst\\t%4, #1<<27", operands);
 272   if (TARGET_THUMB2)
 273     output_asm_insn ("it\\tne", operands);
 274   output_asm_insn ("sbfxne\\t%Q3, %R3, #15, #1", operands);
 275   output_asm_insn ("lsr\\t%0, %Q3, #16", operands);
 276   output_asm_insn ("orr\\t%0, %0, %R3, asl #16", operands);
 277   return "";
 278 }
 279   [(set_attr "conds" "clob")
 280    (set (attr "length")
 281         (if_then_else (eq_attr "is_thumb" "yes")
 282                       (const_int 38)
 283                       (const_int 32)))])
 284
 285 (define_expand "mulha3"
 286   [(set (match_operand:HA 0 "s_register_operand" "")
 287         (mult:HA (match_operand:HA 1 "s_register_operand" "")
 288                  (match_operand:HA 2 "s_register_operand" "")))]
 289   "TARGET_DSP_MULTIPLY && arm_arch_thumb2"
 290 {
 291   rtx tmp = gen_reg_rtx (SImode);
 292
 293   emit_insn (gen_mulhisi3 (tmp, gen_lowpart (HImode, operands[1]),
 294                            gen_lowpart (HImode, operands[2])));
 295   emit_insn (gen_extv (gen_lowpart (SImode, operands[0]), tmp, GEN_INT (16),
 296                        GEN_INT (7)));
 297
 298   DONE;
 299 })
 300
 301 (define_expand "muluha3"
 302   [(set (match_operand:UHA 0 "s_register_operand" "")
 303         (mult:UHA (match_operand:UHA 1 "s_register_operand" "")
 304                   (match_operand:UHA 2 "s_register_operand" "")))]
 305   "TARGET_DSP_MULTIPLY"
 306 {
 307   rtx tmp1 = gen_reg_rtx (SImode);
 308   rtx tmp2 = gen_reg_rtx (SImode);
 309   rtx tmp3 = gen_reg_rtx (SImode);
 310
 311   /* 8.8 * 8.8 -> 16.16 multiply.  */
 312   emit_insn (gen_zero_extendhisi2 (tmp1, gen_lowpart (HImode, operands[1])));
 313   emit_insn (gen_zero_extendhisi2 (tmp2, gen_lowpart (HImode, operands[2])));
 314   emit_insn (gen_mulsi3 (tmp3, tmp1, tmp2));
 315   emit_insn (gen_extzv (gen_lowpart (SImode, operands[0]), tmp3,
 316                         GEN_INT (16), GEN_INT (8)));
 317
 318   DONE;
 319 })
 320
 321 (define_expand "ssmulha3"
 322   [(set (match_operand:HA 0 "s_register_operand" "")
 323         (ss_mult:HA (match_operand:HA 1 "s_register_operand" "")
 324                     (match_operand:HA 2 "s_register_operand" "")))]
 325   "TARGET_32BIT && TARGET_DSP_MULTIPLY && arm_arch6"
 326 {
 327   rtx tmp = gen_reg_rtx (SImode);
 328   rtx rshift;
 329
 330   emit_insn (gen_mulhisi3 (tmp, gen_lowpart (HImode, operands[1]),
 331                            gen_lowpart (HImode, operands[2])));
 332
 333   rshift = gen_rtx_ASHIFTRT (SImode, tmp, GEN_INT (7));
 334
 335   emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (HImode, operands[0]),
 336                           gen_rtx_SS_TRUNCATE (HImode, rshift)));
 337
 338   DONE;
 339 })
 340
 341 (define_expand "usmuluha3"
 342   [(set (match_operand:UHA 0 "s_register_operand" "")
 343         (us_mult:UHA (match_operand:UHA 1 "s_register_operand" "")
 344                      (match_operand:UHA 2 "s_register_operand" "")))]
 345   "TARGET_INT_SIMD"
 346 {
 347   rtx tmp1 = gen_reg_rtx (SImode);
 348   rtx tmp2 = gen_reg_rtx (SImode);
 349   rtx tmp3 = gen_reg_rtx (SImode);
 350   rtx rshift_tmp = gen_reg_rtx (SImode);
 351
 352   /* Note: there's no smul[bt][bt] equivalent for unsigned multiplies.  Use a
 353      normal 32x32->32-bit multiply instead.  */
 354   emit_insn (gen_zero_extendhisi2 (tmp1, gen_lowpart (HImode, operands[1])));
 355   emit_insn (gen_zero_extendhisi2 (tmp2, gen_lowpart (HImode, operands[2])));
 356
 357   emit_insn (gen_mulsi3 (tmp3, tmp1, tmp2));
 358
 359   /* The operand to "usat" is signed, so we cannot use the "..., asr #8"
 360      form of that instruction since the multiplication result TMP3 may have the
 361      top bit set, thus be negative and saturate to zero.  Use a separate
 362      logical right-shift instead.  */
 363   emit_insn (gen_lshrsi3 (rshift_tmp, tmp3, GEN_INT (8)));
 364   emit_insn (gen_arm_usatsihi (gen_lowpart (HImode, operands[0]), rshift_tmp));
 365
 366   DONE;
 367 })
 368
 369 (define_insn "arm_ssatsihi_shift"
 370   [(set (match_operand:HI 0 "s_register_operand" "=r")
 371         (ss_truncate:HI (match_operator:SI 1 "sat_shift_operator"
 372                           [(match_operand:SI 2 "s_register_operand" "r")
 373                            (match_operand:SI 3 "immediate_operand" "I")])))]
 374   "TARGET_32BIT && arm_arch6"
 375   "ssat%?\\t%0, #16, %2%S1"
 376   [(set_attr "predicable" "yes")
 377    (set_attr "type" "alu_shift")])
 378
 379 (define_insn "arm_usatsihi"
 380   [(set (match_operand:HI 0 "s_register_operand" "=r")
 381         (us_truncate:HI (match_operand:SI 1 "s_register_operand")))]
 382   "TARGET_INT_SIMD"
 383   "usat%?\\t%0, #16, %1"
 384   [(set_attr "predicable" "yes")])