1 (* Common code for ARM NEON header file, documentation and test case
4 Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
5 Contributed by CodeSourcery.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. *)
23 (* Shorthand types for vector elements. *)
24 type elts = S8 | S16 | S32 | S64 | F32 | U8 | U16 | U32 | U64 | P8 | P16
25 | I8 | I16 | I32 | I64 | B8 | B16 | B32 | B64 | Conv of elts * elts
26 | Cast of elts * elts | NoElts
28 type eltclass = Signed | Unsigned | Float | Poly | Int | Bits
29 | ConvClass of eltclass * eltclass | NoType
31 (* These vector types correspond directly to C types. *)
32 type vectype = T_int8x8 | T_int8x16
33 | T_int16x4 | T_int16x8
34 | T_int32x2 | T_int32x4
35 | T_int64x1 | T_int64x2
36 | T_uint8x8 | T_uint8x16
37 | T_uint16x4 | T_uint16x8
38 | T_uint32x2 | T_uint32x4
39 | T_uint64x1 | T_uint64x2
40 | T_float32x2 | T_float32x4
41 | T_poly8x8 | T_poly8x16
42 | T_poly16x4 | T_poly16x8
43 | T_immediate of int * int
49 | T_float32 | T_arrayof of int * vectype
50 | T_ptrto of vectype | T_const of vectype
55 (* The meanings of the following are:
56 TImode : "Tetra", two registers (four words).
57 EImode : "hExa", three registers (six words).
58 OImode : "Octa", four registers (eight words).
59 CImode : "dodeCa", six registers (twelve words).
60 XImode : "heXadeca", eight registers (sixteen words).
63 type inttype = B_TImode | B_EImode | B_OImode | B_CImode | B_XImode
65 type shape_elt = Dreg | Qreg | Corereg | Immed | VecArray of int * shape_elt
66 | PtrTo of shape_elt | CstPtrTo of shape_elt
67 (* These next ones are used only in the test generator. *)
68 | Element_of_dreg (* Used for "lane" variants. *)
69 | Element_of_qreg (* Likewise. *)
70 | All_elements_of_dreg (* Used for "dup" variants. *)
71 | Alternatives of shape_elt list (* Used for multiple valid operands *)
73 type shape_form = All of int * shape_elt
75 | Long_noreg of shape_elt
77 | Wide_noreg of shape_elt
81 | Binary_imm of shape_elt
82 | Use_operands of shape_elt array
83 | By_scalar of shape_elt
84 | Unary_scalar of shape_elt
87 | Pair_result of shape_elt
89 type arity = Arity0 of vectype
90 | Arity1 of vectype * vectype
91 | Arity2 of vectype * vectype * vectype
92 | Arity3 of vectype * vectype * vectype * vectype
93 | Arity4 of vectype * vectype * vectype * vectype * vectype
95 type vecmode = V8QI | V4HI | V2SI | V2SF | DI
96 | V16QI | V8HI | V4SI | V4SF | V2DI
139 (* Ops with scalar. *)
161 (* Vector extract. *)
163 (* Reverse elements. *)
167 (* Transposition ops. *)
171 (* Loads and stores (VLD1/VST1/VLD2...), elements and structures. *)
177 (* Set/extract lanes from a vector. *)
180 (* Initialize vector from bit pattern. *)
182 (* Set all lanes to same value. *)
184 | Vmov_n (* Is this the same? *)
185 (* Duplicate scalar to all lanes of vector. *)
187 (* Combine vectors. *)
189 (* Get quadword high/low parts. *)
192 (* Convert vectors. *)
195 (* Narrow/lengthen vectors. *)
201 (* Reinterpret casts. *)
204 (* Features used for documentation, to distinguish between some instruction
205 variants, and to signal special requirements (e.g. swapping arguments). *)
214 | Flipped of string (* Builtin name to use with flipped arguments. *)
215 | InfoWord (* Pass an extra word for signage/rounding etc. (always passed
216 for All _, Long, Wide, Narrow shape_forms. *)
217 | ReturnPtr (* Pass explicit pointer to return value as first argument. *)
218 (* A specification as to the shape of instruction expected upon
219 disassembly, used if it differs from the shape used to build the
220 intrinsic prototype. Multiple entries in the constructor's argument
221 indicate that the intrinsic expands to more than one assembly
222 instruction, each with a corresponding shape specified here. *)
223 | Disassembles_as of shape_form list
224 | Builtin_name of string (* Override the name of the builtin. *)
225 (* Override the name of the instruction. If more than one name
226 is specified, it means that the instruction can have any of those
228 | Instruction_name of string list
229 (* Mark that the intrinsic yields no instructions, or expands to yield
230 behavior that the test generator cannot test. *)
232 (* Mark that the intrinsic has constant arguments that cannot be set
233 to the defaults (zero for pointers and one otherwise) in the test
234 cases. The function supplied must return the integer to be written
235 into the testcase for the argument number (0-based) supplied to it. *)
236 | Const_valuator of (int -> int)
239 exception MixedMode of elts * elts
241 let rec elt_width = function
242 S8 | U8 | P8 | I8 | B8 -> 8
243 | S16 | U16 | P16 | I16 | B16 -> 16
244 | S32 | F32 | U32 | I32 | B32 -> 32
245 | S64 | U64 | I64 | B64 -> 64
247 let wa = elt_width a and wb = elt_width b in
248 if wa = wb then wa else failwith "element width?"
249 | Cast (a, b) -> raise (MixedMode (a, b))
250 | NoElts -> failwith "No elts"
252 let rec elt_class = function
253 S8 | S16 | S32 | S64 -> Signed
254 | U8 | U16 | U32 | U64 -> Unsigned
257 | I8 | I16 | I32 | I64 -> Int
258 | B8 | B16 | B32 | B64 -> Bits
259 | Conv (a, b) | Cast (a, b) -> ConvClass (elt_class a, elt_class b)
262 let elt_of_class_width c w =
270 | Unsigned, 16 -> U16
271 | Unsigned, 32 -> U32
272 | Unsigned, 64 -> U64
283 | _ -> failwith "Bad element type"
285 (* Return unsigned integer element the same width as argument. *)
286 let unsigned_of_elt elt =
287 elt_of_class_width Unsigned (elt_width elt)
289 let signed_of_elt elt =
290 elt_of_class_width Signed (elt_width elt)
292 (* Return untyped bits element the same width as argument. *)
293 let bits_of_elt elt =
294 elt_of_class_width Bits (elt_width elt)
296 let non_signed_variant = function
307 let poly_unsigned_variant v =
308 let elclass = match elt_class v with
311 elt_of_class_width elclass (elt_width v)
314 let w = elt_width elt
315 and c = elt_class elt in
316 elt_of_class_width c (w * 2)
319 let w = elt_width elt
320 and c = elt_class elt in
321 elt_of_class_width c (w / 2)
323 (* If we're trying to find a mode from a "Use_operands" instruction, use the
324 last vector operand as the dominant mode used to invoke the correct builtin.
325 We must stick to this rule in neon.md. *)
326 let find_key_operand operands =
328 match operands.(opno) with
331 | VecArray (_, Qreg) -> Qreg
332 | VecArray (_, Dreg) -> Dreg
335 scan ((Array.length operands) - 1)
337 let rec mode_of_elt elt shape =
338 let flt = match elt_class elt with
339 Float | ConvClass(_, Float) -> true | _ -> false in
341 match elt_width elt with
342 8 -> 0 | 16 -> 1 | 32 -> 2 | 64 -> 3
343 | _ -> failwith "Bad element width"
345 All (_, Dreg) | By_scalar Dreg | Pair_result Dreg | Unary_scalar Dreg
346 | Binary_imm Dreg | Long_noreg Dreg | Wide_noreg Dreg ->
347 [| V8QI; V4HI; if flt then V2SF else V2SI; DI |].(idx)
348 | All (_, Qreg) | By_scalar Qreg | Pair_result Qreg | Unary_scalar Qreg
349 | Binary_imm Qreg | Long_noreg Qreg | Wide_noreg Qreg ->
350 [| V16QI; V8HI; if flt then V4SF else V4SI; V2DI |].(idx)
351 | All (_, (Corereg | PtrTo _ | CstPtrTo _)) ->
352 [| QI; HI; if flt then SF else SI; DI |].(idx)
353 | Long | Wide | Wide_lane | Wide_scalar
355 [| V8QI; V4HI; V2SI; DI |].(idx)
356 | Narrow | Narrow_imm -> [| V16QI; V8HI; V4SI; V2DI |].(idx)
357 | Use_operands ops -> mode_of_elt elt (All (0, (find_key_operand ops)))
358 | _ -> failwith "invalid shape"
360 (* Modify an element type dependent on the shape of the instruction and the
363 let shapemap shape no =
364 let ident = fun x -> x in
366 All _ | Use_operands _ | By_scalar _ | Pair_result _ | Unary_scalar _
367 | Binary_imm _ -> ident
368 | Long | Long_noreg _ | Wide_scalar | Long_imm ->
369 [| widen_elt; ident; ident |].(no)
370 | Wide | Wide_noreg _ -> [| widen_elt; widen_elt; ident |].(no)
371 | Wide_lane -> [| widen_elt; ident; ident; ident |].(no)
372 | Narrow | Narrow_imm -> [| narrow_elt; ident; ident |].(no)
374 (* Register type (D/Q) of an operand, based on shape and operand number. *)
376 let regmap shape no =
378 All (_, reg) | Long_noreg reg | Wide_noreg reg -> reg
379 | Long -> [| Qreg; Dreg; Dreg |].(no)
380 | Wide -> [| Qreg; Qreg; Dreg |].(no)
381 | Narrow -> [| Dreg; Qreg; Qreg |].(no)
382 | Wide_lane -> [| Qreg; Dreg; Dreg; Immed |].(no)
383 | Wide_scalar -> [| Qreg; Dreg; Corereg |].(no)
384 | By_scalar reg -> [| reg; reg; Dreg; Immed |].(no)
385 | Unary_scalar reg -> [| reg; Dreg; Immed |].(no)
386 | Pair_result reg -> [| VecArray (2, reg); reg; reg |].(no)
387 | Binary_imm reg -> [| reg; reg; Immed |].(no)
388 | Long_imm -> [| Qreg; Dreg; Immed |].(no)
389 | Narrow_imm -> [| Dreg; Qreg; Immed |].(no)
390 | Use_operands these -> these.(no)
392 let type_for_elt shape elt no =
393 let elt = (shapemap shape no) elt in
394 let reg = regmap shape no in
395 let rec type_for_reg_elt reg elt =
410 | _ -> failwith "Bad elt type"
425 | _ -> failwith "Bad elt type"
440 | _ -> failwith "Bad elt type"
444 | VecArray (num, sub) ->
445 T_arrayof (num, type_for_reg_elt sub elt)
447 T_ptrto (type_for_reg_elt x elt)
449 T_ptrto (T_const (type_for_reg_elt x elt))
450 (* Anything else is solely for the use of the test generator. *)
453 type_for_reg_elt reg elt
455 (* Return size of a vector type, in bits. *)
456 let vectype_size = function
457 T_int8x8 | T_int16x4 | T_int32x2 | T_int64x1
458 | T_uint8x8 | T_uint16x4 | T_uint32x2 | T_uint64x1
459 | T_float32x2 | T_poly8x8 | T_poly16x4 -> 64
460 | T_int8x16 | T_int16x8 | T_int32x4 | T_int64x2
461 | T_uint8x16 | T_uint16x8 | T_uint32x4 | T_uint64x2
462 | T_float32x4 | T_poly8x16 | T_poly16x8 -> 128
463 | _ -> raise Not_found
465 let inttype_for_array num elttype =
466 let eltsize = vectype_size elttype in
467 let numwords = (num * eltsize) / 32 in
474 | _ -> failwith ("no int type for size " ^ string_of_int numwords)
476 (* These functions return pairs of (internal, external) types, where "internal"
477 types are those seen by GCC, and "external" are those seen by the assembler.
478 These types aren't necessarily the same, since the intrinsics can munge more
479 than one C type into each assembler opcode. *)
481 let make_sign_invariant func shape elt =
482 let arity, elt' = func shape elt in
483 arity, non_signed_variant elt'
485 (* Don't restrict any types. *)
487 let elts_same make_arity shape elt =
488 let vtype = type_for_elt shape elt in
489 make_arity vtype, elt
491 (* As sign_invar_*, but when sign matters. *)
492 let elts_same_io_lane =
493 elts_same (fun vtype -> Arity4 (vtype 0, vtype 0, vtype 1, vtype 2, vtype 3))
496 elts_same (fun vtype -> Arity3 (vtype 0, vtype 0, vtype 1, vtype 2))
498 let elts_same_2_lane =
499 elts_same (fun vtype -> Arity3 (vtype 0, vtype 1, vtype 2, vtype 3))
501 let elts_same_3 = elts_same_2_lane
504 elts_same (fun vtype -> Arity2 (vtype 0, vtype 1, vtype 2))
507 elts_same (fun vtype -> Arity1 (vtype 0, vtype 1))
509 (* Use for signed/unsigned invariant operations (i.e. where the operation
510 doesn't depend on the sign of the data. *)
512 let sign_invar_io_lane = make_sign_invariant elts_same_io_lane
513 let sign_invar_io = make_sign_invariant elts_same_io
514 let sign_invar_2_lane = make_sign_invariant elts_same_2_lane
515 let sign_invar_2 = make_sign_invariant elts_same_2
516 let sign_invar_1 = make_sign_invariant elts_same_1
518 (* Sign-sensitive comparison. *)
520 let cmp_sign_matters shape elt =
521 let vtype = type_for_elt shape elt
522 and rtype = type_for_elt shape (unsigned_of_elt elt) 0 in
523 Arity2 (rtype, vtype 1, vtype 2), elt
525 (* Signed/unsigned invariant comparison. *)
527 let cmp_sign_invar shape elt =
528 let shape', elt' = cmp_sign_matters shape elt in
530 match non_signed_variant elt' with
536 (* Comparison (VTST) where only the element width matters. *)
538 let cmp_bits shape elt =
539 let vtype = type_for_elt shape elt
540 and rtype = type_for_elt shape (unsigned_of_elt elt) 0
541 and bits_only = bits_of_elt elt in
542 Arity2 (rtype, vtype 1, vtype 2), bits_only
544 let reg_shift shape elt =
545 let vtype = type_for_elt shape elt
546 and op2type = type_for_elt shape (signed_of_elt elt) 2 in
547 Arity2 (vtype 0, vtype 1, op2type), elt
549 (* Genericised constant-shift type-generating function. *)
551 let const_shift mkimm ?arity ?result shape elt =
552 let op2type = (shapemap shape 2) elt in
553 let op2width = elt_width op2type in
554 let op2 = mkimm op2width
555 and op1 = type_for_elt shape elt 1
559 | Some restriction -> restriction elt in
560 let rtype = type_for_elt shape r_elt 0 in
562 None -> Arity2 (rtype, op1, op2), elt
563 | Some mkarity -> mkarity rtype op1 op2, elt
565 (* Use for immediate right-shifts. *)
567 let shift_right shape elt =
568 const_shift (fun imm -> T_immediate (1, imm)) shape elt
570 let shift_right_acc shape elt =
571 const_shift (fun imm -> T_immediate (1, imm))
572 ~arity:(fun dst op1 op2 -> Arity3 (dst, dst, op1, op2)) shape elt
574 (* Use for immediate right-shifts when the operation doesn't care about
577 let shift_right_sign_invar =
578 make_sign_invariant shift_right
580 (* Immediate right-shift; result is unsigned even when operand is signed. *)
582 let shift_right_to_uns shape elt =
583 const_shift (fun imm -> T_immediate (1, imm)) ~result:unsigned_of_elt
586 (* Immediate left-shift. *)
588 let shift_left shape elt =
589 const_shift (fun imm -> T_immediate (0, imm - 1)) shape elt
591 (* Immediate left-shift, unsigned result. *)
593 let shift_left_to_uns shape elt =
594 const_shift (fun imm -> T_immediate (0, imm - 1)) ~result:unsigned_of_elt
597 (* Immediate left-shift, don't care about signs. *)
599 let shift_left_sign_invar =
600 make_sign_invariant shift_left
602 (* Shift left/right and insert: only element size matters. *)
604 let shift_insert shape elt =
606 const_shift (fun imm -> T_immediate (1, imm))
607 ~arity:(fun dst op1 op2 -> Arity3 (dst, dst, op1, op2)) shape elt in
608 arity, bits_of_elt elt
612 let get_lane shape elt =
613 let vtype = type_for_elt shape elt in
614 Arity2 (vtype 0, vtype 1, vtype 2),
615 (match elt with P8 -> U8 | P16 -> U16 | S32 | U32 | F32 -> B32 | x -> x)
617 let set_lane shape elt =
618 let vtype = type_for_elt shape elt in
619 Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), bits_of_elt elt
621 let set_lane_notype shape elt =
622 let vtype = type_for_elt shape elt in
623 Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), NoElts
625 let create_vector shape elt =
626 let vtype = type_for_elt shape U64 1
627 and rtype = type_for_elt shape elt 0 in
628 Arity1 (rtype, vtype), elt
630 let conv make_arity shape elt =
631 let edest, esrc = match elt with
632 Conv (edest, esrc) | Cast (edest, esrc) -> edest, esrc
633 | _ -> failwith "Non-conversion element in conversion" in
634 let vtype = type_for_elt shape esrc
635 and rtype = type_for_elt shape edest 0 in
636 make_arity rtype vtype, elt
638 let conv_1 = conv (fun rtype vtype -> Arity1 (rtype, vtype 1))
639 let conv_2 = conv (fun rtype vtype -> Arity2 (rtype, vtype 1, vtype 2))
641 (* Operation has an unsigned result even if operands are signed. *)
643 let dst_unsign make_arity shape elt =
644 let vtype = type_for_elt shape elt
645 and rtype = type_for_elt shape (unsigned_of_elt elt) 0 in
646 make_arity rtype vtype, elt
648 let dst_unsign_1 = dst_unsign (fun rtype vtype -> Arity1 (rtype, vtype 1))
650 let make_bits_only func shape elt =
651 let arity, elt' = func shape elt in
652 arity, bits_of_elt elt'
654 (* Extend operation. *)
656 let extend shape elt =
657 let vtype = type_for_elt shape elt in
658 Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), bits_of_elt elt
660 (* Table look-up operations. Operand 2 is signed/unsigned for signed/unsigned
661 integer ops respectively, or unsigned for polynomial ops. *)
663 let table mkarity shape elt =
664 let vtype = type_for_elt shape elt in
665 let op2 = type_for_elt shape (poly_unsigned_variant elt) 2 in
666 mkarity vtype op2, bits_of_elt elt
668 let table_2 = table (fun vtype op2 -> Arity2 (vtype 0, vtype 1, op2))
669 let table_io = table (fun vtype op2 -> Arity3 (vtype 0, vtype 0, vtype 1, op2))
671 (* Operations where only bits matter. *)
673 let bits_1 = make_bits_only elts_same_1
674 let bits_2 = make_bits_only elts_same_2
675 let bits_3 = make_bits_only elts_same_3
678 let store_1 shape elt =
679 let vtype = type_for_elt shape elt in
680 Arity2 (T_void, vtype 0, vtype 1), bits_of_elt elt
682 let store_3 shape elt =
683 let vtype = type_for_elt shape elt in
684 Arity3 (T_void, vtype 0, vtype 1, vtype 2), bits_of_elt elt
686 let make_notype func shape elt =
687 let arity, _ = func shape elt in
690 let notype_1 = make_notype elts_same_1
691 let notype_2 = make_notype elts_same_2
692 let notype_3 = make_notype elts_same_3
694 (* Bit-select operations (first operand is unsigned int). *)
696 let bit_select shape elt =
697 let vtype = type_for_elt shape elt
698 and itype = type_for_elt shape (unsigned_of_elt elt) in
699 Arity3 (vtype 0, itype 1, vtype 2, vtype 3), NoElts
701 (* Common lists of supported element types. *)
703 let su_8_32 = [S8; S16; S32; U8; U16; U32]
704 let su_8_64 = S64 :: U64 :: su_8_32
705 let su_16_64 = [S16; S32; S64; U16; U32; U64]
706 let pf_su_8_32 = P8 :: P16 :: F32 :: su_8_32
707 let pf_su_8_64 = P8 :: P16 :: F32 :: su_8_64
712 Vadd, [], All (3, Dreg), "vadd", sign_invar_2, F32 :: su_8_32;
713 Vadd, [No_op], All (3, Dreg), "vadd", sign_invar_2, [S64; U64];
714 Vadd, [], All (3, Qreg), "vaddQ", sign_invar_2, F32 :: su_8_64;
715 Vadd, [], Long, "vaddl", elts_same_2, su_8_32;
716 Vadd, [], Wide, "vaddw", elts_same_2, su_8_32;
717 Vadd, [Halving], All (3, Dreg), "vhadd", elts_same_2, su_8_32;
718 Vadd, [Halving], All (3, Qreg), "vhaddQ", elts_same_2, su_8_32;
719 Vadd, [Instruction_name ["vrhadd"]; Rounding; Halving],
720 All (3, Dreg), "vRhadd", elts_same_2, su_8_32;
721 Vadd, [Instruction_name ["vrhadd"]; Rounding; Halving],
722 All (3, Qreg), "vRhaddQ", elts_same_2, su_8_32;
723 Vadd, [Saturating], All (3, Dreg), "vqadd", elts_same_2, su_8_64;
724 Vadd, [Saturating], All (3, Qreg), "vqaddQ", elts_same_2, su_8_64;
725 Vadd, [High_half], Narrow, "vaddhn", sign_invar_2, su_16_64;
726 Vadd, [Instruction_name ["vraddhn"]; Rounding; High_half],
727 Narrow, "vRaddhn", sign_invar_2, su_16_64;
729 (* Multiplication. *)
730 Vmul, [], All (3, Dreg), "vmul", sign_invar_2, P8 :: F32 :: su_8_32;
731 Vmul, [], All (3, Qreg), "vmulQ", sign_invar_2, P8 :: F32 :: su_8_32;
732 Vmul, [Saturating; Doubling; High_half], All (3, Dreg), "vqdmulh",
733 elts_same_2, [S16; S32];
734 Vmul, [Saturating; Doubling; High_half], All (3, Qreg), "vqdmulhQ",
735 elts_same_2, [S16; S32];
737 [Saturating; Rounding; Doubling; High_half;
738 Instruction_name ["vqrdmulh"]],
739 All (3, Dreg), "vqRdmulh",
740 elts_same_2, [S16; S32];
742 [Saturating; Rounding; Doubling; High_half;
743 Instruction_name ["vqrdmulh"]],
744 All (3, Qreg), "vqRdmulhQ",
745 elts_same_2, [S16; S32];
746 Vmul, [], Long, "vmull", elts_same_2, P8 :: su_8_32;
747 Vmul, [Saturating; Doubling], Long, "vqdmull", elts_same_2, [S16; S32];
749 (* Multiply-accumulate. *)
750 Vmla, [], All (3, Dreg), "vmla", sign_invar_io, F32 :: su_8_32;
751 Vmla, [], All (3, Qreg), "vmlaQ", sign_invar_io, F32 :: su_8_32;
752 Vmla, [], Long, "vmlal", elts_same_io, su_8_32;
753 Vmla, [Saturating; Doubling], Long, "vqdmlal", elts_same_io, [S16; S32];
755 (* Multiply-subtract. *)
756 Vmls, [], All (3, Dreg), "vmls", sign_invar_io, F32 :: su_8_32;
757 Vmls, [], All (3, Qreg), "vmlsQ", sign_invar_io, F32 :: su_8_32;
758 Vmls, [], Long, "vmlsl", elts_same_io, su_8_32;
759 Vmls, [Saturating; Doubling], Long, "vqdmlsl", elts_same_io, [S16; S32];
762 Vsub, [], All (3, Dreg), "vsub", sign_invar_2, F32 :: su_8_32;
763 Vsub, [No_op], All (3, Dreg), "vsub", sign_invar_2, [S64; U64];
764 Vsub, [], All (3, Qreg), "vsubQ", sign_invar_2, F32 :: su_8_64;
765 Vsub, [], Long, "vsubl", elts_same_2, su_8_32;
766 Vsub, [], Wide, "vsubw", elts_same_2, su_8_32;
767 Vsub, [Halving], All (3, Dreg), "vhsub", elts_same_2, su_8_32;
768 Vsub, [Halving], All (3, Qreg), "vhsubQ", elts_same_2, su_8_32;
769 Vsub, [Saturating], All (3, Dreg), "vqsub", elts_same_2, su_8_64;
770 Vsub, [Saturating], All (3, Qreg), "vqsubQ", elts_same_2, su_8_64;
771 Vsub, [High_half], Narrow, "vsubhn", sign_invar_2, su_16_64;
772 Vsub, [Instruction_name ["vrsubhn"]; Rounding; High_half],
773 Narrow, "vRsubhn", sign_invar_2, su_16_64;
775 (* Comparison, equal. *)
776 Vceq, [], All (3, Dreg), "vceq", cmp_sign_invar, P8 :: F32 :: su_8_32;
777 Vceq, [], All (3, Qreg), "vceqQ", cmp_sign_invar, P8 :: F32 :: su_8_32;
779 (* Comparison, greater-than or equal. *)
780 Vcge, [], All (3, Dreg), "vcge", cmp_sign_matters, F32 :: su_8_32;
781 Vcge, [], All (3, Qreg), "vcgeQ", cmp_sign_matters, F32 :: su_8_32;
783 (* Comparison, less-than or equal. *)
784 Vcle, [Flipped "vcge"], All (3, Dreg), "vcle", cmp_sign_matters,
786 Vcle, [Instruction_name ["vcge"]; Flipped "vcgeQ"],
787 All (3, Qreg), "vcleQ", cmp_sign_matters,
790 (* Comparison, greater-than. *)
791 Vcgt, [], All (3, Dreg), "vcgt", cmp_sign_matters, F32 :: su_8_32;
792 Vcgt, [], All (3, Qreg), "vcgtQ", cmp_sign_matters, F32 :: su_8_32;
794 (* Comparison, less-than. *)
795 Vclt, [Flipped "vcgt"], All (3, Dreg), "vclt", cmp_sign_matters,
797 Vclt, [Instruction_name ["vcgt"]; Flipped "vcgtQ"],
798 All (3, Qreg), "vcltQ", cmp_sign_matters,
801 (* Compare absolute greater-than or equal. *)
802 Vcage, [Instruction_name ["vacge"]],
803 All (3, Dreg), "vcage", cmp_sign_matters, [F32];
804 Vcage, [Instruction_name ["vacge"]],
805 All (3, Qreg), "vcageQ", cmp_sign_matters, [F32];
807 (* Compare absolute less-than or equal. *)
808 Vcale, [Instruction_name ["vacge"]; Flipped "vcage"],
809 All (3, Dreg), "vcale", cmp_sign_matters, [F32];
810 Vcale, [Instruction_name ["vacge"]; Flipped "vcageQ"],
811 All (3, Qreg), "vcaleQ", cmp_sign_matters, [F32];
813 (* Compare absolute greater-than or equal. *)
814 Vcagt, [Instruction_name ["vacgt"]],
815 All (3, Dreg), "vcagt", cmp_sign_matters, [F32];
816 Vcagt, [Instruction_name ["vacgt"]],
817 All (3, Qreg), "vcagtQ", cmp_sign_matters, [F32];
819 (* Compare absolute less-than or equal. *)
820 Vcalt, [Instruction_name ["vacgt"]; Flipped "vcagt"],
821 All (3, Dreg), "vcalt", cmp_sign_matters, [F32];
822 Vcalt, [Instruction_name ["vacgt"]; Flipped "vcagtQ"],
823 All (3, Qreg), "vcaltQ", cmp_sign_matters, [F32];
826 Vtst, [], All (3, Dreg), "vtst", cmp_bits, P8 :: su_8_32;
827 Vtst, [], All (3, Qreg), "vtstQ", cmp_bits, P8 :: su_8_32;
829 (* Absolute difference. *)
830 Vabd, [], All (3, Dreg), "vabd", elts_same_2, F32 :: su_8_32;
831 Vabd, [], All (3, Qreg), "vabdQ", elts_same_2, F32 :: su_8_32;
832 Vabd, [], Long, "vabdl", elts_same_2, su_8_32;
834 (* Absolute difference and accumulate. *)
835 Vaba, [], All (3, Dreg), "vaba", elts_same_io, su_8_32;
836 Vaba, [], All (3, Qreg), "vabaQ", elts_same_io, su_8_32;
837 Vaba, [], Long, "vabal", elts_same_io, su_8_32;
840 Vmax, [], All (3, Dreg), "vmax", elts_same_2, F32 :: su_8_32;
841 Vmax, [], All (3, Qreg), "vmaxQ", elts_same_2, F32 :: su_8_32;
844 Vmin, [], All (3, Dreg), "vmin", elts_same_2, F32 :: su_8_32;
845 Vmin, [], All (3, Qreg), "vminQ", elts_same_2, F32 :: su_8_32;
848 Vpadd, [], All (3, Dreg), "vpadd", sign_invar_2, F32 :: su_8_32;
849 Vpadd, [], Long_noreg Dreg, "vpaddl", elts_same_1, su_8_32;
850 Vpadd, [], Long_noreg Qreg, "vpaddlQ", elts_same_1, su_8_32;
852 (* Pairwise add, widen and accumulate. *)
853 Vpada, [], Wide_noreg Dreg, "vpadal", elts_same_2, su_8_32;
854 Vpada, [], Wide_noreg Qreg, "vpadalQ", elts_same_2, su_8_32;
856 (* Folding maximum, minimum. *)
857 Vpmax, [], All (3, Dreg), "vpmax", elts_same_2, F32 :: su_8_32;
858 Vpmin, [], All (3, Dreg), "vpmin", elts_same_2, F32 :: su_8_32;
860 (* Reciprocal step. *)
861 Vrecps, [], All (3, Dreg), "vrecps", elts_same_2, [F32];
862 Vrecps, [], All (3, Qreg), "vrecpsQ", elts_same_2, [F32];
863 Vrsqrts, [], All (3, Dreg), "vrsqrts", elts_same_2, [F32];
864 Vrsqrts, [], All (3, Qreg), "vrsqrtsQ", elts_same_2, [F32];
866 (* Vector shift left. *)
867 Vshl, [], All (3, Dreg), "vshl", reg_shift, su_8_64;
868 Vshl, [], All (3, Qreg), "vshlQ", reg_shift, su_8_64;
869 Vshl, [Instruction_name ["vrshl"]; Rounding],
870 All (3, Dreg), "vRshl", reg_shift, su_8_64;
871 Vshl, [Instruction_name ["vrshl"]; Rounding],
872 All (3, Qreg), "vRshlQ", reg_shift, su_8_64;
873 Vshl, [Saturating], All (3, Dreg), "vqshl", reg_shift, su_8_64;
874 Vshl, [Saturating], All (3, Qreg), "vqshlQ", reg_shift, su_8_64;
875 Vshl, [Instruction_name ["vqrshl"]; Saturating; Rounding],
876 All (3, Dreg), "vqRshl", reg_shift, su_8_64;
877 Vshl, [Instruction_name ["vqrshl"]; Saturating; Rounding],
878 All (3, Qreg), "vqRshlQ", reg_shift, su_8_64;
880 (* Vector shift right by constant. *)
881 Vshr_n, [], Binary_imm Dreg, "vshr_n", shift_right, su_8_64;
882 Vshr_n, [], Binary_imm Qreg, "vshrQ_n", shift_right, su_8_64;
883 Vshr_n, [Instruction_name ["vrshr"]; Rounding], Binary_imm Dreg,
884 "vRshr_n", shift_right, su_8_64;
885 Vshr_n, [Instruction_name ["vrshr"]; Rounding], Binary_imm Qreg,
886 "vRshrQ_n", shift_right, su_8_64;
887 Vshr_n, [], Narrow_imm, "vshrn_n", shift_right_sign_invar, su_16_64;
888 Vshr_n, [Instruction_name ["vrshrn"]; Rounding], Narrow_imm, "vRshrn_n",
889 shift_right_sign_invar, su_16_64;
890 Vshr_n, [Saturating], Narrow_imm, "vqshrn_n", shift_right, su_16_64;
891 Vshr_n, [Instruction_name ["vqrshrn"]; Saturating; Rounding], Narrow_imm,
892 "vqRshrn_n", shift_right, su_16_64;
893 Vshr_n, [Saturating; Dst_unsign], Narrow_imm, "vqshrun_n",
894 shift_right_to_uns, [S16; S32; S64];
895 Vshr_n, [Instruction_name ["vqrshrun"]; Saturating; Dst_unsign; Rounding],
896 Narrow_imm, "vqRshrun_n", shift_right_to_uns, [S16; S32; S64];
898 (* Vector shift left by constant. *)
899 Vshl_n, [], Binary_imm Dreg, "vshl_n", shift_left_sign_invar, su_8_64;
900 Vshl_n, [], Binary_imm Qreg, "vshlQ_n", shift_left_sign_invar, su_8_64;
901 Vshl_n, [Saturating], Binary_imm Dreg, "vqshl_n", shift_left, su_8_64;
902 Vshl_n, [Saturating], Binary_imm Qreg, "vqshlQ_n", shift_left, su_8_64;
903 Vshl_n, [Saturating; Dst_unsign], Binary_imm Dreg, "vqshlu_n",
904 shift_left_to_uns, [S8; S16; S32; S64];
905 Vshl_n, [Saturating; Dst_unsign], Binary_imm Qreg, "vqshluQ_n",
906 shift_left_to_uns, [S8; S16; S32; S64];
907 Vshl_n, [], Long_imm, "vshll_n", shift_left, su_8_32;
909 (* Vector shift right by constant and accumulate. *)
910 Vsra_n, [], Binary_imm Dreg, "vsra_n", shift_right_acc, su_8_64;
911 Vsra_n, [], Binary_imm Qreg, "vsraQ_n", shift_right_acc, su_8_64;
912 Vsra_n, [Instruction_name ["vrsra"]; Rounding], Binary_imm Dreg,
913 "vRsra_n", shift_right_acc, su_8_64;
914 Vsra_n, [Instruction_name ["vrsra"]; Rounding], Binary_imm Qreg,
915 "vRsraQ_n", shift_right_acc, su_8_64;
917 (* Vector shift right and insert. *)
918 Vsri, [], Use_operands [| Dreg; Dreg; Immed |], "vsri_n", shift_insert,
919 P8 :: P16 :: su_8_64;
920 Vsri, [], Use_operands [| Qreg; Qreg; Immed |], "vsriQ_n", shift_insert,
921 P8 :: P16 :: su_8_64;
923 (* Vector shift left and insert. *)
924 Vsli, [], Use_operands [| Dreg; Dreg; Immed |], "vsli_n", shift_insert,
925 P8 :: P16 :: su_8_64;
926 Vsli, [], Use_operands [| Qreg; Qreg; Immed |], "vsliQ_n", shift_insert,
927 P8 :: P16 :: su_8_64;
929 (* Absolute value. *)
930 Vabs, [], All (2, Dreg), "vabs", elts_same_1, [S8; S16; S32; F32];
931 Vabs, [], All (2, Qreg), "vabsQ", elts_same_1, [S8; S16; S32; F32];
932 Vabs, [Saturating], All (2, Dreg), "vqabs", elts_same_1, [S8; S16; S32];
933 Vabs, [Saturating], All (2, Qreg), "vqabsQ", elts_same_1, [S8; S16; S32];
936 Vneg, [], All (2, Dreg), "vneg", elts_same_1, [S8; S16; S32; F32];
937 Vneg, [], All (2, Qreg), "vnegQ", elts_same_1, [S8; S16; S32; F32];
938 Vneg, [Saturating], All (2, Dreg), "vqneg", elts_same_1, [S8; S16; S32];
939 Vneg, [Saturating], All (2, Qreg), "vqnegQ", elts_same_1, [S8; S16; S32];
942 Vmvn, [], All (2, Dreg), "vmvn", notype_1, P8 :: su_8_32;
943 Vmvn, [], All (2, Qreg), "vmvnQ", notype_1, P8 :: su_8_32;
945 (* Count leading sign bits. *)
946 Vcls, [], All (2, Dreg), "vcls", elts_same_1, [S8; S16; S32];
947 Vcls, [], All (2, Qreg), "vclsQ", elts_same_1, [S8; S16; S32];
949 (* Count leading zeros. *)
950 Vclz, [], All (2, Dreg), "vclz", sign_invar_1, su_8_32;
951 Vclz, [], All (2, Qreg), "vclzQ", sign_invar_1, su_8_32;
953 (* Count number of set bits. *)
954 Vcnt, [], All (2, Dreg), "vcnt", bits_1, [P8; S8; U8];
955 Vcnt, [], All (2, Qreg), "vcntQ", bits_1, [P8; S8; U8];
957 (* Reciprocal estimate. *)
958 Vrecpe, [], All (2, Dreg), "vrecpe", elts_same_1, [U32; F32];
959 Vrecpe, [], All (2, Qreg), "vrecpeQ", elts_same_1, [U32; F32];
961 (* Reciprocal square-root estimate. *)
962 Vrsqrte, [], All (2, Dreg), "vrsqrte", elts_same_1, [U32; F32];
963 Vrsqrte, [], All (2, Qreg), "vrsqrteQ", elts_same_1, [U32; F32];
965 (* Get lanes from a vector. *)
967 [InfoWord; Disassembles_as [Use_operands [| Corereg; Element_of_dreg |]];
968 Instruction_name ["vmov"]],
969 Use_operands [| Corereg; Dreg; Immed |],
970 "vget_lane", get_lane, pf_su_8_32;
974 Disassembles_as [Use_operands [| Corereg; Corereg; Dreg |]];
975 Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)],
976 Use_operands [| Corereg; Dreg; Immed |],
977 "vget_lane", notype_2, [S64; U64];
979 [InfoWord; Disassembles_as [Use_operands [| Corereg; Element_of_dreg |]];
980 Instruction_name ["vmov"]],
981 Use_operands [| Corereg; Qreg; Immed |],
982 "vgetQ_lane", get_lane, pf_su_8_32;
985 Disassembles_as [Use_operands [| Corereg; Corereg; Dreg |]];
986 Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)],
987 Use_operands [| Corereg; Qreg; Immed |],
988 "vgetQ_lane", notype_2, [S64; U64];
990 (* Set lanes in a vector. *)
991 Vset_lane, [Disassembles_as [Use_operands [| Element_of_dreg; Corereg |]];
992 Instruction_name ["vmov"]],
993 Use_operands [| Dreg; Corereg; Dreg; Immed |], "vset_lane",
994 set_lane, pf_su_8_32;
996 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]];
997 Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)],
998 Use_operands [| Dreg; Corereg; Dreg; Immed |], "vset_lane",
999 set_lane_notype, [S64; U64];
1000 Vset_lane, [Disassembles_as [Use_operands [| Element_of_dreg; Corereg |]];
1001 Instruction_name ["vmov"]],
1002 Use_operands [| Qreg; Corereg; Qreg; Immed |], "vsetQ_lane",
1003 set_lane, pf_su_8_32;
1004 Vset_lane, [Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]];
1005 Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)],
1006 Use_operands [| Qreg; Corereg; Qreg; Immed |], "vsetQ_lane",
1007 set_lane_notype, [S64; U64];
1009 (* Create vector from literal bit pattern. *)
1011 [No_op], (* Not really, but it can yield various things that are too
1012 hard for the test generator at this time. *)
1013 Use_operands [| Dreg; Corereg |], "vcreate", create_vector,
1016 (* Set all lanes to the same value. *)
1018 [Disassembles_as [Use_operands [| Dreg;
1019 Alternatives [ Corereg;
1020 Element_of_dreg ] |]]],
1021 Use_operands [| Dreg; Corereg |], "vdup_n", bits_1,
1025 Instruction_name ["vmov"];
1026 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]],
1027 Use_operands [| Dreg; Corereg |], "vdup_n", notype_1,
1030 [Disassembles_as [Use_operands [| Qreg;
1031 Alternatives [ Corereg;
1032 Element_of_dreg ] |]]],
1033 Use_operands [| Qreg; Corereg |], "vdupQ_n", bits_1,
1037 Instruction_name ["vmov"];
1038 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |];
1039 Use_operands [| Dreg; Corereg; Corereg |]]],
1040 Use_operands [| Qreg; Corereg |], "vdupQ_n", notype_1,
1043 (* These are just aliases for the above. *)
1045 [Builtin_name "vdup_n";
1046 Disassembles_as [Use_operands [| Dreg;
1047 Alternatives [ Corereg;
1048 Element_of_dreg ] |]]],
1049 Use_operands [| Dreg; Corereg |],
1050 "vmov_n", bits_1, pf_su_8_32;
1053 Builtin_name "vdup_n";
1054 Instruction_name ["vmov"];
1055 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]],
1056 Use_operands [| Dreg; Corereg |],
1057 "vmov_n", notype_1, [S64; U64];
1059 [Builtin_name "vdupQ_n";
1060 Disassembles_as [Use_operands [| Qreg;
1061 Alternatives [ Corereg;
1062 Element_of_dreg ] |]]],
1063 Use_operands [| Qreg; Corereg |],
1064 "vmovQ_n", bits_1, pf_su_8_32;
1067 Builtin_name "vdupQ_n";
1068 Instruction_name ["vmov"];
1069 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |];
1070 Use_operands [| Dreg; Corereg; Corereg |]]],
1071 Use_operands [| Qreg; Corereg |],
1072 "vmovQ_n", notype_1, [S64; U64];
1074 (* Duplicate, lane version. We can't use Use_operands here because the
1075 rightmost register (always Dreg) would be picked up by find_key_operand,
1076 when we want the leftmost register to be used in this case (otherwise
1077 the modes are indistinguishable in neon.md, etc. *)
1079 [Disassembles_as [Use_operands [| Dreg; Element_of_dreg |]]],
1080 Unary_scalar Dreg, "vdup_lane", bits_2, pf_su_8_32;
1082 [No_op; Const_valuator (fun _ -> 0)],
1083 Unary_scalar Dreg, "vdup_lane", bits_2, [S64; U64];
1085 [Disassembles_as [Use_operands [| Qreg; Element_of_dreg |]]],
1086 Unary_scalar Qreg, "vdupQ_lane", bits_2, pf_su_8_32;
1088 [No_op; Const_valuator (fun _ -> 0)],
1089 Unary_scalar Qreg, "vdupQ_lane", bits_2, [S64; U64];
1091 (* Combining vectors. *)
1093 Use_operands [| Qreg; Dreg; Dreg |], "vcombine", notype_2,
1096 (* Splitting vectors. *)
1098 Use_operands [| Dreg; Qreg |], "vget_high",
1099 notype_1, pf_su_8_64;
1100 Vget_low, [Instruction_name ["vmov"];
1101 Disassembles_as [Use_operands [| Dreg; Dreg |]];
1103 Use_operands [| Dreg; Qreg |], "vget_low",
1104 notype_1, pf_su_8_32;
1106 Use_operands [| Dreg; Qreg |], "vget_low",
1107 notype_1, [S64; U64];
1110 Vcvt, [InfoWord], All (2, Dreg), "vcvt", conv_1,
1111 [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
1112 Vcvt, [InfoWord], All (2, Qreg), "vcvtQ", conv_1,
1113 [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
1114 Vcvt_n, [InfoWord], Use_operands [| Dreg; Dreg; Immed |], "vcvt_n", conv_2,
1115 [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
1116 Vcvt_n, [InfoWord], Use_operands [| Qreg; Qreg; Immed |], "vcvtQ_n", conv_2,
1117 [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
1119 (* Move, narrowing. *)
1120 Vmovn, [Disassembles_as [Use_operands [| Dreg; Qreg |]]],
1121 Narrow, "vmovn", sign_invar_1, su_16_64;
1122 Vmovn, [Disassembles_as [Use_operands [| Dreg; Qreg |]]; Saturating],
1123 Narrow, "vqmovn", elts_same_1, su_16_64;
1125 [Disassembles_as [Use_operands [| Dreg; Qreg |]]; Saturating; Dst_unsign],
1126 Narrow, "vqmovun", dst_unsign_1,
1130 Vmovl, [Disassembles_as [Use_operands [| Qreg; Dreg |]]],
1131 Long, "vmovl", elts_same_1, su_8_32;
1135 [Instruction_name ["vtbl"];
1136 Disassembles_as [Use_operands [| Dreg; VecArray (1, Dreg); Dreg |]]],
1137 Use_operands [| Dreg; Dreg; Dreg |], "vtbl1", table_2, [U8; S8; P8];
1138 Vtbl 2, [Instruction_name ["vtbl"]],
1139 Use_operands [| Dreg; VecArray (2, Dreg); Dreg |], "vtbl2", table_2,
1141 Vtbl 3, [Instruction_name ["vtbl"]],
1142 Use_operands [| Dreg; VecArray (3, Dreg); Dreg |], "vtbl3", table_2,
1144 Vtbl 4, [Instruction_name ["vtbl"]],
1145 Use_operands [| Dreg; VecArray (4, Dreg); Dreg |], "vtbl4", table_2,
1148 (* Extended table lookup. *)
1150 [Instruction_name ["vtbx"];
1151 Disassembles_as [Use_operands [| Dreg; VecArray (1, Dreg); Dreg |]]],
1152 Use_operands [| Dreg; Dreg; Dreg |], "vtbx1", table_io, [U8; S8; P8];
1153 Vtbx 2, [Instruction_name ["vtbx"]],
1154 Use_operands [| Dreg; VecArray (2, Dreg); Dreg |], "vtbx2", table_io,
1156 Vtbx 3, [Instruction_name ["vtbx"]],
1157 Use_operands [| Dreg; VecArray (3, Dreg); Dreg |], "vtbx3", table_io,
1159 Vtbx 4, [Instruction_name ["vtbx"]],
1160 Use_operands [| Dreg; VecArray (4, Dreg); Dreg |], "vtbx4", table_io,
1163 (* Multiply, lane. (note: these were undocumented at the time of
1165 Vmul_lane, [], By_scalar Dreg, "vmul_lane", sign_invar_2_lane,
1166 [S16; S32; U16; U32; F32];
1167 Vmul_lane, [], By_scalar Qreg, "vmulQ_lane", sign_invar_2_lane,
1168 [S16; S32; U16; U32; F32];
1170 (* Multiply-accumulate, lane. *)
1171 Vmla_lane, [], By_scalar Dreg, "vmla_lane", sign_invar_io_lane,
1172 [S16; S32; U16; U32; F32];
1173 Vmla_lane, [], By_scalar Qreg, "vmlaQ_lane", sign_invar_io_lane,
1174 [S16; S32; U16; U32; F32];
1175 Vmla_lane, [], Wide_lane, "vmlal_lane", elts_same_io_lane,
1176 [S16; S32; U16; U32];
1177 Vmla_lane, [Saturating; Doubling], Wide_lane, "vqdmlal_lane",
1178 elts_same_io_lane, [S16; S32];
1180 (* Multiply-subtract, lane. *)
1181 Vmls_lane, [], By_scalar Dreg, "vmls_lane", sign_invar_io_lane,
1182 [S16; S32; U16; U32; F32];
1183 Vmls_lane, [], By_scalar Qreg, "vmlsQ_lane", sign_invar_io_lane,
1184 [S16; S32; U16; U32; F32];
1185 Vmls_lane, [], Wide_lane, "vmlsl_lane", elts_same_io_lane,
1186 [S16; S32; U16; U32];
1187 Vmls_lane, [Saturating; Doubling], Wide_lane, "vqdmlsl_lane",
1188 elts_same_io_lane, [S16; S32];
1190 (* Long multiply, lane. *)
1192 Wide_lane, "vmull_lane", elts_same_2_lane, [S16; S32; U16; U32];
1194 (* Saturating doubling long multiply, lane. *)
1195 Vqdmull_lane, [Saturating; Doubling],
1196 Wide_lane, "vqdmull_lane", elts_same_2_lane, [S16; S32];
1198 (* Saturating doubling long multiply high, lane. *)
1199 Vqdmulh_lane, [Saturating; Halving],
1200 By_scalar Qreg, "vqdmulhQ_lane", elts_same_2_lane, [S16; S32];
1201 Vqdmulh_lane, [Saturating; Halving],
1202 By_scalar Dreg, "vqdmulh_lane", elts_same_2_lane, [S16; S32];
1203 Vqdmulh_lane, [Saturating; Halving; Rounding;
1204 Instruction_name ["vqrdmulh"]],
1205 By_scalar Qreg, "vqRdmulhQ_lane", elts_same_2_lane, [S16; S32];
1206 Vqdmulh_lane, [Saturating; Halving; Rounding;
1207 Instruction_name ["vqrdmulh"]],
1208 By_scalar Dreg, "vqRdmulh_lane", elts_same_2_lane, [S16; S32];
1210 (* Vector multiply by scalar. *)
1212 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
1213 Use_operands [| Dreg; Dreg; Corereg |], "vmul_n",
1214 sign_invar_2, [S16; S32; U16; U32; F32];
1216 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
1217 Use_operands [| Qreg; Qreg; Corereg |], "vmulQ_n",
1218 sign_invar_2, [S16; S32; U16; U32; F32];
1220 (* Vector long multiply by scalar. *)
1221 Vmull_n, [Instruction_name ["vmull"];
1222 Disassembles_as [Use_operands [| Qreg; Dreg; Element_of_dreg |]]],
1223 Wide_scalar, "vmull_n",
1224 elts_same_2, [S16; S32; U16; U32];
1226 (* Vector saturating doubling long multiply by scalar. *)
1227 Vqdmull_n, [Saturating; Doubling;
1228 Disassembles_as [Use_operands [| Qreg; Dreg;
1229 Element_of_dreg |]]],
1230 Wide_scalar, "vqdmull_n",
1231 elts_same_2, [S16; S32];
1233 (* Vector saturating doubling long multiply high by scalar. *)
1235 [Saturating; Halving; InfoWord;
1236 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
1237 Use_operands [| Qreg; Qreg; Corereg |],
1238 "vqdmulhQ_n", elts_same_2, [S16; S32];
1240 [Saturating; Halving; InfoWord;
1241 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
1242 Use_operands [| Dreg; Dreg; Corereg |],
1243 "vqdmulh_n", elts_same_2, [S16; S32];
1245 [Saturating; Halving; Rounding; InfoWord;
1246 Instruction_name ["vqrdmulh"];
1247 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
1248 Use_operands [| Qreg; Qreg; Corereg |],
1249 "vqRdmulhQ_n", elts_same_2, [S16; S32];
1251 [Saturating; Halving; Rounding; InfoWord;
1252 Instruction_name ["vqrdmulh"];
1253 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
1254 Use_operands [| Dreg; Dreg; Corereg |],
1255 "vqRdmulh_n", elts_same_2, [S16; S32];
1257 (* Vector multiply-accumulate by scalar. *)
1259 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
1260 Use_operands [| Dreg; Dreg; Corereg |], "vmla_n",
1261 sign_invar_io, [S16; S32; U16; U32; F32];
1263 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
1264 Use_operands [| Qreg; Qreg; Corereg |], "vmlaQ_n",
1265 sign_invar_io, [S16; S32; U16; U32; F32];
1266 Vmla_n, [], Wide_scalar, "vmlal_n", elts_same_io, [S16; S32; U16; U32];
1267 Vmla_n, [Saturating; Doubling], Wide_scalar, "vqdmlal_n", elts_same_io,
1270 (* Vector multiply subtract by scalar. *)
1272 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
1273 Use_operands [| Dreg; Dreg; Corereg |], "vmls_n",
1274 sign_invar_io, [S16; S32; U16; U32; F32];
1276 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
1277 Use_operands [| Qreg; Qreg; Corereg |], "vmlsQ_n",
1278 sign_invar_io, [S16; S32; U16; U32; F32];
1279 Vmls_n, [], Wide_scalar, "vmlsl_n", elts_same_io, [S16; S32; U16; U32];
1280 Vmls_n, [Saturating; Doubling], Wide_scalar, "vqdmlsl_n", elts_same_io,
1283 (* Vector extract. *)
1284 Vext, [Const_valuator (fun _ -> 0)],
1285 Use_operands [| Dreg; Dreg; Dreg; Immed |], "vext", extend,
1287 Vext, [Const_valuator (fun _ -> 0)],
1288 Use_operands [| Qreg; Qreg; Qreg; Immed |], "vextQ", extend,
1291 (* Reverse elements. *)
1292 Vrev64, [], All (2, Dreg), "vrev64", bits_1, P8 :: P16 :: F32 :: su_8_32;
1293 Vrev64, [], All (2, Qreg), "vrev64Q", bits_1, P8 :: P16 :: F32 :: su_8_32;
1294 Vrev32, [], All (2, Dreg), "vrev32", bits_1, [P8; P16; S8; U8; S16; U16];
1295 Vrev32, [], All (2, Qreg), "vrev32Q", bits_1, [P8; P16; S8; U8; S16; U16];
1296 Vrev16, [], All (2, Dreg), "vrev16", bits_1, [P8; S8; U8];
1297 Vrev16, [], All (2, Qreg), "vrev16Q", bits_1, [P8; S8; U8];
1299 (* Bit selection. *)
1301 [Instruction_name ["vbsl"; "vbit"; "vbif"];
1302 Disassembles_as [Use_operands [| Dreg; Dreg; Dreg |]]],
1303 Use_operands [| Dreg; Dreg; Dreg; Dreg |], "vbsl", bit_select,
1306 [Instruction_name ["vbsl"; "vbit"; "vbif"];
1307 Disassembles_as [Use_operands [| Qreg; Qreg; Qreg |]]],
1308 Use_operands [| Qreg; Qreg; Qreg; Qreg |], "vbslQ", bit_select,
1311 (* Transpose elements. **NOTE** ReturnPtr goes some of the way towards
1312 generating good code for intrinsics which return structure types --
1313 builtins work well by themselves (and understand that the values being
1314 stored on e.g. the stack also reside in registers, so can optimise the
1315 stores away entirely if the results are used immediately), but
1316 intrinsics are very much less efficient. Maybe something can be improved
1317 re: inlining, or tweaking the ABI used for intrinsics (a special call
1320 Vtrn, [ReturnPtr], Pair_result Dreg, "vtrn", bits_2, pf_su_8_32;
1321 Vtrn, [ReturnPtr], Pair_result Qreg, "vtrnQ", bits_2, pf_su_8_32;
1324 Vzip, [ReturnPtr], Pair_result Dreg, "vzip", bits_2, pf_su_8_32;
1325 Vzip, [ReturnPtr], Pair_result Qreg, "vzipQ", bits_2, pf_su_8_32;
1327 (* Unzip elements. *)
1328 Vuzp, [ReturnPtr], Pair_result Dreg, "vuzp", bits_2, pf_su_8_32;
1329 Vuzp, [ReturnPtr], Pair_result Qreg, "vuzpQ", bits_2, pf_su_8_32;
1331 (* Element/structure loads. VLD1 variants. *)
1333 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1334 CstPtrTo Corereg |]]],
1335 Use_operands [| Dreg; CstPtrTo Corereg |], "vld1", bits_1,
1337 Vldx 1, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1338 CstPtrTo Corereg |]]],
1339 Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q", bits_1,
1343 [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg);
1344 CstPtrTo Corereg |]]],
1345 Use_operands [| Dreg; CstPtrTo Corereg; Dreg; Immed |],
1346 "vld1_lane", bits_3, pf_su_8_32;
1348 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1349 CstPtrTo Corereg |]];
1350 Const_valuator (fun _ -> 0)],
1351 Use_operands [| Dreg; CstPtrTo Corereg; Dreg; Immed |],
1352 "vld1_lane", bits_3, [S64; U64];
1354 [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg);
1355 CstPtrTo Corereg |]]],
1356 Use_operands [| Qreg; CstPtrTo Corereg; Qreg; Immed |],
1357 "vld1Q_lane", bits_3, pf_su_8_32;
1359 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1360 CstPtrTo Corereg |]]],
1361 Use_operands [| Qreg; CstPtrTo Corereg; Qreg; Immed |],
1362 "vld1Q_lane", bits_3, [S64; U64];
1365 [Disassembles_as [Use_operands [| VecArray (1, All_elements_of_dreg);
1366 CstPtrTo Corereg |]]],
1367 Use_operands [| Dreg; CstPtrTo Corereg |], "vld1_dup",
1370 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1371 CstPtrTo Corereg |]]],
1372 Use_operands [| Dreg; CstPtrTo Corereg |], "vld1_dup",
1375 [Disassembles_as [Use_operands [| VecArray (2, All_elements_of_dreg);
1376 CstPtrTo Corereg |]]],
1377 Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q_dup",
1380 [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1381 CstPtrTo Corereg |]]],
1382 Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q_dup",
1385 (* VST1 variants. *)
1386 Vstx 1, [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1388 Use_operands [| PtrTo Corereg; Dreg |], "vst1",
1389 store_1, pf_su_8_64;
1390 Vstx 1, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1392 Use_operands [| PtrTo Corereg; Qreg |], "vst1Q",
1393 store_1, pf_su_8_64;
1396 [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg);
1397 CstPtrTo Corereg |]]],
1398 Use_operands [| PtrTo Corereg; Dreg; Immed |],
1399 "vst1_lane", store_3, pf_su_8_32;
1401 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1402 CstPtrTo Corereg |]];
1403 Const_valuator (fun _ -> 0)],
1404 Use_operands [| PtrTo Corereg; Dreg; Immed |],
1405 "vst1_lane", store_3, [U64; S64];
1407 [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg);
1408 CstPtrTo Corereg |]]],
1409 Use_operands [| PtrTo Corereg; Qreg; Immed |],
1410 "vst1Q_lane", store_3, pf_su_8_32;
1412 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1413 CstPtrTo Corereg |]]],
1414 Use_operands [| PtrTo Corereg; Qreg; Immed |],
1415 "vst1Q_lane", store_3, [U64; S64];
1417 (* VLD2 variants. *)
1418 Vldx 2, [], Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
1419 "vld2", bits_1, pf_su_8_32;
1420 Vldx 2, [Instruction_name ["vld1"]],
1421 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
1422 "vld2", bits_1, [S64; U64];
1423 Vldx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1424 CstPtrTo Corereg |];
1425 Use_operands [| VecArray (2, Dreg);
1426 CstPtrTo Corereg |]]],
1427 Use_operands [| VecArray (2, Qreg); CstPtrTo Corereg |],
1428 "vld2Q", bits_1, pf_su_8_32;
1431 [Disassembles_as [Use_operands
1432 [| VecArray (2, Element_of_dreg);
1433 CstPtrTo Corereg |]]],
1434 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg;
1435 VecArray (2, Dreg); Immed |],
1436 "vld2_lane", bits_3, P8 :: P16 :: F32 :: su_8_32;
1438 [Disassembles_as [Use_operands
1439 [| VecArray (2, Element_of_dreg);
1440 CstPtrTo Corereg |]]],
1441 Use_operands [| VecArray (2, Qreg); CstPtrTo Corereg;
1442 VecArray (2, Qreg); Immed |],
1443 "vld2Q_lane", bits_3, [P16; F32; U16; U32; S16; S32];
1446 [Disassembles_as [Use_operands
1447 [| VecArray (2, All_elements_of_dreg); CstPtrTo Corereg |]]],
1448 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
1449 "vld2_dup", bits_1, pf_su_8_32;
1451 [Instruction_name ["vld1"]; Disassembles_as [Use_operands
1452 [| VecArray (2, Dreg); CstPtrTo Corereg |]]],
1453 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
1454 "vld2_dup", bits_1, [S64; U64];
1456 (* VST2 variants. *)
1457 Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1459 Use_operands [| PtrTo Corereg; VecArray (2, Dreg) |], "vst2",
1460 store_1, pf_su_8_32;
1461 Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1463 Instruction_name ["vst1"]],
1464 Use_operands [| PtrTo Corereg; VecArray (2, Dreg) |], "vst2",
1465 store_1, [S64; U64];
1466 Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1468 Use_operands [| VecArray (2, Dreg);
1470 Use_operands [| PtrTo Corereg; VecArray (2, Qreg) |], "vst2Q",
1471 store_1, pf_su_8_32;
1474 [Disassembles_as [Use_operands
1475 [| VecArray (2, Element_of_dreg);
1476 CstPtrTo Corereg |]]],
1477 Use_operands [| PtrTo Corereg; VecArray (2, Dreg); Immed |], "vst2_lane",
1478 store_3, P8 :: P16 :: F32 :: su_8_32;
1480 [Disassembles_as [Use_operands
1481 [| VecArray (2, Element_of_dreg);
1482 CstPtrTo Corereg |]]],
1483 Use_operands [| PtrTo Corereg; VecArray (2, Qreg); Immed |], "vst2Q_lane",
1484 store_3, [P16; F32; U16; U32; S16; S32];
1486 (* VLD3 variants. *)
1487 Vldx 3, [], Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
1488 "vld3", bits_1, pf_su_8_32;
1489 Vldx 3, [Instruction_name ["vld1"]],
1490 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
1491 "vld3", bits_1, [S64; U64];
1492 Vldx 3, [Disassembles_as [Use_operands [| VecArray (3, Dreg);
1493 CstPtrTo Corereg |];
1494 Use_operands [| VecArray (3, Dreg);
1495 CstPtrTo Corereg |]]],
1496 Use_operands [| VecArray (3, Qreg); CstPtrTo Corereg |],
1497 "vld3Q", bits_1, P8 :: P16 :: F32 :: su_8_32;
1500 [Disassembles_as [Use_operands
1501 [| VecArray (3, Element_of_dreg);
1502 CstPtrTo Corereg |]]],
1503 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg;
1504 VecArray (3, Dreg); Immed |],
1505 "vld3_lane", bits_3, P8 :: P16 :: F32 :: su_8_32;
1507 [Disassembles_as [Use_operands
1508 [| VecArray (3, Element_of_dreg);
1509 CstPtrTo Corereg |]]],
1510 Use_operands [| VecArray (3, Qreg); CstPtrTo Corereg;
1511 VecArray (3, Qreg); Immed |],
1512 "vld3Q_lane", bits_3, [P16; F32; U16; U32; S16; S32];
1515 [Disassembles_as [Use_operands
1516 [| VecArray (3, All_elements_of_dreg); CstPtrTo Corereg |]]],
1517 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
1518 "vld3_dup", bits_1, pf_su_8_32;
1520 [Instruction_name ["vld1"]; Disassembles_as [Use_operands
1521 [| VecArray (3, Dreg); CstPtrTo Corereg |]]],
1522 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
1523 "vld3_dup", bits_1, [S64; U64];
1525 (* VST3 variants. *)
1526 Vstx 3, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1528 Use_operands [| PtrTo Corereg; VecArray (3, Dreg) |], "vst3",
1529 store_1, pf_su_8_32;
1530 Vstx 3, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1532 Instruction_name ["vst1"]],
1533 Use_operands [| PtrTo Corereg; VecArray (3, Dreg) |], "vst3",
1534 store_1, [S64; U64];
1535 Vstx 3, [Disassembles_as [Use_operands [| VecArray (3, Dreg);
1537 Use_operands [| VecArray (3, Dreg);
1539 Use_operands [| PtrTo Corereg; VecArray (3, Qreg) |], "vst3Q",
1540 store_1, pf_su_8_32;
1543 [Disassembles_as [Use_operands
1544 [| VecArray (3, Element_of_dreg);
1545 CstPtrTo Corereg |]]],
1546 Use_operands [| PtrTo Corereg; VecArray (3, Dreg); Immed |], "vst3_lane",
1547 store_3, P8 :: P16 :: F32 :: su_8_32;
1549 [Disassembles_as [Use_operands
1550 [| VecArray (3, Element_of_dreg);
1551 CstPtrTo Corereg |]]],
1552 Use_operands [| PtrTo Corereg; VecArray (3, Qreg); Immed |], "vst3Q_lane",
1553 store_3, [P16; F32; U16; U32; S16; S32];
1555 (* VLD4/VST4 variants. *)
1556 Vldx 4, [], Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
1557 "vld4", bits_1, pf_su_8_32;
1558 Vldx 4, [Instruction_name ["vld1"]],
1559 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
1560 "vld4", bits_1, [S64; U64];
1561 Vldx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1562 CstPtrTo Corereg |];
1563 Use_operands [| VecArray (4, Dreg);
1564 CstPtrTo Corereg |]]],
1565 Use_operands [| VecArray (4, Qreg); CstPtrTo Corereg |],
1566 "vld4Q", bits_1, P8 :: P16 :: F32 :: su_8_32;
1569 [Disassembles_as [Use_operands
1570 [| VecArray (4, Element_of_dreg);
1571 CstPtrTo Corereg |]]],
1572 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg;
1573 VecArray (4, Dreg); Immed |],
1574 "vld4_lane", bits_3, P8 :: P16 :: F32 :: su_8_32;
1576 [Disassembles_as [Use_operands
1577 [| VecArray (4, Element_of_dreg);
1578 CstPtrTo Corereg |]]],
1579 Use_operands [| VecArray (4, Qreg); CstPtrTo Corereg;
1580 VecArray (4, Qreg); Immed |],
1581 "vld4Q_lane", bits_3, [P16; F32; U16; U32; S16; S32];
1584 [Disassembles_as [Use_operands
1585 [| VecArray (4, All_elements_of_dreg); CstPtrTo Corereg |]]],
1586 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
1587 "vld4_dup", bits_1, pf_su_8_32;
1589 [Instruction_name ["vld1"]; Disassembles_as [Use_operands
1590 [| VecArray (4, Dreg); CstPtrTo Corereg |]]],
1591 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
1592 "vld4_dup", bits_1, [S64; U64];
1594 Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1596 Use_operands [| PtrTo Corereg; VecArray (4, Dreg) |], "vst4",
1597 store_1, pf_su_8_32;
1598 Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1600 Instruction_name ["vst1"]],
1601 Use_operands [| PtrTo Corereg; VecArray (4, Dreg) |], "vst4",
1602 store_1, [S64; U64];
1603 Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1605 Use_operands [| VecArray (4, Dreg);
1607 Use_operands [| PtrTo Corereg; VecArray (4, Qreg) |], "vst4Q",
1608 store_1, pf_su_8_32;
1611 [Disassembles_as [Use_operands
1612 [| VecArray (4, Element_of_dreg);
1613 CstPtrTo Corereg |]]],
1614 Use_operands [| PtrTo Corereg; VecArray (4, Dreg); Immed |], "vst4_lane",
1615 store_3, P8 :: P16 :: F32 :: su_8_32;
1617 [Disassembles_as [Use_operands
1618 [| VecArray (4, Element_of_dreg);
1619 CstPtrTo Corereg |]]],
1620 Use_operands [| PtrTo Corereg; VecArray (4, Qreg); Immed |], "vst4Q_lane",
1621 store_3, [P16; F32; U16; U32; S16; S32];
1623 (* Logical operations. And. *)
1624 Vand, [], All (3, Dreg), "vand", notype_2, su_8_32;
1625 Vand, [No_op], All (3, Dreg), "vand", notype_2, [S64; U64];
1626 Vand, [], All (3, Qreg), "vandQ", notype_2, su_8_64;
1629 Vorr, [], All (3, Dreg), "vorr", notype_2, su_8_32;
1630 Vorr, [No_op], All (3, Dreg), "vorr", notype_2, [S64; U64];
1631 Vorr, [], All (3, Qreg), "vorrQ", notype_2, su_8_64;
1634 Veor, [], All (3, Dreg), "veor", notype_2, su_8_32;
1635 Veor, [No_op], All (3, Dreg), "veor", notype_2, [S64; U64];
1636 Veor, [], All (3, Qreg), "veorQ", notype_2, su_8_64;
1638 (* Bic (And-not). *)
1639 Vbic, [], All (3, Dreg), "vbic", notype_2, su_8_32;
1640 Vbic, [No_op], All (3, Dreg), "vbic", notype_2, [S64; U64];
1641 Vbic, [], All (3, Qreg), "vbicQ", notype_2, su_8_64;
1644 Vorn, [], All (3, Dreg), "vorn", notype_2, su_8_32;
1645 Vorn, [No_op], All (3, Dreg), "vorn", notype_2, [S64; U64];
1646 Vorn, [], All (3, Qreg), "vornQ", notype_2, su_8_64;
1650 let elems = P8 :: P16 :: F32 :: su_8_64 in
1653 let types = List.fold_right
1654 (fun convfrom acc ->
1655 if convfrom <> convto then
1656 Cast (convto, convfrom) :: acc
1662 let dconv = Vreinterp, [No_op], Use_operands [| Dreg; Dreg |],
1663 "vreinterpret", conv_1, types
1664 and qconv = Vreinterp, [No_op], Use_operands [| Qreg; Qreg |],
1665 "vreinterpretQ", conv_1, types in
1666 dconv :: qconv :: acc)
1670 (* Output routines. *)
1672 let rec string_of_elt = function
1673 S8 -> "s8" | S16 -> "s16" | S32 -> "s32" | S64 -> "s64"
1674 | U8 -> "u8" | U16 -> "u16" | U32 -> "u32" | U64 -> "u64"
1675 | I8 -> "i8" | I16 -> "i16" | I32 -> "i32" | I64 -> "i64"
1676 | B8 -> "8" | B16 -> "16" | B32 -> "32" | B64 -> "64"
1677 | F32 -> "f32" | P8 -> "p8" | P16 -> "p16"
1678 | Conv (a, b) | Cast (a, b) -> string_of_elt a ^ "_" ^ string_of_elt b
1679 | NoElts -> failwith "No elts"
1681 let string_of_elt_dots elt =
1683 Conv (a, b) | Cast (a, b) -> string_of_elt a ^ "." ^ string_of_elt b
1684 | _ -> string_of_elt elt
1686 let string_of_vectype vt =
1687 let rec name affix = function
1688 T_int8x8 -> affix "int8x8"
1689 | T_int8x16 -> affix "int8x16"
1690 | T_int16x4 -> affix "int16x4"
1691 | T_int16x8 -> affix "int16x8"
1692 | T_int32x2 -> affix "int32x2"
1693 | T_int32x4 -> affix "int32x4"
1694 | T_int64x1 -> affix "int64x1"
1695 | T_int64x2 -> affix "int64x2"
1696 | T_uint8x8 -> affix "uint8x8"
1697 | T_uint8x16 -> affix "uint8x16"
1698 | T_uint16x4 -> affix "uint16x4"
1699 | T_uint16x8 -> affix "uint16x8"
1700 | T_uint32x2 -> affix "uint32x2"
1701 | T_uint32x4 -> affix "uint32x4"
1702 | T_uint64x1 -> affix "uint64x1"
1703 | T_uint64x2 -> affix "uint64x2"
1704 | T_float32x2 -> affix "float32x2"
1705 | T_float32x4 -> affix "float32x4"
1706 | T_poly8x8 -> affix "poly8x8"
1707 | T_poly8x16 -> affix "poly8x16"
1708 | T_poly16x4 -> affix "poly16x4"
1709 | T_poly16x8 -> affix "poly16x8"
1710 | T_int8 -> affix "int8"
1711 | T_int16 -> affix "int16"
1712 | T_int32 -> affix "int32"
1713 | T_int64 -> affix "int64"
1714 | T_uint8 -> affix "uint8"
1715 | T_uint16 -> affix "uint16"
1716 | T_uint32 -> affix "uint32"
1717 | T_uint64 -> affix "uint64"
1718 | T_poly8 -> affix "poly8"
1719 | T_poly16 -> affix "poly16"
1720 | T_float32 -> affix "float32"
1721 | T_immediate _ -> "const int"
1723 | T_intQI -> "__builtin_neon_qi"
1724 | T_intHI -> "__builtin_neon_hi"
1725 | T_intSI -> "__builtin_neon_si"
1726 | T_intDI -> "__builtin_neon_di"
1727 | T_floatSF -> "__builtin_neon_sf"
1728 | T_arrayof (num, base) ->
1729 let basename = name (fun x -> x) base in
1730 affix (Printf.sprintf "%sx%d" basename num)
1732 let basename = name affix x in
1733 Printf.sprintf "%s *" basename
1735 let basename = name affix x in
1736 Printf.sprintf "const %s" basename
1738 name (fun x -> x ^ "_t") vt
1740 let string_of_inttype = function
1741 B_TImode -> "__builtin_neon_ti"
1742 | B_EImode -> "__builtin_neon_ei"
1743 | B_OImode -> "__builtin_neon_oi"
1744 | B_CImode -> "__builtin_neon_ci"
1745 | B_XImode -> "__builtin_neon_xi"
1747 let string_of_mode = function
1748 V8QI -> "v8qi" | V4HI -> "v4hi" | V2SI -> "v2si" | V2SF -> "v2sf"
1749 | DI -> "di" | V16QI -> "v16qi" | V8HI -> "v8hi" | V4SI -> "v4si"
1750 | V4SF -> "v4sf" | V2DI -> "v2di" | QI -> "qi" | HI -> "hi" | SI -> "si"
1753 (* Use uppercase chars for letters which form part of the intrinsic name, but
1754 should be omitted from the builtin name (the info is passed in an extra
1755 argument, instead). *)
1756 let intrinsic_name name = String.lowercase name
1758 (* Allow the name of the builtin to be overridden by things (e.g. Flipped)
1759 found in the features list. *)
1760 let builtin_name features name =
1761 let name = List.fold_right
1764 Flipped x | Builtin_name x -> x
1767 let islower x = let str = String.make 1 x in (String.lowercase str) = str
1768 and buf = Buffer.create (String.length name) in
1769 String.iter (fun c -> if islower c then Buffer.add_char buf c) name;
1772 (* Transform an arity into a list of strings. *)
1773 let strings_of_arity a =
1775 | Arity0 vt -> [string_of_vectype vt]
1776 | Arity1 (vt1, vt2) -> [string_of_vectype vt1; string_of_vectype vt2]
1777 | Arity2 (vt1, vt2, vt3) -> [string_of_vectype vt1;
1778 string_of_vectype vt2;
1779 string_of_vectype vt3]
1780 | Arity3 (vt1, vt2, vt3, vt4) -> [string_of_vectype vt1;
1781 string_of_vectype vt2;
1782 string_of_vectype vt3;
1783 string_of_vectype vt4]
1784 | Arity4 (vt1, vt2, vt3, vt4, vt5) -> [string_of_vectype vt1;
1785 string_of_vectype vt2;
1786 string_of_vectype vt3;
1787 string_of_vectype vt4;
1788 string_of_vectype vt5]
1790 (* Suffixes on the end of builtin names that are to be stripped in order
1791 to obtain the name used as an instruction. They are only stripped if
1792 preceded immediately by an underscore. *)
1793 let suffixes_to_strip = [ "n"; "lane"; "dup" ]
1795 (* Get the possible names of an instruction corresponding to a "name" from the
1796 ops table. This is done by getting the equivalent builtin name and
1797 stripping any suffixes from the list at the top of this file, unless
1798 the features list presents with an Instruction_name entry, in which
1799 case that is used; or unless the features list presents with a Flipped
1800 entry, in which case that is used. If both such entries are present,
1801 the first in the list will be chosen. *)
1802 let get_insn_names features name =
1805 match List.find (fun feature -> match feature with
1806 Instruction_name _ -> true
1808 | _ -> false) features
1810 Instruction_name names -> names
1811 | Flipped name -> [name]
1814 with Not_found -> [builtin_name features name]
1817 List.map (fun name' ->
1819 let underscore = String.rindex name' '_' in
1820 let our_suffix = String.sub name' (underscore + 1)
1821 ((String.length name') - underscore - 1)
1823 let rec strip remaining_suffixes =
1824 match remaining_suffixes with
1826 | s::ss when our_suffix = s -> String.sub name' 0 underscore
1829 strip suffixes_to_strip
1830 with (Not_found | Invalid_argument _) -> name') names
1833 (* Apply a function to each element of a list and then comma-separate
1834 the resulting strings. *)
1835 let rec commas f elts acc =
1838 | [elt] -> acc ^ (f elt)
1840 commas f elts (acc ^ (f elt) ^ ", ")
1842 (* Given a list of features and the shape specified in the "ops" table, apply
1843 a function to each possible shape that the instruction may have.
1844 By default, this is the "shape" entry in "ops". If the features list
1845 contains a Disassembles_as entry, the shapes contained in that entry are
1846 mapped to corresponding outputs and returned in a list. If there is more
1847 than one Disassembles_as entry, only the first is used. *)
1848 let analyze_all_shapes features shape f =
1850 match List.find (fun feature ->
1851 match feature with Disassembles_as _ -> true
1854 Disassembles_as shapes -> List.map f shapes
1856 with Not_found -> [f shape]