1 (* Common code for ARM NEON header file, documentation and test case
4 Copyright (C) 2006, 2007 Free Software Foundation, Inc.
5 Contributed by CodeSourcery.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. *)
23 (* Shorthand types for vector elements. *)
24 type elts = S8 | S16 | S32 | S64 | F32 | U8 | U16 | U32 | U64 | P8 | P16
25 | I8 | I16 | I32 | I64 | B8 | B16 | B32 | B64 | Conv of elts * elts
26 | Cast of elts * elts | NoElts
28 type eltclass = Signed | Unsigned | Float | Poly | Int | Bits
29 | ConvClass of eltclass * eltclass | NoType
31 (* These vector types correspond directly to C types. *)
32 type vectype = T_int8x8 | T_int8x16
33 | T_int16x4 | T_int16x8
34 | T_int32x2 | T_int32x4
35 | T_int64x1 | T_int64x2
36 | T_uint8x8 | T_uint8x16
37 | T_uint16x4 | T_uint16x8
38 | T_uint32x2 | T_uint32x4
39 | T_uint64x1 | T_uint64x2
40 | T_float32x2 | T_float32x4
41 | T_poly8x8 | T_poly8x16
42 | T_poly16x4 | T_poly16x8
43 | T_immediate of int * int
49 | T_float32 | T_arrayof of int * vectype
50 | T_ptrto of vectype | T_const of vectype
55 (* The meanings of the following are:
56 TImode : "Tetra", two registers (four words).
57 EImode : "hExa", three registers (six words).
58 OImode : "Octa", four registers (eight words).
59 CImode : "dodeCa", six registers (twelve words).
60 XImode : "heXadeca", eight registers (sixteen words).
63 type inttype = B_TImode | B_EImode | B_OImode | B_CImode | B_XImode
65 type shape_elt = Dreg | Qreg | Corereg | Immed | VecArray of int * shape_elt
66 | PtrTo of shape_elt | CstPtrTo of shape_elt
67 (* These next ones are used only in the test generator. *)
68 | Element_of_dreg (* Used for "lane" variants. *)
69 | Element_of_qreg (* Likewise. *)
70 | All_elements_of_dreg (* Used for "dup" variants. *)
71 | Alternatives of shape_elt list (* Used for multiple valid operands *)
73 type shape_form = All of int * shape_elt
75 | Long_noreg of shape_elt
77 | Wide_noreg of shape_elt
81 | Binary_imm of shape_elt
82 | Use_operands of shape_elt array
83 | By_scalar of shape_elt
84 | Unary_scalar of shape_elt
87 | Pair_result of shape_elt
89 type arity = Arity0 of vectype
90 | Arity1 of vectype * vectype
91 | Arity2 of vectype * vectype * vectype
92 | Arity3 of vectype * vectype * vectype * vectype
93 | Arity4 of vectype * vectype * vectype * vectype * vectype
95 type vecmode = V8QI | V4HI | V2SI | V2SF | DI
96 | V16QI | V8HI | V4SI | V4SF | V2DI
139 (* Ops with scalar. *)
161 (* Vector extract. *)
163 (* Reverse elements. *)
167 (* Transposition ops. *)
171 (* Loads and stores (VLD1/VST1/VLD2...), elements and structures. *)
177 (* Set/extract lanes from a vector. *)
180 (* Initialize vector from bit pattern. *)
182 (* Set all lanes to same value. *)
184 | Vmov_n (* Is this the same? *)
185 (* Duplicate scalar to all lanes of vector. *)
187 (* Combine vectors. *)
189 (* Get quadword high/low parts. *)
192 (* Convert vectors. *)
195 (* Narrow/lengthen vectors. *)
201 (* Reinterpret casts. *)
204 (* Features used for documentation, to distinguish between some instruction
205 variants, and to signal special requirements (e.g. swapping arguments). *)
214 | Flipped of string (* Builtin name to use with flipped arguments. *)
215 | InfoWord (* Pass an extra word for signage/rounding etc. (always passed
216 for All _, Long, Wide, Narrow shape_forms. *)
217 | ReturnPtr (* Pass explicit pointer to return value as first argument. *)
218 (* A specification as to the shape of instruction expected upon
219 disassembly, used if it differs from the shape used to build the
220 intrinsic prototype. Multiple entries in the constructor's argument
221 indicate that the intrinsic expands to more than one assembly
222 instruction, each with a corresponding shape specified here. *)
223 | Disassembles_as of shape_form list
224 | Builtin_name of string (* Override the name of the builtin. *)
225 (* Override the name of the instruction. If more than one name
226 is specified, it means that the instruction can have any of those
228 | Instruction_name of string list
229 (* Mark that the intrinsic yields no instructions, or expands to yield
230 behavior that the test generator cannot test. *)
232 (* Mark that the intrinsic has constant arguments that cannot be set
233 to the defaults (zero for pointers and one otherwise) in the test
234 cases. The function supplied must return the integer to be written
235 into the testcase for the argument number (0-based) supplied to it. *)
236 | Const_valuator of (int -> int)
238 exception MixedMode of elts * elts
240 let rec elt_width = function
241 S8 | U8 | P8 | I8 | B8 -> 8
242 | S16 | U16 | P16 | I16 | B16 -> 16
243 | S32 | F32 | U32 | I32 | B32 -> 32
244 | S64 | U64 | I64 | B64 -> 64
246 let wa = elt_width a and wb = elt_width b in
247 if wa = wb then wa else failwith "element width?"
248 | Cast (a, b) -> raise (MixedMode (a, b))
249 | NoElts -> failwith "No elts"
251 let rec elt_class = function
252 S8 | S16 | S32 | S64 -> Signed
253 | U8 | U16 | U32 | U64 -> Unsigned
256 | I8 | I16 | I32 | I64 -> Int
257 | B8 | B16 | B32 | B64 -> Bits
258 | Conv (a, b) | Cast (a, b) -> ConvClass (elt_class a, elt_class b)
261 let elt_of_class_width c w =
269 | Unsigned, 16 -> U16
270 | Unsigned, 32 -> U32
271 | Unsigned, 64 -> U64
282 | _ -> failwith "Bad element type"
284 (* Return unsigned integer element the same width as argument. *)
285 let unsigned_of_elt elt =
286 elt_of_class_width Unsigned (elt_width elt)
288 let signed_of_elt elt =
289 elt_of_class_width Signed (elt_width elt)
291 (* Return untyped bits element the same width as argument. *)
292 let bits_of_elt elt =
293 elt_of_class_width Bits (elt_width elt)
295 let non_signed_variant = function
306 let poly_unsigned_variant v =
307 let elclass = match elt_class v with
310 elt_of_class_width elclass (elt_width v)
313 let w = elt_width elt
314 and c = elt_class elt in
315 elt_of_class_width c (w * 2)
318 let w = elt_width elt
319 and c = elt_class elt in
320 elt_of_class_width c (w / 2)
322 (* If we're trying to find a mode from a "Use_operands" instruction, use the
323 last vector operand as the dominant mode used to invoke the correct builtin.
324 We must stick to this rule in neon.md. *)
325 let find_key_operand operands =
327 match operands.(opno) with
330 | VecArray (_, Qreg) -> Qreg
331 | VecArray (_, Dreg) -> Dreg
334 scan ((Array.length operands) - 1)
336 let rec mode_of_elt elt shape =
337 let flt = match elt_class elt with
338 Float | ConvClass(_, Float) -> true | _ -> false in
340 match elt_width elt with
341 8 -> 0 | 16 -> 1 | 32 -> 2 | 64 -> 3
342 | _ -> failwith "Bad element width"
344 All (_, Dreg) | By_scalar Dreg | Pair_result Dreg | Unary_scalar Dreg
345 | Binary_imm Dreg | Long_noreg Dreg | Wide_noreg Dreg ->
346 [| V8QI; V4HI; if flt then V2SF else V2SI; DI |].(idx)
347 | All (_, Qreg) | By_scalar Qreg | Pair_result Qreg | Unary_scalar Qreg
348 | Binary_imm Qreg | Long_noreg Qreg | Wide_noreg Qreg ->
349 [| V16QI; V8HI; if flt then V4SF else V4SI; V2DI |].(idx)
350 | All (_, (Corereg | PtrTo _ | CstPtrTo _)) ->
351 [| QI; HI; if flt then SF else SI; DI |].(idx)
352 | Long | Wide | Wide_lane | Wide_scalar
354 [| V8QI; V4HI; V2SI; DI |].(idx)
355 | Narrow | Narrow_imm -> [| V16QI; V8HI; V4SI; V2DI |].(idx)
356 | Use_operands ops -> mode_of_elt elt (All (0, (find_key_operand ops)))
357 | _ -> failwith "invalid shape"
359 (* Modify an element type dependent on the shape of the instruction and the
362 let shapemap shape no =
363 let ident = fun x -> x in
365 All _ | Use_operands _ | By_scalar _ | Pair_result _ | Unary_scalar _
366 | Binary_imm _ -> ident
367 | Long | Long_noreg _ | Wide_scalar | Long_imm ->
368 [| widen_elt; ident; ident |].(no)
369 | Wide | Wide_noreg _ -> [| widen_elt; widen_elt; ident |].(no)
370 | Wide_lane -> [| widen_elt; ident; ident; ident |].(no)
371 | Narrow | Narrow_imm -> [| narrow_elt; ident; ident |].(no)
373 (* Register type (D/Q) of an operand, based on shape and operand number. *)
375 let regmap shape no =
377 All (_, reg) | Long_noreg reg | Wide_noreg reg -> reg
378 | Long -> [| Qreg; Dreg; Dreg |].(no)
379 | Wide -> [| Qreg; Qreg; Dreg |].(no)
380 | Narrow -> [| Dreg; Qreg; Qreg |].(no)
381 | Wide_lane -> [| Qreg; Dreg; Dreg; Immed |].(no)
382 | Wide_scalar -> [| Qreg; Dreg; Corereg |].(no)
383 | By_scalar reg -> [| reg; reg; Dreg; Immed |].(no)
384 | Unary_scalar reg -> [| reg; Dreg; Immed |].(no)
385 | Pair_result reg -> [| VecArray (2, reg); reg; reg |].(no)
386 | Binary_imm reg -> [| reg; reg; Immed |].(no)
387 | Long_imm -> [| Qreg; Dreg; Immed |].(no)
388 | Narrow_imm -> [| Dreg; Qreg; Immed |].(no)
389 | Use_operands these -> these.(no)
391 let type_for_elt shape elt no =
392 let elt = (shapemap shape no) elt in
393 let reg = regmap shape no in
394 let rec type_for_reg_elt reg elt =
409 | _ -> failwith "Bad elt type"
424 | _ -> failwith "Bad elt type"
439 | _ -> failwith "Bad elt type"
443 | VecArray (num, sub) ->
444 T_arrayof (num, type_for_reg_elt sub elt)
446 T_ptrto (type_for_reg_elt x elt)
448 T_ptrto (T_const (type_for_reg_elt x elt))
449 (* Anything else is solely for the use of the test generator. *)
452 type_for_reg_elt reg elt
454 (* Return size of a vector type, in bits. *)
455 let vectype_size = function
456 T_int8x8 | T_int16x4 | T_int32x2 | T_int64x1
457 | T_uint8x8 | T_uint16x4 | T_uint32x2 | T_uint64x1
458 | T_float32x2 | T_poly8x8 | T_poly16x4 -> 64
459 | T_int8x16 | T_int16x8 | T_int32x4 | T_int64x2
460 | T_uint8x16 | T_uint16x8 | T_uint32x4 | T_uint64x2
461 | T_float32x4 | T_poly8x16 | T_poly16x8 -> 128
462 | _ -> raise Not_found
464 let inttype_for_array num elttype =
465 let eltsize = vectype_size elttype in
466 let numwords = (num * eltsize) / 32 in
473 | _ -> failwith ("no int type for size " ^ string_of_int numwords)
475 (* These functions return pairs of (internal, external) types, where "internal"
476 types are those seen by GCC, and "external" are those seen by the assembler.
477 These types aren't necessarily the same, since the intrinsics can munge more
478 than one C type into each assembler opcode. *)
480 let make_sign_invariant func shape elt =
481 let arity, elt' = func shape elt in
482 arity, non_signed_variant elt'
484 (* Don't restrict any types. *)
486 let elts_same make_arity shape elt =
487 let vtype = type_for_elt shape elt in
488 make_arity vtype, elt
490 (* As sign_invar_*, but when sign matters. *)
491 let elts_same_io_lane =
492 elts_same (fun vtype -> Arity4 (vtype 0, vtype 0, vtype 1, vtype 2, vtype 3))
495 elts_same (fun vtype -> Arity3 (vtype 0, vtype 0, vtype 1, vtype 2))
497 let elts_same_2_lane =
498 elts_same (fun vtype -> Arity3 (vtype 0, vtype 1, vtype 2, vtype 3))
500 let elts_same_3 = elts_same_2_lane
503 elts_same (fun vtype -> Arity2 (vtype 0, vtype 1, vtype 2))
506 elts_same (fun vtype -> Arity1 (vtype 0, vtype 1))
508 (* Use for signed/unsigned invariant operations (i.e. where the operation
509 doesn't depend on the sign of the data. *)
511 let sign_invar_io_lane = make_sign_invariant elts_same_io_lane
512 let sign_invar_io = make_sign_invariant elts_same_io
513 let sign_invar_2_lane = make_sign_invariant elts_same_2_lane
514 let sign_invar_2 = make_sign_invariant elts_same_2
515 let sign_invar_1 = make_sign_invariant elts_same_1
517 (* Sign-sensitive comparison. *)
519 let cmp_sign_matters shape elt =
520 let vtype = type_for_elt shape elt
521 and rtype = type_for_elt shape (unsigned_of_elt elt) 0 in
522 Arity2 (rtype, vtype 1, vtype 2), elt
524 (* Signed/unsigned invariant comparison. *)
526 let cmp_sign_invar shape elt =
527 let shape', elt' = cmp_sign_matters shape elt in
529 match non_signed_variant elt' with
535 (* Comparison (VTST) where only the element width matters. *)
537 let cmp_bits shape elt =
538 let vtype = type_for_elt shape elt
539 and rtype = type_for_elt shape (unsigned_of_elt elt) 0
540 and bits_only = bits_of_elt elt in
541 Arity2 (rtype, vtype 1, vtype 2), bits_only
543 let reg_shift shape elt =
544 let vtype = type_for_elt shape elt
545 and op2type = type_for_elt shape (signed_of_elt elt) 2 in
546 Arity2 (vtype 0, vtype 1, op2type), elt
548 (* Genericised constant-shift type-generating function. *)
550 let const_shift mkimm ?arity ?result shape elt =
551 let op2type = (shapemap shape 2) elt in
552 let op2width = elt_width op2type in
553 let op2 = mkimm op2width
554 and op1 = type_for_elt shape elt 1
558 | Some restriction -> restriction elt in
559 let rtype = type_for_elt shape r_elt 0 in
561 None -> Arity2 (rtype, op1, op2), elt
562 | Some mkarity -> mkarity rtype op1 op2, elt
564 (* Use for immediate right-shifts. *)
566 let shift_right shape elt =
567 const_shift (fun imm -> T_immediate (1, imm)) shape elt
569 let shift_right_acc shape elt =
570 const_shift (fun imm -> T_immediate (1, imm))
571 ~arity:(fun dst op1 op2 -> Arity3 (dst, dst, op1, op2)) shape elt
573 (* Use for immediate right-shifts when the operation doesn't care about
576 let shift_right_sign_invar =
577 make_sign_invariant shift_right
579 (* Immediate right-shift; result is unsigned even when operand is signed. *)
581 let shift_right_to_uns shape elt =
582 const_shift (fun imm -> T_immediate (1, imm)) ~result:unsigned_of_elt
585 (* Immediate left-shift. *)
587 let shift_left shape elt =
588 const_shift (fun imm -> T_immediate (0, imm - 1)) shape elt
590 (* Immediate left-shift, unsigned result. *)
592 let shift_left_to_uns shape elt =
593 const_shift (fun imm -> T_immediate (0, imm - 1)) ~result:unsigned_of_elt
596 (* Immediate left-shift, don't care about signs. *)
598 let shift_left_sign_invar =
599 make_sign_invariant shift_left
601 (* Shift left/right and insert: only element size matters. *)
603 let shift_insert shape elt =
605 const_shift (fun imm -> T_immediate (1, imm))
606 ~arity:(fun dst op1 op2 -> Arity3 (dst, dst, op1, op2)) shape elt in
607 arity, bits_of_elt elt
611 let get_lane shape elt =
612 let vtype = type_for_elt shape elt in
613 Arity2 (vtype 0, vtype 1, vtype 2),
614 (match elt with P8 -> U8 | P16 -> U16 | S32 | U32 | F32 -> B32 | x -> x)
616 let set_lane shape elt =
617 let vtype = type_for_elt shape elt in
618 Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), bits_of_elt elt
620 let set_lane_notype shape elt =
621 let vtype = type_for_elt shape elt in
622 Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), NoElts
624 let create_vector shape elt =
625 let vtype = type_for_elt shape U64 1
626 and rtype = type_for_elt shape elt 0 in
627 Arity1 (rtype, vtype), elt
629 let conv make_arity shape elt =
630 let edest, esrc = match elt with
631 Conv (edest, esrc) | Cast (edest, esrc) -> edest, esrc
632 | _ -> failwith "Non-conversion element in conversion" in
633 let vtype = type_for_elt shape esrc
634 and rtype = type_for_elt shape edest 0 in
635 make_arity rtype vtype, elt
637 let conv_1 = conv (fun rtype vtype -> Arity1 (rtype, vtype 1))
638 let conv_2 = conv (fun rtype vtype -> Arity2 (rtype, vtype 1, vtype 2))
640 (* Operation has an unsigned result even if operands are signed. *)
642 let dst_unsign make_arity shape elt =
643 let vtype = type_for_elt shape elt
644 and rtype = type_for_elt shape (unsigned_of_elt elt) 0 in
645 make_arity rtype vtype, elt
647 let dst_unsign_1 = dst_unsign (fun rtype vtype -> Arity1 (rtype, vtype 1))
649 let make_bits_only func shape elt =
650 let arity, elt' = func shape elt in
651 arity, bits_of_elt elt'
653 (* Extend operation. *)
655 let extend shape elt =
656 let vtype = type_for_elt shape elt in
657 Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), bits_of_elt elt
659 (* Table look-up operations. Operand 2 is signed/unsigned for signed/unsigned
660 integer ops respectively, or unsigned for polynomial ops. *)
662 let table mkarity shape elt =
663 let vtype = type_for_elt shape elt in
664 let op2 = type_for_elt shape (poly_unsigned_variant elt) 2 in
665 mkarity vtype op2, bits_of_elt elt
667 let table_2 = table (fun vtype op2 -> Arity2 (vtype 0, vtype 1, op2))
668 let table_io = table (fun vtype op2 -> Arity3 (vtype 0, vtype 0, vtype 1, op2))
670 (* Operations where only bits matter. *)
672 let bits_1 = make_bits_only elts_same_1
673 let bits_2 = make_bits_only elts_same_2
674 let bits_3 = make_bits_only elts_same_3
677 let store_1 shape elt =
678 let vtype = type_for_elt shape elt in
679 Arity2 (T_void, vtype 0, vtype 1), bits_of_elt elt
681 let store_3 shape elt =
682 let vtype = type_for_elt shape elt in
683 Arity3 (T_void, vtype 0, vtype 1, vtype 2), bits_of_elt elt
685 let make_notype func shape elt =
686 let arity, _ = func shape elt in
689 let notype_1 = make_notype elts_same_1
690 let notype_2 = make_notype elts_same_2
691 let notype_3 = make_notype elts_same_3
693 (* Bit-select operations (first operand is unsigned int). *)
695 let bit_select shape elt =
696 let vtype = type_for_elt shape elt
697 and itype = type_for_elt shape (unsigned_of_elt elt) in
698 Arity3 (vtype 0, itype 1, vtype 2, vtype 3), NoElts
700 (* Common lists of supported element types. *)
702 let su_8_32 = [S8; S16; S32; U8; U16; U32]
703 let su_8_64 = S64 :: U64 :: su_8_32
704 let su_16_64 = [S16; S32; S64; U16; U32; U64]
705 let pf_su_8_32 = P8 :: P16 :: F32 :: su_8_32
706 let pf_su_8_64 = P8 :: P16 :: F32 :: su_8_64
711 Vadd, [], All (3, Dreg), "vadd", sign_invar_2, F32 :: su_8_64;
712 Vadd, [], All (3, Qreg), "vaddQ", sign_invar_2, F32 :: su_8_64;
713 Vadd, [], Long, "vaddl", elts_same_2, su_8_32;
714 Vadd, [], Wide, "vaddw", elts_same_2, su_8_32;
715 Vadd, [Halving], All (3, Dreg), "vhadd", elts_same_2, su_8_32;
716 Vadd, [Halving], All (3, Qreg), "vhaddQ", elts_same_2, su_8_32;
717 Vadd, [Instruction_name ["vrhadd"]; Rounding; Halving],
718 All (3, Dreg), "vRhadd", elts_same_2, su_8_32;
719 Vadd, [Instruction_name ["vrhadd"]; Rounding; Halving],
720 All (3, Qreg), "vRhaddQ", elts_same_2, su_8_32;
721 Vadd, [Saturating], All (3, Dreg), "vqadd", elts_same_2, su_8_64;
722 Vadd, [Saturating], All (3, Qreg), "vqaddQ", elts_same_2, su_8_64;
723 Vadd, [High_half], Narrow, "vaddhn", sign_invar_2, su_16_64;
724 Vadd, [Instruction_name ["vraddhn"]; Rounding; High_half],
725 Narrow, "vRaddhn", sign_invar_2, su_16_64;
727 (* Multiplication. *)
728 Vmul, [], All (3, Dreg), "vmul", sign_invar_2, P8 :: F32 :: su_8_32;
729 Vmul, [], All (3, Qreg), "vmulQ", sign_invar_2, P8 :: F32 :: su_8_32;
730 Vmul, [Saturating; Doubling; High_half], All (3, Dreg), "vqdmulh",
731 elts_same_2, [S16; S32];
732 Vmul, [Saturating; Doubling; High_half], All (3, Qreg), "vqdmulhQ",
733 elts_same_2, [S16; S32];
735 [Saturating; Rounding; Doubling; High_half;
736 Instruction_name ["vqrdmulh"]],
737 All (3, Dreg), "vqRdmulh",
738 elts_same_2, [S16; S32];
740 [Saturating; Rounding; Doubling; High_half;
741 Instruction_name ["vqrdmulh"]],
742 All (3, Qreg), "vqRdmulhQ",
743 elts_same_2, [S16; S32];
744 Vmul, [], Long, "vmull", elts_same_2, P8 :: su_8_32;
745 Vmul, [Saturating; Doubling], Long, "vqdmull", elts_same_2, [S16; S32];
747 (* Multiply-accumulate. *)
748 Vmla, [], All (3, Dreg), "vmla", sign_invar_io, F32 :: su_8_32;
749 Vmla, [], All (3, Qreg), "vmlaQ", sign_invar_io, F32 :: su_8_32;
750 Vmla, [], Long, "vmlal", elts_same_io, su_8_32;
751 Vmla, [Saturating; Doubling], Long, "vqdmlal", elts_same_io, [S16; S32];
753 (* Multiply-subtract. *)
754 Vmls, [], All (3, Dreg), "vmls", sign_invar_io, F32 :: su_8_32;
755 Vmls, [], All (3, Qreg), "vmlsQ", sign_invar_io, F32 :: su_8_32;
756 Vmls, [], Long, "vmlsl", elts_same_io, su_8_32;
757 Vmls, [Saturating; Doubling], Long, "vqdmlsl", elts_same_io, [S16; S32];
760 Vsub, [], All (3, Dreg), "vsub", sign_invar_2, F32 :: su_8_64;
761 Vsub, [], All (3, Qreg), "vsubQ", sign_invar_2, F32 :: su_8_64;
762 Vsub, [], Long, "vsubl", elts_same_2, su_8_32;
763 Vsub, [], Wide, "vsubw", elts_same_2, su_8_32;
764 Vsub, [Halving], All (3, Dreg), "vhsub", elts_same_2, su_8_32;
765 Vsub, [Halving], All (3, Qreg), "vhsubQ", elts_same_2, su_8_32;
766 Vsub, [Saturating], All (3, Dreg), "vqsub", elts_same_2, su_8_64;
767 Vsub, [Saturating], All (3, Qreg), "vqsubQ", elts_same_2, su_8_64;
768 Vsub, [High_half], Narrow, "vsubhn", sign_invar_2, su_16_64;
769 Vsub, [Instruction_name ["vrsubhn"]; Rounding; High_half],
770 Narrow, "vRsubhn", sign_invar_2, su_16_64;
772 (* Comparison, equal. *)
773 Vceq, [], All (3, Dreg), "vceq", cmp_sign_invar, P8 :: F32 :: su_8_32;
774 Vceq, [], All (3, Qreg), "vceqQ", cmp_sign_invar, P8 :: F32 :: su_8_32;
776 (* Comparison, greater-than or equal. *)
777 Vcge, [], All (3, Dreg), "vcge", cmp_sign_matters, F32 :: su_8_32;
778 Vcge, [], All (3, Qreg), "vcgeQ", cmp_sign_matters, F32 :: su_8_32;
780 (* Comparison, less-than or equal. *)
781 Vcle, [Flipped "vcge"], All (3, Dreg), "vcle", cmp_sign_matters,
783 Vcle, [Instruction_name ["vcge"]; Flipped "vcgeQ"],
784 All (3, Qreg), "vcleQ", cmp_sign_matters,
787 (* Comparison, greater-than. *)
788 Vcgt, [], All (3, Dreg), "vcgt", cmp_sign_matters, F32 :: su_8_32;
789 Vcgt, [], All (3, Qreg), "vcgtQ", cmp_sign_matters, F32 :: su_8_32;
791 (* Comparison, less-than. *)
792 Vclt, [Flipped "vcgt"], All (3, Dreg), "vclt", cmp_sign_matters,
794 Vclt, [Instruction_name ["vcgt"]; Flipped "vcgtQ"],
795 All (3, Qreg), "vcltQ", cmp_sign_matters,
798 (* Compare absolute greater-than or equal. *)
799 Vcage, [Instruction_name ["vacge"]],
800 All (3, Dreg), "vcage", cmp_sign_matters, [F32];
801 Vcage, [Instruction_name ["vacge"]],
802 All (3, Qreg), "vcageQ", cmp_sign_matters, [F32];
804 (* Compare absolute less-than or equal. *)
805 Vcale, [Instruction_name ["vacge"]; Flipped "vcage"],
806 All (3, Dreg), "vcale", cmp_sign_matters, [F32];
807 Vcale, [Instruction_name ["vacge"]; Flipped "vcageQ"],
808 All (3, Qreg), "vcaleQ", cmp_sign_matters, [F32];
810 (* Compare absolute greater-than or equal. *)
811 Vcagt, [Instruction_name ["vacgt"]],
812 All (3, Dreg), "vcagt", cmp_sign_matters, [F32];
813 Vcagt, [Instruction_name ["vacgt"]],
814 All (3, Qreg), "vcagtQ", cmp_sign_matters, [F32];
816 (* Compare absolute less-than or equal. *)
817 Vcalt, [Instruction_name ["vacgt"]; Flipped "vcagt"],
818 All (3, Dreg), "vcalt", cmp_sign_matters, [F32];
819 Vcalt, [Instruction_name ["vacgt"]; Flipped "vcagtQ"],
820 All (3, Qreg), "vcaltQ", cmp_sign_matters, [F32];
823 Vtst, [], All (3, Dreg), "vtst", cmp_bits, P8 :: su_8_32;
824 Vtst, [], All (3, Qreg), "vtstQ", cmp_bits, P8 :: su_8_32;
826 (* Absolute difference. *)
827 Vabd, [], All (3, Dreg), "vabd", elts_same_2, F32 :: su_8_32;
828 Vabd, [], All (3, Qreg), "vabdQ", elts_same_2, F32 :: su_8_32;
829 Vabd, [], Long, "vabdl", elts_same_2, su_8_32;
831 (* Absolute difference and accumulate. *)
832 Vaba, [], All (3, Dreg), "vaba", elts_same_io, su_8_32;
833 Vaba, [], All (3, Qreg), "vabaQ", elts_same_io, su_8_32;
834 Vaba, [], Long, "vabal", elts_same_io, su_8_32;
837 Vmax, [], All (3, Dreg), "vmax", elts_same_2, F32 :: su_8_32;
838 Vmax, [], All (3, Qreg), "vmaxQ", elts_same_2, F32 :: su_8_32;
841 Vmin, [], All (3, Dreg), "vmin", elts_same_2, F32 :: su_8_32;
842 Vmin, [], All (3, Qreg), "vminQ", elts_same_2, F32 :: su_8_32;
845 Vpadd, [], All (3, Dreg), "vpadd", sign_invar_2, F32 :: su_8_32;
846 Vpadd, [], Long_noreg Dreg, "vpaddl", elts_same_1, su_8_32;
847 Vpadd, [], Long_noreg Qreg, "vpaddlQ", elts_same_1, su_8_32;
849 (* Pairwise add, widen and accumulate. *)
850 Vpada, [], Wide_noreg Dreg, "vpadal", elts_same_2, su_8_32;
851 Vpada, [], Wide_noreg Qreg, "vpadalQ", elts_same_2, su_8_32;
853 (* Folding maximum, minimum. *)
854 Vpmax, [], All (3, Dreg), "vpmax", elts_same_2, F32 :: su_8_32;
855 Vpmin, [], All (3, Dreg), "vpmin", elts_same_2, F32 :: su_8_32;
857 (* Reciprocal step. *)
858 Vrecps, [], All (3, Dreg), "vrecps", elts_same_2, [F32];
859 Vrecps, [], All (3, Qreg), "vrecpsQ", elts_same_2, [F32];
860 Vrsqrts, [], All (3, Dreg), "vrsqrts", elts_same_2, [F32];
861 Vrsqrts, [], All (3, Qreg), "vrsqrtsQ", elts_same_2, [F32];
863 (* Vector shift left. *)
864 Vshl, [], All (3, Dreg), "vshl", reg_shift, su_8_64;
865 Vshl, [], All (3, Qreg), "vshlQ", reg_shift, su_8_64;
866 Vshl, [Instruction_name ["vrshl"]; Rounding],
867 All (3, Dreg), "vRshl", reg_shift, su_8_64;
868 Vshl, [Instruction_name ["vrshl"]; Rounding],
869 All (3, Qreg), "vRshlQ", reg_shift, su_8_64;
870 Vshl, [Saturating], All (3, Dreg), "vqshl", reg_shift, su_8_64;
871 Vshl, [Saturating], All (3, Qreg), "vqshlQ", reg_shift, su_8_64;
872 Vshl, [Instruction_name ["vqrshl"]; Saturating; Rounding],
873 All (3, Dreg), "vqRshl", reg_shift, su_8_64;
874 Vshl, [Instruction_name ["vqrshl"]; Saturating; Rounding],
875 All (3, Qreg), "vqRshlQ", reg_shift, su_8_64;
877 (* Vector shift right by constant. *)
878 Vshr_n, [], Binary_imm Dreg, "vshr_n", shift_right, su_8_64;
879 Vshr_n, [], Binary_imm Qreg, "vshrQ_n", shift_right, su_8_64;
880 Vshr_n, [Instruction_name ["vrshr"]; Rounding], Binary_imm Dreg,
881 "vRshr_n", shift_right, su_8_64;
882 Vshr_n, [Instruction_name ["vrshr"]; Rounding], Binary_imm Qreg,
883 "vRshrQ_n", shift_right, su_8_64;
884 Vshr_n, [], Narrow_imm, "vshrn_n", shift_right_sign_invar, su_16_64;
885 Vshr_n, [Instruction_name ["vrshrn"]; Rounding], Narrow_imm, "vRshrn_n",
886 shift_right_sign_invar, su_16_64;
887 Vshr_n, [Saturating], Narrow_imm, "vqshrn_n", shift_right, su_16_64;
888 Vshr_n, [Instruction_name ["vqrshrn"]; Saturating; Rounding], Narrow_imm,
889 "vqRshrn_n", shift_right, su_16_64;
890 Vshr_n, [Saturating; Dst_unsign], Narrow_imm, "vqshrun_n",
891 shift_right_to_uns, [S16; S32; S64];
892 Vshr_n, [Instruction_name ["vqrshrun"]; Saturating; Dst_unsign; Rounding],
893 Narrow_imm, "vqRshrun_n", shift_right_to_uns, [S16; S32; S64];
895 (* Vector shift left by constant. *)
896 Vshl_n, [], Binary_imm Dreg, "vshl_n", shift_left_sign_invar, su_8_64;
897 Vshl_n, [], Binary_imm Qreg, "vshlQ_n", shift_left_sign_invar, su_8_64;
898 Vshl_n, [Saturating], Binary_imm Dreg, "vqshl_n", shift_left, su_8_64;
899 Vshl_n, [Saturating], Binary_imm Qreg, "vqshlQ_n", shift_left, su_8_64;
900 Vshl_n, [Saturating; Dst_unsign], Binary_imm Dreg, "vqshlu_n",
901 shift_left_to_uns, [S8; S16; S32; S64];
902 Vshl_n, [Saturating; Dst_unsign], Binary_imm Qreg, "vqshluQ_n",
903 shift_left_to_uns, [S8; S16; S32; S64];
904 Vshl_n, [], Long_imm, "vshll_n", shift_left, su_8_32;
906 (* Vector shift right by constant and accumulate. *)
907 Vsra_n, [], Binary_imm Dreg, "vsra_n", shift_right_acc, su_8_64;
908 Vsra_n, [], Binary_imm Qreg, "vsraQ_n", shift_right_acc, su_8_64;
909 Vsra_n, [Instruction_name ["vrsra"]; Rounding], Binary_imm Dreg,
910 "vRsra_n", shift_right_acc, su_8_64;
911 Vsra_n, [Instruction_name ["vrsra"]; Rounding], Binary_imm Qreg,
912 "vRsraQ_n", shift_right_acc, su_8_64;
914 (* Vector shift right and insert. *)
915 Vsri, [], Use_operands [| Dreg; Dreg; Immed |], "vsri_n", shift_insert,
916 P8 :: P16 :: su_8_64;
917 Vsri, [], Use_operands [| Qreg; Qreg; Immed |], "vsriQ_n", shift_insert,
918 P8 :: P16 :: su_8_64;
920 (* Vector shift left and insert. *)
921 Vsli, [], Use_operands [| Dreg; Dreg; Immed |], "vsli_n", shift_insert,
922 P8 :: P16 :: su_8_64;
923 Vsli, [], Use_operands [| Qreg; Qreg; Immed |], "vsliQ_n", shift_insert,
924 P8 :: P16 :: su_8_64;
926 (* Absolute value. *)
927 Vabs, [], All (2, Dreg), "vabs", elts_same_1, [S8; S16; S32; F32];
928 Vabs, [], All (2, Qreg), "vabsQ", elts_same_1, [S8; S16; S32; F32];
929 Vabs, [Saturating], All (2, Dreg), "vqabs", elts_same_1, [S8; S16; S32];
930 Vabs, [Saturating], All (2, Qreg), "vqabsQ", elts_same_1, [S8; S16; S32];
933 Vneg, [], All (2, Dreg), "vneg", elts_same_1, [S8; S16; S32; F32];
934 Vneg, [], All (2, Qreg), "vnegQ", elts_same_1, [S8; S16; S32; F32];
935 Vneg, [Saturating], All (2, Dreg), "vqneg", elts_same_1, [S8; S16; S32];
936 Vneg, [Saturating], All (2, Qreg), "vqnegQ", elts_same_1, [S8; S16; S32];
939 Vmvn, [], All (2, Dreg), "vmvn", notype_1, P8 :: su_8_32;
940 Vmvn, [], All (2, Qreg), "vmvnQ", notype_1, P8 :: su_8_32;
942 (* Count leading sign bits. *)
943 Vcls, [], All (2, Dreg), "vcls", elts_same_1, [S8; S16; S32];
944 Vcls, [], All (2, Qreg), "vclsQ", elts_same_1, [S8; S16; S32];
946 (* Count leading zeros. *)
947 Vclz, [], All (2, Dreg), "vclz", sign_invar_1, su_8_32;
948 Vclz, [], All (2, Qreg), "vclzQ", sign_invar_1, su_8_32;
950 (* Count number of set bits. *)
951 Vcnt, [], All (2, Dreg), "vcnt", bits_1, [P8; S8; U8];
952 Vcnt, [], All (2, Qreg), "vcntQ", bits_1, [P8; S8; U8];
954 (* Reciprocal estimate. *)
955 Vrecpe, [], All (2, Dreg), "vrecpe", elts_same_1, [U32; F32];
956 Vrecpe, [], All (2, Qreg), "vrecpeQ", elts_same_1, [U32; F32];
958 (* Reciprocal square-root estimate. *)
959 Vrsqrte, [], All (2, Dreg), "vrsqrte", elts_same_1, [U32; F32];
960 Vrsqrte, [], All (2, Qreg), "vrsqrteQ", elts_same_1, [U32; F32];
962 (* Get lanes from a vector. *)
964 [InfoWord; Disassembles_as [Use_operands [| Corereg; Element_of_dreg |]];
965 Instruction_name ["vmov"]],
966 Use_operands [| Corereg; Dreg; Immed |],
967 "vget_lane", get_lane, pf_su_8_32;
970 Disassembles_as [Use_operands [| Corereg; Corereg; Dreg |]];
971 Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)],
972 Use_operands [| Corereg; Dreg; Immed |],
973 "vget_lane", notype_2, [S64; U64];
975 [InfoWord; Disassembles_as [Use_operands [| Corereg; Element_of_dreg |]];
976 Instruction_name ["vmov"]],
977 Use_operands [| Corereg; Qreg; Immed |],
978 "vgetQ_lane", get_lane, pf_su_8_32;
981 Disassembles_as [Use_operands [| Corereg; Corereg; Dreg |]];
982 Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)],
983 Use_operands [| Corereg; Qreg; Immed |],
984 "vgetQ_lane", notype_2, [S64; U64];
986 (* Set lanes in a vector. *)
987 Vset_lane, [Disassembles_as [Use_operands [| Element_of_dreg; Corereg |]];
988 Instruction_name ["vmov"]],
989 Use_operands [| Dreg; Corereg; Dreg; Immed |], "vset_lane",
990 set_lane, pf_su_8_32;
991 Vset_lane, [Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]];
992 Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)],
993 Use_operands [| Dreg; Corereg; Dreg; Immed |], "vset_lane",
994 set_lane_notype, [S64; U64];
995 Vset_lane, [Disassembles_as [Use_operands [| Element_of_dreg; Corereg |]];
996 Instruction_name ["vmov"]],
997 Use_operands [| Qreg; Corereg; Qreg; Immed |], "vsetQ_lane",
998 set_lane, pf_su_8_32;
999 Vset_lane, [Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]];
1000 Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)],
1001 Use_operands [| Qreg; Corereg; Qreg; Immed |], "vsetQ_lane",
1002 set_lane_notype, [S64; U64];
1004 (* Create vector from literal bit pattern. *)
1006 [No_op], (* Not really, but it can yield various things that are too
1007 hard for the test generator at this time. *)
1008 Use_operands [| Dreg; Corereg |], "vcreate", create_vector,
1011 (* Set all lanes to the same value. *)
1013 [Disassembles_as [Use_operands [| Dreg;
1014 Alternatives [ Corereg;
1015 Element_of_dreg ] |]]],
1016 Use_operands [| Dreg; Corereg |], "vdup_n", bits_1,
1019 [Instruction_name ["vmov"];
1020 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]],
1021 Use_operands [| Dreg; Corereg |], "vdup_n", notype_1,
1024 [Disassembles_as [Use_operands [| Qreg;
1025 Alternatives [ Corereg;
1026 Element_of_dreg ] |]]],
1027 Use_operands [| Qreg; Corereg |], "vdupQ_n", bits_1,
1030 [Instruction_name ["vmov"];
1031 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |];
1032 Use_operands [| Dreg; Corereg; Corereg |]]],
1033 Use_operands [| Qreg; Corereg |], "vdupQ_n", notype_1,
1036 (* These are just aliases for the above. *)
1038 [Builtin_name "vdup_n";
1039 Disassembles_as [Use_operands [| Dreg;
1040 Alternatives [ Corereg;
1041 Element_of_dreg ] |]]],
1042 Use_operands [| Dreg; Corereg |],
1043 "vmov_n", bits_1, pf_su_8_32;
1045 [Builtin_name "vdup_n";
1046 Instruction_name ["vmov"];
1047 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]],
1048 Use_operands [| Dreg; Corereg |],
1049 "vmov_n", notype_1, [S64; U64];
1051 [Builtin_name "vdupQ_n";
1052 Disassembles_as [Use_operands [| Qreg;
1053 Alternatives [ Corereg;
1054 Element_of_dreg ] |]]],
1055 Use_operands [| Qreg; Corereg |],
1056 "vmovQ_n", bits_1, pf_su_8_32;
1058 [Builtin_name "vdupQ_n";
1059 Instruction_name ["vmov"];
1060 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |];
1061 Use_operands [| Dreg; Corereg; Corereg |]]],
1062 Use_operands [| Qreg; Corereg |],
1063 "vmovQ_n", notype_1, [S64; U64];
1065 (* Duplicate, lane version. We can't use Use_operands here because the
1066 rightmost register (always Dreg) would be picked up by find_key_operand,
1067 when we want the leftmost register to be used in this case (otherwise
1068 the modes are indistinguishable in neon.md, etc. *)
1070 [Disassembles_as [Use_operands [| Dreg; Element_of_dreg |]]],
1071 Unary_scalar Dreg, "vdup_lane", bits_2, pf_su_8_32;
1073 [No_op; Const_valuator (fun _ -> 0)],
1074 Unary_scalar Dreg, "vdup_lane", bits_2, [S64; U64];
1076 [Disassembles_as [Use_operands [| Qreg; Element_of_dreg |]]],
1077 Unary_scalar Qreg, "vdupQ_lane", bits_2, pf_su_8_32;
1079 [No_op; Const_valuator (fun _ -> 0)],
1080 Unary_scalar Qreg, "vdupQ_lane", bits_2, [S64; U64];
1082 (* Combining vectors. *)
1084 Use_operands [| Qreg; Dreg; Dreg |], "vcombine", notype_2,
1087 (* Splitting vectors. *)
1089 Use_operands [| Dreg; Qreg |], "vget_high",
1090 notype_1, pf_su_8_64;
1091 Vget_low, [Instruction_name ["vmov"];
1092 Disassembles_as [Use_operands [| Dreg; Dreg |]]],
1093 Use_operands [| Dreg; Qreg |], "vget_low",
1094 notype_1, pf_su_8_64;
1097 Vcvt, [InfoWord], All (2, Dreg), "vcvt", conv_1,
1098 [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
1099 Vcvt, [InfoWord], All (2, Qreg), "vcvtQ", conv_1,
1100 [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
1101 Vcvt_n, [InfoWord], Use_operands [| Dreg; Dreg; Immed |], "vcvt_n", conv_2,
1102 [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
1103 Vcvt_n, [InfoWord], Use_operands [| Qreg; Qreg; Immed |], "vcvtQ_n", conv_2,
1104 [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
1106 (* Move, narrowing. *)
1107 Vmovn, [Disassembles_as [Use_operands [| Dreg; Qreg |]]],
1108 Narrow, "vmovn", sign_invar_1, su_16_64;
1109 Vmovn, [Disassembles_as [Use_operands [| Dreg; Qreg |]]; Saturating],
1110 Narrow, "vqmovn", elts_same_1, su_16_64;
1112 [Disassembles_as [Use_operands [| Dreg; Qreg |]]; Saturating; Dst_unsign],
1113 Narrow, "vqmovun", dst_unsign_1,
1117 Vmovl, [Disassembles_as [Use_operands [| Qreg; Dreg |]]],
1118 Long, "vmovl", elts_same_1, su_8_32;
1122 [Instruction_name ["vtbl"];
1123 Disassembles_as [Use_operands [| Dreg; VecArray (1, Dreg); Dreg |]]],
1124 Use_operands [| Dreg; Dreg; Dreg |], "vtbl1", table_2, [U8; S8; P8];
1125 Vtbl 2, [Instruction_name ["vtbl"]],
1126 Use_operands [| Dreg; VecArray (2, Dreg); Dreg |], "vtbl2", table_2,
1128 Vtbl 3, [Instruction_name ["vtbl"]],
1129 Use_operands [| Dreg; VecArray (3, Dreg); Dreg |], "vtbl3", table_2,
1131 Vtbl 4, [Instruction_name ["vtbl"]],
1132 Use_operands [| Dreg; VecArray (4, Dreg); Dreg |], "vtbl4", table_2,
1135 (* Extended table lookup. *)
1137 [Instruction_name ["vtbx"];
1138 Disassembles_as [Use_operands [| Dreg; VecArray (1, Dreg); Dreg |]]],
1139 Use_operands [| Dreg; Dreg; Dreg |], "vtbx1", table_io, [U8; S8; P8];
1140 Vtbx 2, [Instruction_name ["vtbx"]],
1141 Use_operands [| Dreg; VecArray (2, Dreg); Dreg |], "vtbx2", table_io,
1143 Vtbx 3, [Instruction_name ["vtbx"]],
1144 Use_operands [| Dreg; VecArray (3, Dreg); Dreg |], "vtbx3", table_io,
1146 Vtbx 4, [Instruction_name ["vtbx"]],
1147 Use_operands [| Dreg; VecArray (4, Dreg); Dreg |], "vtbx4", table_io,
1150 (* Multiply, lane. (note: these were undocumented at the time of
1152 Vmul_lane, [], By_scalar Dreg, "vmul_lane", sign_invar_2_lane,
1153 [S16; S32; U16; U32; F32];
1154 Vmul_lane, [], By_scalar Qreg, "vmulQ_lane", sign_invar_2_lane,
1155 [S16; S32; U16; U32; F32];
1157 (* Multiply-accumulate, lane. *)
1158 Vmla_lane, [], By_scalar Dreg, "vmla_lane", sign_invar_io_lane,
1159 [S16; S32; U16; U32; F32];
1160 Vmla_lane, [], By_scalar Qreg, "vmlaQ_lane", sign_invar_io_lane,
1161 [S16; S32; U16; U32; F32];
1162 Vmla_lane, [], Wide_lane, "vmlal_lane", elts_same_io_lane,
1163 [S16; S32; U16; U32];
1164 Vmla_lane, [Saturating; Doubling], Wide_lane, "vqdmlal_lane",
1165 elts_same_io_lane, [S16; S32];
1167 (* Multiply-subtract, lane. *)
1168 Vmls_lane, [], By_scalar Dreg, "vmls_lane", sign_invar_io_lane,
1169 [S16; S32; U16; U32; F32];
1170 Vmls_lane, [], By_scalar Qreg, "vmlsQ_lane", sign_invar_io_lane,
1171 [S16; S32; U16; U32; F32];
1172 Vmls_lane, [], Wide_lane, "vmlsl_lane", elts_same_io_lane,
1173 [S16; S32; U16; U32];
1174 Vmls_lane, [Saturating; Doubling], Wide_lane, "vqdmlsl_lane",
1175 elts_same_io_lane, [S16; S32];
1177 (* Long multiply, lane. *)
1179 Wide_lane, "vmull_lane", elts_same_2_lane, [S16; S32; U16; U32];
1181 (* Saturating doubling long multiply, lane. *)
1182 Vqdmull_lane, [Saturating; Doubling],
1183 Wide_lane, "vqdmull_lane", elts_same_2_lane, [S16; S32];
1185 (* Saturating doubling long multiply high, lane. *)
1186 Vqdmulh_lane, [Saturating; Halving],
1187 By_scalar Qreg, "vqdmulhQ_lane", elts_same_2_lane, [S16; S32];
1188 Vqdmulh_lane, [Saturating; Halving],
1189 By_scalar Dreg, "vqdmulh_lane", elts_same_2_lane, [S16; S32];
1190 Vqdmulh_lane, [Saturating; Halving; Rounding;
1191 Instruction_name ["vqrdmulh"]],
1192 By_scalar Qreg, "vqRdmulhQ_lane", elts_same_2_lane, [S16; S32];
1193 Vqdmulh_lane, [Saturating; Halving; Rounding;
1194 Instruction_name ["vqrdmulh"]],
1195 By_scalar Dreg, "vqRdmulh_lane", elts_same_2_lane, [S16; S32];
1197 (* Vector multiply by scalar. *)
1199 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
1200 Use_operands [| Dreg; Dreg; Corereg |], "vmul_n",
1201 sign_invar_2, [S16; S32; U16; U32; F32];
1203 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
1204 Use_operands [| Qreg; Qreg; Corereg |], "vmulQ_n",
1205 sign_invar_2, [S16; S32; U16; U32; F32];
1207 (* Vector long multiply by scalar. *)
1208 Vmull_n, [Instruction_name ["vmull"];
1209 Disassembles_as [Use_operands [| Qreg; Dreg; Element_of_dreg |]]],
1210 Wide_scalar, "vmull_n",
1211 elts_same_2, [S16; S32; U16; U32];
1213 (* Vector saturating doubling long multiply by scalar. *)
1214 Vqdmull_n, [Saturating; Doubling;
1215 Disassembles_as [Use_operands [| Qreg; Dreg;
1216 Element_of_dreg |]]],
1217 Wide_scalar, "vqdmull_n",
1218 elts_same_2, [S16; S32];
1220 (* Vector saturating doubling long multiply high by scalar. *)
1222 [Saturating; Halving; InfoWord;
1223 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
1224 Use_operands [| Qreg; Qreg; Corereg |],
1225 "vqdmulhQ_n", elts_same_2, [S16; S32];
1227 [Saturating; Halving; InfoWord;
1228 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
1229 Use_operands [| Dreg; Dreg; Corereg |],
1230 "vqdmulh_n", elts_same_2, [S16; S32];
1232 [Saturating; Halving; Rounding; InfoWord;
1233 Instruction_name ["vqrdmulh"];
1234 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
1235 Use_operands [| Qreg; Qreg; Corereg |],
1236 "vqRdmulhQ_n", elts_same_2, [S16; S32];
1238 [Saturating; Halving; Rounding; InfoWord;
1239 Instruction_name ["vqrdmulh"];
1240 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
1241 Use_operands [| Dreg; Dreg; Corereg |],
1242 "vqRdmulh_n", elts_same_2, [S16; S32];
1244 (* Vector multiply-accumulate by scalar. *)
1246 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
1247 Use_operands [| Dreg; Dreg; Corereg |], "vmla_n",
1248 sign_invar_io, [S16; S32; U16; U32; F32];
1250 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
1251 Use_operands [| Qreg; Qreg; Corereg |], "vmlaQ_n",
1252 sign_invar_io, [S16; S32; U16; U32; F32];
1253 Vmla_n, [], Wide_scalar, "vmlal_n", elts_same_io, [S16; S32; U16; U32];
1254 Vmla_n, [Saturating; Doubling], Wide_scalar, "vqdmlal_n", elts_same_io,
1257 (* Vector multiply subtract by scalar. *)
1259 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
1260 Use_operands [| Dreg; Dreg; Corereg |], "vmls_n",
1261 sign_invar_io, [S16; S32; U16; U32; F32];
1263 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
1264 Use_operands [| Qreg; Qreg; Corereg |], "vmlsQ_n",
1265 sign_invar_io, [S16; S32; U16; U32; F32];
1266 Vmls_n, [], Wide_scalar, "vmlsl_n", elts_same_io, [S16; S32; U16; U32];
1267 Vmls_n, [Saturating; Doubling], Wide_scalar, "vqdmlsl_n", elts_same_io,
1270 (* Vector extract. *)
1271 Vext, [Const_valuator (fun _ -> 0)],
1272 Use_operands [| Dreg; Dreg; Dreg; Immed |], "vext", extend,
1274 Vext, [Const_valuator (fun _ -> 0)],
1275 Use_operands [| Qreg; Qreg; Qreg; Immed |], "vextQ", extend,
1278 (* Reverse elements. *)
1279 Vrev64, [], All (2, Dreg), "vrev64", bits_1, P8 :: P16 :: F32 :: su_8_32;
1280 Vrev64, [], All (2, Qreg), "vrev64Q", bits_1, P8 :: P16 :: F32 :: su_8_32;
1281 Vrev32, [], All (2, Dreg), "vrev32", bits_1, [P8; P16; S8; U8; S16; U16];
1282 Vrev32, [], All (2, Qreg), "vrev32Q", bits_1, [P8; P16; S8; U8; S16; U16];
1283 Vrev16, [], All (2, Dreg), "vrev16", bits_1, [P8; S8; U8];
1284 Vrev16, [], All (2, Qreg), "vrev16Q", bits_1, [P8; S8; U8];
1286 (* Bit selection. *)
1288 [Instruction_name ["vbsl"; "vbit"; "vbif"];
1289 Disassembles_as [Use_operands [| Dreg; Dreg; Dreg |]]],
1290 Use_operands [| Dreg; Dreg; Dreg; Dreg |], "vbsl", bit_select,
1293 [Instruction_name ["vbsl"; "vbit"; "vbif"];
1294 Disassembles_as [Use_operands [| Qreg; Qreg; Qreg |]]],
1295 Use_operands [| Qreg; Qreg; Qreg; Qreg |], "vbslQ", bit_select,
1298 (* Transpose elements. **NOTE** ReturnPtr goes some of the way towards
1299 generating good code for intrinsics which return structure types --
1300 builtins work well by themselves (and understand that the values being
1301 stored on e.g. the stack also reside in registers, so can optimise the
1302 stores away entirely if the results are used immediately), but
1303 intrinsics are very much less efficient. Maybe something can be improved
1304 re: inlining, or tweaking the ABI used for intrinsics (a special call
1307 Vtrn, [ReturnPtr], Pair_result Dreg, "vtrn", bits_2, pf_su_8_32;
1308 Vtrn, [ReturnPtr], Pair_result Qreg, "vtrnQ", bits_2, pf_su_8_32;
1311 Vzip, [ReturnPtr], Pair_result Dreg, "vzip", bits_2, pf_su_8_32;
1312 Vzip, [ReturnPtr], Pair_result Qreg, "vzipQ", bits_2, pf_su_8_32;
1314 (* Unzip elements. *)
1315 Vuzp, [ReturnPtr], Pair_result Dreg, "vuzp", bits_2, pf_su_8_32;
1316 Vuzp, [ReturnPtr], Pair_result Qreg, "vuzpQ", bits_2, pf_su_8_32;
1318 (* Element/structure loads. VLD1 variants. *)
1320 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1321 CstPtrTo Corereg |]]],
1322 Use_operands [| Dreg; CstPtrTo Corereg |], "vld1", bits_1,
1324 Vldx 1, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1325 CstPtrTo Corereg |]]],
1326 Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q", bits_1,
1330 [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg);
1331 CstPtrTo Corereg |]]],
1332 Use_operands [| Dreg; CstPtrTo Corereg; Dreg; Immed |],
1333 "vld1_lane", bits_3, pf_su_8_32;
1335 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1336 CstPtrTo Corereg |]];
1337 Const_valuator (fun _ -> 0)],
1338 Use_operands [| Dreg; CstPtrTo Corereg; Dreg; Immed |],
1339 "vld1_lane", bits_3, [S64; U64];
1341 [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg);
1342 CstPtrTo Corereg |]]],
1343 Use_operands [| Qreg; CstPtrTo Corereg; Qreg; Immed |],
1344 "vld1Q_lane", bits_3, pf_su_8_32;
1346 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1347 CstPtrTo Corereg |]]],
1348 Use_operands [| Qreg; CstPtrTo Corereg; Qreg; Immed |],
1349 "vld1Q_lane", bits_3, [S64; U64];
1352 [Disassembles_as [Use_operands [| VecArray (1, All_elements_of_dreg);
1353 CstPtrTo Corereg |]]],
1354 Use_operands [| Dreg; CstPtrTo Corereg |], "vld1_dup",
1357 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1358 CstPtrTo Corereg |]]],
1359 Use_operands [| Dreg; CstPtrTo Corereg |], "vld1_dup",
1362 [Disassembles_as [Use_operands [| VecArray (2, All_elements_of_dreg);
1363 CstPtrTo Corereg |]]],
1364 Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q_dup",
1367 [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1368 CstPtrTo Corereg |]]],
1369 Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q_dup",
1372 (* VST1 variants. *)
1373 Vstx 1, [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1375 Use_operands [| PtrTo Corereg; Dreg |], "vst1",
1376 store_1, pf_su_8_64;
1377 Vstx 1, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1379 Use_operands [| PtrTo Corereg; Qreg |], "vst1Q",
1380 store_1, pf_su_8_64;
1383 [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg);
1384 CstPtrTo Corereg |]]],
1385 Use_operands [| PtrTo Corereg; Dreg; Immed |],
1386 "vst1_lane", store_3, pf_su_8_32;
1388 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1389 CstPtrTo Corereg |]];
1390 Const_valuator (fun _ -> 0)],
1391 Use_operands [| PtrTo Corereg; Dreg; Immed |],
1392 "vst1_lane", store_3, [U64; S64];
1394 [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg);
1395 CstPtrTo Corereg |]]],
1396 Use_operands [| PtrTo Corereg; Qreg; Immed |],
1397 "vst1Q_lane", store_3, pf_su_8_32;
1399 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1400 CstPtrTo Corereg |]]],
1401 Use_operands [| PtrTo Corereg; Qreg; Immed |],
1402 "vst1Q_lane", store_3, [U64; S64];
1404 (* VLD2 variants. *)
1405 Vldx 2, [], Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
1406 "vld2", bits_1, pf_su_8_32;
1407 Vldx 2, [Instruction_name ["vld1"]],
1408 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
1409 "vld2", bits_1, [S64; U64];
1410 Vldx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1411 CstPtrTo Corereg |];
1412 Use_operands [| VecArray (2, Dreg);
1413 CstPtrTo Corereg |]]],
1414 Use_operands [| VecArray (2, Qreg); CstPtrTo Corereg |],
1415 "vld2Q", bits_1, pf_su_8_32;
1418 [Disassembles_as [Use_operands
1419 [| VecArray (2, Element_of_dreg);
1420 CstPtrTo Corereg |]]],
1421 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg;
1422 VecArray (2, Dreg); Immed |],
1423 "vld2_lane", bits_3, P8 :: P16 :: F32 :: su_8_32;
1425 [Disassembles_as [Use_operands
1426 [| VecArray (2, Element_of_dreg);
1427 CstPtrTo Corereg |]]],
1428 Use_operands [| VecArray (2, Qreg); CstPtrTo Corereg;
1429 VecArray (2, Qreg); Immed |],
1430 "vld2Q_lane", bits_3, [P16; F32; U16; U32; S16; S32];
1433 [Disassembles_as [Use_operands
1434 [| VecArray (2, All_elements_of_dreg); CstPtrTo Corereg |]]],
1435 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
1436 "vld2_dup", bits_1, pf_su_8_32;
1438 [Instruction_name ["vld1"]; Disassembles_as [Use_operands
1439 [| VecArray (2, Dreg); CstPtrTo Corereg |]]],
1440 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
1441 "vld2_dup", bits_1, [S64; U64];
1443 (* VST2 variants. *)
1444 Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1446 Use_operands [| PtrTo Corereg; VecArray (2, Dreg) |], "vst2",
1447 store_1, pf_su_8_32;
1448 Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1450 Instruction_name ["vst1"]],
1451 Use_operands [| PtrTo Corereg; VecArray (2, Dreg) |], "vst2",
1452 store_1, [S64; U64];
1453 Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1455 Use_operands [| VecArray (2, Dreg);
1457 Use_operands [| PtrTo Corereg; VecArray (2, Qreg) |], "vst2Q",
1458 store_1, pf_su_8_32;
1461 [Disassembles_as [Use_operands
1462 [| VecArray (2, Element_of_dreg);
1463 CstPtrTo Corereg |]]],
1464 Use_operands [| PtrTo Corereg; VecArray (2, Dreg); Immed |], "vst2_lane",
1465 store_3, P8 :: P16 :: F32 :: su_8_32;
1467 [Disassembles_as [Use_operands
1468 [| VecArray (2, Element_of_dreg);
1469 CstPtrTo Corereg |]]],
1470 Use_operands [| PtrTo Corereg; VecArray (2, Qreg); Immed |], "vst2Q_lane",
1471 store_3, [P16; F32; U16; U32; S16; S32];
1473 (* VLD3 variants. *)
1474 Vldx 3, [], Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
1475 "vld3", bits_1, pf_su_8_32;
1476 Vldx 3, [Instruction_name ["vld1"]],
1477 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
1478 "vld3", bits_1, [S64; U64];
1479 Vldx 3, [Disassembles_as [Use_operands [| VecArray (3, Dreg);
1480 CstPtrTo Corereg |];
1481 Use_operands [| VecArray (3, Dreg);
1482 CstPtrTo Corereg |]]],
1483 Use_operands [| VecArray (3, Qreg); CstPtrTo Corereg |],
1484 "vld3Q", bits_1, P8 :: P16 :: F32 :: su_8_32;
1487 [Disassembles_as [Use_operands
1488 [| VecArray (3, Element_of_dreg);
1489 CstPtrTo Corereg |]]],
1490 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg;
1491 VecArray (3, Dreg); Immed |],
1492 "vld3_lane", bits_3, P8 :: P16 :: F32 :: su_8_32;
1494 [Disassembles_as [Use_operands
1495 [| VecArray (3, Element_of_dreg);
1496 CstPtrTo Corereg |]]],
1497 Use_operands [| VecArray (3, Qreg); CstPtrTo Corereg;
1498 VecArray (3, Qreg); Immed |],
1499 "vld3Q_lane", bits_3, [P16; F32; U16; U32; S16; S32];
1502 [Disassembles_as [Use_operands
1503 [| VecArray (3, All_elements_of_dreg); CstPtrTo Corereg |]]],
1504 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
1505 "vld3_dup", bits_1, pf_su_8_32;
1507 [Instruction_name ["vld1"]; Disassembles_as [Use_operands
1508 [| VecArray (3, Dreg); CstPtrTo Corereg |]]],
1509 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
1510 "vld3_dup", bits_1, [S64; U64];
1512 (* VST3 variants. *)
1513 Vstx 3, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1515 Use_operands [| PtrTo Corereg; VecArray (3, Dreg) |], "vst3",
1516 store_1, pf_su_8_32;
1517 Vstx 3, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1519 Instruction_name ["vst1"]],
1520 Use_operands [| PtrTo Corereg; VecArray (3, Dreg) |], "vst3",
1521 store_1, [S64; U64];
1522 Vstx 3, [Disassembles_as [Use_operands [| VecArray (3, Dreg);
1524 Use_operands [| VecArray (3, Dreg);
1526 Use_operands [| PtrTo Corereg; VecArray (3, Qreg) |], "vst3Q",
1527 store_1, pf_su_8_32;
1530 [Disassembles_as [Use_operands
1531 [| VecArray (3, Element_of_dreg);
1532 CstPtrTo Corereg |]]],
1533 Use_operands [| PtrTo Corereg; VecArray (3, Dreg); Immed |], "vst3_lane",
1534 store_3, P8 :: P16 :: F32 :: su_8_32;
1536 [Disassembles_as [Use_operands
1537 [| VecArray (3, Element_of_dreg);
1538 CstPtrTo Corereg |]]],
1539 Use_operands [| PtrTo Corereg; VecArray (3, Qreg); Immed |], "vst3Q_lane",
1540 store_3, [P16; F32; U16; U32; S16; S32];
1542 (* VLD4/VST4 variants. *)
1543 Vldx 4, [], Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
1544 "vld4", bits_1, pf_su_8_32;
1545 Vldx 4, [Instruction_name ["vld1"]],
1546 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
1547 "vld4", bits_1, [S64; U64];
1548 Vldx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1549 CstPtrTo Corereg |];
1550 Use_operands [| VecArray (4, Dreg);
1551 CstPtrTo Corereg |]]],
1552 Use_operands [| VecArray (4, Qreg); CstPtrTo Corereg |],
1553 "vld4Q", bits_1, P8 :: P16 :: F32 :: su_8_32;
1556 [Disassembles_as [Use_operands
1557 [| VecArray (4, Element_of_dreg);
1558 CstPtrTo Corereg |]]],
1559 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg;
1560 VecArray (4, Dreg); Immed |],
1561 "vld4_lane", bits_3, P8 :: P16 :: F32 :: su_8_32;
1563 [Disassembles_as [Use_operands
1564 [| VecArray (4, Element_of_dreg);
1565 CstPtrTo Corereg |]]],
1566 Use_operands [| VecArray (4, Qreg); CstPtrTo Corereg;
1567 VecArray (4, Qreg); Immed |],
1568 "vld4Q_lane", bits_3, [P16; F32; U16; U32; S16; S32];
1571 [Disassembles_as [Use_operands
1572 [| VecArray (4, All_elements_of_dreg); CstPtrTo Corereg |]]],
1573 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
1574 "vld4_dup", bits_1, pf_su_8_32;
1576 [Instruction_name ["vld1"]; Disassembles_as [Use_operands
1577 [| VecArray (4, Dreg); CstPtrTo Corereg |]]],
1578 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
1579 "vld4_dup", bits_1, [S64; U64];
1581 Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1583 Use_operands [| PtrTo Corereg; VecArray (4, Dreg) |], "vst4",
1584 store_1, pf_su_8_32;
1585 Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1587 Instruction_name ["vst1"]],
1588 Use_operands [| PtrTo Corereg; VecArray (4, Dreg) |], "vst4",
1589 store_1, [S64; U64];
1590 Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1592 Use_operands [| VecArray (4, Dreg);
1594 Use_operands [| PtrTo Corereg; VecArray (4, Qreg) |], "vst4Q",
1595 store_1, pf_su_8_32;
1598 [Disassembles_as [Use_operands
1599 [| VecArray (4, Element_of_dreg);
1600 CstPtrTo Corereg |]]],
1601 Use_operands [| PtrTo Corereg; VecArray (4, Dreg); Immed |], "vst4_lane",
1602 store_3, P8 :: P16 :: F32 :: su_8_32;
1604 [Disassembles_as [Use_operands
1605 [| VecArray (4, Element_of_dreg);
1606 CstPtrTo Corereg |]]],
1607 Use_operands [| PtrTo Corereg; VecArray (4, Qreg); Immed |], "vst4Q_lane",
1608 store_3, [P16; F32; U16; U32; S16; S32];
1610 (* Logical operations. And. *)
1611 Vand, [], All (3, Dreg), "vand", notype_2, su_8_64;
1612 Vand, [], All (3, Qreg), "vandQ", notype_2, su_8_64;
1615 Vorr, [], All (3, Dreg), "vorr", notype_2, su_8_64;
1616 Vorr, [], All (3, Qreg), "vorrQ", notype_2, su_8_64;
1619 Veor, [], All (3, Dreg), "veor", notype_2, su_8_64;
1620 Veor, [], All (3, Qreg), "veorQ", notype_2, su_8_64;
1622 (* Bic (And-not). *)
1623 Vbic, [], All (3, Dreg), "vbic", notype_2, su_8_64;
1624 Vbic, [], All (3, Qreg), "vbicQ", notype_2, su_8_64;
1627 Vorn, [], All (3, Dreg), "vorn", notype_2, su_8_64;
1628 Vorn, [], All (3, Qreg), "vornQ", notype_2, su_8_64;
1632 let elems = P8 :: P16 :: F32 :: su_8_64 in
1635 let types = List.fold_right
1636 (fun convfrom acc ->
1637 if convfrom <> convto then
1638 Cast (convto, convfrom) :: acc
1644 let dconv = Vreinterp, [No_op], Use_operands [| Dreg; Dreg |],
1645 "vreinterpret", conv_1, types
1646 and qconv = Vreinterp, [No_op], Use_operands [| Qreg; Qreg |],
1647 "vreinterpretQ", conv_1, types in
1648 dconv :: qconv :: acc)
1652 (* Output routines. *)
1654 let rec string_of_elt = function
1655 S8 -> "s8" | S16 -> "s16" | S32 -> "s32" | S64 -> "s64"
1656 | U8 -> "u8" | U16 -> "u16" | U32 -> "u32" | U64 -> "u64"
1657 | I8 -> "i8" | I16 -> "i16" | I32 -> "i32" | I64 -> "i64"
1658 | B8 -> "8" | B16 -> "16" | B32 -> "32" | B64 -> "64"
1659 | F32 -> "f32" | P8 -> "p8" | P16 -> "p16"
1660 | Conv (a, b) | Cast (a, b) -> string_of_elt a ^ "_" ^ string_of_elt b
1661 | NoElts -> failwith "No elts"
1663 let string_of_elt_dots elt =
1665 Conv (a, b) | Cast (a, b) -> string_of_elt a ^ "." ^ string_of_elt b
1666 | _ -> string_of_elt elt
1668 let string_of_vectype vt =
1669 let rec name affix = function
1670 T_int8x8 -> affix "int8x8"
1671 | T_int8x16 -> affix "int8x16"
1672 | T_int16x4 -> affix "int16x4"
1673 | T_int16x8 -> affix "int16x8"
1674 | T_int32x2 -> affix "int32x2"
1675 | T_int32x4 -> affix "int32x4"
1676 | T_int64x1 -> affix "int64x1"
1677 | T_int64x2 -> affix "int64x2"
1678 | T_uint8x8 -> affix "uint8x8"
1679 | T_uint8x16 -> affix "uint8x16"
1680 | T_uint16x4 -> affix "uint16x4"
1681 | T_uint16x8 -> affix "uint16x8"
1682 | T_uint32x2 -> affix "uint32x2"
1683 | T_uint32x4 -> affix "uint32x4"
1684 | T_uint64x1 -> affix "uint64x1"
1685 | T_uint64x2 -> affix "uint64x2"
1686 | T_float32x2 -> affix "float32x2"
1687 | T_float32x4 -> affix "float32x4"
1688 | T_poly8x8 -> affix "poly8x8"
1689 | T_poly8x16 -> affix "poly8x16"
1690 | T_poly16x4 -> affix "poly16x4"
1691 | T_poly16x8 -> affix "poly16x8"
1692 | T_int8 -> affix "int8"
1693 | T_int16 -> affix "int16"
1694 | T_int32 -> affix "int32"
1695 | T_int64 -> affix "int64"
1696 | T_uint8 -> affix "uint8"
1697 | T_uint16 -> affix "uint16"
1698 | T_uint32 -> affix "uint32"
1699 | T_uint64 -> affix "uint64"
1700 | T_poly8 -> affix "poly8"
1701 | T_poly16 -> affix "poly16"
1702 | T_float32 -> affix "float32"
1703 | T_immediate _ -> "const int"
1705 | T_intQI -> "__builtin_neon_qi"
1706 | T_intHI -> "__builtin_neon_hi"
1707 | T_intSI -> "__builtin_neon_si"
1708 | T_intDI -> "__builtin_neon_di"
1709 | T_floatSF -> "__builtin_neon_sf"
1710 | T_arrayof (num, base) ->
1711 let basename = name (fun x -> x) base in
1712 affix (Printf.sprintf "%sx%d" basename num)
1714 let basename = name affix x in
1715 Printf.sprintf "%s *" basename
1717 let basename = name affix x in
1718 Printf.sprintf "const %s" basename
1720 name (fun x -> x ^ "_t") vt
1722 let string_of_inttype = function
1723 B_TImode -> "__builtin_neon_ti"
1724 | B_EImode -> "__builtin_neon_ei"
1725 | B_OImode -> "__builtin_neon_oi"
1726 | B_CImode -> "__builtin_neon_ci"
1727 | B_XImode -> "__builtin_neon_xi"
1729 let string_of_mode = function
1730 V8QI -> "v8qi" | V4HI -> "v4hi" | V2SI -> "v2si" | V2SF -> "v2sf"
1731 | DI -> "di" | V16QI -> "v16qi" | V8HI -> "v8hi" | V4SI -> "v4si"
1732 | V4SF -> "v4sf" | V2DI -> "v2di" | QI -> "qi" | HI -> "hi" | SI -> "si"
1735 (* Use uppercase chars for letters which form part of the intrinsic name, but
1736 should be omitted from the builtin name (the info is passed in an extra
1737 argument, instead). *)
1738 let intrinsic_name name = String.lowercase name
1740 (* Allow the name of the builtin to be overridden by things (e.g. Flipped)
1741 found in the features list. *)
1742 let builtin_name features name =
1743 let name = List.fold_right
1746 Flipped x | Builtin_name x -> x
1749 let islower x = let str = String.make 1 x in (String.lowercase str) = str
1750 and buf = Buffer.create (String.length name) in
1751 String.iter (fun c -> if islower c then Buffer.add_char buf c) name;
1754 (* Transform an arity into a list of strings. *)
1755 let strings_of_arity a =
1757 | Arity0 vt -> [string_of_vectype vt]
1758 | Arity1 (vt1, vt2) -> [string_of_vectype vt1; string_of_vectype vt2]
1759 | Arity2 (vt1, vt2, vt3) -> [string_of_vectype vt1;
1760 string_of_vectype vt2;
1761 string_of_vectype vt3]
1762 | Arity3 (vt1, vt2, vt3, vt4) -> [string_of_vectype vt1;
1763 string_of_vectype vt2;
1764 string_of_vectype vt3;
1765 string_of_vectype vt4]
1766 | Arity4 (vt1, vt2, vt3, vt4, vt5) -> [string_of_vectype vt1;
1767 string_of_vectype vt2;
1768 string_of_vectype vt3;
1769 string_of_vectype vt4;
1770 string_of_vectype vt5]
1772 (* Suffixes on the end of builtin names that are to be stripped in order
1773 to obtain the name used as an instruction. They are only stripped if
1774 preceded immediately by an underscore. *)
1775 let suffixes_to_strip = [ "n"; "lane"; "dup" ]
1777 (* Get the possible names of an instruction corresponding to a "name" from the
1778 ops table. This is done by getting the equivalent builtin name and
1779 stripping any suffixes from the list at the top of this file, unless
1780 the features list presents with an Instruction_name entry, in which
1781 case that is used; or unless the features list presents with a Flipped
1782 entry, in which case that is used. If both such entries are present,
1783 the first in the list will be chosen. *)
1784 let get_insn_names features name =
1787 match List.find (fun feature -> match feature with
1788 Instruction_name _ -> true
1790 | _ -> false) features
1792 Instruction_name names -> names
1793 | Flipped name -> [name]
1796 with Not_found -> [builtin_name features name]
1799 List.map (fun name' ->
1801 let underscore = String.rindex name' '_' in
1802 let our_suffix = String.sub name' (underscore + 1)
1803 ((String.length name') - underscore - 1)
1805 let rec strip remaining_suffixes =
1806 match remaining_suffixes with
1808 | s::ss when our_suffix = s -> String.sub name' 0 underscore
1811 strip suffixes_to_strip
1812 with (Not_found | Invalid_argument _) -> name') names
1815 (* Apply a function to each element of a list and then comma-separate
1816 the resulting strings. *)
1817 let rec commas f elts acc =
1820 | [elt] -> acc ^ (f elt)
1822 commas f elts (acc ^ (f elt) ^ ", ")
1824 (* Given a list of features and the shape specified in the "ops" table, apply
1825 a function to each possible shape that the instruction may have.
1826 By default, this is the "shape" entry in "ops". If the features list
1827 contains a Disassembles_as entry, the shapes contained in that entry are
1828 mapped to corresponding outputs and returned in a list. If there is more
1829 than one Disassembles_as entry, only the first is used. *)
1830 let analyze_all_shapes features shape f =
1832 match List.find (fun feature ->
1833 match feature with Disassembles_as _ -> true
1836 Disassembles_as shapes -> List.map f shapes
1838 with Not_found -> [f shape]