1 /* Intrinsics for ST Microelectronics Loongson-2E/2F SIMD operations.
3 Copyright (C) 2008 Free Software Foundation, Inc.
4 Contributed by CodeSourcery.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published
10 by the Free Software Foundation; either version 2, or (at your
11 option) any later version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to the
20 Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
21 MA 02110-1301, USA. */
23 /* As a special exception, if you include this header file into source
24 files compiled by GCC, this header file does not by itself cause
25 the resulting executable to be covered by the GNU General Public
26 License. This exception does not however invalidate any other
27 reasons why the executable file might be covered by the GNU General
30 #ifndef _GCC_LOONGSON_H
31 #define _GCC_LOONGSON_H
33 #if !defined(__mips_loongson_vector_rev)
34 # error "You must select -march=loongson2e or -march=loongson2f to use loongson.h"
43 /* Vectors of unsigned bytes, halfwords and words. */
44 typedef uint8_t uint8x8_t __attribute__((vector_size (8)));
45 typedef uint16_t uint16x4_t __attribute__((vector_size (8)));
46 typedef uint32_t uint32x2_t __attribute__((vector_size (8)));
48 /* Vectors of signed bytes, halfwords and words. */
49 typedef int8_t int8x8_t __attribute__((vector_size (8)));
50 typedef int16_t int16x4_t __attribute__((vector_size (8)));
51 typedef int32_t int32x2_t __attribute__((vector_size (8)));
54 Unless otherwise noted, calls to the functions below will expand into
55 precisely one machine instruction, modulo any moves required to
56 satisfy register allocation constraints. */
58 /* Pack with signed saturation. */
59 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
60 packsswh (int32x2_t s, int32x2_t t)
62 return __builtin_loongson_packsswh (s, t);
65 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
66 packsshb (int16x4_t s, int16x4_t t)
68 return __builtin_loongson_packsshb (s, t);
71 /* Pack with unsigned saturation. */
72 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
73 packushb (uint16x4_t s, uint16x4_t t)
75 return __builtin_loongson_packushb (s, t);
78 /* Vector addition, treating overflow by wraparound. */
79 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
80 paddw_u (uint32x2_t s, uint32x2_t t)
82 return __builtin_loongson_paddw_u (s, t);
85 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
86 paddh_u (uint16x4_t s, uint16x4_t t)
88 return __builtin_loongson_paddh_u (s, t);
91 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
92 paddb_u (uint8x8_t s, uint8x8_t t)
94 return __builtin_loongson_paddb_u (s, t);
97 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
98 paddw_s (int32x2_t s, int32x2_t t)
100 return __builtin_loongson_paddw_s (s, t);
103 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
104 paddh_s (int16x4_t s, int16x4_t t)
106 return __builtin_loongson_paddh_s (s, t);
109 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
110 paddb_s (int8x8_t s, int8x8_t t)
112 return __builtin_loongson_paddb_s (s, t);
115 /* Addition of doubleword integers, treating overflow by wraparound. */
116 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
117 paddd_u (uint64_t s, uint64_t t)
119 return __builtin_loongson_paddd_u (s, t);
122 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
123 paddd_s (int64_t s, int64_t t)
125 return __builtin_loongson_paddd_s (s, t);
128 /* Vector addition, treating overflow by signed saturation. */
129 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
130 paddsh (int16x4_t s, int16x4_t t)
132 return __builtin_loongson_paddsh (s, t);
135 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
136 paddsb (int8x8_t s, int8x8_t t)
138 return __builtin_loongson_paddsb (s, t);
141 /* Vector addition, treating overflow by unsigned saturation. */
142 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
143 paddush (uint16x4_t s, uint16x4_t t)
145 return __builtin_loongson_paddush (s, t);
148 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
149 paddusb (uint8x8_t s, uint8x8_t t)
151 return __builtin_loongson_paddusb (s, t);
154 /* Logical AND NOT. */
155 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
156 pandn_ud (uint64_t s, uint64_t t)
158 return __builtin_loongson_pandn_ud (s, t);
161 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
162 pandn_uw (uint32x2_t s, uint32x2_t t)
164 return __builtin_loongson_pandn_uw (s, t);
167 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
168 pandn_uh (uint16x4_t s, uint16x4_t t)
170 return __builtin_loongson_pandn_uh (s, t);
173 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
174 pandn_ub (uint8x8_t s, uint8x8_t t)
176 return __builtin_loongson_pandn_ub (s, t);
179 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
180 pandn_sd (int64_t s, int64_t t)
182 return __builtin_loongson_pandn_sd (s, t);
185 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
186 pandn_sw (int32x2_t s, int32x2_t t)
188 return __builtin_loongson_pandn_sw (s, t);
191 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
192 pandn_sh (int16x4_t s, int16x4_t t)
194 return __builtin_loongson_pandn_sh (s, t);
197 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
198 pandn_sb (int8x8_t s, int8x8_t t)
200 return __builtin_loongson_pandn_sb (s, t);
204 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
205 pavgh (uint16x4_t s, uint16x4_t t)
207 return __builtin_loongson_pavgh (s, t);
210 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
211 pavgb (uint8x8_t s, uint8x8_t t)
213 return __builtin_loongson_pavgb (s, t);
217 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
218 pcmpeqw_u (uint32x2_t s, uint32x2_t t)
220 return __builtin_loongson_pcmpeqw_u (s, t);
223 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
224 pcmpeqh_u (uint16x4_t s, uint16x4_t t)
226 return __builtin_loongson_pcmpeqh_u (s, t);
229 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
230 pcmpeqb_u (uint8x8_t s, uint8x8_t t)
232 return __builtin_loongson_pcmpeqb_u (s, t);
235 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
236 pcmpeqw_s (int32x2_t s, int32x2_t t)
238 return __builtin_loongson_pcmpeqw_s (s, t);
241 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
242 pcmpeqh_s (int16x4_t s, int16x4_t t)
244 return __builtin_loongson_pcmpeqh_s (s, t);
247 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
248 pcmpeqb_s (int8x8_t s, int8x8_t t)
250 return __builtin_loongson_pcmpeqb_s (s, t);
253 /* Greater-than test. */
254 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
255 pcmpgtw_u (uint32x2_t s, uint32x2_t t)
257 return __builtin_loongson_pcmpgtw_u (s, t);
260 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
261 pcmpgth_u (uint16x4_t s, uint16x4_t t)
263 return __builtin_loongson_pcmpgth_u (s, t);
266 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
267 pcmpgtb_u (uint8x8_t s, uint8x8_t t)
269 return __builtin_loongson_pcmpgtb_u (s, t);
272 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
273 pcmpgtw_s (int32x2_t s, int32x2_t t)
275 return __builtin_loongson_pcmpgtw_s (s, t);
278 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
279 pcmpgth_s (int16x4_t s, int16x4_t t)
281 return __builtin_loongson_pcmpgth_s (s, t);
284 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
285 pcmpgtb_s (int8x8_t s, int8x8_t t)
287 return __builtin_loongson_pcmpgtb_s (s, t);
290 /* Extract halfword. */
291 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
292 pextrh_u (uint16x4_t s, int field /* 0--3 */)
294 return __builtin_loongson_pextrh_u (s, field);
297 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
298 pextrh_s (int16x4_t s, int field /* 0--3 */)
300 return __builtin_loongson_pextrh_s (s, field);
303 /* Insert halfword. */
304 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
305 pinsrh_0_u (uint16x4_t s, uint16x4_t t)
307 return __builtin_loongson_pinsrh_0_u (s, t);
310 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
311 pinsrh_1_u (uint16x4_t s, uint16x4_t t)
313 return __builtin_loongson_pinsrh_1_u (s, t);
316 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
317 pinsrh_2_u (uint16x4_t s, uint16x4_t t)
319 return __builtin_loongson_pinsrh_2_u (s, t);
322 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
323 pinsrh_3_u (uint16x4_t s, uint16x4_t t)
325 return __builtin_loongson_pinsrh_3_u (s, t);
328 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
329 pinsrh_0_s (int16x4_t s, int16x4_t t)
331 return __builtin_loongson_pinsrh_0_s (s, t);
334 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
335 pinsrh_1_s (int16x4_t s, int16x4_t t)
337 return __builtin_loongson_pinsrh_1_s (s, t);
340 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
341 pinsrh_2_s (int16x4_t s, int16x4_t t)
343 return __builtin_loongson_pinsrh_2_s (s, t);
346 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
347 pinsrh_3_s (int16x4_t s, int16x4_t t)
349 return __builtin_loongson_pinsrh_3_s (s, t);
352 /* Multiply and add. */
353 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
354 pmaddhw (int16x4_t s, int16x4_t t)
356 return __builtin_loongson_pmaddhw (s, t);
359 /* Maximum of signed halfwords. */
360 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
361 pmaxsh (int16x4_t s, int16x4_t t)
363 return __builtin_loongson_pmaxsh (s, t);
366 /* Maximum of unsigned bytes. */
367 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
368 pmaxub (uint8x8_t s, uint8x8_t t)
370 return __builtin_loongson_pmaxub (s, t);
373 /* Minimum of signed halfwords. */
374 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
375 pminsh (int16x4_t s, int16x4_t t)
377 return __builtin_loongson_pminsh (s, t);
380 /* Minimum of unsigned bytes. */
381 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
382 pminub (uint8x8_t s, uint8x8_t t)
384 return __builtin_loongson_pminub (s, t);
387 /* Move byte mask. */
388 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
389 pmovmskb_u (uint8x8_t s)
391 return __builtin_loongson_pmovmskb_u (s);
394 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
395 pmovmskb_s (int8x8_t s)
397 return __builtin_loongson_pmovmskb_s (s);
400 /* Multiply unsigned integers and store high result. */
401 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
402 pmulhuh (uint16x4_t s, uint16x4_t t)
404 return __builtin_loongson_pmulhuh (s, t);
407 /* Multiply signed integers and store high result. */
408 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
409 pmulhh (int16x4_t s, int16x4_t t)
411 return __builtin_loongson_pmulhh (s, t);
414 /* Multiply signed integers and store low result. */
415 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
416 pmullh (int16x4_t s, int16x4_t t)
418 return __builtin_loongson_pmullh (s, t);
421 /* Multiply unsigned word integers. */
422 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
423 pmuluw (uint32x2_t s, uint32x2_t t)
425 return __builtin_loongson_pmuluw (s, t);
428 /* Absolute difference. */
429 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
430 pasubub (uint8x8_t s, uint8x8_t t)
432 return __builtin_loongson_pasubub (s, t);
435 /* Sum of unsigned byte integers. */
436 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
439 return __builtin_loongson_biadd (s);
442 /* Sum of absolute differences.
443 Note that this intrinsic expands into two machine instructions:
444 PASUBUB followed by BIADD. */
445 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
446 psadbh (uint8x8_t s, uint8x8_t t)
448 return __builtin_loongson_psadbh (s, t);
451 /* Shuffle halfwords. */
452 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
453 pshufh_u (uint16x4_t dest, uint16x4_t s, uint8_t order)
455 return __builtin_loongson_pshufh_u (dest, s, order);
458 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
459 pshufh_s (int16x4_t dest, int16x4_t s, uint8_t order)
461 return __builtin_loongson_pshufh_s (dest, s, order);
464 /* Shift left logical. */
465 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
466 psllh_u (uint16x4_t s, uint8_t amount)
468 return __builtin_loongson_psllh_u (s, amount);
471 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
472 psllh_s (int16x4_t s, uint8_t amount)
474 return __builtin_loongson_psllh_s (s, amount);
477 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
478 psllw_u (uint32x2_t s, uint8_t amount)
480 return __builtin_loongson_psllw_u (s, amount);
483 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
484 psllw_s (int32x2_t s, uint8_t amount)
486 return __builtin_loongson_psllw_s (s, amount);
489 /* Shift right logical. */
490 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
491 psrlh_u (uint16x4_t s, uint8_t amount)
493 return __builtin_loongson_psrlh_u (s, amount);
496 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
497 psrlh_s (int16x4_t s, uint8_t amount)
499 return __builtin_loongson_psrlh_s (s, amount);
502 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
503 psrlw_u (uint32x2_t s, uint8_t amount)
505 return __builtin_loongson_psrlw_u (s, amount);
508 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
509 psrlw_s (int32x2_t s, uint8_t amount)
511 return __builtin_loongson_psrlw_s (s, amount);
514 /* Shift right arithmetic. */
515 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
516 psrah_u (uint16x4_t s, uint8_t amount)
518 return __builtin_loongson_psrah_u (s, amount);
521 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
522 psrah_s (int16x4_t s, uint8_t amount)
524 return __builtin_loongson_psrah_s (s, amount);
527 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
528 psraw_u (uint32x2_t s, uint8_t amount)
530 return __builtin_loongson_psraw_u (s, amount);
533 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
534 psraw_s (int32x2_t s, uint8_t amount)
536 return __builtin_loongson_psraw_s (s, amount);
539 /* Vector subtraction, treating overflow by wraparound. */
540 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
541 psubw_u (uint32x2_t s, uint32x2_t t)
543 return __builtin_loongson_psubw_u (s, t);
546 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
547 psubh_u (uint16x4_t s, uint16x4_t t)
549 return __builtin_loongson_psubh_u (s, t);
552 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
553 psubb_u (uint8x8_t s, uint8x8_t t)
555 return __builtin_loongson_psubb_u (s, t);
558 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
559 psubw_s (int32x2_t s, int32x2_t t)
561 return __builtin_loongson_psubw_s (s, t);
564 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
565 psubh_s (int16x4_t s, int16x4_t t)
567 return __builtin_loongson_psubh_s (s, t);
570 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
571 psubb_s (int8x8_t s, int8x8_t t)
573 return __builtin_loongson_psubb_s (s, t);
576 /* Subtraction of doubleword integers, treating overflow by wraparound. */
577 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
578 psubd_u (uint64_t s, uint64_t t)
580 return __builtin_loongson_psubd_u (s, t);
583 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
584 psubd_s (int64_t s, int64_t t)
586 return __builtin_loongson_psubd_s (s, t);
589 /* Vector subtraction, treating overflow by signed saturation. */
590 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
591 psubsh (int16x4_t s, int16x4_t t)
593 return __builtin_loongson_psubsh (s, t);
596 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
597 psubsb (int8x8_t s, int8x8_t t)
599 return __builtin_loongson_psubsb (s, t);
602 /* Vector subtraction, treating overflow by unsigned saturation. */
603 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
604 psubush (uint16x4_t s, uint16x4_t t)
606 return __builtin_loongson_psubush (s, t);
609 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
610 psubusb (uint8x8_t s, uint8x8_t t)
612 return __builtin_loongson_psubusb (s, t);
615 /* Unpack high data. */
616 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
617 punpckhwd_u (uint32x2_t s, uint32x2_t t)
619 return __builtin_loongson_punpckhwd_u (s, t);
622 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
623 punpckhhw_u (uint16x4_t s, uint16x4_t t)
625 return __builtin_loongson_punpckhhw_u (s, t);
628 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
629 punpckhbh_u (uint8x8_t s, uint8x8_t t)
631 return __builtin_loongson_punpckhbh_u (s, t);
634 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
635 punpckhwd_s (int32x2_t s, int32x2_t t)
637 return __builtin_loongson_punpckhwd_s (s, t);
640 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
641 punpckhhw_s (int16x4_t s, int16x4_t t)
643 return __builtin_loongson_punpckhhw_s (s, t);
646 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
647 punpckhbh_s (int8x8_t s, int8x8_t t)
649 return __builtin_loongson_punpckhbh_s (s, t);
652 /* Unpack low data. */
653 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
654 punpcklwd_u (uint32x2_t s, uint32x2_t t)
656 return __builtin_loongson_punpcklwd_u (s, t);
659 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
660 punpcklhw_u (uint16x4_t s, uint16x4_t t)
662 return __builtin_loongson_punpcklhw_u (s, t);
665 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
666 punpcklbh_u (uint8x8_t s, uint8x8_t t)
668 return __builtin_loongson_punpcklbh_u (s, t);
671 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
672 punpcklwd_s (int32x2_t s, int32x2_t t)
674 return __builtin_loongson_punpcklwd_s (s, t);
677 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
678 punpcklhw_s (int16x4_t s, int16x4_t t)
680 return __builtin_loongson_punpcklhw_s (s, t);
683 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
684 punpcklbh_s (int8x8_t s, int8x8_t t)
686 return __builtin_loongson_punpcklbh_s (s, t);