gcc/config/i386/mmintrin.h

   1 /* Copyright (C) 2002, 2003 Free Software Foundation, Inc.
   2
   3    This file is part of GNU CC.
   4
   5    GNU CC is free software; you can redistribute it and/or modify
   6    it under the terms of the GNU General Public License as published by
   7    the Free Software Foundation; either version 2, or (at your option)
   8    any later version.
   9
  10    GNU CC is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13    GNU General Public License for more details.
  14
  15    You should have received a copy of the GNU General Public License
  16    along with GNU CC; see the file COPYING.  If not, write to
  17    the Free Software Foundation, 59 Temple Place - Suite 330,
  18    Boston, MA 02111-1307, USA.  */
  19
  20 /* As a special exception, if you include this header file into source
  21    files compiled by GCC, this header file does not by itself cause
  22    the resulting executable to be covered by the GNU General Public
  23    License.  This exception does not however invalidate any other
  24    reasons why the executable file might be covered by the GNU General
  25    Public License.  */
  26
  27 /* Implemented from the specification included in the Intel C++ Compiler
  28    User Guide and Reference, version 8.0.  */
  29
  30 #ifndef _MMINTRIN_H_INCLUDED
  31 #define _MMINTRIN_H_INCLUDED
  32
  33 #ifndef __MMX__
  34 # error "MMX instruction set not enabled"
  35 #else
  36 /* The data type intended for user use.  */
  37 typedef int __m64 __attribute__ ((__mode__ (__V2SI__)));
  38
  39 /* Internal data types for implementing the intrinsics.  */
  40 typedef int __v2si __attribute__ ((__mode__ (__V2SI__)));
  41 typedef int __v4hi __attribute__ ((__mode__ (__V4HI__)));
  42 typedef int __v8qi __attribute__ ((__mode__ (__V8QI__)));
  43
  44 /* Empty the multimedia state.  */
  45 static __inline void
  46 _mm_empty (void)
  47 {
  48   __builtin_ia32_emms ();
  49 }
  50
  51 /* Convert I to a __m64 object.  The integer is zero-extended to 64-bits.  */
  52 static __inline __m64
  53 _mm_cvtsi32_si64 (int __i)
  54 {
  55   long long __tmp = (unsigned int)__i;
  56   return (__m64) __tmp;
  57 }
  58
  59 #ifdef __x86_64__
  60 /* Convert I to a __m64 object.  */
  61 static __inline __m64
  62 _mm_cvtsi64x_si64 (long long __i)
  63 {
  64   return (__m64) __i;
  65 }
  66
  67 /* Convert I to a __m64 object.  */
  68 static __inline __m64
  69 _mm_set_pi64x (long long __i)
  70 {
  71   return (__m64) __i;
  72 }
  73 #endif
  74
  75 /* Convert the lower 32 bits of the __m64 object into an integer.  */
  76 static __inline int
  77 _mm_cvtsi64_si32 (__m64 __i)
  78 {
  79   long long __tmp = (long long)__i;
  80   return __tmp;
  81 }
  82
  83 #ifdef __x86_64__
  84 /* Convert the lower 32 bits of the __m64 object into an integer.  */
  85 static __inline long long
  86 _mm_cvtsi64_si64x (__m64 __i)
  87 {
  88   return (long long)__i;
  89 }
  90 #endif
  91
  92 /* Pack the four 16-bit values from M1 into the lower four 8-bit values of
  93    the result, and the four 16-bit values from M2 into the upper four 8-bit
  94    values of the result, all with signed saturation.  */
  95 static __inline __m64
  96 _mm_packs_pi16 (__m64 __m1, __m64 __m2)
  97 {
  98   return (__m64) __builtin_ia32_packsswb ((__v4hi)__m1, (__v4hi)__m2);
  99 }
 100
 101 /* Pack the two 32-bit values from M1 in to the lower two 16-bit values of
 102    the result, and the two 32-bit values from M2 into the upper two 16-bit
 103    values of the result, all with signed saturation.  */
 104 static __inline __m64
 105 _mm_packs_pi32 (__m64 __m1, __m64 __m2)
 106 {
 107   return (__m64) __builtin_ia32_packssdw ((__v2si)__m1, (__v2si)__m2);
 108 }
 109
 110 /* Pack the four 16-bit values from M1 into the lower four 8-bit values of
 111    the result, and the four 16-bit values from M2 into the upper four 8-bit
 112    values of the result, all with unsigned saturation.  */
 113 static __inline __m64
 114 _mm_packs_pu16 (__m64 __m1, __m64 __m2)
 115 {
 116   return (__m64) __builtin_ia32_packuswb ((__v4hi)__m1, (__v4hi)__m2);
 117 }
 118
 119 /* Interleave the four 8-bit values from the high half of M1 with the four
 120    8-bit values from the high half of M2.  */
 121 static __inline __m64
 122 _mm_unpackhi_pi8 (__m64 __m1, __m64 __m2)
 123 {
 124   return (__m64) __builtin_ia32_punpckhbw ((__v8qi)__m1, (__v8qi)__m2);
 125 }
 126
 127 /* Interleave the two 16-bit values from the high half of M1 with the two
 128    16-bit values from the high half of M2.  */
 129 static __inline __m64
 130 _mm_unpackhi_pi16 (__m64 __m1, __m64 __m2)
 131 {
 132   return (__m64) __builtin_ia32_punpckhwd ((__v4hi)__m1, (__v4hi)__m2);
 133 }
 134
 135 /* Interleave the 32-bit value from the high half of M1 with the 32-bit
 136    value from the high half of M2.  */
 137 static __inline __m64
 138 _mm_unpackhi_pi32 (__m64 __m1, __m64 __m2)
 139 {
 140   return (__m64) __builtin_ia32_punpckhdq ((__v2si)__m1, (__v2si)__m2);
 141 }
 142
 143 /* Interleave the four 8-bit values from the low half of M1 with the four
 144    8-bit values from the low half of M2.  */
 145 static __inline __m64
 146 _mm_unpacklo_pi8 (__m64 __m1, __m64 __m2)
 147 {
 148   return (__m64) __builtin_ia32_punpcklbw ((__v8qi)__m1, (__v8qi)__m2);
 149 }
 150
 151 /* Interleave the two 16-bit values from the low half of M1 with the two
 152    16-bit values from the low half of M2.  */
 153 static __inline __m64
 154 _mm_unpacklo_pi16 (__m64 __m1, __m64 __m2)
 155 {
 156   return (__m64) __builtin_ia32_punpcklwd ((__v4hi)__m1, (__v4hi)__m2);
 157 }
 158
 159 /* Interleave the 32-bit value from the low half of M1 with the 32-bit
 160    value from the low half of M2.  */
 161 static __inline __m64
 162 _mm_unpacklo_pi32 (__m64 __m1, __m64 __m2)
 163 {
 164   return (__m64) __builtin_ia32_punpckldq ((__v2si)__m1, (__v2si)__m2);
 165 }
 166
 167 /* Add the 8-bit values in M1 to the 8-bit values in M2.  */
 168 static __inline __m64
 169 _mm_add_pi8 (__m64 __m1, __m64 __m2)
 170 {
 171   return (__m64) __builtin_ia32_paddb ((__v8qi)__m1, (__v8qi)__m2);
 172 }
 173
 174 /* Add the 16-bit values in M1 to the 16-bit values in M2.  */
 175 static __inline __m64
 176 _mm_add_pi16 (__m64 __m1, __m64 __m2)
 177 {
 178   return (__m64) __builtin_ia32_paddw ((__v4hi)__m1, (__v4hi)__m2);
 179 }
 180
 181 /* Add the 32-bit values in M1 to the 32-bit values in M2.  */
 182 static __inline __m64
 183 _mm_add_pi32 (__m64 __m1, __m64 __m2)
 184 {
 185   return (__m64) __builtin_ia32_paddd ((__v2si)__m1, (__v2si)__m2);
 186 }
 187
 188 /* Add the 64-bit values in M1 to the 64-bit values in M2.  */
 189 static __inline __m64
 190 _mm_add_si64 (__m64 __m1, __m64 __m2)
 191 {
 192   return (__m64) __builtin_ia32_paddq ((long long)__m1, (long long)__m2);
 193 }
 194
 195 /* Add the 8-bit values in M1 to the 8-bit values in M2 using signed
 196    saturated arithmetic.  */
 197 static __inline __m64
 198 _mm_adds_pi8 (__m64 __m1, __m64 __m2)
 199 {
 200   return (__m64) __builtin_ia32_paddsb ((__v8qi)__m1, (__v8qi)__m2);
 201 }
 202
 203 /* Add the 16-bit values in M1 to the 16-bit values in M2 using signed
 204    saturated arithmetic.  */
 205 static __inline __m64
 206 _mm_adds_pi16 (__m64 __m1, __m64 __m2)
 207 {
 208   return (__m64) __builtin_ia32_paddsw ((__v4hi)__m1, (__v4hi)__m2);
 209 }
 210
 211 /* Add the 8-bit values in M1 to the 8-bit values in M2 using unsigned
 212    saturated arithmetic.  */
 213 static __inline __m64
 214 _mm_adds_pu8 (__m64 __m1, __m64 __m2)
 215 {
 216   return (__m64) __builtin_ia32_paddusb ((__v8qi)__m1, (__v8qi)__m2);
 217 }
 218
 219 /* Add the 16-bit values in M1 to the 16-bit values in M2 using unsigned
 220    saturated arithmetic.  */
 221 static __inline __m64
 222 _mm_adds_pu16 (__m64 __m1, __m64 __m2)
 223 {
 224   return (__m64) __builtin_ia32_paddusw ((__v4hi)__m1, (__v4hi)__m2);
 225 }
 226
 227 /* Subtract the 8-bit values in M2 from the 8-bit values in M1.  */
 228 static __inline __m64
 229 _mm_sub_pi8 (__m64 __m1, __m64 __m2)
 230 {
 231   return (__m64) __builtin_ia32_psubb ((__v8qi)__m1, (__v8qi)__m2);
 232 }
 233
 234 /* Subtract the 16-bit values in M2 from the 16-bit values in M1.  */
 235 static __inline __m64
 236 _mm_sub_pi16 (__m64 __m1, __m64 __m2)
 237 {
 238   return (__m64) __builtin_ia32_psubw ((__v4hi)__m1, (__v4hi)__m2);
 239 }
 240
 241 /* Subtract the 32-bit values in M2 from the 32-bit values in M1.  */
 242 static __inline __m64
 243 _mm_sub_pi32 (__m64 __m1, __m64 __m2)
 244 {
 245   return (__m64) __builtin_ia32_psubd ((__v2si)__m1, (__v2si)__m2);
 246 }
 247
 248 /* Add the 64-bit values in M1 to the 64-bit values in M2.  */
 249 static __inline __m64
 250 _mm_sub_si64 (__m64 __m1, __m64 __m2)
 251 {
 252   return (__m64) __builtin_ia32_psubq ((long long)__m1, (long long)__m2);
 253 }
 254
 255 /* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed
 256    saturating arithmetic.  */
 257 static __inline __m64
 258 _mm_subs_pi8 (__m64 __m1, __m64 __m2)
 259 {
 260   return (__m64) __builtin_ia32_psubsb ((__v8qi)__m1, (__v8qi)__m2);
 261 }
 262
 263 /* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
 264    signed saturating arithmetic.  */
 265 static __inline __m64
 266 _mm_subs_pi16 (__m64 __m1, __m64 __m2)
 267 {
 268   return (__m64) __builtin_ia32_psubsw ((__v4hi)__m1, (__v4hi)__m2);
 269 }
 270
 271 /* Subtract the 8-bit values in M2 from the 8-bit values in M1 using
 272    unsigned saturating arithmetic.  */
 273 static __inline __m64
 274 _mm_subs_pu8 (__m64 __m1, __m64 __m2)
 275 {
 276   return (__m64) __builtin_ia32_psubusb ((__v8qi)__m1, (__v8qi)__m2);
 277 }
 278
 279 /* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
 280    unsigned saturating arithmetic.  */
 281 static __inline __m64
 282 _mm_subs_pu16 (__m64 __m1, __m64 __m2)
 283 {
 284   return (__m64) __builtin_ia32_psubusw ((__v4hi)__m1, (__v4hi)__m2);
 285 }
 286
 287 /* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing
 288    four 32-bit intermediate results, which are then summed by pairs to
 289    produce two 32-bit results.  */
 290 static __inline __m64
 291 _mm_madd_pi16 (__m64 __m1, __m64 __m2)
 292 {
 293   return (__m64) __builtin_ia32_pmaddwd ((__v4hi)__m1, (__v4hi)__m2);
 294 }
 295
 296 /* Multiply four signed 16-bit values in M1 by four signed 16-bit values in
 297    M2 and produce the high 16 bits of the 32-bit results.  */
 298 static __inline __m64
 299 _mm_mulhi_pi16 (__m64 __m1, __m64 __m2)
 300 {
 301   return (__m64) __builtin_ia32_pmulhw ((__v4hi)__m1, (__v4hi)__m2);
 302 }
 303
 304 /* Multiply four 16-bit values in M1 by four 16-bit values in M2 and produce
 305    the low 16 bits of the results.  */
 306 static __inline __m64
 307 _mm_mullo_pi16 (__m64 __m1, __m64 __m2)
 308 {
 309   return (__m64) __builtin_ia32_pmullw ((__v4hi)__m1, (__v4hi)__m2);
 310 }
 311
 312 /* Shift four 16-bit values in M left by COUNT.  */
 313 static __inline __m64
 314 _mm_sll_pi16 (__m64 __m, __m64 __count)
 315 {
 316   return (__m64) __builtin_ia32_psllw ((__v4hi)__m, (long long)__count);
 317 }
 318
 319 static __inline __m64
 320 _mm_slli_pi16 (__m64 __m, int __count)
 321 {
 322   return (__m64) __builtin_ia32_psllw ((__v4hi)__m, __count);
 323 }
 324
 325 /* Shift two 32-bit values in M left by COUNT.  */
 326 static __inline __m64
 327 _mm_sll_pi32 (__m64 __m, __m64 __count)
 328 {
 329   return (__m64) __builtin_ia32_pslld ((__v2si)__m, (long long)__count);
 330 }
 331
 332 static __inline __m64
 333 _mm_slli_pi32 (__m64 __m, int __count)
 334 {
 335   return (__m64) __builtin_ia32_pslld ((__v2si)__m, __count);
 336 }
 337
 338 /* Shift the 64-bit value in M left by COUNT.  */
 339 static __inline __m64
 340 _mm_sll_si64 (__m64 __m, __m64 __count)
 341 {
 342   return (__m64) __builtin_ia32_psllq ((long long)__m, (long long)__count);
 343 }
 344
 345 static __inline __m64
 346 _mm_slli_si64 (__m64 __m, int __count)
 347 {
 348   return (__m64) __builtin_ia32_psllq ((long long)__m, (long long)__count);
 349 }
 350
 351 /* Shift four 16-bit values in M right by COUNT; shift in the sign bit.  */
 352 static __inline __m64
 353 _mm_sra_pi16 (__m64 __m, __m64 __count)
 354 {
 355   return (__m64) __builtin_ia32_psraw ((__v4hi)__m, (long long)__count);
 356 }
 357
 358 static __inline __m64
 359 _mm_srai_pi16 (__m64 __m, int __count)
 360 {
 361   return (__m64) __builtin_ia32_psraw ((__v4hi)__m, __count);
 362 }
 363
 364 /* Shift two 32-bit values in M right by COUNT; shift in the sign bit.  */
 365 static __inline __m64
 366 _mm_sra_pi32 (__m64 __m, __m64 __count)
 367 {
 368   return (__m64) __builtin_ia32_psrad ((__v2si)__m, (long long)__count);
 369 }
 370
 371 static __inline __m64
 372 _mm_srai_pi32 (__m64 __m, int __count)
 373 {
 374   return (__m64) __builtin_ia32_psrad ((__v2si)__m, __count);
 375 }
 376
 377 /* Shift four 16-bit values in M right by COUNT; shift in zeros.  */
 378 static __inline __m64
 379 _mm_srl_pi16 (__m64 __m, __m64 __count)
 380 {
 381   return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, (long long)__count);
 382 }
 383
 384 static __inline __m64
 385 _mm_srli_pi16 (__m64 __m, int __count)
 386 {
 387   return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, __count);
 388 }
 389
 390 /* Shift two 32-bit values in M right by COUNT; shift in zeros.  */
 391 static __inline __m64
 392 _mm_srl_pi32 (__m64 __m, __m64 __count)
 393 {
 394   return (__m64) __builtin_ia32_psrld ((__v2si)__m, (long long)__count);
 395 }
 396
 397 static __inline __m64
 398 _mm_srli_pi32 (__m64 __m, int __count)
 399 {
 400   return (__m64) __builtin_ia32_psrld ((__v2si)__m, __count);
 401 }
 402
 403 /* Shift the 64-bit value in M left by COUNT; shift in zeros.  */
 404 static __inline __m64
 405 _mm_srl_si64 (__m64 __m, __m64 __count)
 406 {
 407   return (__m64) __builtin_ia32_psrlq ((long long)__m, (long long)__count);
 408 }
 409
 410 static __inline __m64
 411 _mm_srli_si64 (__m64 __m, int __count)
 412 {
 413   return (__m64) __builtin_ia32_psrlq ((long long)__m, (long long)__count);
 414 }
 415
 416 /* Bit-wise AND the 64-bit values in M1 and M2.  */
 417 static __inline __m64
 418 _mm_and_si64 (__m64 __m1, __m64 __m2)
 419 {
 420   return (__m64) __builtin_ia32_pand ((long long)__m1, (long long)__m2);
 421 }
 422
 423 /* Bit-wise complement the 64-bit value in M1 and bit-wise AND it with the
 424    64-bit value in M2.  */
 425 static __inline __m64
 426 _mm_andnot_si64 (__m64 __m1, __m64 __m2)
 427 {
 428   return (__m64) __builtin_ia32_pandn ((long long)__m1, (long long)__m2);
 429 }
 430
 431 /* Bit-wise inclusive OR the 64-bit values in M1 and M2.  */
 432 static __inline __m64
 433 _mm_or_si64 (__m64 __m1, __m64 __m2)
 434 {
 435   return (__m64)__builtin_ia32_por ((long long)__m1, (long long)__m2);
 436 }
 437
 438 /* Bit-wise exclusive OR the 64-bit values in M1 and M2.  */
 439 static __inline __m64
 440 _mm_xor_si64 (__m64 __m1, __m64 __m2)
 441 {
 442   return (__m64)__builtin_ia32_pxor ((long long)__m1, (long long)__m2);
 443 }
 444
 445 /* Compare eight 8-bit values.  The result of the comparison is 0xFF if the
 446    test is true and zero if false.  */
 447 static __inline __m64
 448 _mm_cmpeq_pi8 (__m64 __m1, __m64 __m2)
 449 {
 450   return (__m64) __builtin_ia32_pcmpeqb ((__v8qi)__m1, (__v8qi)__m2);
 451 }
 452
 453 static __inline __m64
 454 _mm_cmpgt_pi8 (__m64 __m1, __m64 __m2)
 455 {
 456   return (__m64) __builtin_ia32_pcmpgtb ((__v8qi)__m1, (__v8qi)__m2);
 457 }
 458
 459 /* Compare four 16-bit values.  The result of the comparison is 0xFFFF if
 460    the test is true and zero if false.  */
 461 static __inline __m64
 462 _mm_cmpeq_pi16 (__m64 __m1, __m64 __m2)
 463 {
 464   return (__m64) __builtin_ia32_pcmpeqw ((__v4hi)__m1, (__v4hi)__m2);
 465 }
 466
 467 static __inline __m64
 468 _mm_cmpgt_pi16 (__m64 __m1, __m64 __m2)
 469 {
 470   return (__m64) __builtin_ia32_pcmpgtw ((__v4hi)__m1, (__v4hi)__m2);
 471 }
 472
 473 /* Compare two 32-bit values.  The result of the comparison is 0xFFFFFFFF if
 474    the test is true and zero if false.  */
 475 static __inline __m64
 476 _mm_cmpeq_pi32 (__m64 __m1, __m64 __m2)
 477 {
 478   return (__m64) __builtin_ia32_pcmpeqd ((__v2si)__m1, (__v2si)__m2);
 479 }
 480
 481 static __inline __m64
 482 _mm_cmpgt_pi32 (__m64 __m1, __m64 __m2)
 483 {
 484   return (__m64) __builtin_ia32_pcmpgtd ((__v2si)__m1, (__v2si)__m2);
 485 }
 486
 487 /* Creates a 64-bit zero.  */
 488 static __inline __m64
 489 _mm_setzero_si64 (void)
 490 {
 491   return (__m64)__builtin_ia32_mmx_zero ();
 492 }
 493
 494 /* Creates a vector of two 32-bit values; I0 is least significant.  */
 495 static __inline __m64
 496 _mm_set_pi32 (int __i1, int __i0)
 497 {
 498   union {
 499     __m64 __q;
 500     struct {
 501       unsigned int __i0;
 502       unsigned int __i1;
 503     } __s;
 504   } __u;
 505
 506   __u.__s.__i0 = __i0;
 507   __u.__s.__i1 = __i1;
 508
 509   return __u.__q;
 510 }
 511
 512 /* Creates a vector of four 16-bit values; W0 is least significant.  */
 513 static __inline __m64
 514 _mm_set_pi16 (short __w3, short __w2, short __w1, short __w0)
 515 {
 516   unsigned int __i1 = (unsigned short)__w3 << 16 | (unsigned short)__w2;
 517   unsigned int __i0 = (unsigned short)__w1 << 16 | (unsigned short)__w0;
 518   return _mm_set_pi32 (__i1, __i0);
 519
 520 }
 521
 522 /* Creates a vector of eight 8-bit values; B0 is least significant.  */
 523 static __inline __m64
 524 _mm_set_pi8 (char __b7, char __b6, char __b5, char __b4,
 525              char __b3, char __b2, char __b1, char __b0)
 526 {
 527   unsigned int __i1, __i0;
 528
 529   __i1 = (unsigned char)__b7;
 530   __i1 = __i1 << 8 | (unsigned char)__b6;
 531   __i1 = __i1 << 8 | (unsigned char)__b5;
 532   __i1 = __i1 << 8 | (unsigned char)__b4;
 533
 534   __i0 = (unsigned char)__b3;
 535   __i0 = __i0 << 8 | (unsigned char)__b2;
 536   __i0 = __i0 << 8 | (unsigned char)__b1;
 537   __i0 = __i0 << 8 | (unsigned char)__b0;
 538
 539   return _mm_set_pi32 (__i1, __i0);
 540 }
 541
 542 /* Similar, but with the arguments in reverse order.  */
 543 static __inline __m64
 544 _mm_setr_pi32 (int __i0, int __i1)
 545 {
 546   return _mm_set_pi32 (__i1, __i0);
 547 }
 548
 549 static __inline __m64
 550 _mm_setr_pi16 (short __w0, short __w1, short __w2, short __w3)
 551 {
 552   return _mm_set_pi16 (__w3, __w2, __w1, __w0);
 553 }
 554
 555 static __inline __m64
 556 _mm_setr_pi8 (char __b0, char __b1, char __b2, char __b3,
 557               char __b4, char __b5, char __b6, char __b7)
 558 {
 559   return _mm_set_pi8 (__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
 560 }
 561
 562 /* Creates a vector of two 32-bit values, both elements containing I.  */
 563 static __inline __m64
 564 _mm_set1_pi32 (int __i)
 565 {
 566   return _mm_set_pi32 (__i, __i);
 567 }
 568
 569 /* Creates a vector of four 16-bit values, all elements containing W.  */
 570 static __inline __m64
 571 _mm_set1_pi16 (short __w)
 572 {
 573   unsigned int __i = (unsigned short)__w << 16 | (unsigned short)__w;
 574   return _mm_set1_pi32 (__i);
 575 }
 576
 577 /* Creates a vector of four 16-bit values, all elements containing B.  */
 578 static __inline __m64
 579 _mm_set1_pi8 (char __b)
 580 {
 581   unsigned int __w = (unsigned char)__b << 8 | (unsigned char)__b;
 582   unsigned int __i = __w << 16 | __w;
 583   return _mm_set1_pi32 (__i);
 584 }
 585
 586 /* Alternate intrinsic name definitions.  */
 587 #define _m_empty        _mm_empty
 588 #define _m_from_int     _mm_cvtsi32_si64
 589 #define _m_to_int       _mm_cvtsi64_si32
 590 #define _m_packsswb     _mm_packs_pi16
 591 #define _m_packssdw     _mm_packs_pi32
 592 #define _m_packuswb     _mm_packs_pu16
 593 #define _m_punpckhbw    _mm_unpackhi_pi8
 594 #define _m_punpckhwd    _mm_unpackhi_pi16
 595 #define _m_punpckhdq    _mm_unpackhi_pi32
 596 #define _m_punpcklbw    _mm_unpacklo_pi8
 597 #define _m_punpcklwd    _mm_unpacklo_pi16
 598 #define _m_punpckldq    _mm_unpacklo_pi32
 599 #define _m_paddb        _mm_add_pi8
 600 #define _m_paddw        _mm_add_pi16
 601 #define _m_paddd        _mm_add_pi32
 602 #define _m_paddsb       _mm_adds_pi8
 603 #define _m_paddsw       _mm_adds_pi16
 604 #define _m_paddusb      _mm_adds_pu8
 605 #define _m_paddusw      _mm_adds_pu16
 606 #define _m_psubb        _mm_sub_pi8
 607 #define _m_psubw        _mm_sub_pi16
 608 #define _m_psubd        _mm_sub_pi32
 609 #define _m_psubsb       _mm_subs_pi8
 610 #define _m_psubsw       _mm_subs_pi16
 611 #define _m_psubusb      _mm_subs_pu8
 612 #define _m_psubusw      _mm_subs_pu16
 613 #define _m_pmaddwd      _mm_madd_pi16
 614 #define _m_pmulhw       _mm_mulhi_pi16
 615 #define _m_pmullw       _mm_mullo_pi16
 616 #define _m_psllw        _mm_sll_pi16
 617 #define _m_psllwi       _mm_slli_pi16
 618 #define _m_pslld        _mm_sll_pi32
 619 #define _m_pslldi       _mm_slli_pi32
 620 #define _m_psllq        _mm_sll_si64
 621 #define _m_psllqi       _mm_slli_si64
 622 #define _m_psraw        _mm_sra_pi16
 623 #define _m_psrawi       _mm_srai_pi16
 624 #define _m_psrad        _mm_sra_pi32
 625 #define _m_psradi       _mm_srai_pi32
 626 #define _m_psrlw        _mm_srl_pi16
 627 #define _m_psrlwi       _mm_srli_pi16
 628 #define _m_psrld        _mm_srl_pi32
 629 #define _m_psrldi       _mm_srli_pi32
 630 #define _m_psrlq        _mm_srl_si64
 631 #define _m_psrlqi       _mm_srli_si64
 632 #define _m_pand         _mm_and_si64
 633 #define _m_pandn        _mm_andnot_si64
 634 #define _m_por          _mm_or_si64
 635 #define _m_pxor         _mm_xor_si64
 636 #define _m_pcmpeqb      _mm_cmpeq_pi8
 637 #define _m_pcmpeqw      _mm_cmpeq_pi16
 638 #define _m_pcmpeqd      _mm_cmpeq_pi32
 639 #define _m_pcmpgtb      _mm_cmpgt_pi8
 640 #define _m_pcmpgtw      _mm_cmpgt_pi16
 641 #define _m_pcmpgtd      _mm_cmpgt_pi32
 642
 643 #endif /* __MMX__ */
 644 #endif /* _MMINTRIN_H_INCLUDED */