The miscellaneous intrinsics for Streaming SIMD Extensions 2 (SSE2) are listed in the following table followed by their descriptions.
The prototypes for SSE2 intrinsics are in the emmintrin.h header file.
Intrinsic | Corresponding Instruction |
Operation |
---|---|---|
_mm_packs_epi16 | PACKSSWB | Packed Saturation |
_mm_packs_epi32 | PACKSSDW | Packed Saturation |
_mm_packus_epi16 | PACKUSWB | Packed Saturation |
_mm_extract_epi16 | PEXTRW | Extraction |
_mm_insert_epi16 | PINSRW | Insertion |
_mm_movemask_epi8 | PMOVMSKB | Mask Creation |
_mm_shuffle_epi32 | PSHUFD | Shuffle |
_mm_shufflehi_epi16 | PSHUFHW | Shuffle |
_mm_shufflelo_epi16 | PSHUFLW | Shuffle |
_mm_unpackhi_epi8 | PUNPCKHBW | Interleave |
_mm_unpackhi_epi16 | PUNPCKHWD | Interleave |
_mm_unpackhi_epi32 | PUNPCKHDQ | Interleave |
_mm_unpackhi_epi64 | PUNPCKHQDQ | Interleave |
_mm_unpacklo_epi8 | PUNPCKLBW | Interleave |
_mm_unpacklo_epi16 | PUNPCKLWD | Interleave |
_mm_unpacklo_epi32 | PUNPCKLDQ | Interleave |
_mm_unpacklo_epi64 | PUNPCKLQDQ | Interleave |
_mm_movepi64_pi64 | MOVDQ2Q | move |
_m128i_mm_movpi64_epi64 | MOVQ2DQ | move |
_mm_move_epi64 | MOVQ | move |
__m128i _mm_packs_epi16(__m128i a, __m128i b)
Packs the 16 signed 16-bit integers from a
and b into 8-bit integers and saturates.
r0 := SignedSaturate(a0)
r1 := SignedSaturate(a1)
...
r7 := SignedSaturate(a7)
r8 := SignedSaturate(b0)
r9 := SignedSaturate(b1)
...
r15 := SignedSaturate(b7)
__m128i _mm_packs_epi32(__m128i a, __m128i b)
Packs the 8 signed 32-bit integers from a
and b into signed 16-bit integers and saturates.
r0 := SignedSaturate(a0)
r1 := SignedSaturate(a1)
r2 := SignedSaturate(a2)
r3 := SignedSaturate(a3)
r4 := SignedSaturate(b0)
r5 := SignedSaturate(b1)
r6 := SignedSaturate(b2)
r7 := SignedSaturate(b3)
__m128i _mm_packus_epi16(__m128i a, __m128i b)
Packs the 16 signed 16-bit integers from a
and b into 8-bit unsigned integers and saturates.
r0 := UnsignedSaturate(a0)
r1 := UnsignedSaturate(a1)
...
r7 := UnsignedSaturate(a7)
r8 := UnsignedSaturate(b0)
r9 := UnsignedSaturate(b1)
...
r15 := UnsignedSaturate(b7)
int _mm_extract_epi16(__m128i a, int imm)
Extracts the selected signed or unsigned 16-bit integer
from a and zero extends. The selector imm
must be an immediate.
r := (imm == 0) ? a0 :
( (imm == 1) ? a1 :
...
(imm == 7) ? a7 )
__m128i _mm_insert_epi16(__m128i a, int b, int imm)
Inserts the least significant 16 bits of b
into the selected 16-bit integer of a. The selector
imm must be an immediate.
r0 := (imm == 0) ? b : a0;
r1 := (imm == 1) ? b : a1;
...
r7 := (imm == 7) ? b : a7;
int _mm_movemask_epi8(__m128i a)
Creates a 16-bit mask from the most significant bits
of the 16 signed or unsigned 8-bit integers in a
and zero extends the upper bits.
r := a15[7] << 15 |
a14[7] << 14 |
...
a1[7] << 1 |
a0[7]
__m128i _mm_shuffle_epi32(__m128i a, int imm)
Shuffles the 4 signed or unsigned 32-bit integers in a as specified by imm. The shuffle value, imm, must be an immediate. See Macro Function for Shuffle for a description of shuffle semantics.
__m128i _mm_shufflehi_epi16(__m128i a, int imm)
Shuffles the upper 4 signed or unsigned 16-bit integers in a as specified by imm. The shuffle value, imm, must be an immediate. See Macro Function for Shuffle for a description of shuffle semantics.
__m128i _mm_shufflelo_epi16(__m128i a, int imm)
Shuffles the lower 4 signed or unsigned 16-bit integers in a as specified by imm. The shuffle value, imm, must be an immediate. See Macro Function for Shuffle for a description of shuffle semantics.
__m128i _mm_unpackhi_epi8(__m128i a, __m128i b)
Interleaves the upper 8 signed or unsigned 8-bit integers
in a with the upper 8 signed or unsigned 8-bit
integers in b.
r0 := a8 ; r1 := b8
r2 := a9 ; r3 := b9
...
r14 := a15 ; r15 := b15
__m128i _mm_unpackhi_epi16(__m128i a, __m128i b)
Interleaves the upper 4 signed or unsigned 16-bit integers
in a with the upper 4 signed or unsigned 16-bit
integers in b.
r0 := a4 ; r1 := b4
r2 := a5 ; r3 := b5
r4 := a6 ; r5 := b6
r6 := a7 ; r7 := b7
__m128i _mm_unpackhi_epi32(__m128i a, __m128i b)
Interleaves the upper 2 signed or unsigned 32-bit integers
in a with the upper 2 signed or unsigned 32-bit
integers in b.
r0 := a2 ; r1 := b2
r2 := a3 ; r3 := b3
__m128i _mm_unpackhi_epi64(__m128i a, __m128i b)
Interleaves the upper signed or unsigned 64-bit integer
in a with the upper signed or unsigned 64-bit
integer in b.
r0 := a1 ; r1 := b1
__m128i _mm_unpacklo_epi8(__m128i a, __m128i b)
Interleaves the lower 8 signed or unsigned 8-bit integers
in a with the lower 8 signed or unsigned 8-bit
integers in b.
r0 := a0 ; r1 := b0
r2 := a1 ; r3 := b1
...
r14 := a7 ; r15 := b7
__m128i _mm_unpacklo_epi16(__m128i a, __m128i b)
Interleaves the lower 4 signed or unsigned 16-bit integers
in a with the lower 4 signed or unsigned 16-bit
integers in b.
r0 := a0 ; r1 := b0
r2 := a1 ; r3 := b1
r4 := a2 ; r5 := b2
r6 := a3 ; r7 := b3
__m128i _mm_unpacklo_epi32(__m128i a, __m128i b)
Interleaves the lower 2 signed or unsigned 32-bit integers
in a with the lower 2 signed or unsigned 32-bit
integers in b.
r0 := a0 ; r1 := b0
r2 := a1 ; r3 := b1
__m128i _mm_unpacklo_epi64(__m128i a, __m128i b)
Interleaves the lower signed or unsigned 64-bit integer
in a with the lower signed or unsigned 64-bit
integer in b.
r0 := a0 ; r1 := b0
__m64 _mm_movepi64_pi64(__m128i a)
Returns the lower 64 bits of a
as an __m64 type.
r0 := a0 ;
__128i _mm_movpi64_pi64(__m64 a)
Moves the 64 bits of a to the
lower 64 bits of the result, zeroing the upper bits.
r0 := a0 ; r1 := 0X0 ;
__128i _mm_move_epi64(__128i a)
Moves the lower 64 bits of a
to the lower 64 bits of the result, zeroing the upper bits.
r0 := a0 ; r1 := 0X0 ;
The prototypes for Streaming SIMD Extensions 2 (SSE2) intrinsics are in the emmintrin.h header file.
__m128d _mm_unpackhi_pd(__m128d a, __m128d b)
(uses UNPCKHPD) Interleaves
the upper DP FP values of a and b.
r0 := a1
r1 := b1
__m128d _mm_unpacklo_pd(__m128d a, __m128d b)
(uses UNPCKLPD) Interleaves
the lower DP FP values of a and b.
r0 := a0
r1 := b0
int _mm_movemask_pd(__m128d a)
(uses MOVMSKPD) Creates a two-bit
mask from the sign bits of the two DP FP values of a.
r := sign(a1) << 1 | sign(a0)
__m128d _mm_shuffle_pd(__m128d a, __m128d b, int i)
(uses SHUFPD) Selects two specific DP FP values from a and b, based on the mask i. The mask must be an immediate. See Macro Function for Shuffle for a description of the shuffle semantics.
This version of the Intel C++ Compiler supports casting between various SP, DP, and INT vector types. These intrinsics do not convert values; they just change the type.
extern __m128 _mm_castpd_ps(__m128d in);
extern __m128i _mm_castpd_si128(__m128d in);
extern __m128d _mm_castps_pd(__m128 in);
extern __m128i _mm_castps_si128(__m128 in);
extern __m128 _mm_castsi128_ps(__m128i in);
extern __m128d _mm_castsi128_pd(__m128i in);