SPRUI04F july 2015 – april 2023
The C6000 compiler recognizes a number of intrinsic operators. Intrinsics allow you to express the meaning of certain assembly statements that would otherwise be cumbersome or inexpressible in C/C++. Intrinsics are used like functions; you can use C/C++ variables with these intrinsics, just as you would with any normal function.
The intrinsics are specified with a leading underscore, and are accessed by calling them as you do a function. For example:
int x1, x2, y;
y = _sadd(x1, x2);
In some instances, an intrinsic’s exact corresponding assembly language instruction may not be used by the compiler. When this is the case, the meaning of the program does not change.
The tables that list intrinsics apply to device families as follows:
Table 8-4 provides a summary of the C6000 intrinsics clarifying which devices support which intrinsics.
Intrinsic | C6400+ | C6740 | C6600 |
---|---|---|---|
_abs | Yes | Yes | Yes |
_abs2 | Yes | Yes | Yes |
_add2 | Yes | Yes | Yes |
_add4 | Yes | Yes | Yes |
_addsub | Yes | Yes | Yes |
_addsub2 | Yes | Yes | Yes |
_amem2 | Yes | Yes | Yes |
_amem2_const | Yes | Yes | Yes |
_amem4 | Yes | Yes | Yes |
_amem4_const | Yes | Yes | Yes |
_amem8 | Yes | Yes | Yes |
_amem8_const | Yes | Yes | Yes |
_amem8_f2 | Yes | Yes | Yes |
_amem8_f2_const | Yes | Yes | Yes |
_amemd8 | Yes | Yes | Yes |
_amemd8_const | Yes | Yes | Yes |
_avg2 | Yes | Yes | Yes |
_avgu4 | Yes | Yes | Yes |
_bitc4 | Yes | Yes | Yes |
_bitr | Yes | Yes | Yes |
_ccmatmpy | Yes | ||
_ccmatmpyr1 | Yes | ||
_ccmpy32r1 | Yes | ||
_clr | Yes | Yes | Yes |
_clrr | Yes | Yes | Yes |
_cmatmpy | Yes | ||
_cmatmpyr1 | Yes | ||
_cmpeq2 | Yes | Yes | Yes |
_cmpeg4 | Yes | Yes | Yes |
_cmpgt2 | Yes | Yes | Yes |
_cmpgtu4 | Yes | Yes | Yes |
_cmplt2 | Yes | Yes | Yes |
_cmpltu4 | Yes | Yes | Yes |
_cmpy | Yes | Yes | Yes |
_cmpy32r1 | Yes | ||
_cmpyr | Yes | Yes | Yes |
_cmpyr1 | Yes | Yes | Yes |
_cmpysp | Yes | ||
_complex_conjugate_mpysp | Yes | ||
_complex_mpysp | Yes | ||
_crot270 | Yes | ||
_crot90 | Yes | ||
_dadd | Yes | ||
_dadd2 | Yes | ||
_daddsp | Yes | ||
_dadd_c | Yes | ||
_dapys2 | Yes | ||
_davg2 | Yes | ||
_davgnr2 | Yes | ||
_davgnru4 | Yes | ||
_davgu4 | Yes | ||
_dccmpyr1 | Yes | ||
_dcmpeq2 | Yes | ||
_dcmpeq4 | Yes | ||
_dcmpgt2 | Yes | ||
_dcmpgtu4 | Yes | ||
_dccmpy | Yes | ||
_dcmpy | Yes | ||
_dcmpyr1 | Yes | ||
_dcrot90 | Yes | ||
_dcrot270 | Yes | ||
_ddotp4 | Yes | Yes | Yes |
_ddotp4h | Yes | ||
_ddotph2 | Yes | Yes | Yes |
_ddotph2r | Yes | Yes | Yes |
_ddotpl2 | Yes | Yes | Yes |
_ddotpl2r | Yes | Yes | Yes |
_ddotpsu4h | Yes | ||
_deal | Yes | Yes | Yes |
_dinthsp | Yes | ||
_dinthspu | Yes | ||
_dintsp | Yes | ||
_dintspu | Yes | ||
_dmax2 | Yes | ||
_dmaxu4 | Yes | ||
_dmin2 | Yes | ||
_dminu4 | Yes | ||
_dmpy2 | Yes | ||
_dmpysp | Yes | ||
_dmpysu4 | Yes | ||
_dmpyu2 | Yes | ||
_dmpyu4 | Yes | ||
_dmv | Yes | Yes | Yes |
_dmvd | Yes | ||
_dotp2 | Yes | Yes | Yes |
_dotp4h | Yes | ||
_dotp4hll | Yes | ||
_dotpn2 | Yes | Yes | Yes |
_dotpnrsu2 | Yes | Yes | Yes |
_dotpnrus2 | Yes | Yes | Yes |
_dotprsu2 | Yes | Yes | Yes |
_dotpsu4 | Yes | Yes | Yes |
_dotpus4 | Yes | Yes | Yes |
_dotpsu4h | Yes | ||
_dotpsu4hll | Yes | ||
_dotpu4 | Yes | Yes | Yes |
_dpack2 | Yes | Yes | Yes |
_dpackh2 | Yes | ||
_dpackh4 | Yes | ||
_dpacklh2 | Yes | ||
_dpacklh4 | Yes | ||
_dpackl2 | Yes | ||
_dpackl4 | Yes | ||
_dpackx2 | Yes | Yes | Yes |
_dpint | Yes | Yes | |
_dsadd | Yes | ||
_dsadd2 | Yes | ||
_dshl | Yes | ||
_dshl2 | Yes | ||
_dshr | Yes | ||
_dshr2 | Yes | ||
_dshru | Yes | ||
_dshru2 | Yes | ||
_dsmpy2 | Yes | ||
_dspacku4 | Yes | ||
_dspint | Yes | ||
_dspinth | Yes | ||
_dssub | Yes | ||
_dssub2 | Yes | ||
_dsub | Yes | ||
_dsub2 | Yes | ||
_dsubsp | Yes | ||
_dtol | Yes | Yes | Yes |
_dtoll | Yes | Yes | Yes |
_dxpnd2 | Yes | ||
_dxpnd4 | Yes | ||
_ext | Yes | Yes | Yes |
_extr | Yes | Yes | Yes |
_extu | Yes | Yes | Yes |
_extur | Yes | Yes | Yes |
_f2tol | Yes | Yes | |
_f2toll | Yes | Yes | |
_fabs | Yes | Yes | |
_fabsf | Yes | Yes | |
_fdmvd_f2 | Yes | ||
_fdmv_f2 | Yes | Yes | Yes |
_ftoi | Yes | Yes | Yes |
_gmpy | Yes | Yes | Yes |
_gmpy4 | Yes | Yes | Yes |
_hi | Yes | Yes | Yes |
_hill | Yes | Yes | Yes |
_itod | Yes | Yes | Yes |
_itof | Yes | Yes | Yes |
_itoll | Yes | Yes | Yes |
_labs | Yes | Yes | Yes |
_land | Yes | ||
_landn | Yes | ||
_ldotp2 | Yes | Yes | Yes |
_lmbd | Yes | Yes | Yes |
_lnorm | Yes | Yes | Yes |
_lo | Yes | Yes | Yes |
_loll | Yes | Yes | Yes |
_lor | Yes | ||
_lsadd | Yes | Yes | Yes |
_lssub | Yes | Yes | Yes |
_ltod | Yes | Yes | Yes |
_lltod | Yes | Yes | Yes |
_lltof2 | Yes | Yes | |
_ltof2 | Yes | Yes | |
_max2 | Yes | Yes | Yes |
_maxu4 | Yes | Yes | Yes |
_mfence | Yes | ||
_min2 | Yes | Yes | Yes |
_minu4 | Yes | Yes | Yes |
_mem2 | Yes | Yes | Yes |
_mem2_const | Yes | Yes | Yes |
_mem4 | Yes | Yes | Yes |
_mem4_const | Yes | Yes | Yes |
_mem8 | Yes | Yes | Yes |
_mem8_const | Yes | Yes | Yes |
_mem8_f2 | Yes | Yes | |
_mem8_f2_const | Yes | Yes | |
_memd8 | Yes | Yes | Yes |
_memd8_const | Yes | Yes | Yes |
_mpy | Yes | Yes | Yes |
_mpy2ir | Yes | Yes | Yes |
_mpy2ll | Yes | Yes | Yes |
_mpy32 | Yes | Yes | Yes |
_mpy32ll | Yes | Yes | Yes |
_mpy32su | Yes | Yes | Yes |
_mpy32u | Yes | Yes | Yes |
_mpy32us | Yes | Yes | Yes |
_mpyh | Yes | Yes | Yes |
_mpyhill | Yes | Yes | Yes |
_mpyihll | Yes | Yes | Yes |
_mpyilll | Yes | Yes | Yes |
_mpyhir | Yes | Yes | Yes |
_mpyihr | Yes | Yes | Yes |
_mpyilr | Yes | Yes | Yes |
_mpyhl | Yes | Yes | Yes |
_mpyhlu | Yes | Yes | Yes |
_mpyhslu | Yes | Yes | Yes |
_mpyhsu | Yes | Yes | Yes |
_myphu | Yes | Yes | Yes |
_mpyhuls | Yes | Yes | Yes |
_mpyhus | Yes | Yes | Yes |
_mpyidll | Yes | Yes | |
_mpylh | Yes | Yes | Yes |
_mpylhu | Yes | Yes | Yes |
_mpylill | Yes | Yes | Yes |
_mpylir | Yes | Yes | Yes |
_mpylshu | Yes | Yes | Yes |
_mpyluhs | Yes | Yes | Yes |
_mpysp2dp | Yes | Yes | |
_mpyspdp | Yes | Yes | |
_mpysu | Yes | Yes | Yes |
_mpysu4ll | Yes | Yes | Yes |
_mpyus4ll | Yes | Yes | Yes |
_mpyu | Yes | Yes | Yes |
_mpyu2 | Yes | ||
_mpyu4ll | Yes | Yes | Yes |
_mpyus | Yes | Yes | Yes |
_mvd | Yes | Yes | Yes |
_nassert | Yes | Yes | Yes |
_norm | Yes | Yes | Yes |
_pack2 | Yes | Yes | Yes |
_packh2 | Yes | Yes | Yes |
_packh4 | Yes | Yes | Yes |
_packhl2 | Yes | Yes | Yes |
_packl4 | Yes | Yes | Yes |
_packlh2 | Yes | Yes | Yes |
_qmpy32 | Yes | ||
_qmpysp | Yes | ||
_qsmpy32r1 | Yes | ||
_rcpdp | Yes | Yes | |
_rcpsp | Yes | Yes | |
_rsqrdp | Yes | Yes | |
_rsqrsp | Yes | Yes | |
_rotl | Yes | Yes | Yes |
_rpack2 | Yes | Yes | Yes |
_sadd | Yes | Yes | Yes |
_sadd2 | Yes | Yes | Yes |
_saddsub | Yes | Yes | Yes |
_saddsub2 | Yes | Yes | Yes |
_saddu4 | Yes | Yes | Yes |
_saddus2 | Yes | Yes | Yes |
_saddsu2 | Yes | Yes | Yes |
_sat | Yes | Yes | Yes |
_set | Yes | Yes | Yes |
_setr | Yes | Yes | Yes |
_shfl | Yes | Yes | Yes |
_shfl3 | Yes | Yes | Yes |
_shl2 | Yes | ||
_shlmb | Yes | Yes | Yes |
_shr2 | Yes | Yes | Yes |
_shrmb | Yes | Yes | Yes |
_shru2 | Yes | Yes | Yes |
_smpy | Yes | Yes | Yes |
_smpy2ll | Yes | Yes | Yes |
_smpy32 | Yes | Yes | Yes |
_smpyh | Yes | Yes | Yes |
_smpyhl | Yes | Yes | Yes |
_smpylh | Yes | Yes | Yes |
_spack2 | Yes | Yes | Yes |
_spacku4 | Yes | Yes | Yes |
_spint | Yes | Yes | |
_sshl | Yes | Yes | Yes |
_sshvl | Yes | Yes | Yes |
_sshvr | Yes | Yes | Yes |
_ssub | Yes | Yes | Yes |
_ssub2 | Yes | Yes | Yes |
_sub2 | Yes | Yes | Yes |
_sub4 | Yes | Yes | Yes |
_subabs4 | Yes | Yes | Yes |
_subc | Yes | Yes | Yes |
_swap2 | Yes | Yes | Yes |
_swap4 | Yes | Yes | Yes |
_unpkbu4 | Yes | ||
_unpkh2 | Yes | ||
_unpkhu2 | Yes | ||
_unpkhu4 | Yes | Yes | Yes |
_unpklu4 | Yes | Yes | Yes |
_xorll_c | Yes | ||
_xormpy | Yes | Yes | Yes |
_xpnd2 | Yes | Yes | Yes |
_xpnd4 | Yes | Yes | Yes |
The intrinsics listed in Table 8-5 can be used on all C6000 devices. They correspond to the indicated C6000 assembly language instruction(s). See the TMS320C6000 CPU and Instruction Set Reference Guide for more information.
See Table 8-6 for a list of intrinsics that are specific to C6740 and C6600. See Table 8-7 for a list of C6600-specifiic intrinsics.
Some items listed in the following tables are actually defined in the c6x.h header file as macros that point to intrinsics. This header file is provided in the compiler's "include" directory. Your code must include this header file in order to use the noted macros.
C/C++ Compiler Intrinsic | Assembly Instruction | Description |
---|---|---|
int _abs (int
src);
__int40_t _labs (__int40_t src); | ABS | Returns the saturated absolute value of src |
int _abs2 (int src); | ABS2 | Calculates the absolute value for each 16-bit value |
int _add2 (int src1, int src2); | ADD2 | Adds the upper and lower halves of src1 to the upper and lower halves of src2 and returns the result. Any overflow from the lower half add does not affect the upper half add. |
int _add4 (int src1, int src2); | ADD4 | Performs 2s-complement addition to pairs of packed 8-bit numbers |
long long _addsub (int src1, int src2); | ADDSUB | Performs an addition and subtraction in parallel. |
long long _addsub2 (int src1, int src2); | ADDSUB2 | Performs an ADD2 and SUB2 in parallel. |
ushort & _amem2 (void *ptr); | LDHU STH | Allows aligned loads and stores of 2 bytes to memory. The pointer must be aligned to a two-byte boundary.(1) |
const ushort & _amem2_const (const void *ptr); | LDHU | Allows aligned loads of 2 bytes from memory. The pointer must be aligned to a two-byte boundary.(1) |
unsigned & _amem4 (void *ptr); | LDW STW | Allows aligned loads and stores of 4 bytes to memory. The pointer must be aligned to a four-byte boundary.(1) |
const unsigned & _amem4_const (const void *ptr); | LDW | Allows aligned loads of 4 bytes from memory. The pointer must be aligned to a four-byte boundary.(1) |
long long & _amem8 (void *ptr); | LDDW STDW | Allows aligned loads and stores of 8 bytes to memory. The pointer must be aligned to an eight-byte boundary. An LDDW or STDW instruction will be used. |
const long long & _amem8_const (const void *ptr); | LDW/LDW LDDW | Allows aligned loads of 8 bytes from memory. The pointer must be aligned to an eight-byte boundary.(2) |
__float2_t & _amem8_f2(void * ptr); | LDDW STDW | Allows aligned loads and stores of 8 bytes to memory. The pointer must be aligned to an eight-byte boundary. This is defined as a macro. You must include c6x.h. (2)(1) |
const __float2_t & _amem8_f2_const(void * ptr); | LDDW | Allows aligned loads of 8 bytes from memory. The pointer must be aligned to an eight-byte boundary. This is defined as a macro. You must include c6x.h. (2)(1) |
double & _amemd8 (void *ptr); | LDDW STDW | Allows aligned loads and stores of 8 bytes to memory. The pointer must be aligned to an eight-byte boundary.(1)(2) An LDDW or STDW instruction will be used. |
const double & _amemd8_const (const void *ptr); | LDW/LDW LDDW | Allows aligned loads of 8 bytes from memory. The pointer must be aligned to an eight-byte boundary.(1)(2) |
int _avg2 (int src1, int src2); | AVG2 | Calculates the average for each pair of signed 16-bit values |
unsigned _avgu4 (unsigned src1, unsigned src2); | AVGU4 | Calculates the average for each pair of unsigned 8-bit values |
unsigned _bitc4 (unsigned src); | BITC4 | For each of the 8-bit quantities in src, the number of 1 bits is written to the corresponding position in the return value |
unsigned _bitr (unsigned src); | BITR | Reverses the order of the bits |
unsigned _clr (unsigned src2, unsigned csta, unsigned cstb); | CLR | Clears the specified field in src2. The beginning and ending bits of the field to be cleared are specified by csta and cstb, respectively. |
unsigned _clrr (unsigned src2, int src1); | CLR | Clears the specified field in src2. The beginning and ending bits of the field to be cleared are specified by the lower 10 bits of src1. |
int _cmpeq2 (int src1, int src2); | CMPEQ2 | Performs equality comparisons on each pair of 16-bit values. Equality results are packed into the two least-significant bits of the return value. |
int _cmpeq4 (int src1, int src2); | CMPEQ4 | Performs equality comparisons on each pair of 8-bit values. Equality results are packed into the four least-significant bits of the return value. |
int _cmpgt2 (int src1, int src2); | CMPGT2 | Compares each pair of signed 16-bit values. Results are packed into the two least-significant bits of the return value. |
unsigned _cmpgtu4 (unsigned src1, unsigned src2); | CMPGTU4 | Compares each pair of unsigned 8-bit values. Results are packed into the four least-significant bits of the return value. |
int _cmplt2 (int src1, int src2); | CMPLT2 | Swaps operands and calls _cmpgt2. This is defined as a macro. You must include c6x.h. |
unsigned _cmpltu4 (unsigned src1, unsigned src2); | CMPLTU4 | Swaps operands and calls _cmpgtu4. This is defined as a macro. You must include c6x.h. |
long long _cmpy (unsigned src1, unsigned src2); unsigned _cmpyr (unsigned src1, unsigned src2); unsigned _cmpyr1 (unsigned src1, unsigned src2 ); | CMPY CMPYR CMPYR1 | Performs various complex multiply operations. |
long long _ddotp4 (unsigned src1, unsigned src2); | DDOTP4 | Performs two DOTP2 operations simultaneously. |
long long _ddotph2 (long long src1, unsigned src2); long long _ddotpl2 (long long src1, unsigned src2); unsigned _ddotph2r (long long src1, unsigned src2); unsigned _ddotpl2r (long long src1, unsigned src2); | DDOTPH2 DDOTPL2 DDOTPH2R DDOTPL2 | Performs various dual dot-product operations between two pairs of signed, packed 16-bit values. |
unsigned _deal (unsigned src); | DEAL | The odd and even bits of src are extracted into two separate 16-bit values. |
long long _dmv (int src1, int src2); | DMV | Places src1 in the 32 MSBs of the long long and src2 in the 32 LSBs of the long long. See also _itoll(). |
int _dotp2 (int src1, int src2); __int40_t _ldotp2 (int src1, int src2); | DOTP2 DOTP2 | The product of the signed lower 16-bit values of src1 and src2 is added to the product of the signed upper 16-bit values of src1 and src2. In the case of _dotp2, the signed result is written to a single 32-bit register. In the case of _ldotp2, the signed result is written to a 64-bit register pair. |
int _dotpn2 (int src1, int src2); | DOTPN2 | The product of the signed lower 16-bit values of src1 and src2 is subtracted from the product of the signed upper 16-bit values of src1 and src2. |
int _dotpnrsu2 (int src1, unsigned src2); | DOTPNRSU2 | The product of the lower 16-bit values of src1 and src2 is subtracted from the product of the upper 16-bit values of src1 and src2. The values in src1 are treated as signed packed quantities; the values in src2 are treated as unsigned packed quantities. 2^15 is added and the result is sign shifted right by 16. |
int _dotpnrus2 (unsigned src1, int src2); | DOTPNRUS2 | Swaps the operands and calls _dotpnrsu2. This is defined as a macro. You must include c6x.h. |
int _dotprsu2 (int src1, unsigned src2); | DOTPRSU2 | The product of the lower 16-bit values of src1 and src2 is added to the product of the upper 16-bit values of src1 and src2. The values in src1 are treated as signed packed quantities; the values in src2 are treated as unsigned packed quantities. 2^15 is added and the result is sign shifted by 16. |
int _dotpsu4 (int src1, unsigned src2); int _dotpus4 (unsigned src1, int src2); unsigned _dotpu4 (unsigned src1, unsigned src2); | DOTPSU4 DOTPUS4 DOTPU4 | For each pair of 8-bit values in src1 and src2, the 8-bit value from src1 is multiplied with the 8-bit value from src2. The four products are summed together. _dotpus4 is defined as a macro. You must include c6x.h. |
long long _dpack2 (unsigned src1, unsigned src2); | DPACK2 | PACK2 and PACKH2 operations performed in parallel. |
long long _dpackx2 (unsigned src1, unsigned src2); | DPACKX2 | PACKLH2 and PACKX2 operations performed in parallel. |
__int40_t _dtol (double src); | Reinterprets double register pair src as an __int40_t (stored as a register pair). | |
long long _dtoll (double src); | Reinterprets double register pair src as a long long register pair. | |
int _ext (int src2, unsigned csta, unsigned cstb); | EXT | Extracts the specified field in src2, sign-extended to 32 bits. The extract is performed by a shift left followed by a signed shift right; csta and cstb are the shift left and shift right amounts, respectively. |
int _extr (int src2, int src1); | EXT | Extracts the specified field in src2, sign-extended to 32 bits. The extract is performed by a shift left followed by a signed shift right; the shift left and shift right amounts are specified by the lower 10 bits of src1. |
unsigned _extu (unsigned src2, unsigned csta , unsigned cstb); | EXTU | Extracts the specified field in src2, zero-extended to 32 bits. The extract is performed by a shift left followed by a unsigned shift right; csta and cstb are the shift left and shift right amounts, respectively. |
unsigned _extur (unsigned src2, int src1); | EXTU | Extracts the specified field in src2, zero-extended to 32 bits. The extract is performed by a shift left followed by a unsigned shift right; the shift left and shift right amounts are specified by the lower 10 bits of src1. |
__float2_t _fdmv_f2(float src1, float src2); | DMV | Places src1 in the 32 LSBs of the __float2_t and src2 in the 32 MSBs of the __float2_t. See also _itoll(). This is defined as a macro. You must include c6x.h. |
unsigned _ftoi (float src); | Reinterprets the bits in the float as an unsigned. For example: _ftoi (1.0) == 1065353216U | |
unsigned _gmpy (unsigned src1, unsigned src2); | GMPY | Performs the Galois Field multiply. |
int _gmpy4 (int src1, int src2); | GMPY4 | Performs the Galois Field multiply on four values in src1 with four parallel values in src2. The four products are packed into the return value. |
unsigned _hi (double src); | Returns the high (odd) register of a double register pair | |
unsigned _hill (long long src); | Returns the high (odd) register of a long long register pair | |
double _itod (unsigned src2, unsigned src1); | Builds a new double register pair by reinterpreting two unsigned values, where src2 is the high (odd) register and src1 is the low (even) register | |
float _itof (unsigned src); | Reinterprets the bits in the unsigned as a float. For example: _itof (0x3f800000) = 1.0 | |
long long _itoll (unsigned src2, unsigned src1); | Builds a new long long register pair by reinterpreting two unsigned values, where src2 is the high (odd) register and src1 is the low (even) register | |
unsigned _lmbd (unsigned src1, unsigned src2); | LMBD | Searches for a leftmost 1 or 0 of src2 determined by the LSB of src1. Returns the number of bits up to the bit change. |
unsigned _lo (double src); | Returns the low (even) register of a double register pair | |
unsigned _loll (long long src); | Returns the low (even) register of a long long register pair | |
double _ltod (__int40_t src); | Reinterprets an __int40_t register pair src as a double register pair. | |
double _lltod (long long src); | Reinterprets long long register pair src as a double register pair. | |
int _max2 (int src1, int src2); int _min2 (int src1, int src2); unsigned _maxu4 (unsigned src1, unsigned src2); unsigned _minu4 (unsigned src1, unsigned src2); | MAX2 MIN2 MAXU4 MINU4 | Places the larger/smaller of each pair of values in the corresponding position in the return value. Values can be 16-bit signed or 8-bit unsigned. |
ushort & _mem2 (void * ptr); | LDB/LDB STB/STB | Allows unaligned loads and stores of 2 bytes to memory(1) |
const ushort & _mem2_const (const void * ptr); | LDB/LDB | Allows unaligned loads of 2 bytes to memory(1) |
unsigned & _mem4 (void * ptr); | LDNW STNW | Allows unaligned loads and stores of 4 bytes to memory(1) |
const unsigned & _mem4_const (const void * ptr); | LDNW | Allows unaligned loads of 4 bytes from memory(1) |
long long & _mem8 (void * ptr); | LDNDW STNDW | Allows unaligned loads and stores of 8 bytes to memory(1) |
const long long & _mem8_const (const void * ptr); | LDNDW | Allows unaligned loads of 8 bytes from memory(1) |
double & _memd8 (void * ptr); | LDNDW STNDW | Allows unaligned loads and stores of 8 bytes to memory(2)(1) |
const double & _memd8_const (const void * ptr); | LDNDW | Allows unaligned loads of 8 bytes from memory(2)(1) |
int _mpy (int src1, int src2); int _mpyus (unsigned src1, int src2); int _mpysu (int src1, unsigned src2); unsigned _mpyu (unsigned src1, unsigned src2); | MPY MPYUS MPYSU MPYU | Multiplies the 16 LSBs of src1 by the 16 LSBs of src2 and returns the result. Values can be signed or unsigned. |
long long _mpy2ir (int src1, int src2); | MPY2IR | Performs two 16 by 32 multiplies. Both results are shifted right by 15 bits to produce a rounded result. |
long long _mpy2ll (int src1, int src2); | MPY2 | Returns the products of the lower and higher 16-bit values in src1 and src2 |
int _mpy32 (int src1, int src2); | MPY32 | Returns the 32 LSBs of a 32 by 32 multiply. |
long long _mpy32ll (int src1, int src2); long long _mpy32su (int src1, int src2); long long _mpy32us (unsigned src1, int src2); long long _mpy32u (unsigned src1, unsigned src2); | MPY32 MPY32SU MPY32US MPY32U | Returns all 64 bits of a 32 by 32 multiply. Values can be signed or unsigned. |
int _mpyh (int src1, int src2); int _mpyhus (unsigned src1, int src2); int _mpyhsu (int src1, unsigned src2); unsigned _mpyhu (unsigned src1, unsigned src2); | MPYH MPYHUS MPYHSU MPYHU | Multiplies the 16 MSBs of src1 by the 16 MSBs of src2 and returns the result. Values can be signed or unsigned. |
long long _mpyhill (int src1, int src2); long long _mpylill (int src1, int src2); | MPYHI MPYLI | Produces a 16 by 32 multiply. The result is placed into the lower 48 bits of the return type. Can use the upper or lower 16 bits of src1. |
int _mpyhir (int src1, int src2); int _mpylir (int src1, int src2); | MPYHIR MPYLIR | Produces a signed 16 by 32 multiply. The result is shifted right by 15 bits. Can use the upper or lower 16 bits of src1. |
int _mpyhl (int src1, int src2); int _mpyhuls (unsigned src1, int src2); int _mpyhslu (int src1, unsigned src2); unsigned _mpyhlu (unsigned src1, unsigned src2); | MPYHL MPYHULS MPYHSLU MPYHLU | Multiplies the 16 MSBs of src1 by the 16 LSBs of src2 and returns the result. Values can be signed or unsigned. |
long long _mpyihll (int src1, int src2); long long _mpyilll (int src1, int src2); | MPYIH MPYIL | Swaps operands and calls _mpyhill. This is defined as a macro. You must include c6x.h. Swaps operands and calls _mpylill. This is defined as a macro. You must include c6x.h. |
int _mpyihr (int src1, int src2); int _mpyilr (int src1, int src2); | MPYIHR MPYILR | Swaps operands and calls _mpyhir. This is defined as a macro. You must include c6x.h. Swaps operands and calls _mpylir. This is defined as a macro. You must include c6x.h. |
int _mpylh (int src1, int src2); int _mpyluhs (unsigned src1, int src2); int _mpylshu (int src1, unsigned src2); unsigned _mpylhu (unsigned src1, unsigned src2); | MPYLH MPYLUHS MPYLSHU MPYLHU | Multiplies the 16 LSBs of src1 by the 16 MSBs of src2 and returns the result. Values can be signed or unsigned. |
long long _mpysu4ll (int src1, unsigned src2); long long _mpyus4ll (unsigned src1, int src2); long long _mpyu4ll (unsigned src1, unsigned src2); | MPYSU4 MPYUS4 MPYU4 | For each 8-bit quantity in src1 and src2, performs an 8-bit by 8-bit multiply. The four 16-bit results are packed into a 64-bit result. The results can be signed or unsigned. _mpyus4ll is defined as a macro. You must include c6x.h. |
int _mvd (int src2); | MVD | Moves the data from src2 to the return value over four cycles using the multiplier pipeline |
void _nassert (int src); | Generates no code. Tells the optimizer that the expression declared with the assert function is true; this gives a hint to the optimizer as to what optimizations might be valid. | |
unsigned _norm (int src); unsigned _lnorm (__int40_t src); | NORM | Returns the number of bits up to the first nonredundant sign bit of src |
unsigned _pack2 (unsigned src1, unsigned src2); unsigned _packh2 (unsigned src1, unsigned src2); | PACK2 PACKH2 | The lower/upper halfwords of src1 and src2 are placed in the return value. |
unsigned _packh4 (unsigned src1, unsigned src2); unsigned _packl4 (unsigned src1, unsigned src2); | PACKH4 PACKL4 | Packs alternate bytes into return value. Can pack high or low bytes. |
unsigned _packhl2 (unsigned src1, unsigned src2); unsigned _packlh2 (unsigned src1, unsigned src2); | PACKHL2 PACKLH2 | The upper/lower halfword of src1 is placed in the upper halfword the return value. The lower/upper halfword of src2 is placed in the lower halfword the return value. |
unsigned _rotl (unsigned src1, unsigned src2); | ROTL | Rotates src1 to the left by the amount in src2 |
int _rpack2 (int src1, int src2); | RPACK2 | Shifts src1 and src2 left by 1 with saturation. The 16 MSBs of the shifted src1 is placed in the 16 MSBs of the 32-bit output. The 16 MSBs of the shifted src2 is placed in the 16 LSBs of the 32-bit output. |
int _sadd (int
src1, int src2);
__int40_t _lsadd (int src1, __int40_t src2); | SADD | Adds src1 to src2 and saturates the result. Returns the result. |
int _sadd2 (int src1, int src2); int _saddus2 (unsigned src1, int src2); int _saddsu2 (int src1, unsigned src2); | SADD2 SADDUS2 SADDSU2 | Performs saturated addition between pairs of 16-bit values in src1 and src2. Values for src1 can be signed or unsigned. _saddsu2 is defined as a macro. You must include c6x.h. |
long long _saddsub (unsigned src1, unsigned src2); | SADDSUB | Performs a saturated addition and a saturated subtraction in parallel. |
long long _saddsub2 (unsigned src1, unsigned src2); | SADDSUB2 | Performs a SADD2 and a SSUB2 in parallel. |
unsigned _saddu4 (unsigned src1, unsigned src2); | SADDU4 | Performs saturated addition between pairs of 8-bit unsigned values in src1 and src2. |
int _sat (__int40_t src2); | SAT | Converts a 40-bit long to a 32-bit signed int and saturates if necessary. |
unsigned _set (unsigned src2, unsigned csta , unsigned cstb); | SET | Sets the specified field in src2 to all 1s and returns the src2 value. The beginning and ending bits of the field to be set are specified by csta and cstb, respectively. |
unsigned _setr (unit src2, int src1); | SET | Sets the specified field in src2 to all 1s and returns the src2 value. The beginning and ending bits of the field to be set are specified by the lower ten bits of src1. |
unsigned _shfl (unsigned src2); | SHFL | The lower 16 bits of src2 are placed in the even bit positions, and the upper 16 bits of src are placed in the odd bit positions. |
long long _shfl3 (unsigned src1, unsigned src2); | SHFL3 | Takes two 16-bit values from src1 and 16 LSBs from src2 to perform a 3-way interleave, creating a 48-bit result. |
unsigned _shlmb (unsigned src1, unsigned src2); unsigned _shrmb (unsigned src1, unsigned src2); | SHLMB SHRMB | Shifts src2 left/right by one byte, and the most/least significant byte of src1 is merged into the least/most significant byte position. |
int _shr2 (int src1, unsigned src2); unsigned _shru2 (unsigned src1, unsigned src2); | SHR2 SHRU2 | For each 16-bit quantity in src1, the quantity is arithmetically or logically shifted right by src2 number of bits. src1 can contain signed or unsigned values. |
int _smpy (int src1, int src2); int _smpyh (int src1, int src2); int _smpyhl (int src1, int src2); int _smpylh (int src1, int src2); | SMPY SMPYH SMPYHL SMPYLH | Multiplies src1 by src2, left shifts the result by 1, and returns the result. If the result is 0x80000000, saturates the result to 0x7FFFFFFF |
long long _smpy2ll (int src1, int src2); | SMPY2 | Performs 16-bit multiplication between pairs of signed packed 16-bit values, with an additional 1 bit left-shift and saturate into a 64-bit result. |
int _smpy32 (int src1, int src2); | SMPY32 | Returns the 32 MSBs of a 32 by 32 multiply shifted left by 1. |
int _spack2 (int src1, int src2); | SPACK2 | Two signed 32-bit values are saturated to 16-bit values and packed into the return value |
unsigned _spacku4 (int src1, int src2); | SPACKU4 | Four signed 16-bit values are saturated to 8-bit values and packed into the return value |
int _sshl (int src2, unsigned src1); | SSHL | Shifts src2 left by the contents of src1, saturates the result to 32 bits, and returns the result |
int _sshvl (int src2, int src1); int _sshvr (int src2, int src1); | SSHVL SSHVR | Shifts src2 to the left/right src1 bits. Saturates the result if the shifted value is greater than MAX_INT or less than MIN_INT. |
int _ssub (int src1, int src2); __int40_t _lssub (int src1, __int40_t src2); | SSUB | Subtracts src2 from src1, saturates the result, and returns the result. |
int _ssub2 (int src1, int src2); | SSUB2 | Subtracts the upper and lower halves of src2 from the upper and lower halves of src1 and saturates each result. |
int _sub4 (int src1, int src2); | SUB4 | Performs 2s-complement subtraction between pairs of packed 8-bit values |
int _subabs4 (int src1, int src2); | SUBABS4 | Calculates the absolute value of the differences for each pair of packed unsigned 8-bit values |
unsigned _subc (unsigned src1, unsigned src2); | SUBC | Conditional subtract divide step |
int _sub2 (int src1, int src2); | SUB2 | Subtracts the upper and lower halves of src2 from the upper and lower halves of src1, and returns the result. Borrowing in the lower half subtract does not affect the upper half subtract. |
unsigned _swap4 (unsigned src); | SWAP4 | Exchanges pairs of bytes (an endian swap) within each 16-bit value. |
unsigned _swap2 (unsigned src); | SWAP2 | Calls _packlh2. This is defined as a macro. You must include c6x.h. |
unsigned _unpkhu4 (unsigned src); | UNPKHU4 | Unpacks the two high unsigned 8-bit values into unsigned packed 16-bit values |
unsigned _unpklu4 (unsigned src); | UNPKLU4 | Unpacks the two low unsigned 8-bit values into unsigned packed 16-bit values |
unsigned _xormpy (unsigned src1, unsigned src2); | XORMPY | Performs a Galois Field multiply |
unsigned _xpnd2 (unsigned src); | XPND2 | Bits 1 and 0 of src are replicated to the upper and lower halfwords of the result, respectively. |
unsigned _xpnd4 (unsigned src); | XPND4 | Bits 3 and 0 of src are replicated to bytes 3 through 0 of the result. |
The intrinsics listed in Table 8-6 can be used for C6740 and C6600 devices, but not C6400+ devices. The intrinsics listed correspond to the indicated C6000 assembly language instruction(s). See the TMS320C6000 CPU and Instruction Set Reference Guide for more information.
See Table 8-5 for a list of generic C6000 intrinsics. See Table 8-7 for a list of C6600-specific intrinsics.
C/C++ Compiler Intrinsic | Assembly Instruction | Description |
---|---|---|
int _dpint (double src); | DPINT | Converts 64-bit double to 32-bit signed integer, using the rounding mode set by the CSR register. |
__int40_t _f2tol(__float2_t src); | Reinterprets a __float2_t register pair src as an __int40_t (stored as a register pair). This is defined as a macro. You must include c6x.h. | |
__float2_t _f2toll(__float2_t src); | Reinterprets a __float2_t register pair as a long long register pair. This is defined as a macro. You must include c6x.h. | |
double _fabs (double src); float _fabsf (float src); | ABSDP ABSSP | Returns absolute value of src. |
__float2_t _lltof2(long long src); | Reinterprets a long long register pair as a __float2_t register pair. This is defined as a macro. You must include c6x.h. | |
__float2_t _ltof2(__int40_t src); | Reinterprets an __int40_t register pair as a __float2_t register pair. This is defined as a macro. You must include c6x.h. | |
__float2_t & _mem8_f2(void * ptr); | LDNDW STNDW | Allows unaligned loads and stores of 8 bytes to memory.(1) This is defined as a macro. You must include c6x.h. |
const __float2_t & _mem8_f2_const(void * ptr); | LDNDW STNDW | Allows unaligned loads of 8 bytes from memory.(1) This is defined as a macro. You must include c6x.h. |
long long _mpyidll (int src1, int src2); | MPYID | Produces a signed integer multiply. The result is placed in a register pair. |
double_mpysp2dp (float src1, float src2); | MPYSP2DP | Produces a double-precision floating-point multiply. The result is placed in a register pair. |
double_mpyspdp (float src1, double src2); | MPYSPDP | Produces a double-precision floating-point multiply. The result is placed in a register pair. |
double _rcpdp (double src); | RCPDP | Computes the approximate 64-bit double reciprocal. |
float _rcpsp (float src); | RCPSP | Computes the approximate 32-bit float reciprocal. |
double _rsqrdp (double src); | RSQRDP | Computes the approximate 64-bit double square root reciprocal. |
float _rsqrsp (float src); | RSQRSP | Computes the approximate 32-bit float square root reciprocal. |
int _spint (float src); | SPINT | Converts 32-bit float to 32-bit signed integer, using the rounding mode set by the CSR register. |
The intrinsics listed in Table 8-7 are supported only for C6600 devices. These intrinsics are in addition to those listed in Table 8-5 and Table 8-6. The intrinsics listed correspond to the indicated assembly language instruction(s). See the TMS320C6000 CPU and Instruction Set Reference Guide for more information.
C/C++ Compiler Intrinsic | Assembly Instruction | Description |
---|---|---|
ADDDP | No intrinsic. Use native C: a + b where a and b are doubles. | |
ADDSP | No intrinsic. Use native C: a + b where a and b are floats. | |
AND | No intrinsic: Use native C: "a & b" where a and b are long longs. | |
ANDN | No intrinsic: Use native C: "a & ~b" where a and b are long longs. | |
FMPYDP | No intrinsic. Use native C: a * b where a and b are doubles. | |
OR | No intrinsic: Use native C: "a | b" where a and b are long longs. | |
SUBDP | No intrinsic. Use native C: a - b where a and b are doubles. | |
SUBSP | No intrinsic. Use native C: a - b where a and b are floats. | |
XOR | No intrinsic: Use native C: "a ^ b" where a and b are long longs. See also _xorll_c(). | |
__x128_t _ccmatmpy (long long src1, __x128_t src2); | CCMATMPY | Multiply the conjugate of 1x2 complex vector by a 2x2 complex matrix, producing two 64-bit results. For details on the __x128_t container type see Section 8.6.7. |
long long _ccmatmpyr1 (long long src1, __x128_t src2); | CCMATMPYR1 | Multiply the complex conjugate of a 1x2 complex vector by a 2x2 complex matrix, producing two 32-bit complex results. |
long long _ccmpy32r1 (long long src1, long long src2); | CCMPY32R1 | 32-bit complex conjugate multiply of Q31 numbers with rounding. |
__x128_t _cmatmpy (long long src1, __x128_t src2); | CMATMPY | Multiply a 1x2 vector by a 2x2 complex matrix, producing two 64-bit complex results. |
long long _cmatmpyr1 (long long src1, __x128_t src2); | CMATMPYR1 | Multiply a 1x2 complex vector by a 2x2 complex matrix, producing two 32-bit complex results. |
long long _cmpy32r1 (long long src1, long long src2); | CMPY32R1 | 32-bit complex multiply of Q31 numbers with rounding. |
__x128_t _cmpysp (__float2_t src1, __float2_t src2); | CMPYSP | Perform the multiply operations for a complex multiply of two complex numbers (See also _complex_mpysp and _complex_conjugate_mpysp.) |
double _complex_conjugate_mpysp (double src1, double src2); | CMPYSP DSUBSP | Performs a complex conjugate multiply by performing a CMPYSP and DSUBSP. |
double _complex_mpysp (double src1, double src2); | CMPYSP DADDSP | Performs a complex multiply by performing a CMPYSP and DADDSP. |
int _crot90 (int src); | CROT90 | Rotate complex number by 90 degrees. |
int _crot270 (int src); | CROT270 | Rotate complex number by 270 degrees. |
long long _dadd (long long src1, long long src2); | DADD | Two-way SIMD addition of signed 32-bit values producing two signed 32-bit results. |
long long _dadd2 (long long src1, long long src2); | DADD2 | Four-way SIMD addition of packed signed 16-bit values producing four signed 16-bit results. (Two-way _add2) |
__float2_t _daddsp (__float2_t src1, __float2_t src2); | DADDSP | Two-way SIMD addition of 32-bit single precision numbers. |
long long _dadd_c (scst5 immediate src1, long long src2); | DADD | Addition of two signed 32-bit values by a single constant in src2 (-16 to 15) producing two signed 32-bit results. |
long long _dapys2 (long long src1, long long src2); | DAPYS2 | Use the sign bit of src1 to determine whether to multiply the four 16-bit values in src2 by 1 or -1. Yields four signed 16-bit results. (If src1 and src2 are the same register pair, it is equivalent to a two-way _abs2). |
long long _davg2 (long long src1, long long src2); | DAVG2 | Four-way SIMD average of signed 16-bit values, with rounding. (Two-way _avg2) |
long long _davgnr2 (long long src1, long long src2); | DAVGNR2 | Four-way SIMD average of signed 16-bit values, without rounding. |
long long _davgnru4 (long long src1, long long src2); | DAVGNRU4 | Eight-way SIMD average of unsigned 8-bit values, without rounding. |
long long _davgu4 (long long src1, long long src2); | DAVGU4 | Eight-way SIMD average of unsigned 8-bit values, with rounding. (Two-way _avgu4) |
long long _dccmpyr1 (long long src1, long long src2); | DCCMPYR1 | Two-way SIMD complex multiply with rounding (_cmpyr1) with complex conjugate of src2. |
unsigned _dcmpeq2 (long long src1, long long src2); | DCMPEQ2 | Four-way SIMD comparison of signed 16-bit values. Results are packed into the four least-significant bits of the return value. (Two-way _cmpeq2) |
unsigned _dcmpeq4 (long long src1, long long src2); | DCMPEQ4 | Eight-way SIMD comparison of unsigned 8-bit values. Results are packed into the eight least-significant bits of the return value. (Two-way _cmpeq4) |
unsigned _dcmpgt2 (long long src1, long long src2); | DCMPGT2 | Four-way SIMD comparison of signed 16-bit values. Results are packed into the four least-significant bits of the return value. (Two-way _cmpgt2) |
unsigned _dcmpgtu4 (long long src1, long long src2); | DCMPGTU4 | Eight-way SIMD comparison of unsigned 8-bit values. Results are packed into the eight least-significant bits of the return value. (Two-way _cmpgtu4) |
__x128_t _dccmpy (long long src1, long long src2); | DCCMPY | Two complex multiply operations on two sets of packed complex numbers, with complex conjugate of src2. |
__x128_t _dcmpy (long long src1, long long src2); | DCMPY | Performs two complex multiply operations on two sets of packed complex numbers. (Two-way SIMD _cmpy) |
long long _dcmpyr1 (long long src1, long long src2); | DCMPYR1 | Two-way SIMD complex multiply with rounding (_cmpyr1). |
long long _dcrot90 (long long src); | DCROT90 | Two-way SIMD version of _crot90. |
long long _dcrot270 (long long src); | DCROT270 | Two-way SIMD version of _crot270. |
long long _ddotp4h (__x128_t src1, __x128_t src2 ); | DDOTP4H | Performs two dot-products between four sets of packed 16-bit values. (Two-way _dotp4h) |
long long _ddotpsu4h (__x128_t src1, __x128_t src2 ); | DDOTPSU4H | Performs two dot-products between four sets of packed 16-bit values. (Two-way _dotpsu4h) |
__float2_t _dinthsp (int src); | DINTHSP | Converts two packed signed 16-bit values into two single-precision floating point values. |
__float2_t _dinthspu (unsigned src); | DINTHSPU | Converts two packed unsigned 16-bit values into two single-precision float point values. |
__float2_t _dintsp(long long src); | DINTSP | Converts two 32-bit signed integers to two single-precision float point values. |
__float2_t _dintspu(long long src); | DINTSPU | Converts two 32-bit unsigned integers to two single-precision float point values. |
long long _dmax2 (long long src1, long long src2); | DMAX2 | Four-way SIMD maximum of 16-bit signed values producing four signed 16-bit results. (Two-way _max2) |
long long _dmaxu4 (long long src1, long long src2); | DMAXU4 | 8-way SIMD maximum of unsigned 8-bit values producing eight unsigned 8-bit results. (Two-way _maxu4) |
long long _dmin2 (long long src1, long long src2); | DMIN2 | Four-way SIMD minimum of signed 16-bit values producing four signed 16-bit results. (Two-way _min2) |
long long _dminu4 (long long src1, long long src2); | DMINU4 | 8-way SIMD minimum of unsigned 8-bit values producing eight unsigned 8-bit results. (Two-way _minu4) |
__x128_t _dmpy2 (long long src1, long long src2); | DMPY2 | Four-way SIMD multiply of signed 16-bit values producing four signed 32-bit results. (Two-way _mpy2) |
__float2_t _dmpysp (__float2_t src1, __float2_t src2); | DMPYSP | Two-way single precision floating point multiply producing two single-precision results. |
__x128_t _dmpysu4 (long long src1, long long src2); | DMPYSU4 | Eight-way SIMD multiply of signed 8-bit values by unsigned 8-bit values producing eight signed 16-bit results. (Two-way _mpysu4) |
__x128_t _dmpyu2 (long long src1, long long src2); | DMPYU2 | Four-way SIMD multiply of unsigned 16-bit values producing four unsigned 32-bit results. (Two-way _mpyu2) |
__x128_t _dmpyu4 (long long src1, long long src2); | DMPYU4 | Eight-way SIMD multiply of signed 8-bit values producing eight signed 16-bit results. (Two-way _mpyu4) |
long long _dmvd (int src1, int src2 ); | DMVD | Places src1 in the low register of the long long and src2 in the high register of the long long. Takes four cycles. See also _dmv(), _fdmv_f2, and _itoll(). |
int _dotp4h (long long src1, long long src2 ); | DOTP4H | Multiply two sets of four signed 16-bit values and return the 32-bit sum. |
long long _dotp4hll (long long src1, long long src2 ); | DOTP4H | Multiply two sets of four signed 16-bit values and return the 64-bit sum. |
int _dotpsu4h (long long src1, long long src2); | DOTPSU4H | Multiply four signed 16-bit values by four unsigned 16-bit values and return the 32-bit sum. |
long long _dotpsu4hll (long long src1, long long src2); | DOTPSU4H | Multiply four signed 16-bit values by four unsigned 16-bit values and return the 64-bit sum. |
long long _dpackh2 (long long src1, long long src2); | DPACKH2 | Two-way _packh2. |
long long _dpackh4 (long long src1, long long src2); | DPACKH4 | Two-way _packh4. |
long long _dpacklh2 (long long src1, long long src2); | DPACKLH2 | Two-way _packlh2. |
long long _dpacklh4 (unsigned src1, unsigned src2); | DPACKLH4 | Performs a _packl4 and a _packh4. The output of the _packl4 is in the low register of the result and the output of the _packh4 is in the high register of the result. |
long long _dpackl2 (long long src1, long long src2); | DPACKL2 | Two-way _packl2. |
long long _dpackl4 (long long src1, long long src2); | DPACKL4 | Two-way _packl4. |
long long _dsadd (long long src1, long long src2); | DSADD | Two-way SIMD saturated addition of signed 32-bit values producing two signed 32-bit results. (Two-way _sadd) |
long long _dsadd2 (long long src1, long long src2); | DSADD2 | Four-way SIMD saturated addition of signed 16-bit values producing four signed 16-bit results. (Two-way _sadd2) |
long long _dshl (long long src1, unsigned src2); | DSHL | Shift-left of two signed 32-bit values by a single value in the src2 argument. |
long long _dshl2 (long long src1, unsigned src2); | DSHL2 | Shift-left of four signed 16-bit values by a single value in the src2 argument. (Two-way _shl2) |
long long _dshr (long long src1, unsigned src2); | DSHR | Shift-right of two signed 32-bit values by a single value in the src2 argument. |
long long _dshr2 (long long src1, unsigned src2); | DSHR2 | Shift-right of four signed 16-bit values by a single value in the src2 argument. (Two-way _shr2) |
long long _dshru (long long src1, unsigned src2); | DSHRU | Shift-right of two unsigned 32-bit values by a single value in the src2 argument. |
long long _dshru2 (long long src1, unsigned src2); | DSHRU2 | Shift-right of four unsigned 16-bit values by a single value in the src2 argument. (Two-way _shru2) |
__x128_t _dsmpy2 (long long src1, long long src2); | DSMPY2 | Four-way SIMD multiply of signed 16-bit values with 1-bit left-shift and saturate producing four signed 32-bit results. (Two-way _smpy2) |
long long _dspacku4 (long long src1, long long src2); | DSPACKU4 | Two-way _spacku4. |
long long _dspint (__float2_t src); | DSPINT | Converts two packed single-precision floating point values to two signed 32-bit values. |
unsigned _dspinth (__float2_t src); | DSPINTH | Converts two packed single-precision floating point values to two packed signed 16-bit values. |
long long _dssub (long long src1, long long src2); | DSSUB | Two-way SIMD saturated subtraction of 32-bit signed values producing two signed 32-bit results. |
long long _dssub2 (long long src1, long long src2); | DSSUB2 | Four-way SIMD saturated subtraction of signed 16-bit values producing four signed 16-bit results. (Two-way _ssub2) |
long long _dsub (long long src1, long long src2); | DSUB | Two-way SIMD subtraction of 32-bit signed values producing two signed 32-bit results. |
long long _dsub2 (long long src1, long long src2); | DSUB2 | Four-way SIMD subtraction of signed 16-bit values producing four signed 16-bit results. (Two-way _sub2) |
__float2_t _dsubsp (__float2_t src1, __float2_t src2); | DSUBSP | Two-way SIMD subtraction of 32-bit single precision numbers. |
long long _dxpnd2 (unsigned src); | DXPND2 | Expand four lower bits to four 16-bit fields. |
long long _dxpnd4 (unsigned src); | DXPND4 | Expand eight lower bits to eight 8-bit fields. |
__float2_t _fdmvd_f2(float src1, float src2); | DMVD | Places src1 in the low register of the __float2_t and src2 in the high register of the __float2_t. Takes four cycles. See also _dmv(), _dmvd(), and _itoll(). This is defined as a macro. You must include c6x.h. |
int _land (int src1, int src2); | LAND | Logical AND of src1 and src2. |
int _landn (int src1, int src2); | LANDN | Logical AND of src1 and NOT of src2; i.e. src1 AND ~src2. |
int _lor (int src1, int src2); | LOR | Logical OR of src1 and src2. |
void _mfence(); | MFENCE | Stall CPU while memory system is busy. |
long long _mpyu2 (unsigned src1, unsigned src2 ); | MPYU2 | Two-way SIMD multiply of unsigned 16-bit values producing two unsigned 32-bit results. |
__x128_t _qmpy32 (__x128_t src1, __x128_t src2); | QMPY32 | Four-way SIMD multiply of signed 32-bit values producing four 32-bit results. (Four-way _mpy32) |
__x128_t _qmpysp (__x128_t src1, __x128_t src2); | QMPYSP | Four-way SIMD 32-bit single precision multiply producing four 32-bit single precision results. |
__x128_t _qsmpy32r1 (__x128_t src1, __x128_t src2); | QSMPY32R1 | 4-way SIMD fractional 32-bit by 32-bit multiply where each result value is shifted right by 31 bits and rounded. This normalizes the result to lie within -1 and 1 in a Q31 fractional number system. |
unsigned _shl2 (unsigned src1, unsigned src2); | SHL2 | Shift-left of two signed 16-bit values by a single value in the src2 argument. |
long long _unpkbu4 (unsigned src); | UNPKBU4 | Unpack four unsigned 8-bit values into four unsigned 16-bit values. (See also _unpklu4 and _unpkhu4) |
long long _unpkh2 (unsigned src); | UNPKH2 | Unpack two signed 16-bit values to two signed 32-bit values. |
long long _unpkhu2 (unsigned src); | UNPKHU2 | Unpack two unsigned 16-bit values to two unsigned 32-bit values. |
long long _xorll_c (scst5 immediate src1, long long src2); | XOR | XOR src1 with the upper and lower 32-bit portions of src2 (SIMD XOR by constant). |