setting the speed for the future of games programming
vectorc

contentsclose
 

INTRINSIC FUNCTIONS

Intrinsic functions are functions that are handled directly by the compiler, instead of generating a call. VectorC will only recognise a function as an intrinsic function if it is correctly prototyped.


Standard Intrinsic Functions

int abs (int) returns the positive value of an integer
double fabs (double) returns the positive value of a floating-point value. With the single-precision command-line option, this function returns a float if the parameter and expression is single precision.
float fabs (float) If you declare this prototype, "fabs" will always be single precision
double sqrt (double) returns the square root of a floating-point value. With the single-precision command-line option, this function returns a float if the parameter and expression is single precision. This function can also have __hint__((precision(12))) applied to it - see hints.
__alloca (int) Allocates space on the stack. Usually, there will be a macro called "alloca" which calls this intrinsic function.
memcpy (void *, void*, unsigned int) Copies a section of memory.
double cos (double)  
double sin (double)  


VectorC-Specific Intrinsic Functions

int __cpu_supported (void) returns 1 if the program is being run on a processor that supports the target processor selected when compiled. This should be called right at the start of the program before the processor has had a chance to execute any possibly unsupported instructions. Use for giving a message like: "This program will not run on this processor".


MMX Instrinsic Functions

These functions are specified by Intel and are now supported by many compilers. Use these functions as an alternative to VectorC's vectorizer, but remember that your code will require an MMX processor and to be compiled for an MMX target processor.

Function
MMX instruction
Comments
__m64 _m_paddb (__m64, __m64)

paddb

vector addition
__m64 _m_paddw (__m64, __m64) paddw
__m64 _m_paddd (__m64, __m64) paddd
__m64 _m_psubb (__m64, __m64) psubb vector subtraction
__m64 _m_psubw (__m64, __m64) psubw
__m64 _m_psubd (__m64, __m64) psubd
__m64 _m_pand (__m64, __m64) pand bitwise logical operations performed on 64-bit values
__m64 _m_pandn (__m64, __m64) pandn
__m64 _m_por (__m64, __m64) por
__m64 _m_pxor (__m64, __m64) pxor
__m64 _m_paddsb (__m64, __m64) paddsb saturated arithmetic
__m64 _m_paddb (__m64, __m64) paddb
__m64 _m_psubsb (__m64, __m64) psubsb
__m64 _m_paddusb (__m64, __m64) paddusb
__m64 _m_psubusb (__m64, __m64) psubusb
__m64 _m_paddsw (__m64, __m64) paddsw
__m64 _m_psubsw (__m64, __m64) psubsw
__m64 _m_paddusw (__m64, __m64) paddusw
__m64 _m_psubusw (__m64, __m64) psubusw
__m64 _m_packsswb (__m64, __m64) packsswb convert from a large component type to a smaller component type with saturation
__m64 _m_packssdw (__m64, __m64) packssdw
__m64 _m_packuswb (__m64, __m64) packuswb
__m64 _m_punpcklbw (__m64, __m64) punpcklbw  
__m64 _m_punpcklwd (__m64, __m64) punpcklwd
__m64 _m_punpckldq (__m64, __m64) punpckldq
__m64 _m_punpckhbw (__m64, __m64) punpckhbw
__m64 _m_punpckhwd (__m64, __m64) punpckhwd
__m64 _m_punpckhdq (__m64, __m64) punpckhdq
__m64 _m_pmullw (__m64, __m64) pmullw multiply 16-bit
__m64 _m_pmulhw (__m64, __m64) pmulhw multiply 16-bit by 16-bit with 32-bit result, then shift right arithmetic by 16
__m64 _m_pmaddwd (__m64, __m64) pmaddwd multiply 16-bit to 32-bit result and add lower 2 and higher 2 components into 2 32-bit results
__m64 _m_psllw (__m64, __m64) psllw shift instructions
__m64 _m_pslld (__m64, __m64) pslld
__m64 _m_psllq (__m64, __m64) psllq
__m64 _m_psraw (__m64, __m64) psraw
__m64 _m_psrad (__m64, __m64) psrad
__m64 _m_psrlw (__m64, __m64) psrlw
__m64 _m_psrld (__m64, __m64) psrld
__m64 _m_psrlq (__m64, __m64) psrlq
__m64 _m_pcmpeqb (__m64, __m64) pcmpeqb vector comparison instructions. Can be used with "pand", "pandn" and "por" to create a vector conditional move
__m64 _m_pcmpeqw (__m64, __m64) pcmpeqw
__m64 _m_pcmpeqd (__m64, __m64) pcmpeqd
__m64 _m_pcmpgtb (__m64, __m64) pcmpgtb
__m64 _m_pcmpgtw (__m64, __m64) pcmpgtw
__m64 _m_pcmpgtd (__m64, __m64) pcmpgtd
__m64 _m_psllwi (__m64, int) psllw Alternative form of shift instructions using an int shift value
__m64 _m_pslldi (__m64, int) pslld
__m64 _m_psllqi (__m64, int) psllq
__m64 _m_psrawi (__m64, int) psraw
__m64 _m_psradi (__m64, int) psrad
__m64 _m_psrlwi (__m64, int) psrlw
__m64 _m_psrldi (__m64, int) psrld
__m64 _m_psrlqi (__m64, int) psrlq
__m64 _m_from_int (int) movd Create a 64-bit MMX value from an integer - the high 32 bits are zero
int _m_to_int (__m64) movd Extract the low 32 bits from an MMX value
void _m_empty (void) emms Ignored by VectorC - the placement of emms instructions is done automatically
_m64 _mm_set_pi32 (int, int)   Create a 64-bit MMX vector from components. Last value is in the lowest position in the vector
_m64 _mm_set_pi16 (short, short, short, short)  
_m64 _mm_set_pi8 (char, char, char, char, char, char, char, char)  
_m64 _mm_set1_pi32 (int)   Create a 64-bit MMX vector from a single component The component is duplicated
_m64 _mm_set1_pi16 (short)  
_m64 _mm_set1_pi8 (char)  
_m64 _mm_setr_pi32 (int, int)   Create a 64-bit MMX vector from components. First value is in the lowest position in the vector
_m64 _mm_setr_pi16 (short, short, short, short)  
_m64 _mm_setr_pi8 (char, char, char, char, char, char, char, char)  


SSE Instrinsic Functions

These functions are specified by Intel and are now supported by many compilers. Use these functions as an alternative to VectorC's vectorizer, but remember that your code will require an SSE processor and to be compiled for an SSE target processor.

Function
SSE Instruction
Comments
__m64 _m_pmuluhw (__m64, __m64) pmuluhw multiply 16-bit unsigned by 16-bit unsigned to 32-bit temporary, shift right 16
__m64 _m_pmaxsw (__m64, __m64) pmaxsw maximum signed 16-bit
__m64 _m_pminsw (__m64, __m64) pminsw minimum signed 16-bit
__m64 _m_pmaxub (__m64, __m64) pmaxub maximum unsigned 8-bit
__m64 _m_pminub (__m64, __m64) pminub minimum unsigned 8-bit
__m64 _m_pavgb (__m64, __m64) pavgb average unsigned 8-bit
__m64 _m_pavgw (__m64, __m64) pavgw average unsigned 16-bit
__m64 _m_psadbw (__m64, __m64) psadbw sum of absolute differences
__m64 _m_pshufw (__m64, const int) pshufw shuffle 16-bit words
int _m_pmovmskb (__m64) pmovmskb returns a 4-bit mask from avector of 4 16-bit integers
void _mm_stream_pi (_m64 *,_m64) movntq store without going through cache
int _m_pextrw (_m64, const int) pextrw extract 16-bit value from vector
_m64 _m_pinsrw (_m64, int, const int) pinsrw put a 16-bit value into a vector
_m128 _mm_set_ps (float, float, float, float)   creates a float vector from 4 floats. Last value is in the lowest position in the vector
_m128 _mm_setr_ps (float, float, float, float)   creates a float vector from 4 floats. First value is in the lowest position in the vector
_m128 _mm_setr_ps1 (float)   creates a float vector from a float - all components have the same value
_m128 _mm_set_ss (float)   creates a float vector from a float - the 3 highest components are zero
_m128 _mm_cvt_si2ss (int) cvtsi2ss convert integer to float
_m128 _mm_sqrt_ss (_m128) sqrtss square root
_m128 _mm_rsqrt_ss (_m128) rsqrtss reciprocal square root approximate
_m128 _mm_rcp_ss (_m128) rcpss reciprocal approximate
_m128 _mm_sqrt_ps (_m128) sqrtps square root
_m128 _mm_rsqrt_ps (_m128) rsqrtps reciprocal square root approximate
_m128 _mm_rcp_ps (_m128) rcpps reciprocal approximate
_m128 _mm_loadu_ps (_m128 *) movups un-aligned load
void _mm_storeu_ps (_m128 *, _m128) movups un-aligned store
_m128 _mm_load_ps (_m128 *) movaps aligned load
_m128 _mm_loadr_ps (_m128 *)

movaps,
shufps

aligned load and reverse order of vector
void _mm_prefetch (void *, const int) prefetcht0
prefetcht1
prefetcht2
prefetchnta
prefetch data into cache
void _mm_stream_ps (_m128 *, _m128) movntps store without going through cache
_m128 _mm_load_ss (float *) movss load float
_m128 _mm_load_ps1 (float *) movss
shufps
load float and duplicate into vector
void _mm_store_ps (_m128 *, _m128) movaps store float vector aligned
void _mm_store_ps1 (_m128 *, _m128) shufps
movaps
 
void _mm_storer_ps (_m128 *, _m128) shufps
movaps
store float vector aligned after reversing components
void _mm_store_ss (float *, _m128) movss store float
_m128 _mm_add_ps (_m128, _m128) addps vector arithmetic
_m128 _mm_sub_ps (_m128, _m128) subps
_m128 _mm_mul_ps (_m128, _m128) mulps
_m128 _mm_div_ps (_m128, _m128) divps
_m128 _mm_add_ss (_m128, _m128) addss float arithmetic
_m128 _mm_mul_ss (_m128, _m128) mulss
_m128 _mm_div_ss (_m128, _m128) divss
_m128 _mm_and_ps (_m128, _m128) andps bitwise logical operations
_m128 _mm_andnot_ps (_m128, _m128) andnps
_m128 _mm_xor_ps (_m128, _m128) xorps
_m128 _mm_or_ps (_m128, _m128) orps
_m128 _mm_min_ss (_m128, _m128) minss minimum float
_m128 _mm_max_ss (_m128, _m128) maxss maximum float
_m128 _mm_cmpeq_ss (_m128, _m128) cmpss comparisons
_m128 _mm_cmpneq_ss (_m128, _m128)
_m128 _mm_cmplt_ss (_m128, _m128)
_m128 _mm_cmpgt_ss (_m128, _m128)
_m128 _mm_cmple_ss (_m128, _m128)
_m128 _mm_cmpge_ss (_m128, _m128)
_m128 _mm_cmpnlt_ss (_m128, _m128)
_m128 _mm_cmpnle_ss (_m128, _m128)
_m128 _mm_cmpngt_ss (_m128, _m128)
_m128 _mm_cmpnge_ss (_m128, _m128)
_m128 _mm_min_ps (_m128, _m128) minps minimum float vector
_m128 _mm_max_ps (_m128, _m128) maxps maximum float vector
_m128 _mm_cmpeq_ps (_m128, _m128) cmpps comparison of float vectors
_m128 _mm_cmpneq_ps (_m128, _m128)
_m128 _mm_cmplt_ps (_m128, _m128)
_m128 _mm_cmpnlt_ps (_m128, _m128)
_m128 _mm_cmpnle_ps (_m128, _m128)
_m128 _mm_cmpnle_ps (_m128, _m128)
_m128 _mm_cmpngt_ps (_m128, _m128)
_m128 _mm_cmpnge_ps (_m128, _m128)
_m128 _mm_cmpgt_ps (_m128, _m128)
_m128 _mm_cmple_ps (_m128, _m128)
_m128 _mm_cmpge_ps (_m128, _m128)
_m128 _mm_cmpord_ps (_m128, _m128)
_m128 _mm_cmpunord_ps (_m128, _m128)
_m128 _mm_unpackhi_ps (_m128, _m128) unpckhps  
_m128 _mm_unpacklo_ps (_m128, _m128) unpcklps  
_m128 _mm_movehl_ps (_m128, _m128) movhlps move high to low
_m128 _mm_movelh_ps (_m128, _m128) movlhps move low to high
_m128 _mm_move_ss (_m128, _m128) movss move single floating-point value into a vector register - the high values are 0
int _mm_comieq_ss (_m128, _m128) comiss floating-point comparisons
int _mm_comilt_ss (_m128, _m128)
int _mm_comieq_ss (_m128, _m128)
int _mm_comineq_ss (_m128, _m128)
int _mm_comile_ss (_m128, _m128)
int _mm_comigt_ss (_m128, _m128)
int _mm_comige_ss (_m128, _m128)
int _mm_ucomieq_ss (_m128, _m128) ucomiss
int _mm_ucomilt_ss (_m128, _m128)
int _mm_ucomieq_ss (_m128, _m128)
int _mm_ucomile_ss (_m128, _m128)
int _mm_ucomigt_ss (_m128, _m128)
int _mm_ucomige_ss (_m128, _m128)
_m128 _mm_loadh_pi (_m128, _m64 *) movhps load high 64-bits
_m128 _mm_loadl_pi (_m128, _m64 *) movlps load low 64-bits
void _mm_storeh_pi (_m64 *, _m128) movhps store high 64-bits
void _mm_storel_pi (_m64 *, _m128) movlps store low 64-bits
int _mm_cvt_ss2si (_m128) cvtss2si convert float to int
int _mm_cvtt_ss2si (_m128) cvttss2si convert float to int with truncation
_m128 _mm_cvt_si2ss (_m128, int) cvtsi2ss convert int to float
_m128 _mm_cvt_pi2ps (_m128, _m64) cvtpi2ps convert int vector to float vector
_m128 _mm_shuffle_ps (_m128, _m128, const int) shufps shuffle
void _mm_sfence (void) sfence store fence
int _mm_getcsr (void)   get SSE control register
void _mm_setcsr (int)   set SSE control register
_m128 _mm_setzero_ps (void) xorps set a SSE register to 0


top

contentsclose