gcc let you add sse instruction manual through builtin function .
Compile this with : g++ main.cpp .
To get the assembly code , use g++ -S main.cpp .
#include <iostream> #include <stdio.h> #include <math.h> typedef int v2si __attribute__( (vector_size(8))); typedef char v8qi __attribute__( (vector_size(8))); typedef int v4si __attribute__( (vector_size(16))); typedef float v4sf __attribute__( (vector_size(16))); void explicit_mmx() { std::cout << "*********** MMX addition ***********" << std::endl; v8qi a, b, c; char * Pa = (char *) &a; Pa[0] = 0; Pa[1] = 1; Pa[2] = 2; Pa[3] = 3; char * Pb = (char *) &b; Pb[0] = 0; Pb[1] = 1; Pb[2] = 2; Pb[3] = 3; char * Pc = (char *) &c; Pc[0] = 0; Pc[1] = 0; Pc[2] = 0; Pc[3] = 0; c = __builtin_ia32_paddb(b, a); c = __builtin_ia32_paddb(c, a); for (int i = 0; i < 4; i++) std::cout << i << ":" << (int) Pc[i] << std::endl; } void implicit_mmx() { std::cout << "*********** MMX addition ***********" << std::endl; v8qi a, b, c; char * Pa = (char *) &a; Pa[0] = 0; Pa[1] = 1; Pa[2] = 2; Pa[3] = 3; char * Pb = (char *) &b; Pb[0] = 0; Pb[1] = 1; Pb[2] = 2; Pb[3] = 3; char * Pc = (char *) &c; Pc[0] = 0; Pc[1] = 0; Pc[2] = 0; Pc[3] = 0; c = b + a; c = c + a; for (int i = 0; i < 4; i++) std::cout << i << ":" << (int) Pc[i] << std::endl; } void implicit_sse(int v) { std::cout << "*********** implicit SSE addition ***********" << std::endl; v4si a = {v, v, v, v}; v4si b = {1, 2, 3, 4}; v4si c = a + b; // c = c + b; int * Pc = (int *) &c; for (int i = 0; i < 4; i++) std::cout << i << ":" << (int) Pc[i] << std::endl; } void explicit_sse(float v) { std::cout << "*********** explicit SSE addition ***********" << std::endl; v4sf U; v4sf V; v4sf W; float * PU = (float *) &U; PU[0] = 1 * v; PU[1] = 2 * v; PU[2] = 3 * v; PU[3] = 4 * v; float * PV = (float *) &V; PV[0] = 1; PV[1] = 2; PV[2] = 2; PV[3] = 3; float * PW = (float *) &W; PW[0] = 0; PW[1] = 0; PW[2] = 0; PW[3] = 0; W = __builtin_ia32_addps(U, V); for (int i = 0; i < 4; i++) std::cout << i << ":" << (float) PW[i] << std::endl; } int main() { explicit_mmx(); implicit_mmx(); //re-allow floating point operations ( as MMX and FPU register are shared) __builtin_ia32_emms(); implicit_sse(2); explicit_sse(3.141592); getchar(); return 0; };
