返回

SSE运算居然比C还慢!

下面这段程序是使用SSE进行向量单位化计算,c的版本要比sse快很多,不知道为什么,请各位高手帮忙看看

#include <stdio.h>#include <stdlib.h>#include <math.h>#include <time.h>#include <conio.h>typedef _declspec(align(16)) float vec3_t[3]; inline void vec_normalize_sse(vec3_t vec){ _asm { mov esi, vec movups xmm0, [esi] movups xmm1, xmm0 mulps xmm1, xmm1

movups xmm2, xmm1 shufps xmm2, xmm1, 0xe1 movups xmm3, xmm1 shufps xmm3, xmm1, 0xc6 addps xmm1, xmm2 addps xmm1, xmm3

shufps xmm1, xmm1, 0x00 sqrtps xmm1, xmm1 divps xmm0, xmm1

movups [esi], xmm0 }}inline void vec_normalize_c(vec3_t vec){ float len; len = vec[0]*vec[0] + vec[1]*vec[1] + vec[2]*vec[2]; len = (float)sqrt(len); len = 1.0f/len; vec[0] *= len; vec[1] *= len; vec[2] *= len;}int main(){ int i, s, e, count; vec3_t vec; count = 1000000; vec[0] = 1.0f; vec[1] = 2.0f; vec[2] = 3.0f; s = clock(); for (i = 0; i < count; i++) { vec[0] += 0.1f; vec[1] += 0.1f; vec[2] += 0.1f; vec_normalize_sse(vec); } e = clock(); printf("sse = %d, %f, %f, %f\n", e - s, vec[0], vec[1], vec[2]);

vec[0] = 1.0f; vec[1] = 2.0f; vec[2] = 3.0f; s = clock(); for (i = 0; i < count; i++) { vec[0] += 0.1f; vec[1] += 0.1f; vec[2] += 0.1f; vec_normalize_c(vec); } e = clock(); printf("c = %d, %f, %f, %f\n", e - s, vec[0], vec[1], vec[2]); getch(); return 0;}

名字: 自动排版 密码:

回复 | (1052) | leo1981816 | 2006-04-10 05:42:16