返回

SSE居然比C还慢

这是一段归一化向量到单位向量的代码,发现C的版本比SSE要快得多!!!真是费解,不知到是程序写的有的问题还是其他什么原因

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#include <conio.h>
typedef _declspec(align(16)) float vec3_t[3];


inline void vec_normalize_sse(vec3_t vec)
{
	_asm {
		mov    esi, vec
		movaps xmm0, [esi]
		movaps xmm1, xmm0
		mulps  xmm1, xmm1

		movaps xmm2, xmm1
		shufps xmm2, xmm1, 0xe1
		movaps xmm3, xmm1
		shufps xmm3, xmm1, 0xc6
		addps  xmm1, xmm2
		addps  xmm1, xmm3

		shufps xmm1, xmm1, 0x00
		sqrtps xmm1, xmm1
		divps  xmm0, xmm1

		movaps [esi], xmm0
	}
}


inline void vec_normalize_c(vec3_t vec)
{
	float len;
	len = vec[0]*vec[0] + vec[1]*vec[1] + vec[2]*vec[2];
	len = (float)sqrt(len);
	len = 1.0f/len;
	vec[0] *= len;
	vec[1] *= len;
	vec[2] *= len;
}

int main()
{
	int i, s, e, count;
	vec3_t vec;

	count = 10000000;

	vec[0] = 1.0f;
	vec[1] = 2.0f;
	vec[2] = 3.0f;
	s = clock();
	for (i = 0; i < count; i++) {
		vec_normalize_sse(vec);
	}
	e = clock();
	printf("sse = %d, %f, %f, %f\n", e - s, vec[0], vec[1], vec[2]);

	vec[0] = 1.0f;
	vec[1] = 2.0f;
	vec[2] = 3.0f;
	s = clock();
	for (i = 0; i < count; i++) {
		vec_normalize_c(vec);
	}
	e = clock();
	printf("c = %d, %f, %f, %f\n", e - s, vec[0], vec[1], vec[2]);
	getch();
	return 0;
}

名字: 自动排版 密码:

回复 | (1054) | leo1981816 | 2006-04-11 09:03:28