返回

为什么sse如此的慢

这是一个对数组中的元素进行归一化的程序,两段程序代码是一样的,但是第二段的就多了一个mulps ,而速度却慢了很多。请高手帮忙解答一下,谢谢

#include "stdio.h"
#include "stdlib.h"
#include "conio.h"
#include "math.h"
#include "time.h"
#include "windows.h"

#define NUM                  10000000

typedef _declspec(align (16)) float vec3_t[3];
vec3_t vec[NUM];


int main()
{
	int i, j, index;
	int s, e;
	float len;

	srand(time(NULL));

	for (i = 0; i < 5; i++) {
		for (j = 0; j < NUM; j++) {
			vec[j][0] = (rand()%100)/100.0f;
			vec[j][1] = (rand()%100)/100.0f;
			vec[j][2] = (rand()%100)/100.0f;
		}
		s = clock();
		for (j = 0; j < NUM; j++) {
			/*
			len = vec[j][0]*vec[j][0] + vec[j][1]*vec[j][1] + vec[j][2]*vec[j][2];
			len = (float)sqrt(len);
			len = 1.0f/len;
			vec[j][0] *= len;
			vec[j][1] *= len;
			vec[j][2] *= len;*/
			_asm {
				lea		ecx, vec[j]
				movups	xmm0, [ecx]
				movaps	xmm1, xmm0
//				mulps	xmm1, xmm1
				
//				movaps	xmm2, xmm1
//				movaps	xmm3, xmm1
//				shufps	xmm2, xmm2, 0xc9
//				shufps	xmm3, xmm3, 0xd2
//				addps	xmm1, xmm2
//				addps	xmm1, xmm3
//				rsqrtps	xmm1, xmm1
//				mulps	xmm0, xmm1

//				movups	[ecx], xmm0
			}
		}
		e = clock();
		printf("%d\n", e - s);


		for (j = 0; j < NUM; j++) {
			vec[j][0] = (rand()%100)/100.0f;
			vec[j][1] = (rand()%100)/100.0f;
			vec[j][2] = (rand()%100)/100.0f;
		}
		s = clock();
		for (j = 0; j < NUM; j++) {
			/*
			len = vec[j][0]*vec[j][0] + vec[j][1]*vec[j][1] + vec[j][2]*vec[j][2];
			len = (float)sqrt(len);
			len = 1.0f/len;
			vec[j][0] *= len;
			vec[j][1] *= len;
			vec[j][2] *= len;*/
			_asm {
				lea		ecx, vec[j]
				movups	xmm0, [ecx]
				movaps	xmm1, xmm0
				mulps	xmm1, xmm1  //这个语句很慢,why
				
//				movaps	xmm2, xmm1
//				movaps	xmm3, xmm1
//				shufps	xmm2, xmm2, 0xc9
//				shufps	xmm3, xmm3, 0xd2
//				addps	xmm1, xmm2
//				addps	xmm1, xmm3
//				rsqrtps	xmm1, xmm1
//				mulps	xmm0, xmm1

//				movups	[ecx], xmm0
			}
		}
		e = clock();
		printf("%d\n\n", e - s);
	}

	getch();

	return 0;
}

名字: 自动排版 密码:

回复 | (1069) | kitty | 2006-04-24 12:43:30