From d2ae02a8551c5d0fac4b21c4162e8d06f93d0fa1 Mon Sep 17 00:00:00 2001
From: Shyotl <Shyotl@gmail.com>
Date: Thu, 18 Aug 2011 00:33:59 -0500
Subject: [PATCH] Slight cleanup. Also, index arrays are always non-strided, so
 now using vectorization.

---
 indra/llcommon/llstrider.h    |  3 ++-
 indra/llprimitive/llmodel.cpp |  6 +++---
 indra/newview/llface.cpp      | 24 ++++++++++++++++++++----
 3 files changed, 25 insertions(+), 8 deletions(-)

diff --git a/indra/llcommon/llstrider.h b/indra/llcommon/llstrider.h
index 2afaacf3b..b9f0da206 100644
--- a/indra/llcommon/llstrider.h
+++ b/indra/llcommon/llstrider.h
@@ -52,6 +52,7 @@ public:
 	void setStride (S32 skipBytes)	{ mSkip = (skipBytes ? skipBytes : sizeof(Object));}
 	void setTypeSize (S32 typeBytes){ mTypeSize = (typeBytes ? typeBytes : sizeof(Object)); }
 
+	bool isStrided() const		   { return mTypeSize != mSkip; } 
 	void skip(const U32 index)     { mBytep += mSkip*index;}
 	U32 getSkip() const			   { return mSkip; }
 	Object* get()                  { return mObjectp; }
@@ -70,7 +71,7 @@ public:
 		//stride == sizeof(element) implies entire buffer is unstrided and thus memcpy-able, provided source buffer elements match in size.
 		//Because LLStrider is often passed an LLVector3 even if the reprensentation is LLVector4 in the vertex buffer, mTypeSize is set to 
 		//the TRUE vbo datatype size via VertexBufferStrider::get
-		if(mTypeSize == mSkip && mTypeSize == elem_size)	
+		if(!isStrided() && mTypeSize == elem_size)	
 		{
 			if(bytes >= sizeof(LLVector4) * 4)	//Should be able to pull at least 3 16byte blocks from this. Smaller isn't really beneficial.
 			{
diff --git a/indra/llprimitive/llmodel.cpp b/indra/llprimitive/llmodel.cpp
index ef126c770..f7a0ae72b 100644
--- a/indra/llprimitive/llmodel.cpp
+++ b/indra/llprimitive/llmodel.cpp
@@ -1019,13 +1019,13 @@ void LLModel::setVolumeFaceData(
 	face.resizeVertices(num_verts);
 	face.resizeIndices(num_indices);
 
-	if(pos.getSkip() == sizeof(LLVector4a))
+	if(!pos.isStrided())
 		LLVector4a::memcpyNonAliased16((F32*) face.mPositions, (F32*) pos.get(), num_verts*4*sizeof(F32));
 	else 
 		for(U32 i=0;i<num_verts;++i)	face.mPositions[i].load3(pos[i].mV);
 	if (norm.get())
 	{
-		if(norm.getSkip() == sizeof(LLVector4a))
+		if(!norm.isStrided())
 			LLVector4a::memcpyNonAliased16((F32*) face.mNormals, (F32*) norm.get(), num_verts*4*sizeof(F32));
 		else
 			for(U32 i=0;i<num_verts;++i)	face.mNormals[i].load3(norm[i].mV);
@@ -1038,7 +1038,7 @@ void LLModel::setVolumeFaceData(
 
 	if (tc.get())
 	{
-		if(tc.getSkip() == sizeof(LLVector2))
+		if(!tc.isStrided())
 			LLVector4a::memcpyNonAliased16((F32*) face.mTexCoords, (F32*) tc.get(), num_verts*2*sizeof(F32));
 		else
 			for(U32 i=0;i<num_verts;++i)	face.mTexCoords[i] = tc[i].mV;
diff --git a/indra/newview/llface.cpp b/indra/newview/llface.cpp
index 594173c17..aa5137960 100644
--- a/indra/newview/llface.cpp
+++ b/indra/newview/llface.cpp
@@ -1157,13 +1157,29 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume,
     // INDICES
 	if (full_rebuild)
 	{
-		mVertexBuffer->getIndexStrider(indicesp, mIndicesIndex);
-		for (U32 i = 0; i < (U32) num_indices; i++)
+		mVertexBuffer->getIndexStrider(indicesp, mIndicesIndex, mIndicesCount, map_range);
+
+		__m128i* dst = (__m128i*) indicesp.get();
+		__m128i* src = (__m128i*) vf.mIndices;
+		__m128i offset = _mm_set1_epi16(index_offset);
+
+		S32 end = num_indices/8;
+		
+		for (S32 i = 0; i < end; i++)
 		{
-			indicesp[i] = vf.mIndices[i] + index_offset;
+			__m128i res = _mm_add_epi16(src[i], offset);
+			_mm_storeu_si128(dst+i, res);
 		}
 
-		//mVertexBuffer->setBuffer(0);
+		for (S32 i = end*8; i < num_indices; ++i)
+		{
+			indicesp[i] = vf.mIndices[i]+index_offset;
+		}
+
+		if (map_range)
+		{
+			mVertexBuffer->setBuffer(0);
+		}
 	}
 	
 	LLMatrix4a mat_normal;