From d2ae02a8551c5d0fac4b21c4162e8d06f93d0fa1 Mon Sep 17 00:00:00 2001 From: Shyotl Date: Thu, 18 Aug 2011 00:33:59 -0500 Subject: [PATCH] Slight cleanup. Also, index arrays are always non-strided, so now using vectorization. --- indra/llcommon/llstrider.h | 3 ++- indra/llprimitive/llmodel.cpp | 6 +++--- indra/newview/llface.cpp | 24 ++++++++++++++++++++---- 3 files changed, 25 insertions(+), 8 deletions(-) diff --git a/indra/llcommon/llstrider.h b/indra/llcommon/llstrider.h index 2afaacf3b..b9f0da206 100644 --- a/indra/llcommon/llstrider.h +++ b/indra/llcommon/llstrider.h @@ -52,6 +52,7 @@ public: void setStride (S32 skipBytes) { mSkip = (skipBytes ? skipBytes : sizeof(Object));} void setTypeSize (S32 typeBytes){ mTypeSize = (typeBytes ? typeBytes : sizeof(Object)); } + bool isStrided() const { return mTypeSize != mSkip; } void skip(const U32 index) { mBytep += mSkip*index;} U32 getSkip() const { return mSkip; } Object* get() { return mObjectp; } @@ -70,7 +71,7 @@ public: //stride == sizeof(element) implies entire buffer is unstrided and thus memcpy-able, provided source buffer elements match in size. //Because LLStrider is often passed an LLVector3 even if the reprensentation is LLVector4 in the vertex buffer, mTypeSize is set to //the TRUE vbo datatype size via VertexBufferStrider::get - if(mTypeSize == mSkip && mTypeSize == elem_size) + if(!isStrided() && mTypeSize == elem_size) { if(bytes >= sizeof(LLVector4) * 4) //Should be able to pull at least 3 16byte blocks from this. Smaller isn't really beneficial. { diff --git a/indra/llprimitive/llmodel.cpp b/indra/llprimitive/llmodel.cpp index ef126c770..f7a0ae72b 100644 --- a/indra/llprimitive/llmodel.cpp +++ b/indra/llprimitive/llmodel.cpp @@ -1019,13 +1019,13 @@ void LLModel::setVolumeFaceData( face.resizeVertices(num_verts); face.resizeIndices(num_indices); - if(pos.getSkip() == sizeof(LLVector4a)) + if(!pos.isStrided()) LLVector4a::memcpyNonAliased16((F32*) face.mPositions, (F32*) pos.get(), num_verts*4*sizeof(F32)); else for(U32 i=0;igetIndexStrider(indicesp, mIndicesIndex); - for (U32 i = 0; i < (U32) num_indices; i++) + mVertexBuffer->getIndexStrider(indicesp, mIndicesIndex, mIndicesCount, map_range); + + __m128i* dst = (__m128i*) indicesp.get(); + __m128i* src = (__m128i*) vf.mIndices; + __m128i offset = _mm_set1_epi16(index_offset); + + S32 end = num_indices/8; + + for (S32 i = 0; i < end; i++) { - indicesp[i] = vf.mIndices[i] + index_offset; + __m128i res = _mm_add_epi16(src[i], offset); + _mm_storeu_si128(dst+i, res); } - //mVertexBuffer->setBuffer(0); + for (S32 i = end*8; i < num_indices; ++i) + { + indicesp[i] = vf.mIndices[i]+index_offset; + } + + if (map_range) + { + mVertexBuffer->setBuffer(0); + } } LLMatrix4a mat_normal;