diff --git a/indra/llcommon/llstrider.h b/indra/llcommon/llstrider.h index 444820c67..2afaacf3b 100644 --- a/indra/llcommon/llstrider.h +++ b/indra/llcommon/llstrider.h @@ -42,13 +42,15 @@ template class LLStrider U8* mBytep; }; U32 mSkip; + U32 mTypeSize; public: - LLStrider() { mObjectp = NULL; mSkip = sizeof(Object); } + LLStrider() { mObjectp = NULL; mTypeSize = mSkip = sizeof(Object); } ~LLStrider() { } const LLStrider& operator = (Object *first) { mObjectp = first; return *this;} void setStride (S32 skipBytes) { mSkip = (skipBytes ? skipBytes : sizeof(Object));} + void setTypeSize (S32 typeBytes){ mTypeSize = (typeBytes ? typeBytes : sizeof(Object)); } void skip(const U32 index) { mBytep += mSkip*index;} U32 getSkip() const { return mSkip; } @@ -58,18 +60,68 @@ public: Object* operator ++(int) { Object* old = mObjectp; mBytep += mSkip; return old; } Object* operator +=(int i) { mBytep += mSkip*i; return mObjectp; } Object& operator[](U32 index) { return *(Object*)(mBytep + (mSkip * index)); } - void assignArray(U8* buff, size_t elem_size, size_t elem_count) + void assignArray(U8* __restrict source, const size_t elem_size, const size_t elem_count) { llassert_always(sizeof(Object) <= elem_size); - if(sizeof(Object) == mSkip && sizeof(Object) == elem_size) //No stride. No difference in element size. - LLVector4a::memcpyNonAliased16((F32*) mBytep, (F32*) buff, elem_size * elem_count); + + U8* __restrict dest = mBytep; //refer to dest instead of mBytep to benefit from __restrict hint + const U32 bytes = elem_size * elem_count; //total bytes to copy from source to dest + + //stride == sizeof(element) implies entire buffer is unstrided and thus memcpy-able, provided source buffer elements match in size. + //Because LLStrider is often passed an LLVector3 even if the reprensentation is LLVector4 in the vertex buffer, mTypeSize is set to + //the TRUE vbo datatype size via VertexBufferStrider::get + if(mTypeSize == mSkip && mTypeSize == elem_size) + { + if(bytes >= sizeof(LLVector4) * 4) //Should be able to pull at least 3 16byte blocks from this. Smaller isn't really beneficial. + { + U8* __restrict aligned_source = LL_NEXT_ALIGNED_ADDRESS(source); + U8* __restrict aligned_dest = LL_NEXT_ALIGNED_ADDRESS(dest); + const U32 source_offset = aligned_source - source; //Offset to first aligned location in source buffer. + const U32 dest_offset = aligned_dest - dest; //Offset to first aligned location in dest buffer. + llassert_always(source_offset < 16); + llassert_always(dest_offset < 16); + if(source_offset == dest_offset) //delta to aligned location matches between source and destination! _mm_*_ps should be viable. + { + const U32 end_offset = (bytes - source_offset) % sizeof(LLVector4); //buffers may not neatly end on a 16byte alignment boundary. + const U32 aligned_bytes = bytes - source_offset - end_offset; //how many bytes to copy from aligned start to aligned end. + + llassert_always(aligned_bytes > 0); + + if(source_offset) //memcpy up to the aligned location if needed + memcpy(dest,source,source_offset); + LLVector4a::memcpyNonAliased16((F32*) aligned_dest, (F32*) aligned_source, aligned_bytes); + if(end_offset) //memcpy to the very end if needed. + memcpy(aligned_dest+aligned_bytes,aligned_source+aligned_bytes,end_offset); + } + else //buffers non-uniformly offset from aligned location. Using _mm_*u_ps. + { + U32 end = bytes/sizeof(LLVector4); //sizeof(LLVector4) = 16 bytes = 128 bits + + llassert_always(end > 0); + + __m128* dst = (__m128*) dest; + __m128* src = (__m128*) source; + + for (U32 i = 0; i < end; i++) //copy 128bit chunks + { + __m128 res = _mm_loadu_ps((F32*)&src[i]); + _mm_storeu_ps((F32*)&dst[i], res); + } + end*=16;//Convert to real byte offset + if(end < bytes) //just memcopy the rest + memcpy(dest+end,source+end,bytes-end); + } + } + else //Too small. just do a simple memcpy. + memcpy(dest,source,bytes); + } else { for(U32 i=0;ifirst, pos ) ) + { + return iterPos->second; + } + } + + //2. Otherwise we'll use the older implementation weight_map::iterator iter = mSkinWeights.find(pos); if (iter != mSkinWeights.end()) diff --git a/indra/llprimitive/llmodel.h b/indra/llprimitive/llmodel.h index fe37d3420..58975e55b 100644 --- a/indra/llprimitive/llmodel.h +++ b/indra/llprimitive/llmodel.h @@ -223,20 +223,17 @@ public: }; - struct JointPositionalCompare + //Are the doubles the same w/in epsilon specified tolerance + bool areEqual( double a, double b ) { - //Are the doubles the same w/in epsilon specified tolerance - bool areEqual( double a, double b ) - { - const float epsilon = 1e-5f; - return (abs((int)(a - b)) < epsilon) && (a < b); - } - //Make sure that we return false for any values that are within the tolerance for equivalence - bool operator() ( const LLVector3& a, const LLVector3& b ) - { - return ( areEqual( a[0],b[0]) && areEqual( a[1],b[1] ) && areEqual( a[2],b[2]) ) ? false : true; - } - }; + const float epsilon = 1e-5f; + return (fabs((a - b)) < epsilon) ? true : false ; + } + //Make sure that we return false for any values that are within the tolerance for equivalence + bool jointPositionalLookup( const LLVector3& a, const LLVector3& b ) + { + return ( areEqual( a[0],b[0]) && areEqual( a[1],b[1] ) && areEqual( a[2],b[2]) ) ? true : false; + } //copy of position array for this model -- mPosition[idx].mV[X,Y,Z] std::vector mPosition; diff --git a/indra/llrender/llvertexbuffer.cpp b/indra/llrender/llvertexbuffer.cpp index e0c469544..301a3f056 100644 --- a/indra/llrender/llvertexbuffer.cpp +++ b/indra/llrender/llvertexbuffer.cpp @@ -1192,12 +1192,14 @@ template struct VertexBufferStrider strider = (T*)(ptr + index*sizeof(T)); strider.setStride(0); + strider.setTypeSize(0); return TRUE; } else if (vbo.hasDataType(type)) { S32 stride = vbo.getStride(); volatile U8* ptr = vbo.mapVertexBuffer(type); + S32 size = LLVertexBuffer::sTypeSize[type]; if (ptr == NULL) { @@ -1207,6 +1209,7 @@ template struct VertexBufferStrider strider = (T*)(ptr + vbo.getOffset(type) + index*stride); strider.setStride(stride); + strider.setTypeSize(size); return TRUE; } else @@ -1575,11 +1578,3 @@ void LLVertexBuffer::setupVertexBuffer(U32 data_mask) const llglassertok(); } -void LLVertexBuffer::markDirty(U32 vert_index, U32 vert_count, U32 indices_index, U32 indices_count) -{ - // TODO: use GL_APPLE_flush_buffer_range here - /*if (useVBOs() && !mFilthy) - { - - }*/ -} diff --git a/indra/llrender/llvertexbuffer.h b/indra/llrender/llvertexbuffer.h index f59f92518..fef966151 100644 --- a/indra/llrender/llvertexbuffer.h +++ b/indra/llrender/llvertexbuffer.h @@ -213,8 +213,6 @@ public: void setStride(S32 type, S32 new_stride); - void markDirty(U32 vert_index, U32 vert_count, U32 indices_index, U32 indices_count); - void draw(U32 mode, U32 count, U32 indices_offset) const; void drawArrays(U32 mode, U32 offset, U32 count) const; void drawRange(U32 mode, U32 start, U32 end, U32 count, U32 indices_offset) const; diff --git a/indra/newview/llmeshrepository.cpp b/indra/newview/llmeshrepository.cpp index c67249ebf..86500e297 100644 --- a/indra/newview/llmeshrepository.cpp +++ b/indra/newview/llmeshrepository.cpp @@ -3180,7 +3180,8 @@ void LLPhysicsDecomp::doDecompositionSingleHull() return; #endif //!MESH_IMPORT #if MESH_IMPORT - + LLConvexDecomposition* decomp = LLConvexDecomposition::getInstance(); + if (decomp == NULL) { //stub. do nothing. diff --git a/indra/newview/llvovolume.cpp b/indra/newview/llvovolume.cpp index d86205019..3c040c5dd 100644 --- a/indra/newview/llvovolume.cpp +++ b/indra/newview/llvovolume.cpp @@ -3678,12 +3678,8 @@ void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, std:: U32 te_idx = facep->getTEOffset(); - if (facep->getGeometryVolume(*volume, te_idx, - vobj->getRelativeXform(), vobj->getRelativeXformInvTrans(), index_offset)) - { - buffer->markDirty(facep->getGeomIndex(), facep->getGeomCount(), - facep->getIndicesStart(), facep->getIndicesCount()); - } + facep->getGeometryVolume(*volume, te_idx, + vobj->getRelativeXform(), vobj->getRelativeXformInvTrans(), index_offset); } } @@ -3709,9 +3705,9 @@ void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, std:: // can we safely treat this as an alpha mask? if (facep->canRenderAsMask()) { - const LLDrawable* drawablep = facep->getDrawable(); - const LLVOVolume* vobj = drawablep ? drawablep->getVOVolume() : NULL; - if (te->getFullbright() || (vobj && vobj->isHUDAttachment())) + //const LLDrawable* drawablep = facep->getDrawable(); + //const LLVOVolume* vobj = drawablep ? drawablep->getVOVolume() : NULL; + if (te->getFullbright() /*|| (vobj && vobj->isHUDAttachment())*/) { registerFace(group, facep, LLRenderPass::PASS_FULLBRIGHT_ALPHA_MASK); }