Cleanup. Applied http://hg.secondlife.com/mesh-development/changeset/3031f266784a

2011-08-05 01:18:27 -05:00
parent 2c489d7741
commit b75a28ec15
7 changed files with 92 additions and 40 deletions
--- a/indra/llcommon/llstrider.h
+++ b/indra/llcommon/llstrider.h
@@ -42,13 +42,15 @@ template <class Object> class LLStrider
 		U8*		mBytep;
 	};
 	U32     mSkip;
+	U32		mTypeSize;
 public:

-	LLStrider()  { mObjectp = NULL; mSkip = sizeof(Object); } 
+	LLStrider()  { mObjectp = NULL; mTypeSize = mSkip = sizeof(Object); } 
 	~LLStrider() { } 

 	const LLStrider<Object>& operator =  (Object *first)    { mObjectp = first; return *this;}
 	void setStride (S32 skipBytes)	{ mSkip = (skipBytes ? skipBytes : sizeof(Object));}
+	void setTypeSize (S32 typeBytes){ mTypeSize = (typeBytes ? typeBytes : sizeof(Object)); }

 	void skip(const U32 index)     { mBytep += mSkip*index;}
 	U32 getSkip() const			   { return mSkip; }
@@ -58,18 +60,68 @@ public:
 	Object* operator ++(int)       { Object* old = mObjectp; mBytep += mSkip; return old; }
 	Object* operator +=(int i)     { mBytep += mSkip*i; return mObjectp; }
 	Object& operator[](U32 index)  { return *(Object*)(mBytep + (mSkip * index)); }
-	void assignArray(U8* buff, size_t elem_size, size_t elem_count)
+	void assignArray(U8* __restrict source, const size_t elem_size, const size_t elem_count)
 	{
 		llassert_always(sizeof(Object) <= elem_size);
-		if(sizeof(Object) == mSkip && sizeof(Object) == elem_size)	//No stride. No difference in element size.
-			LLVector4a::memcpyNonAliased16((F32*) mBytep, (F32*) buff, elem_size * elem_count);
+
+		U8* __restrict dest = mBytep;				//refer to dest instead of mBytep to benefit from __restrict hint
+		const U32 bytes = elem_size * elem_count;	//total bytes to copy from source to dest
+
+		//stride == sizeof(element) implies entire buffer is unstrided and thus memcpy-able, provided source buffer elements match in size.
+		//Because LLStrider is often passed an LLVector3 even if the reprensentation is LLVector4 in the vertex buffer, mTypeSize is set to 
+		//the TRUE vbo datatype size via VertexBufferStrider::get
+		if(mTypeSize == mSkip && mTypeSize == elem_size)	
+		{
+			if(bytes >= sizeof(LLVector4) * 4)	//Should be able to pull at least 3 16byte blocks from this. Smaller isn't really beneficial.
+			{
+				U8* __restrict aligned_source = LL_NEXT_ALIGNED_ADDRESS(source);
+				U8* __restrict aligned_dest = LL_NEXT_ALIGNED_ADDRESS(dest);
+				const U32 source_offset = aligned_source - source;	//Offset to first aligned location in source buffer.
+				const U32 dest_offset = aligned_dest - dest;		//Offset to first aligned location in dest buffer.
+				llassert_always(source_offset < 16);
+				llassert_always(dest_offset < 16);
+				if(source_offset == dest_offset)	//delta to aligned location matches between source and destination! _mm_*_ps should be viable.
+				{
+					const U32 end_offset = (bytes - source_offset) % sizeof(LLVector4);		//buffers may not neatly end on a 16byte alignment boundary.
+					const U32 aligned_bytes = bytes - source_offset - end_offset;	//how many bytes to copy from aligned start to aligned end.
+
+					llassert_always(aligned_bytes > 0);
+
+					if(source_offset)	//memcpy up to the aligned location if needed
+						memcpy(dest,source,source_offset);
+					LLVector4a::memcpyNonAliased16((F32*) aligned_dest, (F32*) aligned_source, aligned_bytes);
+					if(end_offset)		//memcpy to the very end if needed.
+						memcpy(aligned_dest+aligned_bytes,aligned_source+aligned_bytes,end_offset);
+				}
+				else	//buffers non-uniformly offset from aligned location. Using _mm_*u_ps.
+				{
+					U32 end = bytes/sizeof(LLVector4);	//sizeof(LLVector4) = 16 bytes = 128 bits
+
+					llassert_always(end > 0);
+
+					__m128* dst = (__m128*) dest;
+					__m128* src = (__m128*) source;			
+
+					for (U32 i = 0; i < end; i++)	//copy 128bit chunks
+					{
+						__m128 res = _mm_loadu_ps((F32*)&src[i]);
+						_mm_storeu_ps((F32*)&dst[i], res);
+					}
+					end*=16;//Convert to real byte offset
+					if(end < bytes)	//just memcopy the rest
+						memcpy(dest+end,source+end,bytes-end);
+				}
+			}
+			else	//Too small. just do a simple memcpy.
+				memcpy(dest,source,bytes);
+		}
 		else
 		{
 			for(U32 i=0;i<elem_count;i++)
 			{
-				memcpy(mBytep,buff,sizeof(Object));
-				mBytep+=mSkip;
-				buff+=elem_size;
+				memcpy(dest,source,sizeof(Object));
+				dest+=mSkip;
+				source+=elem_size;
 			}
 		}
 	}
--- a/indra/llprimitive/llmodel.cpp
+++ b/indra/llprimitive/llmodel.cpp
@@ -1695,6 +1695,19 @@ LLSD LLModel::writeModelToStream(std::ostream& ostr, LLSD& mdl, BOOL nowrite, BO

 LLModel::weight_list& LLModel::getJointInfluences(const LLVector3& pos)
 {
+	//1. If a vertex has been weighted then we'll find it via pos and return it's weight list
+	weight_map::iterator iterPos = mSkinWeights.begin();
+	weight_map::iterator iterEnd = mSkinWeights.end();
+	
+	for ( ; iterPos!=iterEnd; ++iterPos )
+	{
+		if ( jointPositionalLookup( iterPos->first, pos ) )
+		{
+			return iterPos->second;
+		}
+	}
+	
+	//2. Otherwise we'll use the older implementation
 	weight_map::iterator iter = mSkinWeights.find(pos);
 	
 	if (iter != mSkinWeights.end())
--- a/indra/llprimitive/llmodel.h
+++ b/indra/llprimitive/llmodel.h
@@ -223,20 +223,17 @@ public:
 	};

 	
-	struct JointPositionalCompare
+	//Are the doubles the same w/in epsilon specified tolerance
+	bool areEqual( double a, double b ) 
 	{
-		//Are the doubles the same w/in epsilon specified tolerance
-		bool areEqual( double a, double b )
-		{
-			const float epsilon = 1e-5f;
-			return (abs((int)(a - b)) < epsilon) && (a < b);
-		}
-		//Make sure that we return false for any values that are within the tolerance for equivalence
-		bool operator() ( const LLVector3& a, const LLVector3& b )
-		{
-			 return ( areEqual( a[0],b[0]) && areEqual( a[1],b[1] ) && areEqual( a[2],b[2]) ) ? false : true;		
-		}
-	};
+		const float epsilon = 1e-5f;
+		return (fabs((a - b)) < epsilon) ? true : false ;
+	}
+	//Make sure that we return false for any values that are within the tolerance for equivalence
+	bool jointPositionalLookup( const LLVector3& a, const LLVector3& b ) 
+	{
+		 return ( areEqual( a[0],b[0]) && areEqual( a[1],b[1] ) && areEqual( a[2],b[2]) ) ? true : false;
+	}

 	//copy of position array for this model -- mPosition[idx].mV[X,Y,Z]
 	std::vector<LLVector3> mPosition;
--- a/indra/llrender/llvertexbuffer.cpp
+++ b/indra/llrender/llvertexbuffer.cpp
@@ -1192,12 +1192,14 @@ template <class T,S32 type> struct VertexBufferStrider

 			strider = (T*)(ptr + index*sizeof(T));
 			strider.setStride(0);
+			strider.setTypeSize(0);
 			return TRUE;
 		}
 		else if (vbo.hasDataType(type))
 		{
 			S32 stride = vbo.getStride();
 			volatile U8* ptr = vbo.mapVertexBuffer(type);
+			S32 size = LLVertexBuffer::sTypeSize[type];

 			if (ptr == NULL)
 			{
@@ -1207,6 +1209,7 @@ template <class T,S32 type> struct VertexBufferStrider

 			strider = (T*)(ptr + vbo.getOffset(type) + index*stride);
 			strider.setStride(stride);
+			strider.setTypeSize(size);
 			return TRUE;
 		}
 		else
@@ -1575,11 +1578,3 @@ void LLVertexBuffer::setupVertexBuffer(U32 data_mask) const
 	llglassertok();
 }

-void LLVertexBuffer::markDirty(U32 vert_index, U32 vert_count, U32 indices_index, U32 indices_count)
-{
-	// TODO: use GL_APPLE_flush_buffer_range here
-	/*if (useVBOs() && !mFilthy)
-	{
-	
-	}*/
-}
--- a/indra/llrender/llvertexbuffer.h
+++ b/indra/llrender/llvertexbuffer.h
@@ -213,8 +213,6 @@ public:

 	void setStride(S32 type, S32 new_stride);
 	
-	void markDirty(U32 vert_index, U32 vert_count, U32 indices_index, U32 indices_count);
-
 	void draw(U32 mode, U32 count, U32 indices_offset) const;
 	void drawArrays(U32 mode, U32 offset, U32 count) const;
 	void drawRange(U32 mode, U32 start, U32 end, U32 count, U32 indices_offset) const;
--- a/indra/newview/llmeshrepository.cpp
+++ b/indra/newview/llmeshrepository.cpp
@@ -3180,7 +3180,8 @@ void LLPhysicsDecomp::doDecompositionSingleHull()
 	return;
 #endif //!MESH_IMPORT	
 #if MESH_IMPORT
-
+	LLConvexDecomposition* decomp = LLConvexDecomposition::getInstance();
+	
 	if (decomp == NULL)
 	{
 		//stub. do nothing.
--- a/indra/newview/llvovolume.cpp
+++ b/indra/newview/llvovolume.cpp
@@ -3678,12 +3678,8 @@ void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, std::

 					U32 te_idx = facep->getTEOffset();

-					if (facep->getGeometryVolume(*volume, te_idx, 
-						vobj->getRelativeXform(), vobj->getRelativeXformInvTrans(), index_offset))
-					{
-						buffer->markDirty(facep->getGeomIndex(), facep->getGeomCount(), 
-							facep->getIndicesStart(), facep->getIndicesCount());
-					}
+					facep->getGeometryVolume(*volume, te_idx, 
+						vobj->getRelativeXform(), vobj->getRelativeXformInvTrans(), index_offset);
 				}
 			}

@@ -3709,9 +3705,9 @@ void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, std::
 				// can we safely treat this as an alpha mask?
 				if (facep->canRenderAsMask())
 				{
-					const LLDrawable* drawablep = facep->getDrawable();
-					const LLVOVolume* vobj = drawablep ? drawablep->getVOVolume() : NULL;
-					if (te->getFullbright() || (vobj && vobj->isHUDAttachment()))
+					//const LLDrawable* drawablep = facep->getDrawable();
+					//const LLVOVolume* vobj = drawablep ? drawablep->getVOVolume() : NULL;
+					if (te->getFullbright() /*|| (vobj && vobj->isHUDAttachment())*/)
 					{
 						registerFace(group, facep, LLRenderPass::PASS_FULLBRIGHT_ALPHA_MASK);
 					}