Merge branch 'future' of github-siana:siana/SingularityViewer into future

2011-08-18 19:26:22 +02:00
parent fc839cb2ff e6de1e0d5a
commit 6aff0002fa
59 changed files with 2499 additions and 1339 deletions
--- a/indra/llcommon/CMakeLists.txt
+++ b/indra/llcommon/CMakeLists.txt
@@ -96,7 +96,7 @@ set(llcommon_HEADER_FILES
    indra_constants.h
    linden_common.h
    linked_lists.h
-    llaccountingquota.h
+    llaccountingcost.h
    llagentconstants.h
    llavatarname.h
    llapp.h
--- a/indra/llcommon/llaccountingquota.h
+++ b/indra/llcommon/llaccountingquota.h
@@ -1,5 +1,5 @@
 /** 
- * @file llaccountingquota.h
+ * @file llaccountingcost.h
 * @
 *
 * $LicenseInfo:firstyear=2001&license=viewerlgpl$
@@ -58,22 +58,28 @@ struct ParcelQuota
 	F32 mParcelCapacity;
 };

-struct SelectionQuota
+//SelectionQuota atm does not require a id
+struct SelectionCost
 {
-	SelectionQuota( LLUUID localId, F32 renderCost, F32 physicsCost, F32 networkCost, F32 simulationCost )
-	: mLocalId( localId)
-	, mRenderCost( renderCost )
-	, mPhysicsCost( physicsCost )
+	SelectionCost( /*LLTransactionID transactionId, */ F32 physicsCost, F32 networkCost, F32 simulationCost )
+	//: mTransactionId( transactionId)
+	: mPhysicsCost( physicsCost )
 	, mNetworkCost( networkCost )
 	, mSimulationCost( simulationCost )
 	{
 	}
-	SelectionQuota() {}
+	SelectionCost()
+	: mPhysicsCost( 0.0f )
+	, mNetworkCost( 0.0f )
+	, mSimulationCost( 0.0f )
+	{}
 	
-	F32 mRenderCost, mPhysicsCost, mNetworkCost, mSimulationCost;	
-	LLUUID mLocalId;
+	F32 mPhysicsCost, mNetworkCost, mSimulationCost;	
+	//LLTransactionID mTransactionId;
 };

+typedef enum { Roots = 0 , Prims } eSelectionType;
+
 #endif


--- a/indra/llcommon/llmemory.h
+++ b/indra/llcommon/llmemory.h
@@ -35,7 +35,9 @@

 #include <new>
 #include <cstdlib>
+#if !LL_WINDOWS
 #include <stdint.h>		// uintptr_t
+#endif

 #include "llerror.h"

--- a/indra/llcommon/llstrider.h
+++ b/indra/llcommon/llstrider.h
@@ -42,13 +42,15 @@ template <class Object> class LLStrider
 		U8*		mBytep;
 	};
 	U32     mSkip;
+	U32		mTypeSize;
 public:

-	LLStrider()  { mObjectp = NULL; mSkip = sizeof(Object); } 
+	LLStrider()  { mObjectp = NULL; mTypeSize = mSkip = sizeof(Object); } 
 	~LLStrider() { } 

 	const LLStrider<Object>& operator =  (Object *first)    { mObjectp = first; return *this;}
 	void setStride (S32 skipBytes)	{ mSkip = (skipBytes ? skipBytes : sizeof(Object));}
+	void setTypeSize (S32 typeBytes){ mTypeSize = (typeBytes ? typeBytes : sizeof(Object)); }

 	void skip(const U32 index)     { mBytep += mSkip*index;}
 	U32 getSkip() const			   { return mSkip; }
@@ -58,18 +60,68 @@ public:
 	Object* operator ++(int)       { Object* old = mObjectp; mBytep += mSkip; return old; }
 	Object* operator +=(int i)     { mBytep += mSkip*i; return mObjectp; }
 	Object& operator[](U32 index)  { return *(Object*)(mBytep + (mSkip * index)); }
-	void assignArray(U8* buff, size_t elem_size, size_t elem_count)
+	void assignArray(U8* __restrict source, const size_t elem_size, const size_t elem_count)
 	{
 		llassert_always(sizeof(Object) <= elem_size);
-		if(sizeof(Object) == mSkip && sizeof(Object) == elem_size)	//No stride. No difference in element size.
-			LLVector4a::memcpyNonAliased16((F32*) mBytep, (F32*) buff, elem_size * elem_count);
+
+		U8* __restrict dest = mBytep;				//refer to dest instead of mBytep to benefit from __restrict hint
+		const U32 bytes = elem_size * elem_count;	//total bytes to copy from source to dest
+
+		//stride == sizeof(element) implies entire buffer is unstrided and thus memcpy-able, provided source buffer elements match in size.
+		//Because LLStrider is often passed an LLVector3 even if the reprensentation is LLVector4 in the vertex buffer, mTypeSize is set to 
+		//the TRUE vbo datatype size via VertexBufferStrider::get
+		if(mTypeSize == mSkip && mTypeSize == elem_size)	
+		{
+			if(bytes >= sizeof(LLVector4) * 4)	//Should be able to pull at least 3 16byte blocks from this. Smaller isn't really beneficial.
+			{
+				U8* __restrict aligned_source = LL_NEXT_ALIGNED_ADDRESS(source);
+				U8* __restrict aligned_dest = LL_NEXT_ALIGNED_ADDRESS(dest);
+				const U32 source_offset = aligned_source - source;	//Offset to first aligned location in source buffer.
+				const U32 dest_offset = aligned_dest - dest;		//Offset to first aligned location in dest buffer.
+				llassert_always(source_offset < 16);
+				llassert_always(dest_offset < 16);
+				if(source_offset == dest_offset)	//delta to aligned location matches between source and destination! _mm_*_ps should be viable.
+				{
+					const U32 end_offset = (bytes - source_offset) % sizeof(LLVector4);		//buffers may not neatly end on a 16byte alignment boundary.
+					const U32 aligned_bytes = bytes - source_offset - end_offset;	//how many bytes to copy from aligned start to aligned end.
+
+					llassert_always(aligned_bytes > 0);
+
+					if(source_offset)	//memcpy up to the aligned location if needed
+						memcpy(dest,source,source_offset);
+					LLVector4a::memcpyNonAliased16((F32*) aligned_dest, (F32*) aligned_source, aligned_bytes);
+					if(end_offset)		//memcpy to the very end if needed.
+						memcpy(aligned_dest+aligned_bytes,aligned_source+aligned_bytes,end_offset);
+				}
+				else	//buffers non-uniformly offset from aligned location. Using _mm_*u_ps.
+				{
+					U32 end = bytes/sizeof(LLVector4);	//sizeof(LLVector4) = 16 bytes = 128 bits
+
+					llassert_always(end > 0);
+
+					__m128* dst = (__m128*) dest;
+					__m128* src = (__m128*) source;			
+
+					for (U32 i = 0; i < end; i++)	//copy 128bit chunks
+					{
+						__m128 res = _mm_loadu_ps((F32*)&src[i]);
+						_mm_storeu_ps((F32*)&dst[i], res);
+					}
+					end*=16;//Convert to real byte offset
+					if(end < bytes)	//just memcopy the rest
+						memcpy(dest+end,source+end,bytes-end);
+				}
+			}
+			else	//Too small. just do a simple memcpy.
+				memcpy(dest,source,bytes);
+		}
 		else
 		{
 			for(U32 i=0;i<elem_count;i++)
 			{
-				memcpy(mBytep,buff,sizeof(Object));
-				mBytep+=mSkip;
-				buff+=elem_size;
+				memcpy(dest,source,sizeof(Object));
+				dest+=mSkip;
+				source+=elem_size;
 			}
 		}
 	}
--- a/indra/llcommon/llthread.cpp
+++ b/indra/llcommon/llthread.cpp
@@ -102,7 +102,7 @@ void *APR_THREAD_FUNC LLThread::staticRun(apr_thread_t *apr_threadp, void *datap
 	// Setting mStatus to STOPPED is done non-thread-safe, so it's
 	// possible that the thread is deleted by another thread at
 	// the moment it happens... therefore make a copy here.
-	volatile char const* name = threadp->mName.c_str();
+	char const* volatile name = threadp->mName.c_str();
 	
 	// We're done with the run function, this thread is done executing now.
 	threadp->mStatus = STOPPED;