From 1b89e5c9732a03289e21ca11d7f012784e55095e Mon Sep 17 00:00:00 2001 From: Shyotl Date: Tue, 16 Oct 2012 02:33:05 -0500 Subject: [PATCH] Dragged in a bunch of alignment fixes from LL. Should allow disabling of tcmalloc on windows/linux32 if such is ever implemented. --- indra/llcommon/llallocator.cpp | 2 +- indra/llcommon/llevents.cpp | 4 +- indra/llcommon/llmemory.cpp | 40 +++++++--------- indra/llcommon/llmemory.h | 60 +++++++++++++++++------ indra/llmath/llcamera.h | 12 ++--- indra/llmath/llmatrix3a.h | 2 +- indra/llmath/llmatrix4a.h | 2 +- indra/llmath/lloctree.h | 5 +- indra/llmath/llplane.h | 4 +- indra/llmath/llsimdmath.h | 3 +- indra/llmath/llvector4a.cpp | 8 +++- indra/llmath/llvector4a.h | 5 +- indra/llmath/llvector4logical.h | 2 + indra/llmath/llvolume.cpp | 32 ++++++++----- indra/llmath/llvolumeoctree.h | 25 ++++++++-- indra/llprimitive/llmodel.cpp | 3 +- indra/llrender/llrender.cpp | 18 ++++--- indra/newview/lldrawable.h | 17 +++++-- indra/newview/lldriverparam.h | 10 ++++ indra/newview/lldynamictexture.h | 12 ++++- indra/newview/llface.h | 11 +++++ indra/newview/llpolymesh.cpp | 48 +++++++++---------- indra/newview/llpolymesh.h | 10 ++++ indra/newview/llpolymorph.cpp | 72 ++++++++++++++++++++-------- indra/newview/llpolymorph.h | 13 +++++ indra/newview/llspatialpartition.cpp | 3 ++ indra/newview/llspatialpartition.h | 36 ++++++++++---- indra/newview/lltexlayerparams.h | 20 ++++++++ indra/newview/llviewercamera.h | 14 +++++- indra/newview/llvoavatar.cpp | 2 +- indra/newview/llvoavatar.h | 12 ++++- indra/newview/llvoavatarself.h | 10 ++++ 32 files changed, 381 insertions(+), 136 deletions(-) diff --git a/indra/llcommon/llallocator.cpp b/indra/llcommon/llallocator.cpp index 6f6abefc6..87654b5b9 100644 --- a/indra/llcommon/llallocator.cpp +++ b/indra/llcommon/llallocator.cpp @@ -27,7 +27,7 @@ #include "linden_common.h" #include "llallocator.h" -#if LL_USE_TCMALLOC +#if (LL_USE_TCMALLOC && LL_USE_HEAP_PROFILER) #include "google/heap-profiler.h" #include "google/commandlineflags_public.h" diff --git a/indra/llcommon/llevents.cpp b/indra/llcommon/llevents.cpp index 9bc61ccb5..480e94e43 100644 --- a/indra/llcommon/llevents.cpp +++ b/indra/llcommon/llevents.cpp @@ -444,13 +444,13 @@ LLBoundListener LLEventPump::listen_impl(const std::string& name, const LLEventL { // The new node isn't last. Place it between the previous node and // the successor. - newNode = (myprev + mydmi->second)/2.0; + newNode = (myprev + mydmi->second)/2.f; } else { // The new node is last. Bump myprev up to the next integer, add // 1.0 and use that. - newNode = std::ceil(myprev) + 1.0; + newNode = std::ceil(myprev) + 1.f; } // Now that newNode has a value that places it appropriately in mSignal, // connect it. diff --git a/indra/llcommon/llmemory.cpp b/indra/llcommon/llmemory.cpp index 446946ac4..52d0917d7 100644 --- a/indra/llcommon/llmemory.cpp +++ b/indra/llcommon/llmemory.cpp @@ -67,6 +67,17 @@ BOOL LLMemory::sEnableMemoryFailurePrevention = FALSE; LLPrivateMemoryPoolManager::mem_allocation_info_t LLPrivateMemoryPoolManager::sMemAllocationTracker; #endif +void ll_assert_aligned_func(uintptr_t ptr,U32 alignment) +{ +#ifdef SHOW_ASSERT + // Redundant, place to set breakpoints. + if (ptr%alignment!=0) + { + llwarns << "alignment check failed" << llendl; + } + llassert(ptr%alignment==0); +#endif +} //static void LLMemory::initClass() { @@ -246,21 +257,6 @@ U32 LLMemory::getAllocatedMemKB() return sAllocatedMemInKB ; } -void* ll_allocate (size_t size) -{ - if (size == 0) - { - llwarns << "Null allocation" << llendl; - } - void *p = malloc(size); - if (p == NULL) - { - LLMemory::freeReserve(); - llerrs << "Out of memory Error" << llendl; - } - return p; -} - //---------------------------------------------------------------------------- #if defined(LL_WINDOWS) @@ -1415,7 +1411,7 @@ char* LLPrivateMemoryPool::allocate(U32 size) to_log = false ; } - return (char*)malloc(size) ; + return (char*)ll_aligned_malloc_16(size) ; } return p ; @@ -1434,7 +1430,7 @@ void LLPrivateMemoryPool::freeMem(void* addr) if(!chunk) { - free(addr) ; //release from heap + ll_aligned_free_16(addr) ; //release from heap } else { @@ -1558,7 +1554,7 @@ LLPrivateMemoryPool::LLMemoryChunk* LLPrivateMemoryPool::addChunk(S32 chunk_inde mReservedPoolSize += preferred_size + overhead ; - char* buffer = (char*)malloc(preferred_size + overhead) ; + char* buffer = (char*)ll_aligned_malloc_16(preferred_size + overhead) ; if(!buffer) { return NULL ; @@ -1626,7 +1622,7 @@ void LLPrivateMemoryPool::removeChunk(LLMemoryChunk* chunk) mReservedPoolSize -= chunk->getBufferSize() ; //release memory - free(chunk->getBuffer()) ; + ll_aligned_free_16(chunk->getBuffer()) ; } U16 LLPrivateMemoryPool::findHashKey(const char* addr) @@ -1970,7 +1966,7 @@ char* LLPrivateMemoryPoolManager::allocate(LLPrivateMemoryPool* poolp, U32 size, if(!poolp) { - p = (char*)malloc(size) ; + p = (char*)ll_aligned_malloc_16(size) ; } else { @@ -1999,7 +1995,7 @@ char* LLPrivateMemoryPoolManager::allocate(LLPrivateMemoryPool* poolp, U32 size) } else { - return (char*)malloc(size) ; + return (char*)ll_aligned_malloc_16(size) ; } } #endif @@ -2024,7 +2020,7 @@ void LLPrivateMemoryPoolManager::freeMem(LLPrivateMemoryPool* poolp, void* addr { if(!sPrivatePoolEnabled) { - free(addr) ; //private pool is disabled. + ll_aligned_free_16(addr) ; //private pool is disabled. } else if(!sInstance) //the private memory manager is destroyed, try the dangling list { diff --git a/indra/llcommon/llmemory.h b/indra/llcommon/llmemory.h index 19cc720b0..a28be0adb 100644 --- a/indra/llcommon/llmemory.h +++ b/indra/llcommon/llmemory.h @@ -39,9 +39,14 @@ #include // uintptr_t #endif -#include "llerror.h" #include "llmemtype.h" -#if LL_DEBUG + +#if LL_WINDOWS && LL_DEBUG +#define LL_CHECK_MEMORY llassert(_CrtCheckMemory()); +#else +#define LL_CHECK_MEMORY +#endif + inline void* ll_aligned_malloc( size_t size, int align ) { void* mem = malloc( size + (align - 1) + sizeof(void*) ); @@ -57,10 +62,11 @@ inline void ll_aligned_free( void* ptr ) free( ((void**)ptr)[-1] ); } +#if !LL_USE_TCMALLOC inline void* ll_aligned_malloc_16(size_t size) // returned hunk MUST be freed with ll_aligned_free_16(). { #if defined(LL_WINDOWS) - return _mm_malloc(size, 16); + return _aligned_malloc(size, 16); #elif defined(LL_DARWIN) return malloc(size); // default osx malloc is 16 byte aligned. #else @@ -75,7 +81,7 @@ inline void* ll_aligned_malloc_16(size_t size) // returned hunk MUST be freed wi inline void ll_aligned_free_16(void *p) { #if defined(LL_WINDOWS) - _mm_free(p); + _aligned_free(p); #elif defined(LL_DARWIN) return free(p); #else @@ -83,10 +89,35 @@ inline void ll_aligned_free_16(void *p) #endif } +inline void* ll_aligned_realloc_16(void* ptr, size_t size, size_t old_size) // returned hunk MUST be freed with ll_aligned_free_16(). +{ +#if defined(LL_WINDOWS) + return _aligned_realloc(ptr, size, 16); +#elif defined(LL_DARWIN) + return realloc(ptr,size); // default osx malloc is 16 byte aligned. +#else + //FIXME: memcpy is SLOW + void* ret = ll_aligned_malloc_16(size); + if (ptr) + { + memcpy(ret, ptr, old_size); + ll_aligned_free_16(ptr); + } + return ret; +#endif +} + +#else // USE_TCMALLOC +// ll_aligned_foo_16 are not needed with tcmalloc +#define ll_aligned_malloc_16 malloc +#define ll_aligned_realloc_16(a,b,c) realloc(a,b) +#define ll_aligned_free_16 free +#endif // USE_TCMALLOC + inline void* ll_aligned_malloc_32(size_t size) // returned hunk MUST be freed with ll_aligned_free_32(). { #if defined(LL_WINDOWS) - return _mm_malloc(size, 32); + return _aligned_malloc(size, 32); #elif defined(LL_DARWIN) return ll_aligned_malloc( size, 32 ); #else @@ -101,22 +132,13 @@ inline void* ll_aligned_malloc_32(size_t size) // returned hunk MUST be freed wi inline void ll_aligned_free_32(void *p) { #if defined(LL_WINDOWS) - _mm_free(p); + _aligned_free(p); #elif defined(LL_DARWIN) ll_aligned_free( p ); #else free(p); // posix_memalign() is compatible with heap deallocator #endif } -#else // LL_DEBUG -// ll_aligned_foo are noops now that we use tcmalloc everywhere (tcmalloc aligns automatically at appropriate intervals) -#define ll_aligned_malloc( size, align ) malloc(size) -#define ll_aligned_free( ptr ) free(ptr) -#define ll_aligned_malloc_16 malloc -#define ll_aligned_free_16 free -#define ll_aligned_malloc_32 malloc -#define ll_aligned_free_32 free -#endif // LL_DEBUG #ifndef __DEBUG_PRIVATE_MEM__ #define __DEBUG_PRIVATE_MEM__ 0 @@ -521,6 +543,14 @@ void LLPrivateMemoryPoolTester::operator delete[](void* addr) #endif #endif +LL_COMMON_API void ll_assert_aligned_func(uintptr_t ptr,U32 alignment); + +#ifdef SHOW_ASSERT +#define ll_assert_aligned(ptr,alignment) ll_assert_aligned_func(reinterpret_cast(ptr),((U32)alignment)) +#else +#define ll_assert_aligned(ptr,alignment) +#endif + //EVENTUALLY REMOVE THESE: #include "llpointer.h" #include "llsingleton.h" diff --git a/indra/llmath/llcamera.h b/indra/llmath/llcamera.h index 0459f797c..97e56b2a2 100644 --- a/indra/llmath/llcamera.h +++ b/indra/llmath/llcamera.h @@ -66,7 +66,7 @@ static const F32 MAX_FIELD_OF_VIEW = 175.f * DEG_TO_RAD; // roll(), pitch(), yaw() // etc... - +LL_ALIGN_PREFIX(16) class LLCamera : public LLCoordFrame { @@ -114,7 +114,7 @@ public: }; private: - LLPlane mAgentPlanes[7]; //frustum planes in agent space a la gluUnproject (I'm a bastard, I know) - DaveP + LL_ALIGN_16(LLPlane mAgentPlanes[7]); //frustum planes in agent space a la gluUnproject (I'm a bastard, I know) - DaveP U8 mPlaneMask[8]; // 8 for alignment F32 mView; // angle between top and bottom frustum planes in radians. @@ -122,13 +122,13 @@ private: S32 mViewHeightInPixels; // for ViewHeightInPixels() only F32 mNearPlane; F32 mFarPlane; - LLPlane mLocalPlanes[4]; + LL_ALIGN_16(LLPlane mLocalPlanes[4]); F32 mFixedDistance; // Always return this distance, unless < 0 LLVector3 mFrustCenter; // center of frustum and radius squared for ultra-quick exclusion test F32 mFrustRadiusSquared; - LLPlane mWorldPlanes[PLANE_NUM]; - LLPlane mHorizPlanes[HORIZ_PLANE_NUM]; + LL_ALIGN_16(LLPlane mWorldPlanes[PLANE_NUM]); + LL_ALIGN_16(LLPlane mHorizPlanes[HORIZ_PLANE_NUM]); U32 mPlaneCount; //defaults to 6, if setUserClipPlane is called, uses user supplied clip plane in @@ -214,7 +214,7 @@ protected: void calculateFrustumPlanes(F32 left, F32 right, F32 top, F32 bottom); void calculateFrustumPlanesFromWindow(F32 x1, F32 y1, F32 x2, F32 y2); void calculateWorldFrustumPlanes(); -}; +} LL_ALIGN_POSTFIX(16); #endif diff --git a/indra/llmath/llmatrix3a.h b/indra/llmath/llmatrix3a.h index adb7e3389..9916cfd2d 100644 --- a/indra/llmath/llmatrix3a.h +++ b/indra/llmath/llmatrix3a.h @@ -111,7 +111,7 @@ public: protected: - LLVector4a mColumns[3]; + LL_ALIGN_16(LLVector4a mColumns[3]); }; diff --git a/indra/llmath/llmatrix4a.h b/indra/llmath/llmatrix4a.h index 0c61fc96f..94e5e54af 100644 --- a/indra/llmath/llmatrix4a.h +++ b/indra/llmath/llmatrix4a.h @@ -34,7 +34,7 @@ class LLMatrix4a { public: - LLVector4a mMatrix[4]; + LL_ALIGN_16(LLVector4a mMatrix[4]); inline void clear() { diff --git a/indra/llmath/lloctree.h b/indra/llmath/lloctree.h index 6e5e51b7e..fcc0c2807 100644 --- a/indra/llmath/lloctree.h +++ b/indra/llmath/lloctree.h @@ -121,11 +121,12 @@ public: typedef typename std::vector*>::iterator tree_listener_iter; typedef LLOctreeNode** child_list; typedef LLOctreeNode** child_iter; + typedef LLTreeNode BaseType; typedef LLOctreeNode oct_node; typedef LLOctreeListener oct_listener; - /*void* operator new(size_t size) + void* operator new(size_t size) { return ll_aligned_malloc_16(size); } @@ -133,7 +134,7 @@ public: void operator delete(void* ptr) { ll_aligned_free_16(ptr); - }*/ + } LLOctreeNode( const LLVector4a& center, const LLVector4a& size, diff --git a/indra/llmath/llplane.h b/indra/llmath/llplane.h index a18f63a0d..44c8327f5 100644 --- a/indra/llmath/llplane.h +++ b/indra/llmath/llplane.h @@ -42,6 +42,8 @@ // The plane normal = [A, B, C] // The closest approach = D / sqrt(A*A + B*B + C*C) + +LL_ALIGN_PREFIX(16) class LLPlane { public: @@ -100,7 +102,7 @@ public: private: LLVector4a mV; -}; +} LL_ALIGN_POSTFIX(16); diff --git a/indra/llmath/llsimdmath.h b/indra/llmath/llsimdmath.h index c7cdf7b32..01458521e 100644 --- a/indra/llmath/llsimdmath.h +++ b/indra/llmath/llsimdmath.h @@ -67,11 +67,10 @@ template T* LL_NEXT_ALIGNED_ADDRESS_64(T* address) #define LL_ALIGN_16(var) LL_ALIGN_PREFIX(16) var LL_ALIGN_POSTFIX(16) - - #include #include +#include "llmemory.h" #include "llsimdtypes.h" #include "llsimdtypes.inl" diff --git a/indra/llmath/llvector4a.cpp b/indra/llmath/llvector4a.cpp index b66b7a707..6edeb0fef 100644 --- a/indra/llmath/llvector4a.cpp +++ b/indra/llmath/llvector4a.cpp @@ -24,6 +24,7 @@ * $/LicenseInfo$ */ +#include "llmemory.h" #include "llmath.h" #include "llquantize.h" @@ -44,7 +45,10 @@ extern const LLVector4a LL_V4A_EPSILON = reinterpret_cast ( F assert(dst != NULL); assert(bytes > 0); assert((bytes % sizeof(F32))== 0); - + ll_assert_aligned(src,16); + ll_assert_aligned(dst,16); + assert(bytes%16==0); + F32* end = dst + (bytes / sizeof(F32) ); if (bytes > 64) @@ -189,6 +193,8 @@ void LLVector4a::quantize16( const LLVector4a& low, const LLVector4a& high ) LLVector4a oneOverDelta; { static LL_ALIGN_16( const F32 F_TWO_4A[4] ) = { 2.f, 2.f, 2.f, 2.f }; + ll_assert_aligned(F_TWO_4A,16); + LLVector4a two; two.load4a( F_TWO_4A ); // Here we use _mm_rcp_ps plus one round of newton-raphson diff --git a/indra/llmath/llvector4a.h b/indra/llmath/llvector4a.h index 596082509..9de0e6677 100644 --- a/indra/llmath/llvector4a.h +++ b/indra/llmath/llvector4a.h @@ -32,6 +32,7 @@ class LLRotation; #include #include "llpreprocessor.h" +#include "llmemory.h" /////////////////////////////////// // FIRST TIME USERS PLEASE READ @@ -46,6 +47,7 @@ class LLRotation; // LLVector3/LLVector4. ///////////////////////////////// +LL_ALIGN_PREFIX(16) class LLVector4a { public: @@ -90,6 +92,7 @@ public: LLVector4a() { //DO NOT INITIALIZE -- The overhead is completely unnecessary + ll_assert_aligned(this,16); } LLVector4a(F32 x, F32 y, F32 z, F32 w = 0.f) @@ -313,7 +316,7 @@ public: private: LLQuad mQ; -}; +} LL_ALIGN_POSTFIX(16); inline void update_min_max(LLVector4a& min, LLVector4a& max, const LLVector4a& p) { diff --git a/indra/llmath/llvector4logical.h b/indra/llmath/llvector4logical.h index dd66b09d4..c5698f7ce 100644 --- a/indra/llmath/llvector4logical.h +++ b/indra/llmath/llvector4logical.h @@ -27,6 +27,7 @@ #ifndef LL_VECTOR4LOGICAL_H #define LL_VECTOR4LOGICAL_H +#include "llmemory.h" //////////////////////////// // LLVector4Logical @@ -77,6 +78,7 @@ public: inline LLVector4Logical& invert() { static const LL_ALIGN_16(U32 allOnes[4]) = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF }; + ll_assert_aligned(allOnes,16); mQ = _mm_andnot_ps( mQ, *(LLQuad*)(allOnes) ); return *this; } diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index 54a88a570..ebf049591 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -29,6 +29,10 @@ #include "llmath.h" #include +#if !LL_WINDOWS +#include +#endif +#include #include "llerror.h" #include "llmemtype.h" @@ -6991,17 +6995,21 @@ void LLVolumeFace::pushVertex(const LLVector4a& pos, const LLVector4a& norm, con { S32 new_verts = mNumVertices+1; S32 new_size = new_verts*16; -// S32 old_size = mNumVertices*16; + S32 old_size = mNumVertices*16; //positions - mPositions = (LLVector4a*) realloc(mPositions, new_size); + mPositions = (LLVector4a*) ll_aligned_realloc_16(mPositions, new_size, old_size); + ll_assert_aligned(mPositions,16); //normals - mNormals = (LLVector4a*) realloc(mNormals, new_size); - + mNormals = (LLVector4a*) ll_aligned_realloc_16(mNormals, new_size, old_size); + ll_assert_aligned(mNormals,16); + //tex coords new_size = ((new_verts*8)+0xF) & ~0xF; - mTexCoords = (LLVector2*) realloc(mTexCoords, new_size); + old_size = ((mNumVertices*8)+0xF) & ~0xF; + mTexCoords = (LLVector2*) ll_aligned_realloc_16(mTexCoords, new_size, old_size); + ll_assert_aligned(mTexCoords,16); //just clear binormals @@ -7095,12 +7103,12 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat } //allocate new buffer space - mPositions = (LLVector4a*) realloc(mPositions, new_count*sizeof(LLVector4a)); - assert_aligned(mPositions, 16); - mNormals = (LLVector4a*) realloc(mNormals, new_count*sizeof(LLVector4a)); - assert_aligned(mNormals, 16); - mTexCoords = (LLVector2*) realloc(mTexCoords, (new_count*sizeof(LLVector2)+0xF) & ~0xF); - assert_aligned(mTexCoords, 16); + mPositions = (LLVector4a*) ll_aligned_realloc_16(mPositions, new_count*sizeof(LLVector4a), mNumVertices*sizeof(LLVector4a)); + ll_assert_aligned(mPositions, 16); + mNormals = (LLVector4a*) ll_aligned_realloc_16(mNormals, new_count*sizeof(LLVector4a), mNumVertices*sizeof(LLVector4a)); + ll_assert_aligned(mNormals, 16); + mTexCoords = (LLVector2*) ll_aligned_realloc_16(mTexCoords, (new_count*sizeof(LLVector2)+0xF) & ~0xF, (mNumVertices*sizeof(LLVector2)+0xF) & ~0xF); + ll_assert_aligned(mTexCoords, 16); mNumVertices = new_count; @@ -7146,7 +7154,7 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat new_count = mNumIndices + face.mNumIndices; //allocate new index buffer - mIndices = (U16*) realloc(mIndices, (new_count*sizeof(U16)+0xF) & ~0xF); + mIndices = (U16*) ll_aligned_realloc_16(mIndices, (new_count*sizeof(U16)+0xF) & ~0xF, (mNumIndices*sizeof(U16)+0xF) & ~0xF); //get destination address into new index buffer U16* dst_idx = mIndices+mNumIndices; diff --git a/indra/llmath/llvolumeoctree.h b/indra/llmath/llvolumeoctree.h index c25e37f1a..03f10814e 100644 --- a/indra/llmath/llvolumeoctree.h +++ b/indra/llmath/llvolumeoctree.h @@ -37,6 +37,15 @@ class LLVolumeTriangle : public LLRefCount { public: + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } LLVolumeTriangle() { mBinIndex = -1; @@ -58,7 +67,7 @@ public: } - LLVector4a mPositionGroup; + LL_ALIGN_16(LLVector4a mPositionGroup); const LLVector4a* mV[3]; U16 mIndex[3]; @@ -78,6 +87,16 @@ class LLVolumeOctreeListener : public LLOctreeListener { public: + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } + LLVolumeOctreeListener(LLOctreeNode* node); ~LLVolumeOctreeListener(); @@ -104,8 +123,8 @@ public: public: - LLVector4a mBounds[2]; // bounding box (center, size) of this node and all its children (tight fit to objects) - LLVector4a mExtents[2]; // extents (min, max) of this node and all its children + LL_ALIGN_16(LLVector4a mBounds[2]); // bounding box (center, size) of this node and all its children (tight fit to objects) + LL_ALIGN_16(LLVector4a mExtents[2]); // extents (min, max) of this node and all its children }; class LLOctreeTriangleRayIntersect : public LLOctreeTraveler diff --git a/indra/llprimitive/llmodel.cpp b/indra/llprimitive/llmodel.cpp index 7d4b34ead..b3b5145c5 100644 --- a/indra/llprimitive/llmodel.cpp +++ b/indra/llprimitive/llmodel.cpp @@ -1027,7 +1027,8 @@ void LLModel::setVolumeFaceData( if (tc.get()) { - LLVector4a::memcpyNonAliased16((F32*) face.mTexCoords, (F32*) tc.get(), num_verts*2*sizeof(F32)); + U32 tex_size = (num_verts*2*sizeof(F32)+0xF)&~0xF; + LLVector4a::memcpyNonAliased16((F32*) face.mTexCoords, (F32*) tc.get(), tex_size); } else { diff --git a/indra/llrender/llrender.cpp b/indra/llrender/llrender.cpp index 366f506a7..6c985fbb4 100644 --- a/indra/llrender/llrender.cpp +++ b/indra/llrender/llrender.cpp @@ -1510,20 +1510,26 @@ void LLRender::pushUIMatrix() { if (mUIOffset.empty()) { - mUIOffset.push_back(new LLVector4a(0.f)); + mUIOffset.push_back(static_cast(ll_aligned_malloc_16(sizeof(LLVector4a)))); + mUIOffset.back()->splat(0.f); } else { - mUIOffset.push_back(new LLVector4a(*mUIOffset.back())); + const LLVector4a* last_entry = mUIOffset.back(); + mUIOffset.push_back(static_cast(ll_aligned_malloc_16(sizeof(LLVector4a)))); + *mUIOffset.back() = *last_entry; } if (mUIScale.empty()) { - mUIScale.push_back(new LLVector4a(1.f)); + mUIScale.push_back(static_cast(ll_aligned_malloc_16(sizeof(LLVector4a)))); + mUIScale.back()->splat(1.f); } else { - mUIScale.push_back(new LLVector4a(*mUIScale.back())); + const LLVector4a* last_entry = mUIScale.back(); + mUIScale.push_back(static_cast(ll_aligned_malloc_16(sizeof(LLVector4a)))); + *mUIScale.back() = *last_entry; } } @@ -1533,9 +1539,9 @@ void LLRender::popUIMatrix() { llerrs << "UI offset stack blown." << llendl; } - delete mUIOffset.back(); + ll_aligned_free_16(mUIOffset.back()); mUIOffset.pop_back(); - delete mUIScale.back(); + ll_aligned_free_16(mUIScale.back()); mUIScale.pop_back(); } diff --git a/indra/newview/lldrawable.h b/indra/newview/lldrawable.h index bd2d23c5f..ff6023597 100644 --- a/indra/newview/lldrawable.h +++ b/indra/newview/lldrawable.h @@ -65,6 +65,7 @@ class LLViewerTexture; const U32 SILHOUETTE_HIGHLIGHT = 0; // All data for new renderer goes into this class. +LL_ALIGN_PREFIX(16) class LLDrawable : public LLRefCount { public: @@ -81,6 +82,16 @@ public: static void initClass(); + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } + LLDrawable() { init(); } MEM_TYPE_NEW(LLMemType::MTYPE_DRAWABLE); @@ -290,8 +301,8 @@ public: } EDrawableFlags; private: //aligned members - LLVector4a mExtents[2]; - LLVector4a mPositionGroup; + LL_ALIGN_16(LLVector4a mExtents[2]); + LL_ALIGN_16(LLVector4a mPositionGroup); public: LLXformMatrix mXform; @@ -333,7 +344,7 @@ private: static U32 sNumZombieDrawables; static LLDynamicArrayPtr > sDeadList; -}; +} LL_ALIGN_POSTFIX(16); inline LLFace* LLDrawable::getFace(const S32 i) const diff --git a/indra/newview/lldriverparam.h b/indra/newview/lldriverparam.h index 7a4d711d4..c0976d1d4 100644 --- a/indra/newview/lldriverparam.h +++ b/indra/newview/lldriverparam.h @@ -83,6 +83,16 @@ public: LLDriverParam(LLWearable *wearablep); ~LLDriverParam(); + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } + // Special: These functions are overridden by child classes LLDriverParamInfo* getInfo() const { return (LLDriverParamInfo*)mInfo; } // This sets mInfo and calls initialization functions diff --git a/indra/newview/lldynamictexture.h b/indra/newview/lldynamictexture.h index 398a41cef..33287ae1a 100644 --- a/indra/newview/lldynamictexture.h +++ b/indra/newview/lldynamictexture.h @@ -36,6 +36,16 @@ class LLViewerDynamicTexture : public LLViewerTexture { public: + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } + enum { LL_VIEWER_DYNAMIC_TEXTURE = LLViewerTexture::DYNAMIC_TEXTURE, @@ -85,7 +95,7 @@ protected: protected: BOOL mClamp; LLCoordGL mOrigin; - LLCamera mCamera; + LL_ALIGN_16(LLCamera mCamera); typedef std::set instance_list_t; static instance_list_t sInstances[ LLViewerDynamicTexture::ORDER_COUNT ]; diff --git a/indra/newview/llface.h b/indra/newview/llface.h index 4cab49205..b59dff8f0 100644 --- a/indra/newview/llface.h +++ b/indra/newview/llface.h @@ -64,6 +64,17 @@ class LLFace { public: + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } + + LLFace(const LLFace& rhs) { *this = rhs; diff --git a/indra/newview/llpolymesh.cpp b/indra/newview/llpolymesh.cpp index d7b342b25..04ccafcb9 100644 --- a/indra/newview/llpolymesh.cpp +++ b/indra/newview/llpolymesh.cpp @@ -125,28 +125,28 @@ void LLPolyMeshSharedData::setupLOD(LLPolyMeshSharedData* reference_data) //----------------------------------------------------------------------------- void LLPolyMeshSharedData::freeMeshData() { - if (!mReferenceData) - { - mNumVertices = 0; + if (!mReferenceData) + { + mNumVertices = 0; - delete [] mBaseCoords; - mBaseCoords = NULL; + ll_aligned_free_16(mBaseCoords); + mBaseCoords = NULL; - delete [] mBaseNormals; - mBaseNormals = NULL; + ll_aligned_free_16(mBaseNormals); + mBaseNormals = NULL; - delete [] mBaseBinormals; - mBaseBinormals = NULL; + ll_aligned_free_16(mBaseBinormals); + mBaseBinormals = NULL; - delete [] mTexCoords; - mTexCoords = NULL; + ll_aligned_free_16(mTexCoords); + mTexCoords = NULL; - delete [] mDetailTexCoords; - mDetailTexCoords = NULL; + ll_aligned_free_16(mDetailTexCoords); + mDetailTexCoords = NULL; - delete [] mWeights; - mWeights = NULL; - } + ll_aligned_free_16(mWeights); + mWeights = NULL; + } mNumFaces = 0; delete [] mFaces; @@ -228,14 +228,14 @@ U32 LLPolyMeshSharedData::getNumKB() //----------------------------------------------------------------------------- BOOL LLPolyMeshSharedData::allocateVertexData( U32 numVertices ) { - U32 i; - mBaseCoords = new LLVector4a[ numVertices ]; - mBaseNormals = new LLVector4a[ numVertices ]; - mBaseBinormals = new LLVector4a[ numVertices ]; - mTexCoords = new LLVector2[ numVertices ]; - mDetailTexCoords = new LLVector2[ numVertices ]; - mWeights = new F32[ numVertices ]; - for (i = 0; i < numVertices; i++) + U32 i; + mBaseCoords = (LLVector4a*) ll_aligned_malloc_16(numVertices*sizeof(LLVector4a)); + mBaseNormals = (LLVector4a*) ll_aligned_malloc_16(numVertices*sizeof(LLVector4a)); + mBaseBinormals = (LLVector4a*) ll_aligned_malloc_16(numVertices*sizeof(LLVector4a)); + mTexCoords = (LLVector2*) ll_aligned_malloc_16(numVertices*sizeof(LLVector2)); + mDetailTexCoords = (LLVector2*) ll_aligned_malloc_16(numVertices*sizeof(LLVector2)); + mWeights = (F32*) ll_aligned_malloc_16(numVertices*sizeof(F32)); + for (i = 0; i < numVertices; i++) { mBaseCoords[i].clear(); mBaseNormals[i].clear(); diff --git a/indra/newview/llpolymesh.h b/indra/newview/llpolymesh.h index d09909309..184372a95 100644 --- a/indra/newview/llpolymesh.h +++ b/indra/newview/llpolymesh.h @@ -428,6 +428,16 @@ public: LLPolySkeletalDistortion(LLVOAvatar *avatarp); ~LLPolySkeletalDistortion(); + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } + // Special: These functions are overridden by child classes LLPolySkeletalDistortionInfo* getInfo() const { return (LLPolySkeletalDistortionInfo*)mInfo; } // This sets mInfo and calls initialization functions diff --git a/indra/newview/llpolymorph.cpp b/indra/newview/llpolymorph.cpp index b43837e89..c05d1c1f1 100644 --- a/indra/newview/llpolymorph.cpp +++ b/indra/newview/llpolymorph.cpp @@ -74,9 +74,9 @@ LLPolyMorphData::LLPolyMorphData(const LLPolyMorphData &rhs) : { const S32 numVertices = mNumIndices; - mCoords = new LLVector4a[numVertices]; - mNormals = new LLVector4a[numVertices]; - mBinormals = new LLVector4a[numVertices]; + mCoords = static_cast(ll_aligned_malloc_16(numVertices * sizeof(LLVector4a))); + mNormals = static_cast(ll_aligned_malloc_16(numVertices * sizeof(LLVector4a))); + mBinormals = static_cast(ll_aligned_malloc_16(numVertices * sizeof(LLVector4a))); mTexCoords = new LLVector2[numVertices]; mVertexIndices = new U32[numVertices]; @@ -90,17 +90,12 @@ LLPolyMorphData::LLPolyMorphData(const LLPolyMorphData &rhs) : } } - //----------------------------------------------------------------------------- // ~LLPolyMorphData() //----------------------------------------------------------------------------- LLPolyMorphData::~LLPolyMorphData() { - delete [] mVertexIndices; - delete [] mCoords; - delete [] mNormals; - delete [] mBinormals; - delete [] mTexCoords; + freeData(); } //----------------------------------------------------------------------------- @@ -119,12 +114,17 @@ BOOL LLPolyMorphData::loadBinary(LLFILE *fp, LLPolyMeshSharedData *mesh) return FALSE; } + //------------------------------------------------------------------------- + // free any existing data + //------------------------------------------------------------------------- + freeData(); + //------------------------------------------------------------------------- // allocate vertices //------------------------------------------------------------------------- - mCoords = new LLVector4a[numVertices]; - mNormals = new LLVector4a[numVertices]; - mBinormals = new LLVector4a[numVertices]; + mCoords = static_cast(ll_aligned_malloc_16(numVertices * sizeof(LLVector4a))); + mNormals = static_cast(ll_aligned_malloc_16(numVertices * sizeof(LLVector4a))); + mBinormals = static_cast(ll_aligned_malloc_16(numVertices * sizeof(LLVector4a))); mTexCoords = new LLVector2[numVertices]; // Actually, we are allocating more space than we need for the skiplist mVertexIndices = new U32[numVertices]; @@ -207,6 +207,42 @@ BOOL LLPolyMorphData::loadBinary(LLFILE *fp, LLPolyMeshSharedData *mesh) return TRUE; } +//----------------------------------------------------------------------------- +// freeData() +//----------------------------------------------------------------------------- +void LLPolyMorphData::freeData() +{ + if (mCoords != NULL) + { + ll_aligned_free_16(mCoords); + mCoords = NULL; + } + + if (mNormals != NULL) + { + ll_aligned_free_16(mNormals); + mNormals = NULL; + } + + if (mBinormals != NULL) + { + ll_aligned_free_16(mBinormals); + mBinormals = NULL; + } + + if (mTexCoords != NULL) + { + delete [] mTexCoords; + mTexCoords = NULL; + } + + if (mVertexIndices != NULL) + { + delete [] mVertexIndices; + mVertexIndices = NULL; + } +} + //----------------------------------------------------------------------------- // LLPolyMesh::saveLLM() //----------------------------------------------------------------------------- @@ -354,9 +390,9 @@ BOOL LLPolyMorphData::setMorphFromMesh(LLPolyMesh *morph) if (num_significant == 0) nindices = 1; - LLVector4a* new_coords = new LLVector4a[nindices]; - LLVector4a* new_normals = new LLVector4a[nindices]; - LLVector4a* new_binormals = new LLVector4a[nindices]; + LLVector4a* new_coords = static_cast(ll_aligned_malloc_16(nindices * sizeof(LLVector4a))); + LLVector4a* new_normals = static_cast(ll_aligned_malloc_16(nindices * sizeof(LLVector4a))); + LLVector4a* new_binormals = static_cast(ll_aligned_malloc_16(nindices * sizeof(LLVector4a))); LLVector2* new_tex_coords = new LLVector2[nindices]; U32* new_vertex_indices = new U32[nindices]; @@ -490,11 +526,7 @@ BOOL LLPolyMorphData::setMorphFromMesh(LLPolyMesh *morph) //------------------------------------------------------------------------- // reallocate vertices //------------------------------------------------------------------------- - delete [] mVertexIndices; - delete [] mCoords; - delete [] mNormals; - delete [] mBinormals; - delete [] mTexCoords; + freeData(); mVertexIndices = new_vertex_indices; mCoords = new_coords; diff --git a/indra/newview/llpolymorph.h b/indra/newview/llpolymorph.h index dac698437..02f5be42f 100644 --- a/indra/newview/llpolymorph.h +++ b/indra/newview/llpolymorph.h @@ -54,6 +54,16 @@ public: ~LLPolyMorphData(); LLPolyMorphData(const LLPolyMorphData &rhs); + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } + BOOL loadBinary(LLFILE* fp, LLPolyMeshSharedData *mesh); const std::string& getName() { return mName; } @@ -77,6 +87,9 @@ public: F32 mMaxDistortion; // maximum single vertex distortion in a given morph LLVector4a mAvgDistortion; // average vertex distortion, to infer directionality of the morph LLPolyMeshSharedData* mMesh; + +private: + void freeData(); }; //----------------------------------------------------------------------------- diff --git a/indra/newview/llspatialpartition.cpp b/indra/newview/llspatialpartition.cpp index dbc2a1961..60ad83ad2 100644 --- a/indra/newview/llspatialpartition.cpp +++ b/indra/newview/llspatialpartition.cpp @@ -412,6 +412,7 @@ void LLSpatialGroup::setVisible() void LLSpatialGroup::validate() { + ll_assert_aligned(this,64); #if LL_OCTREE_PARANOIA_CHECK sg_assert(!isState(DIRTY)); @@ -1075,6 +1076,8 @@ LLSpatialGroup::LLSpatialGroup(OctreeNode* node, LLSpatialPartition* part) : mLastUpdateDistance(-1.f), mLastUpdateTime(gFrameTimeSeconds) { + ll_assert_aligned(this,16); + sNodeCount++; LLMemType mt(LLMemType::MTYPE_SPACE_PARTITION); diff --git a/indra/newview/llspatialpartition.h b/indra/newview/llspatialpartition.h index e6b018087..fa3ce39e8 100644 --- a/indra/newview/llspatialpartition.h +++ b/indra/newview/llspatialpartition.h @@ -73,6 +73,16 @@ protected: ~LLDrawInfo(); public: + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } + LLDrawInfo(const LLDrawInfo& rhs) { @@ -111,7 +121,7 @@ public: F32 mPartSize; F32 mVSize; LLSpatialGroup* mGroup; - LLFace* mFace; //associated face + LL_ALIGN_16(LLFace* mFace); //associated face F32 mDistance; U32 mDrawMode; @@ -186,7 +196,7 @@ public: }; }; -LL_ALIGN_PREFIX(64) +LL_ALIGN_PREFIX(16) class LLSpatialGroup : public LLOctreeListener { friend class LLSpatialPartition; @@ -198,6 +208,16 @@ public: *this = rhs; } + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } + const LLSpatialGroup& operator=(const LLSpatialGroup& rhs) { llerrs << "Illegal operation!" << llendl; @@ -359,12 +379,12 @@ public: V4_COUNT = 10 } eV4Index; - LLVector4a mBounds[2]; // bounding box (center, size) of this node and all its children (tight fit to objects) - LLVector4a mExtents[2]; // extents (min, max) of this node and all its children - LLVector4a mObjectExtents[2]; // extents (min, max) of objects in this node - LLVector4a mObjectBounds[2]; // bounding box (center, size) of objects in this node - LLVector4a mViewAngle; - LLVector4a mLastUpdateViewAngle; + LL_ALIGN_16(LLVector4a mBounds[2]); // bounding box (center, size) of this node and all its children (tight fit to objects) + LL_ALIGN_16(LLVector4a mExtents[2]); // extents (min, max) of this node and all its children + LL_ALIGN_16(LLVector4a mObjectExtents[2]); // extents (min, max) of objects in this node + LL_ALIGN_16(LLVector4a mObjectBounds[2]); // bounding box (center, size) of objects in this node + LL_ALIGN_16(LLVector4a mViewAngle); + LL_ALIGN_16(LLVector4a mLastUpdateViewAngle); F32 mObjectBoxSize; //cached mObjectBounds[1].getLength3() diff --git a/indra/newview/lltexlayerparams.h b/indra/newview/lltexlayerparams.h index 2c0da60b4..fffe20208 100644 --- a/indra/newview/lltexlayerparams.h +++ b/indra/newview/lltexlayerparams.h @@ -67,6 +67,16 @@ public: /*virtual*/ LLViewerVisualParam* cloneParam(LLWearable* wearable = NULL) const; + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } + // LLVisualParam Virtual functions ///*virtual*/ BOOL parseData(LLXmlTreeNode* node); /*virtual*/ void apply( ESex avatar_sex ) {} @@ -143,6 +153,16 @@ public: LLTexLayerParamColor( LLVOAvatar* avatar ); /* virtual */ ~LLTexLayerParamColor(); + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } + /*virtual*/ LLViewerVisualParam* cloneParam(LLWearable* wearable = NULL) const; // LLVisualParam Virtual functions diff --git a/indra/newview/llviewercamera.h b/indra/newview/llviewercamera.h index a4a05fca8..82d88bc3d 100644 --- a/indra/newview/llviewercamera.h +++ b/indra/newview/llviewercamera.h @@ -52,9 +52,20 @@ const F32 OGL_TO_CFR_ROTATION[16] = { 0.f, 0.f, -1.f, 0.f, // -Z becomes X const BOOL FOR_SELECTION = TRUE; const BOOL NOT_FOR_SELECTION = FALSE; + +LL_ALIGN_PREFIX(16) class LLViewerCamera : public LLCamera, public LLSingleton { public: + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } typedef enum { @@ -138,6 +149,7 @@ protected: S16 mZoomSubregion; public: -}; +} LL_ALIGN_POSTFIX(16); + #endif // LL_LLVIEWERCAMERA_H diff --git a/indra/newview/llvoavatar.cpp b/indra/newview/llvoavatar.cpp index 4b27f6a2a..1b26891c8 100644 --- a/indra/newview/llvoavatar.cpp +++ b/indra/newview/llvoavatar.cpp @@ -3139,7 +3139,7 @@ void LLVOAvatar::idleUpdateMisc(bool detailed_update) if (isImpostor() && !mNeedsImpostorUpdate) { - LLVector4a ext[2]; + LL_ALIGN_16(LLVector4a ext[2]); F32 distance; LLVector3 angle; diff --git a/indra/newview/llvoavatar.h b/indra/newview/llvoavatar.h index 81529a2f3..39937c9db 100644 --- a/indra/newview/llvoavatar.h +++ b/indra/newview/llvoavatar.h @@ -140,6 +140,16 @@ protected: **/ public: + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } + LLVOAvatar(const LLUUID &id, const LLPCode pcode, LLViewerRegion *regionp); virtual void markDead(); static void initClass(); // Initialize data that's only init'd once per class. @@ -261,7 +271,7 @@ public: bool isBuilt() const { return mIsBuilt; } private: //aligned members - LLVector4a mImpostorExtents[2]; + LL_ALIGN_16(LLVector4a mImpostorExtents[2]); private: BOOL mSupportsAlphaLayers; // For backwards compatibility, TRUE for 1.23+ clients diff --git a/indra/newview/llvoavatarself.h b/indra/newview/llvoavatarself.h index 0105315ff..03c3b9b75 100644 --- a/indra/newview/llvoavatarself.h +++ b/indra/newview/llvoavatarself.h @@ -49,6 +49,16 @@ class LLVOAvatarSelf : **/ public: + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } + LLVOAvatarSelf(const LLUUID &id, const LLPCode pcode, LLViewerRegion *regionp); virtual ~LLVOAvatarSelf(); virtual void markDead();