From f25eb07fab9aeb4b50694bb980e5474c5df89246 Mon Sep 17 00:00:00 2001 From: Shyotl Date: Wed, 9 Oct 2013 14:47:06 -0500 Subject: [PATCH] Unstaged changes cleanup. Further vectorization. Change in binormal/bitangent calculation. --- indra/llappearance/llpolymesh.cpp | 51 + indra/llcommon/CMakeLists.txt | 1 + indra/llcommon/llalignedarray.h | 139 ++ indra/llcommon/llmemory.h | 157 +- indra/llmath/llmatrix3a.inl | 2 +- indra/llmath/llsimdmath.h | 28 - indra/llmath/llvector4a.cpp | 50 +- indra/llmath/llvector4a.h | 6 + indra/llmath/llvector4a.inl | 34 +- indra/llmath/llvolume.cpp | 2178 +++++++---------- indra/llmath/llvolume.h | 103 +- indra/llmath/llvolumeoctree.cpp | 64 +- indra/llmath/llvolumeoctree.h | 8 +- indra/llprimitive/llmodel.cpp | 63 +- indra/llprimitive/llmodel.h | 3 +- indra/llrender/llshadermgr.cpp | 2 +- indra/llrender/llvertexbuffer.cpp | 63 +- indra/llrender/llvertexbuffer.h | 7 +- .../shaders/class1/deferred/bumpSkinnedV.glsl | 6 +- .../shaders/class1/deferred/bumpV.glsl | 6 +- .../shaders/class1/deferred/fxaaF.glsl | 1 - .../shaders/class1/transform/binormalV.glsl | 6 +- indra/newview/lldrawpoolavatar.h | 2 +- indra/newview/lldrawpoolbump.cpp | 2 +- indra/newview/llface.cpp | 94 +- indra/newview/llface.h | 2 +- indra/newview/llflexibleobject.cpp | 17 +- indra/newview/llhudicon.cpp | 21 +- indra/newview/llhudicon.h | 4 +- indra/newview/llhudnametag.cpp | 19 +- indra/newview/llhudnametag.h | 2 +- indra/newview/llspatialpartition.cpp | 91 +- indra/newview/llspatialpartition.h | 10 +- indra/newview/llviewerobject.cpp | 16 +- indra/newview/llviewerobject.h | 10 +- indra/newview/llviewershadermgr.cpp | 14 +- indra/newview/llviewershadermgr.h | 3 +- indra/newview/llviewerwindow.cpp | 139 +- indra/newview/llviewerwindow.h | 23 +- indra/newview/llvoavatar.cpp | 38 +- indra/newview/llvoavatar.h | 16 +- indra/newview/llvograss.cpp | 28 +- indra/newview/llvograss.h | 8 +- indra/newview/llvosurfacepatch.cpp | 20 +- indra/newview/llvosurfacepatch.h | 8 +- indra/newview/llvotree.cpp | 13 +- indra/newview/llvotree.h | 8 +- indra/newview/llvovolume.cpp | 210 +- indra/newview/llvovolume.h | 8 +- indra/newview/pipeline.cpp | 62 +- indra/newview/pipeline.h | 16 +- 51 files changed, 1987 insertions(+), 1895 deletions(-) create mode 100644 indra/llcommon/llalignedarray.h diff --git a/indra/llappearance/llpolymesh.cpp b/indra/llappearance/llpolymesh.cpp index a6290d02a..d588d687d 100644 --- a/indra/llappearance/llpolymesh.cpp +++ b/indra/llappearance/llpolymesh.cpp @@ -1302,6 +1302,57 @@ BOOL LLPolyMesh::saveOBJ(LLFILE *fp) return TRUE; } +// Finds binormal based on three vertices with texture coordinates. +// Fills in dummy values if the triangle has degenerate texture coordinates. +void calc_binormal_from_triangle(LLVector4a& binormal, + + const LLVector4a& pos0, + const LLVector2& tex0, + const LLVector4a& pos1, + const LLVector2& tex1, + const LLVector4a& pos2, + const LLVector2& tex2) +{ + LLVector4a rx0( pos0[VX], tex0.mV[VX], tex0.mV[VY] ); + LLVector4a rx1( pos1[VX], tex1.mV[VX], tex1.mV[VY] ); + LLVector4a rx2( pos2[VX], tex2.mV[VX], tex2.mV[VY] ); + + LLVector4a ry0( pos0[VY], tex0.mV[VX], tex0.mV[VY] ); + LLVector4a ry1( pos1[VY], tex1.mV[VX], tex1.mV[VY] ); + LLVector4a ry2( pos2[VY], tex2.mV[VX], tex2.mV[VY] ); + + LLVector4a rz0( pos0[VZ], tex0.mV[VX], tex0.mV[VY] ); + LLVector4a rz1( pos1[VZ], tex1.mV[VX], tex1.mV[VY] ); + LLVector4a rz2( pos2[VZ], tex2.mV[VX], tex2.mV[VY] ); + + LLVector4a lhs, rhs; + + LLVector4a r0; + lhs.setSub(rx0, rx1); rhs.setSub(rx0, rx2); + r0.setCross3(lhs, rhs); + + LLVector4a r1; + lhs.setSub(ry0, ry1); rhs.setSub(ry0, ry2); + r1.setCross3(lhs, rhs); + + LLVector4a r2; + lhs.setSub(rz0, rz1); rhs.setSub(rz0, rz2); + r2.setCross3(lhs, rhs); + + if( r0[VX] && r1[VX] && r2[VX] ) + { + binormal.set( + -r0[VZ] / r0[VX], + -r1[VZ] / r1[VX], + -r2[VZ] / r2[VX]); + // binormal.normVec(); + } + else + { + binormal.set( 0, 1 , 0 ); + } +} + //----------------------------------------------------------------------------- // LLPolyMesh::loadOBJ() //----------------------------------------------------------------------------- diff --git a/indra/llcommon/CMakeLists.txt b/indra/llcommon/CMakeLists.txt index 6f93be79e..4cbd4d5f0 100644 --- a/indra/llcommon/CMakeLists.txt +++ b/indra/llcommon/CMakeLists.txt @@ -116,6 +116,7 @@ set(llcommon_HEADER_FILES linden_common.h linked_lists.h llaccountingcost.h + llalignedarray.h llagentconstants.h llallocator.h llallocator_heap_profile.h diff --git a/indra/llcommon/llalignedarray.h b/indra/llcommon/llalignedarray.h new file mode 100644 index 000000000..ed8fd3120 --- /dev/null +++ b/indra/llcommon/llalignedarray.h @@ -0,0 +1,139 @@ +/** + * @file llalignedarray.h + * @brief A static array which obeys alignment restrictions and mimics std::vector accessors. + * + * $LicenseInfo:firstyear=2002&license=viewerlgpl$ + * Second Life Viewer Source Code + * Copyright (C) 2010, Linden Research, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA + * $/LicenseInfo$ + */ + +#ifndef LL_LLALIGNEDARRAY_H +#define LL_LLALIGNEDARRAY_H + +#include "llmemory.h" + +template +class LLAlignedArray +{ +public: + T* mArray; + U32 mElementCount; + U32 mCapacity; + + LLAlignedArray(); + ~LLAlignedArray(); + + void push_back(const T& elem); + U32 size() const { return mElementCount; } + void resize(U32 size); + T* append(S32 N); + T& operator[](int idx); + const T& operator[](int idx) const; +}; + +template +LLAlignedArray::LLAlignedArray() +{ + llassert(alignment >= 16); + mArray = NULL; + mElementCount = 0; + mCapacity = 0; +} + +template +LLAlignedArray::~LLAlignedArray() +{ + ll_aligned_free(mArray); + mArray = NULL; + mElementCount = 0; + mCapacity = 0; +} + +template +void LLAlignedArray::push_back(const T& elem) +{ + T* old_buf = NULL; + if (mCapacity <= mElementCount) + { + mCapacity++; + mCapacity *= 2; + T* new_buf = (T*) ll_aligned_malloc(mCapacity*sizeof(T), alignment); + if (mArray) + { + ll_memcpy_nonaliased_aligned_16((char*)new_buf, (char*)mArray, sizeof(T)*mElementCount); + } + old_buf = mArray; + mArray = new_buf; + } + + mArray[mElementCount++] = elem; + + //delete old array here to prevent error on a.push_back(a[0]) + ll_aligned_free(old_buf); +} + +template +void LLAlignedArray::resize(U32 size) +{ + if (mCapacity < size) + { + mCapacity = size+mCapacity*2; + T* new_buf = mCapacity > 0 ? (T*) ll_aligned_malloc(mCapacity*sizeof(T), alignment) : NULL; + if (mArray) + { + ll_memcpy_nonaliased_aligned_16((char*) new_buf, (char*) mArray, sizeof(T)*mElementCount); + ll_aligned_free(mArray); + } + + /*for (U32 i = mElementCount; i < mCapacity; ++i) + { + new(new_buf+i) T(); + }*/ + mArray = new_buf; + } + + mElementCount = size; +} + + +template +T& LLAlignedArray::operator[](int idx) +{ + llassert(idx < mElementCount); + return mArray[idx]; +} + +template +const T& LLAlignedArray::operator[](int idx) const +{ + llassert(idx < mElementCount); + return mArray[idx]; +} + +template +T* LLAlignedArray::append(S32 N) +{ + U32 sz = size(); + resize(sz+N); + return &((*this)[sz]); +} + +#endif + diff --git a/indra/llcommon/llmemory.h b/indra/llcommon/llmemory.h index 0762dc3f4..43a77d522 100644 --- a/indra/llcommon/llmemory.h +++ b/indra/llcommon/llmemory.h @@ -42,27 +42,77 @@ class LLMutex ; #define LL_CHECK_MEMORY #endif +LL_COMMON_API void ll_assert_aligned_func(uintptr_t ptr,U32 alignment); + +#ifdef SHOW_ASSERT +#define ll_assert_aligned(ptr,alignment) ll_assert_aligned_func(reinterpret_cast(ptr),((U32)alignment)) +#else +#define ll_assert_aligned(ptr,alignment) +#endif + +#include + +template T* LL_NEXT_ALIGNED_ADDRESS(T* address) +{ + return reinterpret_cast( + (reinterpret_cast(address) + 0xF) & ~0xF); +} + +template T* LL_NEXT_ALIGNED_ADDRESS_64(T* address) +{ + return reinterpret_cast( + (reinterpret_cast(address) + 0x3F) & ~0x3F); +} + +#if LL_LINUX || LL_DARWIN + +#define LL_ALIGN_PREFIX(x) +#define LL_ALIGN_POSTFIX(x) __attribute__((aligned(x))) + +#elif LL_WINDOWS + +#define LL_ALIGN_PREFIX(x) __declspec(align(x)) +#define LL_ALIGN_POSTFIX(x) + +#else +#error "LL_ALIGN_PREFIX and LL_ALIGN_POSTFIX undefined" +#endif + +#define LL_ALIGN_16(var) LL_ALIGN_PREFIX(16) var LL_ALIGN_POSTFIX(16) + inline void* ll_aligned_malloc( size_t size, int align ) { +#if defined(LL_WINDOWS) + return _aligned_malloc(size, align); +#else void* mem = malloc( size + (align - 1) + sizeof(void*) ); char* aligned = ((char*)mem) + sizeof(void*); aligned += align - ((uintptr_t)aligned & (align - 1)); ((void**)aligned)[-1] = mem; return aligned; +#endif } inline void ll_aligned_free( void* ptr ) { - free( ((void**)ptr)[-1] ); +#if defined(LL_WINDOWS) + _aligned_free(ptr); +#else + if (ptr) + { + free( ((void**)ptr)[-1] ); + } +#endif } +#if !LL_USE_TCMALLOC inline void* ll_aligned_malloc_16(size_t size) // returned hunk MUST be freed with ll_aligned_free_16(). { -#if (LL_DARWIN || LL_USE_TCMALLOC) - return malloc(size); // default osx malloc is 16 byte aligned. -#elif LL_WINDOWS +#if defined(LL_WINDOWS) return _aligned_malloc(size, 16); +#elif defined(LL_DARWIN) + return malloc(size); // default osx malloc is 16 byte aligned. #else void *rtn; if (LL_LIKELY(0 == posix_memalign(&rtn, 16, size))) @@ -74,10 +124,10 @@ inline void* ll_aligned_malloc_16(size_t size) // returned hunk MUST be freed wi inline void ll_aligned_free_16(void *p) { -#if (LL_DARWIN || LL_USE_TCMALLOC) - free(p); -#elif LL_WINDOWS +#if defined(LL_WINDOWS) _aligned_free(p); +#elif defined(LL_DARWIN) + return free(p); #else free(p); // posix_memalign() is compatible with heap deallocator #endif @@ -85,10 +135,10 @@ inline void ll_aligned_free_16(void *p) inline void* ll_aligned_realloc_16(void* ptr, size_t size, size_t old_size) // returned hunk MUST be freed with ll_aligned_free_16(). { -#if (LL_DARWIN || LL_USE_TCMALLOC) - return realloc(ptr,size); // default osx malloc is 16 byte aligned. -#elif LL_WINDOWS +#if defined(LL_WINDOWS) return _aligned_realloc(ptr, size, 16); +#elif defined(LL_DARWIN) + return realloc(ptr,size); // default osx malloc is 16 byte aligned. #else //FIXME: memcpy is SLOW void* ret = ll_aligned_malloc_16(size); @@ -105,11 +155,18 @@ inline void* ll_aligned_realloc_16(void* ptr, size_t size, size_t old_size) // r #endif } +#else // USE_TCMALLOC +// ll_aligned_foo_16 are not needed with tcmalloc +#define ll_aligned_malloc_16 malloc +#define ll_aligned_realloc_16(a,b,c) realloc(a,b) +#define ll_aligned_free_16 free +#endif // USE_TCMALLOC + inline void* ll_aligned_malloc_32(size_t size) // returned hunk MUST be freed with ll_aligned_free_32(). { -#if LL_WINDOWS +#if defined(LL_WINDOWS) return _aligned_malloc(size, 32); -#elif LL_DARWIN +#elif defined(LL_DARWIN) return ll_aligned_malloc( size, 32 ); #else void *rtn; @@ -122,15 +179,87 @@ inline void* ll_aligned_malloc_32(size_t size) // returned hunk MUST be freed wi inline void ll_aligned_free_32(void *p) { -#if LL_WINDOWS +#if defined(LL_WINDOWS) _aligned_free(p); -#elif LL_DARWIN +#elif defined(LL_DARWIN) ll_aligned_free( p ); #else free(p); // posix_memalign() is compatible with heap deallocator #endif } + +// Copy words 16-byte blocks from src to dst. Source and destination MUST NOT OVERLAP. +// Source and dest must be 16-byte aligned and size must be multiple of 16. +// +inline void ll_memcpy_nonaliased_aligned_16(char* __restrict dst, const char* __restrict src, size_t bytes) +{ + assert(src != NULL); + assert(dst != NULL); + assert(bytes > 0); + assert((bytes % sizeof(F32))== 0); + ll_assert_aligned(src,16); + ll_assert_aligned(dst,16); + assert((src < dst) ? ((src + bytes) < dst) : ((dst + bytes) < src)); + assert(bytes%16==0); + + char* end = dst + bytes; + + if (bytes > 64) + { + + // Find start of 64b aligned area within block + // + void* begin_64 = LL_NEXT_ALIGNED_ADDRESS_64(dst); + + //at least 64 bytes before the end of the destination, switch to 16 byte copies + void* end_64 = end-64; + + // Prefetch the head of the 64b area now + // + _mm_prefetch((char*)begin_64, _MM_HINT_NTA); + _mm_prefetch((char*)begin_64 + 64, _MM_HINT_NTA); + _mm_prefetch((char*)begin_64 + 128, _MM_HINT_NTA); + _mm_prefetch((char*)begin_64 + 192, _MM_HINT_NTA); + + // Copy 16b chunks until we're 64b aligned + // + while (dst < begin_64) + { + + _mm_store_ps((F32*)dst, _mm_load_ps((F32*)src)); + dst += 16; + src += 16; + } + + // Copy 64b chunks up to your tail + // + // might be good to shmoo the 512b prefetch offset + // (characterize performance for various values) + // + while (dst < end_64) + { + _mm_prefetch((char*)src + 512, _MM_HINT_NTA); + _mm_prefetch((char*)dst + 512, _MM_HINT_NTA); + _mm_store_ps((F32*)dst, _mm_load_ps((F32*)src)); + _mm_store_ps((F32*)(dst + 16), _mm_load_ps((F32*)(src + 16))); + _mm_store_ps((F32*)(dst + 32), _mm_load_ps((F32*)(src + 32))); + _mm_store_ps((F32*)(dst + 48), _mm_load_ps((F32*)(src + 48))); + dst += 64; + src += 64; + } + } + + // Copy remainder 16b tail chunks (or ALL 16b chunks for sub-64b copies) + // + while (dst < end) + { + _mm_store_ps((F32*)dst, _mm_load_ps((F32*)src)); + dst += 16; + src += 16; + } +} + #ifndef __DEBUG_PRIVATE_MEM__ #define __DEBUG_PRIVATE_MEM__ 0 #endif diff --git a/indra/llmath/llmatrix3a.inl b/indra/llmath/llmatrix3a.inl index eaf58319d..37819fea3 100644 --- a/indra/llmath/llmatrix3a.inl +++ b/indra/llmath/llmatrix3a.inl @@ -60,7 +60,7 @@ inline void LLMatrix3a::setTranspose(const LLMatrix3a& src) const LLQuad srcCol1 = src.mColumns[1]; const LLQuad unpacklo = _mm_unpacklo_ps( srcCol0, srcCol1 ); mColumns[0] = _mm_movelh_ps( unpacklo, src.mColumns[2] ); - mColumns[1] = _mm_shuffle_ps( unpacklo, src.mColumns[2], _MM_SHUFFLE(0, 1, 3, 2) ); + mColumns[1] = _mm_shuffle_ps( _mm_movehl_ps( srcCol0, unpacklo ), src.mColumns[2], _MM_SHUFFLE(0, 1, 1, 0) ); mColumns[2] = _mm_shuffle_ps( _mm_unpackhi_ps( srcCol0, srcCol1 ), src.mColumns[2], _MM_SHUFFLE(0, 2, 1, 0) ); } diff --git a/indra/llmath/llsimdmath.h b/indra/llmath/llsimdmath.h index 01458521e..cebd2ace7 100644 --- a/indra/llmath/llsimdmath.h +++ b/indra/llmath/llsimdmath.h @@ -39,34 +39,6 @@ #include #endif -template T* LL_NEXT_ALIGNED_ADDRESS(T* address) -{ - return reinterpret_cast( - (reinterpret_cast(address) + 0xF) & ~0xF); -} - -template T* LL_NEXT_ALIGNED_ADDRESS_64(T* address) -{ - return reinterpret_cast( - (reinterpret_cast(address) + 0x3F) & ~0x3F); -} - -#if LL_LINUX || LL_DARWIN - -#define LL_ALIGN_PREFIX(x) -#define LL_ALIGN_POSTFIX(x) __attribute__((aligned(x))) - -#elif LL_WINDOWS - -#define LL_ALIGN_PREFIX(x) __declspec(align(x)) -#define LL_ALIGN_POSTFIX(x) - -#else -#error "LL_ALIGN_PREFIX and LL_ALIGN_POSTFIX undefined" -#endif - -#define LL_ALIGN_16(var) LL_ALIGN_PREFIX(16) var LL_ALIGN_POSTFIX(16) - #include #include diff --git a/indra/llmath/llvector4a.cpp b/indra/llmath/llvector4a.cpp index 6edeb0fef..570fa41a4 100644 --- a/indra/llmath/llvector4a.cpp +++ b/indra/llmath/llvector4a.cpp @@ -41,55 +41,7 @@ extern const LLVector4a LL_V4A_EPSILON = reinterpret_cast ( F /*static */void LLVector4a::memcpyNonAliased16(F32* __restrict dst, const F32* __restrict src, size_t bytes) { - assert(src != NULL); - assert(dst != NULL); - assert(bytes > 0); - assert((bytes % sizeof(F32))== 0); - ll_assert_aligned(src,16); - ll_assert_aligned(dst,16); - assert(bytes%16==0); - - F32* end = dst + (bytes / sizeof(F32) ); - - if (bytes > 64) - { - F32* begin_64 = LL_NEXT_ALIGNED_ADDRESS_64(dst); - - //at least 64 (16*4) bytes before the end of the destination, switch to 16 byte copies - F32* end_64 = end-16; - - _mm_prefetch((char*)begin_64, _MM_HINT_NTA); - _mm_prefetch((char*)begin_64 + 64, _MM_HINT_NTA); - _mm_prefetch((char*)begin_64 + 128, _MM_HINT_NTA); - _mm_prefetch((char*)begin_64 + 192, _MM_HINT_NTA); - - while (dst < begin_64) - { - copy4a(dst, src); - dst += 4; - src += 4; - } - - while (dst < end_64) - { - _mm_prefetch((char*)src + 512, _MM_HINT_NTA); - _mm_prefetch((char*)dst + 512, _MM_HINT_NTA); - copy4a(dst, src); - copy4a(dst+4, src+4); - copy4a(dst+8, src+8); - copy4a(dst+12, src+12); - - dst += 16; - src += 16; - } - } - - while (dst < end) - { - copy4a(dst, src); - dst += 4; - src += 4; - } + ll_memcpy_nonaliased_aligned_16((char*)dst, (char*)src, bytes); } void LLVector4a::setRotated( const LLRotation& rot, const LLVector4a& vec ) diff --git a/indra/llmath/llvector4a.h b/indra/llmath/llvector4a.h index 9de0e6677..ffd58114e 100644 --- a/indra/llmath/llvector4a.h +++ b/indra/llmath/llvector4a.h @@ -46,6 +46,7 @@ class LLRotation; // of this writing, July 08, 2010) about getting it implemented before you resort to // LLVector3/LLVector4. ///////////////////////////////// +class LLVector4a; LL_ALIGN_PREFIX(16) class LLVector4a @@ -235,6 +236,11 @@ public: // Note that this does not consider zero length vectors! inline void normalize3fast(); + // Normalize this vector with respect to the x, y, and z components only. Accurate only to 10-12 bits of precision. W component is destroyed + // Same as above except substitutes default vector contents if the vector is non-finite or degenerate due to zero length. + // + inline void normalize3fast_checked(LLVector4a* d = 0); + // Return true if this vector is normalized with respect to x,y,z up to tolerance inline LLBool32 isNormalized3( F32 tolerance = 1e-3 ) const; diff --git a/indra/llmath/llvector4a.inl b/indra/llmath/llvector4a.inl index 7ad22a563..69d3d01ef 100644 --- a/indra/llmath/llvector4a.inl +++ b/indra/llmath/llvector4a.inl @@ -409,6 +409,26 @@ inline void LLVector4a::normalize3fast() mQ = _mm_mul_ps( mQ, approxRsqrt ); } +inline void LLVector4a::normalize3fast_checked(LLVector4a* d) +{ + if (!isFinite3()) + { + *this = d ? *d : LLVector4a(0,1,0,1); + return; + } + + LLVector4a lenSqrd; lenSqrd.setAllDot3( *this, *this ); + + if (lenSqrd.getF32ptr()[0] <= FLT_EPSILON) + { + *this = d ? *d : LLVector4a(0,1,0,1); + return; + } + + const LLQuad approxRsqrt = _mm_rsqrt_ps(lenSqrd.mQ); + mQ = _mm_mul_ps( mQ, approxRsqrt ); +} + // Return true if this vector is normalized with respect to x,y,z up to tolerance inline LLBool32 LLVector4a::isNormalized3( F32 tolerance ) const { @@ -460,21 +480,19 @@ inline void LLVector4a::setMax(const LLVector4a& lhs, const LLVector4a& rhs) mQ = _mm_max_ps(lhs.mQ, rhs.mQ); } -// Set this to (c * lhs) + rhs * ( 1 - c) +// Set this to lhs + (rhs-lhs)*c inline void LLVector4a::setLerp(const LLVector4a& lhs, const LLVector4a& rhs, F32 c) { - LLVector4a a = lhs; - a.mul(c); - - LLVector4a b = rhs; - b.mul(1.f-c); - - setAdd(a, b); + LLVector4a t; + t.setSub(rhs,lhs); + t.mul(c); + setAdd(lhs, t); } inline LLBool32 LLVector4a::isFinite3() const { static LL_ALIGN_16(const U32 nanOrInfMask[4]) = { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 }; + ll_assert_aligned(nanOrInfMask,16); const __m128i nanOrInfMaskV = *reinterpret_cast (nanOrInfMask); const __m128i maskResult = _mm_and_si128( _mm_castps_si128(mQ), nanOrInfMaskV ); const LLVector4Logical equalityCheck = _mm_castsi128_ps(_mm_cmpeq_epi32( maskResult, nanOrInfMaskV )); diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index fdba388b1..26767841a 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -136,6 +136,82 @@ BOOL LLLineSegmentBoxIntersect(const F32* start, const F32* end, const F32* cent return true; } +// Finds tangent vec based on three vertices with texture coordinates. +// Fills in dummy values if the triangle has degenerate texture coordinates. +void calc_tangent_from_triangle( + LLVector4a& normal, + LLVector4a& tangent_out, + const LLVector4a& v1, + const LLVector2& w1, + const LLVector4a& v2, + const LLVector2& w2, + const LLVector4a& v3, + const LLVector2& w3) +{ + const F32* v1ptr = v1.getF32ptr(); + const F32* v2ptr = v2.getF32ptr(); + const F32* v3ptr = v3.getF32ptr(); + + float x1 = v2ptr[0] - v1ptr[0]; + float x2 = v3ptr[0] - v1ptr[0]; + float y1 = v2ptr[1] - v1ptr[1]; + float y2 = v3ptr[1] - v1ptr[1]; + float z1 = v2ptr[2] - v1ptr[2]; + float z2 = v3ptr[2] - v1ptr[2]; + + float s1 = w2.mV[0] - w1.mV[0]; + float s2 = w3.mV[0] - w1.mV[0]; + float t1 = w2.mV[1] - w1.mV[1]; + float t2 = w3.mV[1] - w1.mV[1]; + + F32 rd = s1*t2-s2*t1; + + float r = ((rd*rd) > FLT_EPSILON) ? 1.0F / rd : 1024.f; //some made up large ratio for division by zero + + llassert(llfinite(r)); + llassert(!llisnan(r)); + + LLVector4a sdir( + (t2 * x1 - t1 * x2) * r, + (t2 * y1 - t1 * y2) * r, + (t2 * z1 - t1 * z2) * r); + + LLVector4a tdir( + (s1 * x2 - s2 * x1) * r, + (s1 * y2 - s2 * y1) * r, + (s1 * z2 - s2 * z1) * r); + + LLVector4a n = normal; + LLVector4a t = sdir; + + LLVector4a ncrosst; + ncrosst.setCross3(n,t); + + // Gram-Schmidt orthogonalize + n.mul(n.dot3(t).getF32()); + + LLVector4a tsubn; + tsubn.setSub(t,n); + + if (tsubn.dot3(tsubn).getF32() > F_APPROXIMATELY_ZERO) + { + tsubn.normalize3fast_checked(); + + // Calculate handedness + F32 handedness = ncrosst.dot3(tdir).getF32() < 0.f ? -1.f : 1.f; + + tsubn.getF32ptr()[3] = handedness; + + tangent_out = tsubn; + } + else + { + // degenerate, make up a value + // + tangent_out.set(0,0,1,1); + } + +} // intersect test between triangle vert0, vert1, vert2 and a ray from orig in direction dir. @@ -474,7 +550,7 @@ void LLProfile::genNGon(const LLProfileParams& params, S32 sides, F32 offset, F3 const F32 tableScale[] = { 1, 1, 1, 0.5f, 0.707107f, 0.53f, 0.525f, 0.5f }; F32 scale = 0.5f; F32 t, t_step, t_first, t_fraction, ang, ang_step; - LLVector3 pt1,pt2; + LLVector4a pt1,pt2; F32 begin = params.getBegin(); F32 end = params.getEnd(); @@ -497,20 +573,21 @@ void LLProfile::genNGon(const LLProfileParams& params, S32 sides, F32 offset, F3 // Starting t and ang values for the first face t = t_first; ang = 2.0f*F_PI*(t*ang_scale + offset); - pt1.setVec(cos(ang)*scale,sin(ang)*scale, t); + pt1.set(cos(ang)*scale,sin(ang)*scale, t); // Increment to the next point. // pt2 is the end point on the fractional face t += t_step; ang += ang_step; - pt2.setVec(cos(ang)*scale,sin(ang)*scale,t); + pt2.set(cos(ang)*scale,sin(ang)*scale,t); t_fraction = (begin - t_first)*sides; // Only use if it's not almost exactly on an edge. if (t_fraction < 0.9999f) { - LLVector3 new_pt = lerp(pt1, pt2, t_fraction); + LLVector4a new_pt; + new_pt.setLerp(pt1, pt2, t_fraction); mProfile.push_back(new_pt); } @@ -518,12 +595,17 @@ void LLProfile::genNGon(const LLProfileParams& params, S32 sides, F32 offset, F3 while (t < end) { // Iterate through all the integer steps of t. - pt1.setVec(cos(ang)*scale,sin(ang)*scale,t); + pt1.set(cos(ang)*scale,sin(ang)*scale,t); if (mProfile.size() > 0) { - LLVector3 p = mProfile[mProfile.size()-1]; + LLVector4a p = mProfile[mProfile.size()-1]; for (S32 i = 0; i < split && mProfile.size() > 0; i++) { - mProfile.push_back(p+(pt1-p) * 1.0f/(float)(split+1) * (float)(i+1)); + //mProfile.push_back(p+(pt1-p) * 1.0f/(float)(split+1) * (float)(i+1)); + LLVector4a new_pt; + new_pt.setSub(pt1, p); + new_pt.mul(1.0f/(float)(split+1) * (float)(i+1)); + new_pt.add(p); + mProfile.push_back(new_pt); } } mProfile.push_back(pt1); @@ -536,18 +618,25 @@ void LLProfile::genNGon(const LLProfileParams& params, S32 sides, F32 offset, F3 // pt1 is the first point on the fractional face // pt2 is the end point on the fractional face - pt2.setVec(cos(ang)*scale,sin(ang)*scale,t); + pt2.set(cos(ang)*scale,sin(ang)*scale,t); // Find the fraction that we need to add to the end point. t_fraction = (end - (t - t_step))*sides; if (t_fraction > 0.0001f) { - LLVector3 new_pt = lerp(pt1, pt2, t_fraction); + LLVector4a new_pt; + new_pt.setLerp(pt1, pt2, t_fraction); if (mProfile.size() > 0) { - LLVector3 p = mProfile[mProfile.size()-1]; + LLVector4a p = mProfile[mProfile.size()-1]; for (S32 i = 0; i < split && mProfile.size() > 0; i++) { - mProfile.push_back(p+(new_pt-p) * 1.0f/(float)(split+1) * (float)(i+1)); + //mProfile.push_back(p+(new_pt-p) * 1.0f/(float)(split+1) * (float)(i+1)); + + LLVector4a pt1; + pt1.setSub(new_pt, p); + pt1.mul(1.0f/(float)(split+1) * (float)(i+1)); + pt1.add(p); + mProfile.push_back(pt1); } } mProfile.push_back(new_pt); @@ -568,7 +657,7 @@ void LLProfile::genNGon(const LLProfileParams& params, S32 sides, F32 offset, F3 if (params.getHollow() <= 0) { // put center point if not hollow. - mProfile.push_back(LLVector3(0,0,0)); + mProfile.push_back(LLVector4a(0,0,0)); } } else @@ -581,103 +670,6 @@ void LLProfile::genNGon(const LLProfileParams& params, S32 sides, F32 offset, F3 mTotal = mProfile.size(); } -void LLProfile::genNormals(const LLProfileParams& params) -{ - S32 count = mProfile.size(); - - S32 outer_count; - if (mTotalOut) - { - outer_count = mTotalOut; - } - else - { - outer_count = mTotal / 2; - } - - mEdgeNormals.resize(count * 2); - mEdgeCenters.resize(count * 2); - mNormals.resize(count); - - LLVector2 pt0,pt1; - - BOOL hollow = (params.getHollow() > 0); - - S32 i0, i1, i2, i3, i4; - - // Parametrically generate normal - for (i2 = 0; i2 < count; i2++) - { - mNormals[i2].mV[0] = mProfile[i2].mV[0]; - mNormals[i2].mV[1] = mProfile[i2].mV[1]; - if (hollow && (i2 >= outer_count)) - { - mNormals[i2] *= -1.f; - } - if (mNormals[i2].magVec() < 0.001) - { - // Special case for point at center, get adjacent points. - i1 = (i2 - 1) >= 0 ? i2 - 1 : count - 1; - i0 = (i1 - 1) >= 0 ? i1 - 1 : count - 1; - i3 = (i2 + 1) < count ? i2 + 1 : 0; - i4 = (i3 + 1) < count ? i3 + 1 : 0; - - pt0.setVec(mProfile[i1].mV[VX] + mProfile[i1].mV[VX] - mProfile[i0].mV[VX], - mProfile[i1].mV[VY] + mProfile[i1].mV[VY] - mProfile[i0].mV[VY]); - pt1.setVec(mProfile[i3].mV[VX] + mProfile[i3].mV[VX] - mProfile[i4].mV[VX], - mProfile[i3].mV[VY] + mProfile[i3].mV[VY] - mProfile[i4].mV[VY]); - - mNormals[i2] = pt0 + pt1; - mNormals[i2] *= 0.5f; - } - mNormals[i2].normVec(); - } - - S32 num_normal_sets = isConcave() ? 2 : 1; - for (S32 normal_set = 0; normal_set < num_normal_sets; normal_set++) - { - S32 point_num; - for (point_num = 0; point_num < mTotal; point_num++) - { - LLVector3 point_1 = mProfile[point_num]; - point_1.mV[VZ] = 0.f; - - LLVector3 point_2; - - if (isConcave() && normal_set == 0 && point_num == (mTotal - 1) / 2) - { - point_2 = mProfile[mTotal - 1]; - } - else if (isConcave() && normal_set == 1 && point_num == mTotal - 1) - { - point_2 = mProfile[(mTotal - 1) / 2]; - } - else - { - LLVector3 delta_pos; - S32 neighbor_point = (point_num + 1) % mTotal; - while(delta_pos.magVecSquared() < 0.01f * 0.01f) - { - point_2 = mProfile[neighbor_point]; - delta_pos = point_2 - point_1; - neighbor_point = (neighbor_point + 1) % mTotal; - if (neighbor_point == point_num) - { - break; - } - } - } - - point_2.mV[VZ] = 0.f; - LLVector3 face_normal = (point_2 - point_1) % LLVector3::z_axis; - face_normal.normVec(); - mEdgeNormals[normal_set * count + point_num] = face_normal; - mEdgeCenters[normal_set * count + point_num] = lerp(point_1, point_2, 0.5f); - } - } -} - - // Hollow is percent of the original bounding box, not of this particular // profile's geometry. Thus, a swept triangle needs lower hollow values than // a swept square. @@ -693,12 +685,13 @@ LLProfile::Face* LLProfile::addHole(const LLProfileParams& params, BOOL flat, F3 Face *face = addFace(mTotalOut, mTotal-mTotalOut,0,LL_FACE_INNER_SIDE, flat); - std::vector pt; + static LLAlignedArray pt; pt.resize(mTotal) ; for (S32 i=mTotalOut;imPos.setVec(0 + lerp(0,params.getShear().mV[0],s) + pt->mPos.set(0 + lerp(0,params.getShear().mV[0],s) + lerp(-skew ,skew, t) * 0.5f, c + lerp(0,params.getShear().mV[1],s), s); - pt->mScale.mV[VX] = hole_x * lerp(taper_x_begin, taper_x_end, t); - pt->mScale.mV[VY] = hole_y * lerp(taper_y_begin, taper_y_end, t); + pt->mScale.set(hole_x * lerp(taper_x_begin, taper_x_end, t), + hole_y * lerp(taper_y_begin, taper_y_end, t), + 0,1); pt->mTexT = t; - + // Twist rotates the path along the x,y plane (I think) - DJS 04/05/02 twist.setQuat (lerp(twist_begin,twist_end,t) * 2.f * F_PI - F_PI,0,0,1); // Rotate the point around the circle's center. qang.setQuat (ang,path_axis); - pt->mRot = twist * qang; + + LLMatrix3 rot(twist * qang); + + pt->mRot.loadu(rot); t+=step; @@ -1408,50 +1410,54 @@ void LLPath::genNGon(const LLPathParams& params, S32 sides, F32 startOff, F32 en // Run through the non-cut dependent points. while (t < params.getEnd()) { - pt = vector_append(mPath, 1); + pt = mPath.append(1); ang = 2.0f*F_PI*revolutions * t; c = cos(ang)*lerp(radius_start, radius_end, t); s = sin(ang)*lerp(radius_start, radius_end, t); - pt->mPos.setVec(0 + lerp(0,params.getShear().mV[0],s) + pt->mPos.set(0 + lerp(0,params.getShear().mV[0],s) + lerp(-skew ,skew, t) * 0.5f, c + lerp(0,params.getShear().mV[1],s), s); - pt->mScale.mV[VX] = hole_x * lerp(taper_x_begin, taper_x_end, t); - pt->mScale.mV[VY] = hole_y * lerp(taper_y_begin, taper_y_end, t); + pt->mScale.set(hole_x * lerp(taper_x_begin, taper_x_end, t), + hole_y * lerp(taper_y_begin, taper_y_end, t), + 0,1); pt->mTexT = t; // Twist rotates the path along the x,y plane (I think) - DJS 04/05/02 twist.setQuat (lerp(twist_begin,twist_end,t) * 2.f * F_PI - F_PI,0,0,1); // Rotate the point around the circle's center. qang.setQuat (ang,path_axis); - pt->mRot = twist * qang; + LLMatrix3 tmp(twist*qang); + pt->mRot.loadu(tmp); t+=step; } // Make one final pass for the end cut. t = params.getEnd(); - pt = vector_append(mPath, 1); + pt = mPath.append(1); ang = 2.0f*F_PI*revolutions * t; c = cos(ang)*lerp(radius_start, radius_end, t); s = sin(ang)*lerp(radius_start, radius_end, t); - pt->mPos.setVec(0 + lerp(0,params.getShear().mV[0],s) + pt->mPos.set(0 + lerp(0,params.getShear().mV[0],s) + lerp(-skew ,skew, t) * 0.5f, c + lerp(0,params.getShear().mV[1],s), s); - pt->mScale.mV[VX] = hole_x * lerp(taper_x_begin, taper_x_end, t); - pt->mScale.mV[VY] = hole_y * lerp(taper_y_begin, taper_y_end, t); + pt->mScale.set(hole_x * lerp(taper_x_begin, taper_x_end, t), + hole_y * lerp(taper_y_begin, taper_y_end, t), + 0,1); pt->mTexT = t; - + // Twist rotates the path along the x,y plane (I think) - DJS 04/05/02 twist.setQuat (lerp(twist_begin,twist_end,t) * 2.f * F_PI - F_PI,0,0,1); // Rotate the point around the circle's center. qang.setQuat (ang,path_axis); - pt->mRot = twist * qang; + LLMatrix3 tmp(twist*qang); + pt->mRot.loadu(tmp); mTotal = mPath.size(); } @@ -1549,7 +1555,7 @@ BOOL LLPath::generate(const LLPathParams& params, F32 detail, S32 split, mDirty = FALSE; S32 np = 2; // hardcode for line - mPath.clear(); + mPath.resize(0); mOpen = TRUE; // Is this 0xf0 mask really necessary? DK 03/02/05 @@ -1575,12 +1581,16 @@ BOOL LLPath::generate(const LLPathParams& params, F32 detail, S32 split, for (S32 i=0;imPath.size() * mProfilep->mProfile.size()) > (1u << 20)) - { - llinfos << "sizeS: " << mPathp->mPath.size() << " sizeT: " << mProfilep->mProfile.size() << llendl ; - llinfos << "path_detail : " << path_detail << " split: " << split << " profile_detail: " << profile_detail << llendl ; - llinfos << mParams << llendl ; - llinfos << "more info to check if mProfilep is deleted or not." << llendl ; - llinfos << mProfilep->mNormals.size() << " : " << mProfilep->mFaces.size() << " : " << mProfilep->mEdgeNormals.size() << " : " << mProfilep->mEdgeCenters.size() << llendl ; - - llerrs << "LLVolume corrupted!" << llendl ; - } - //******************************************************************** - BOOL regenPath = mPathp->generate(mParams.getPathParams(), path_detail, split); BOOL regenProf = mProfilep->generate(mParams.getProfileParams(), mPathp->isOpen(),profile_detail, split); @@ -2163,21 +2166,6 @@ BOOL LLVolume::generate() S32 sizeS = mPathp->mPath.size(); S32 sizeT = mProfilep->mProfile.size(); - //******************************************************************** - //debug info, to be removed - if((U32)(sizeS * sizeT) > (1u << 20)) - { - llinfos << "regenPath: " << (S32)regenPath << " regenProf: " << (S32)regenProf << llendl ; - llinfos << "sizeS: " << sizeS << " sizeT: " << sizeT << llendl ; - llinfos << "path_detail : " << path_detail << " split: " << split << " profile_detail: " << profile_detail << llendl ; - llinfos << mParams << llendl ; - llinfos << "more info to check if mProfilep is deleted or not." << llendl ; - llinfos << mProfilep->mNormals.size() << " : " << mProfilep->mFaces.size() << " : " << mProfilep->mEdgeNormals.size() << " : " << mProfilep->mEdgeCenters.size() << llendl ; - - llerrs << "LLVolume corrupted!" << llendl ; - } - //******************************************************************** - sNumMeshPoints -= mMesh.size(); mMesh.resize(sizeT * sizeS); sNumMeshPoints += mMesh.size(); @@ -2185,22 +2173,39 @@ BOOL LLVolume::generate() //generate vertex positions // Run along the path. + LLVector4a* dst = mMesh.mArray; + for (S32 s = 0; s < sizeS; ++s) { - LLVector2 scale = mPathp->mPath[s].mScale; - LLQuaternion rot = mPathp->mPath[s].mRot; + F32* scale = mPathp->mPath[s].mScale.getF32ptr(); + + F32 sc [] = + { scale[0], 0, 0, 0, + 0, scale[1], 0, 0, + 0, 0, scale[2], 0, + 0, 0, 0, 1 }; + + LLMatrix4 rot((F32*) mPathp->mPath[s].mRot.mMatrix); + LLMatrix4 scale_mat(sc); + + scale_mat *= rot; + + LLMatrix4a rot_mat; + rot_mat.loadu(scale_mat); + + LLVector4a* profile = mProfilep->mProfile.mArray; + LLVector4a* end_profile = profile+sizeT; + LLVector4a offset = mPathp->mPath[s].mPos; + + LLVector4a tmp; // Run along the profile. - for (S32 t = 0; t < sizeT; ++t) + while (profile < end_profile) { - S32 m = s*sizeT + t; - Point& pt = mMesh[m]; - - pt.mPos.mV[0] = mProfilep->mProfile[t].mV[0] * scale.mV[0]; - pt.mPos.mV[1] = mProfilep->mProfile[t].mV[1] * scale.mV[1]; - pt.mPos.mV[2] = 0.0f; - pt.mPos = pt.mPos * rot; - pt.mPos += mPathp->mPath[s].mPos; + rot_mat.rotate(*profile++, tmp); + dst->setAdd(tmp,offset); + llassert(dst->isFinite3()); + ++dst; } } @@ -2790,13 +2795,15 @@ void LLVolume::createVolumeFaces() } -inline LLVector3 sculpt_rgb_to_vector(U8 r, U8 g, U8 b) +inline LLVector4a sculpt_rgb_to_vector(U8 r, U8 g, U8 b) { // maps RGB values to vector values [0..255] -> [-0.5..0.5] - LLVector3 value; - value.mV[VX] = r / 255.f - 0.5f; - value.mV[VY] = g / 255.f - 0.5f; - value.mV[VZ] = b / 255.f - 0.5f; + LLVector4a value; + LLVector4a sub(0.5f, 0.5f, 0.5f); + + value.set(r,g,b); + value.mul(1.f/255.f); + value.sub(sub); return value; } @@ -2817,21 +2824,21 @@ inline U32 sculpt_st_to_index(S32 s, S32 t, S32 size_s, S32 size_t, U16 sculpt_w } -inline LLVector3 sculpt_index_to_vector(U32 index, const U8* sculpt_data) +inline LLVector4a sculpt_index_to_vector(U32 index, const U8* sculpt_data) { - LLVector3 v = sculpt_rgb_to_vector(sculpt_data[index], sculpt_data[index+1], sculpt_data[index+2]); + LLVector4a v = sculpt_rgb_to_vector(sculpt_data[index], sculpt_data[index+1], sculpt_data[index+2]); return v; } -inline LLVector3 sculpt_st_to_vector(S32 s, S32 t, S32 size_s, S32 size_t, U16 sculpt_width, U16 sculpt_height, S8 sculpt_components, const U8* sculpt_data) +inline LLVector4a sculpt_st_to_vector(S32 s, S32 t, S32 size_s, S32 size_t, U16 sculpt_width, U16 sculpt_height, S8 sculpt_components, const U8* sculpt_data) { U32 index = sculpt_st_to_index(s, t, size_s, size_t, sculpt_width, sculpt_height, sculpt_components); return sculpt_index_to_vector(index, sculpt_data); } -inline LLVector3 sculpt_xy_to_vector(U32 x, U32 y, U16 sculpt_width, U16 sculpt_height, S8 sculpt_components, const U8* sculpt_data) +inline LLVector4a sculpt_xy_to_vector(U32 x, U32 y, U16 sculpt_width, U16 sculpt_height, S8 sculpt_components, const U8* sculpt_data) { U32 index = sculpt_xy_to_index(x, y, sculpt_width, sculpt_height, sculpt_components); @@ -2853,15 +2860,26 @@ F32 LLVolume::sculptGetSurfaceArea() for (S32 t = 0; t < sizeT-1; t++) { // get four corners of quad - LLVector3 p1 = mMesh[(s )*sizeT + (t )].mPos; - LLVector3 p2 = mMesh[(s+1)*sizeT + (t )].mPos; - LLVector3 p3 = mMesh[(s )*sizeT + (t+1)].mPos; - LLVector3 p4 = mMesh[(s+1)*sizeT + (t+1)].mPos; + LLVector4a& p1 = mMesh[(s )*sizeT + (t )]; + LLVector4a& p2 = mMesh[(s+1)*sizeT + (t )]; + LLVector4a& p3 = mMesh[(s )*sizeT + (t+1)]; + LLVector4a& p4 = mMesh[(s+1)*sizeT + (t+1)]; // compute the area of the quad by taking the length of the cross product of the two triangles - LLVector3 cross1 = (p1 - p2) % (p1 - p3); - LLVector3 cross2 = (p4 - p2) % (p4 - p3); - area += (cross1.magVec() + cross2.magVec()) / 2.f; + LLVector4a v0,v1,v2,v3; + v0.setSub(p1,p2); + v1.setSub(p1,p3); + v2.setSub(p4,p2); + v3.setSub(p4,p3); + + LLVector4a cross1, cross2; + cross1.setCross3(v0,v1); + cross2.setCross3(v2,v3); + + //LLVector3 cross1 = (p1 - p2) % (p1 - p3); + //LLVector3 cross2 = (p4 - p2) % (p4 - p3); + + area += (cross1.getLength3() + cross2.getLength3()).getF32() / 2.f; } } @@ -2882,7 +2900,7 @@ void LLVolume::sculptGeneratePlaceholder() for (S32 t = 0; t < sizeT; t++) { S32 i = t + line; - Point& pt = mMesh[i]; + LLVector4a& pt = mMesh[i]; F32 u = (F32)s/(sizeS-1); @@ -2890,9 +2908,13 @@ void LLVolume::sculptGeneratePlaceholder() const F32 RADIUS = (F32) 0.3; - pt.mPos.mV[0] = (F32)(sin(F_PI * v) * cos(2.0 * F_PI * u) * RADIUS); - pt.mPos.mV[1] = (F32)(sin(F_PI * v) * sin(2.0 * F_PI * u) * RADIUS); - pt.mPos.mV[2] = (F32)(cos(F_PI * v) * RADIUS); + F32* p = pt.getF32ptr(); + + p[0] = (F32)(sin(F_PI * v) * cos(2.0 * F_PI * u) * RADIUS); + p[1] = (F32)(sin(F_PI * v) * sin(2.0 * F_PI * u) * RADIUS); + p[2] = (F32)(cos(F_PI * v) * RADIUS); + + llassert(pt.isFinite3()); } line += sizeT; @@ -2917,7 +2939,7 @@ void LLVolume::sculptGenerateMapVertices(U16 sculpt_width, U16 sculpt_height, S8 for (S32 t = 0; t < sizeT; t++) { S32 i = t + line; - Point& pt = mMesh[i]; + LLVector4a& pt = mMesh[i]; S32 reversed_t = t; @@ -2974,12 +2996,15 @@ void LLVolume::sculptGenerateMapVertices(U16 sculpt_width, U16 sculpt_height, S8 } } - pt.mPos = sculpt_xy_to_vector(x, y, sculpt_width, sculpt_height, sculpt_components, sculpt_data); + pt = sculpt_xy_to_vector(x, y, sculpt_width, sculpt_height, sculpt_components, sculpt_data); if (sculpt_mirror) { - pt.mPos.mV[VX] *= -1.f; + LLVector4a scale(-1.f,1,1,1); + pt.mul(scale); } + + llassert(pt.isFinite3()); } line += sizeT; @@ -3599,627 +3624,6 @@ bool LLVolumeParams::validate(U8 prof_curve, F32 prof_begin, F32 prof_end, F32 h return true; } -S32 *LLVolume::getTriangleIndices(U32 &num_indices) const -{ - S32 expected_num_triangle_indices = getNumTriangleIndices(); - if (expected_num_triangle_indices > MAX_VOLUME_TRIANGLE_INDICES) - { - // we don't allow LLVolumes with this many vertices - llwarns << "Couldn't allocate triangle indices" << llendl; - num_indices = 0; - return NULL; - } - - S32* index = new S32[expected_num_triangle_indices]; - S32 count = 0; - - // Let's do this totally diffently, as we don't care about faces... - // Counter-clockwise triangles are forward facing... - - BOOL open = getProfile().isOpen(); - BOOL hollow = (mParams.getProfileParams().getHollow() > 0); - BOOL path_open = getPath().isOpen(); - S32 size_s, size_s_out, size_t; - S32 s, t, i; - size_s = getProfile().getTotal(); - size_s_out = getProfile().getTotalOut(); - size_t = getPath().mPath.size(); - - // NOTE -- if the construction of the triangles below ever changes - // then getNumTriangleIndices() method may also have to be updated. - - if (open) /* Flawfinder: ignore */ - { - if (hollow) - { - // Open hollow -- much like the closed solid, except we - // we need to stitch up the gap between s=0 and s=size_s-1 - - for (t = 0; t < size_t - 1; t++) - { - // The outer face, first cut, and inner face - for (s = 0; s < size_s - 1; s++) - { - i = s + t*size_s; - index[count++] = i; // x,y - index[count++] = i + 1; // x+1,y - index[count++] = i + size_s; // x,y+1 - - index[count++] = i + size_s; // x,y+1 - index[count++] = i + 1; // x+1,y - index[count++] = i + size_s + 1; // x+1,y+1 - } - - // The other cut face - index[count++] = s + t*size_s; // x,y - index[count++] = 0 + t*size_s; // x+1,y - index[count++] = s + (t+1)*size_s; // x,y+1 - - index[count++] = s + (t+1)*size_s; // x,y+1 - index[count++] = 0 + t*size_s; // x+1,y - index[count++] = 0 + (t+1)*size_s; // x+1,y+1 - } - - // Do the top and bottom caps, if necessary - if (path_open) - { - // Top cap - S32 pt1 = 0; - S32 pt2 = size_s-1; - S32 i = (size_t - 1)*size_s; - - while (pt2 - pt1 > 1) - { - // Use the profile points instead of the mesh, since you want - // the un-transformed profile distances. - LLVector3 p1 = getProfile().mProfile[pt1]; - LLVector3 p2 = getProfile().mProfile[pt2]; - LLVector3 pa = getProfile().mProfile[pt1+1]; - LLVector3 pb = getProfile().mProfile[pt2-1]; - - p1.mV[VZ] = 0.f; - p2.mV[VZ] = 0.f; - pa.mV[VZ] = 0.f; - pb.mV[VZ] = 0.f; - - // Use area of triangle to determine backfacing - F32 area_1a2, area_1ba, area_21b, area_2ab; - area_1a2 = (p1.mV[0]*pa.mV[1] - pa.mV[0]*p1.mV[1]) + - (pa.mV[0]*p2.mV[1] - p2.mV[0]*pa.mV[1]) + - (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]); - - area_1ba = (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*pa.mV[1] - pa.mV[0]*pb.mV[1]) + - (pa.mV[0]*p1.mV[1] - p1.mV[0]*pa.mV[1]); - - area_21b = (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]) + - (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); - - area_2ab = (p2.mV[0]*pa.mV[1] - pa.mV[0]*p2.mV[1]) + - (pa.mV[0]*pb.mV[1] - pb.mV[0]*pa.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); - - BOOL use_tri1a2 = TRUE; - BOOL tri_1a2 = TRUE; - BOOL tri_21b = TRUE; - - if (area_1a2 < 0) - { - tri_1a2 = FALSE; - } - if (area_2ab < 0) - { - // Can't use, because it contains point b - tri_1a2 = FALSE; - } - if (area_21b < 0) - { - tri_21b = FALSE; - } - if (area_1ba < 0) - { - // Can't use, because it contains point b - tri_21b = FALSE; - } - - if (!tri_1a2) - { - use_tri1a2 = FALSE; - } - else if (!tri_21b) - { - use_tri1a2 = TRUE; - } - else - { - LLVector3 d1 = p1 - pa; - LLVector3 d2 = p2 - pb; - - if (d1.magVecSquared() < d2.magVecSquared()) - { - use_tri1a2 = TRUE; - } - else - { - use_tri1a2 = FALSE; - } - } - - if (use_tri1a2) - { - index[count++] = pt1 + i; - index[count++] = pt1 + 1 + i; - index[count++] = pt2 + i; - pt1++; - } - else - { - index[count++] = pt1 + i; - index[count++] = pt2 - 1 + i; - index[count++] = pt2 + i; - pt2--; - } - } - - // Bottom cap - pt1 = 0; - pt2 = size_s-1; - while (pt2 - pt1 > 1) - { - // Use the profile points instead of the mesh, since you want - // the un-transformed profile distances. - LLVector3 p1 = getProfile().mProfile[pt1]; - LLVector3 p2 = getProfile().mProfile[pt2]; - LLVector3 pa = getProfile().mProfile[pt1+1]; - LLVector3 pb = getProfile().mProfile[pt2-1]; - - p1.mV[VZ] = 0.f; - p2.mV[VZ] = 0.f; - pa.mV[VZ] = 0.f; - pb.mV[VZ] = 0.f; - - // Use area of triangle to determine backfacing - F32 area_1a2, area_1ba, area_21b, area_2ab; - area_1a2 = (p1.mV[0]*pa.mV[1] - pa.mV[0]*p1.mV[1]) + - (pa.mV[0]*p2.mV[1] - p2.mV[0]*pa.mV[1]) + - (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]); - - area_1ba = (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*pa.mV[1] - pa.mV[0]*pb.mV[1]) + - (pa.mV[0]*p1.mV[1] - p1.mV[0]*pa.mV[1]); - - area_21b = (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]) + - (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); - - area_2ab = (p2.mV[0]*pa.mV[1] - pa.mV[0]*p2.mV[1]) + - (pa.mV[0]*pb.mV[1] - pb.mV[0]*pa.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); - - BOOL use_tri1a2 = TRUE; - BOOL tri_1a2 = TRUE; - BOOL tri_21b = TRUE; - - if (area_1a2 < 0) - { - tri_1a2 = FALSE; - } - if (area_2ab < 0) - { - // Can't use, because it contains point b - tri_1a2 = FALSE; - } - if (area_21b < 0) - { - tri_21b = FALSE; - } - if (area_1ba < 0) - { - // Can't use, because it contains point b - tri_21b = FALSE; - } - - if (!tri_1a2) - { - use_tri1a2 = FALSE; - } - else if (!tri_21b) - { - use_tri1a2 = TRUE; - } - else - { - LLVector3 d1 = p1 - pa; - LLVector3 d2 = p2 - pb; - - if (d1.magVecSquared() < d2.magVecSquared()) - { - use_tri1a2 = TRUE; - } - else - { - use_tri1a2 = FALSE; - } - } - - if (use_tri1a2) - { - index[count++] = pt1; - index[count++] = pt2; - index[count++] = pt1 + 1; - pt1++; - } - else - { - index[count++] = pt1; - index[count++] = pt2; - index[count++] = pt2 - 1; - pt2--; - } - } - } - } - else - { - // Open solid - - for (t = 0; t < size_t - 1; t++) - { - // Outer face + 1 cut face - for (s = 0; s < size_s - 1; s++) - { - i = s + t*size_s; - - index[count++] = i; // x,y - index[count++] = i + 1; // x+1,y - index[count++] = i + size_s; // x,y+1 - - index[count++] = i + size_s; // x,y+1 - index[count++] = i + 1; // x+1,y - index[count++] = i + size_s + 1; // x+1,y+1 - } - - // The other cut face - index[count++] = (size_s - 1) + (t*size_s); // x,y - index[count++] = 0 + t*size_s; // x+1,y - index[count++] = (size_s - 1) + (t+1)*size_s; // x,y+1 - - index[count++] = (size_s - 1) + (t+1)*size_s; // x,y+1 - index[count++] = 0 + (t*size_s); // x+1,y - index[count++] = 0 + (t+1)*size_s; // x+1,y+1 - } - - // Do the top and bottom caps, if necessary - if (path_open) - { - for (s = 0; s < size_s - 2; s++) - { - index[count++] = s+1; - index[count++] = s; - index[count++] = size_s - 1; - } - - // We've got a top cap - S32 offset = (size_t - 1)*size_s; - for (s = 0; s < size_s - 2; s++) - { - // Inverted ordering from bottom cap. - index[count++] = offset + size_s - 1; - index[count++] = offset + s; - index[count++] = offset + s + 1; - } - } - } - } - else if (hollow) - { - // Closed hollow - // Outer face - - for (t = 0; t < size_t - 1; t++) - { - for (s = 0; s < size_s_out - 1; s++) - { - i = s + t*size_s; - - index[count++] = i; // x,y - index[count++] = i + 1; // x+1,y - index[count++] = i + size_s; // x,y+1 - - index[count++] = i + size_s; // x,y+1 - index[count++] = i + 1; // x+1,y - index[count++] = i + 1 + size_s; // x+1,y+1 - } - } - - // Inner face - // Invert facing from outer face - for (t = 0; t < size_t - 1; t++) - { - for (s = size_s_out; s < size_s - 1; s++) - { - i = s + t*size_s; - - index[count++] = i; // x,y - index[count++] = i + 1; // x+1,y - index[count++] = i + size_s; // x,y+1 - - index[count++] = i + size_s; // x,y+1 - index[count++] = i + 1; // x+1,y - index[count++] = i + 1 + size_s; // x+1,y+1 - } - } - - // Do the top and bottom caps, if necessary - if (path_open) - { - // Top cap - S32 pt1 = 0; - S32 pt2 = size_s-1; - S32 i = (size_t - 1)*size_s; - - while (pt2 - pt1 > 1) - { - // Use the profile points instead of the mesh, since you want - // the un-transformed profile distances. - LLVector3 p1 = getProfile().mProfile[pt1]; - LLVector3 p2 = getProfile().mProfile[pt2]; - LLVector3 pa = getProfile().mProfile[pt1+1]; - LLVector3 pb = getProfile().mProfile[pt2-1]; - - p1.mV[VZ] = 0.f; - p2.mV[VZ] = 0.f; - pa.mV[VZ] = 0.f; - pb.mV[VZ] = 0.f; - - // Use area of triangle to determine backfacing - F32 area_1a2, area_1ba, area_21b, area_2ab; - area_1a2 = (p1.mV[0]*pa.mV[1] - pa.mV[0]*p1.mV[1]) + - (pa.mV[0]*p2.mV[1] - p2.mV[0]*pa.mV[1]) + - (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]); - - area_1ba = (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*pa.mV[1] - pa.mV[0]*pb.mV[1]) + - (pa.mV[0]*p1.mV[1] - p1.mV[0]*pa.mV[1]); - - area_21b = (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]) + - (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); - - area_2ab = (p2.mV[0]*pa.mV[1] - pa.mV[0]*p2.mV[1]) + - (pa.mV[0]*pb.mV[1] - pb.mV[0]*pa.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); - - BOOL use_tri1a2 = TRUE; - BOOL tri_1a2 = TRUE; - BOOL tri_21b = TRUE; - - if (area_1a2 < 0) - { - tri_1a2 = FALSE; - } - if (area_2ab < 0) - { - // Can't use, because it contains point b - tri_1a2 = FALSE; - } - if (area_21b < 0) - { - tri_21b = FALSE; - } - if (area_1ba < 0) - { - // Can't use, because it contains point b - tri_21b = FALSE; - } - - if (!tri_1a2) - { - use_tri1a2 = FALSE; - } - else if (!tri_21b) - { - use_tri1a2 = TRUE; - } - else - { - LLVector3 d1 = p1 - pa; - LLVector3 d2 = p2 - pb; - - if (d1.magVecSquared() < d2.magVecSquared()) - { - use_tri1a2 = TRUE; - } - else - { - use_tri1a2 = FALSE; - } - } - - if (use_tri1a2) - { - index[count++] = pt1 + i; - index[count++] = pt1 + 1 + i; - index[count++] = pt2 + i; - pt1++; - } - else - { - index[count++] = pt1 + i; - index[count++] = pt2 - 1 + i; - index[count++] = pt2 + i; - pt2--; - } - } - - // Bottom cap - pt1 = 0; - pt2 = size_s-1; - while (pt2 - pt1 > 1) - { - // Use the profile points instead of the mesh, since you want - // the un-transformed profile distances. - LLVector3 p1 = getProfile().mProfile[pt1]; - LLVector3 p2 = getProfile().mProfile[pt2]; - LLVector3 pa = getProfile().mProfile[pt1+1]; - LLVector3 pb = getProfile().mProfile[pt2-1]; - - p1.mV[VZ] = 0.f; - p2.mV[VZ] = 0.f; - pa.mV[VZ] = 0.f; - pb.mV[VZ] = 0.f; - - // Use area of triangle to determine backfacing - F32 area_1a2, area_1ba, area_21b, area_2ab; - area_1a2 = (p1.mV[0]*pa.mV[1] - pa.mV[0]*p1.mV[1]) + - (pa.mV[0]*p2.mV[1] - p2.mV[0]*pa.mV[1]) + - (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]); - - area_1ba = (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*pa.mV[1] - pa.mV[0]*pb.mV[1]) + - (pa.mV[0]*p1.mV[1] - p1.mV[0]*pa.mV[1]); - - area_21b = (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]) + - (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); - - area_2ab = (p2.mV[0]*pa.mV[1] - pa.mV[0]*p2.mV[1]) + - (pa.mV[0]*pb.mV[1] - pb.mV[0]*pa.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); - - BOOL use_tri1a2 = TRUE; - BOOL tri_1a2 = TRUE; - BOOL tri_21b = TRUE; - - if (area_1a2 < 0) - { - tri_1a2 = FALSE; - } - if (area_2ab < 0) - { - // Can't use, because it contains point b - tri_1a2 = FALSE; - } - if (area_21b < 0) - { - tri_21b = FALSE; - } - if (area_1ba < 0) - { - // Can't use, because it contains point b - tri_21b = FALSE; - } - - if (!tri_1a2) - { - use_tri1a2 = FALSE; - } - else if (!tri_21b) - { - use_tri1a2 = TRUE; - } - else - { - LLVector3 d1 = p1 - pa; - LLVector3 d2 = p2 - pb; - - if (d1.magVecSquared() < d2.magVecSquared()) - { - use_tri1a2 = TRUE; - } - else - { - use_tri1a2 = FALSE; - } - } - - if (use_tri1a2) - { - index[count++] = pt1; - index[count++] = pt2; - index[count++] = pt1 + 1; - pt1++; - } - else - { - index[count++] = pt1; - index[count++] = pt2; - index[count++] = pt2 - 1; - pt2--; - } - } - } - } - else - { - // Closed solid. Easy case. - for (t = 0; t < size_t - 1; t++) - { - for (s = 0; s < size_s - 1; s++) - { - // Should wrap properly, but for now... - i = s + t*size_s; - - index[count++] = i; // x,y - index[count++] = i + 1; // x+1,y - index[count++] = i + size_s; // x,y+1 - - index[count++] = i + size_s; // x,y+1 - index[count++] = i + 1; // x+1,y - index[count++] = i + size_s + 1; // x+1,y+1 - } - } - - // Do the top and bottom caps, if necessary - if (path_open) - { - // bottom cap - for (s = 1; s < size_s - 2; s++) - { - index[count++] = s+1; - index[count++] = s; - index[count++] = 0; - } - - // top cap - S32 offset = (size_t - 1)*size_s; - for (s = 1; s < size_s - 2; s++) - { - // Inverted ordering from bottom cap. - index[count++] = offset; - index[count++] = offset + s; - index[count++] = offset + s + 1; - } - } - } - -#ifdef LL_DEBUG - // assert that we computed the correct number of indices - if (count != expected_num_triangle_indices ) - { - llerrs << "bad index count prediciton:" - << " expected=" << expected_num_triangle_indices - << " actual=" << count << llendl; - } -#endif - -#if 0 - // verify that each index does not point beyond the size of the mesh - S32 num_vertices = mMesh.size(); - for (i = 0; i < count; i+=3) - { - llinfos << index[i] << ":" << index[i+1] << ":" << index[i+2] << llendl; - llassert(index[i] < num_vertices); - llassert(index[i+1] < num_vertices); - llassert(index[i+2] < num_vertices); - } -#endif - - num_indices = count; - return index; -} - void LLVolume::getLoDTriangleCounts(const LLVolumeParams& params, S32* counts) { //attempt to approximate the number of triangles that will result from generating a volume LoD set for the //supplied LLVolumeParams -- inaccurate, but a close enough approximation for determining streaming cost @@ -4237,63 +3641,6 @@ void LLVolume::getLoDTriangleCounts(const LLVolumeParams& params, S32* counts) } } -S32 LLVolume::getNumTriangleIndices() const -{ - BOOL profile_open = getProfile().isOpen(); - BOOL hollow = (mParams.getProfileParams().getHollow() > 0); - BOOL path_open = getPath().isOpen(); - - S32 size_s, size_s_out, size_t; - size_s = getProfile().getTotal(); - size_s_out = getProfile().getTotalOut(); - size_t = getPath().mPath.size(); - - S32 count = 0; - if (profile_open) /* Flawfinder: ignore */ - { - if (hollow) - { - // Open hollow -- much like the closed solid, except we - // we need to stitch up the gap between s=0 and s=size_s-1 - count = (size_t - 1) * (((size_s -1) * 6) + 6); - } - else - { - count = (size_t - 1) * (((size_s -1) * 6) + 6); - } - } - else if (hollow) - { - // Closed hollow - // Outer face - count = (size_t - 1) * (size_s_out - 1) * 6; - - // Inner face - count += (size_t - 1) * ((size_s - 1) - size_s_out) * 6; - } - else - { - // Closed solid. Easy case. - count = (size_t - 1) * (size_s - 1) * 6; - } - - if (path_open) - { - S32 cap_triangle_count = size_s - 3; - if ( profile_open - || hollow ) - { - cap_triangle_count = size_s - 2; - } - if ( cap_triangle_count > 0 ) - { - // top and bottom caps - count += cap_triangle_count * 2 * 3; - } - } - return count; -} - S32 LLVolume::getNumTriangles(S32* vcount) const { @@ -4430,7 +3777,7 @@ void LLVolume::generateSilhouetteVertices(std::vector &vertices, normals.push_back(LLVector3(0,0,1)); #if DEBUG_SILHOUETTE_BINORMALS vertices.push_back(face.mVertices[j].getPosition()); - vertices.push_back(face.mVertices[j].getPosition() + face.mVertices[j].mBinormal*0.1f); + vertices.push_back(face.mVertices[j].getPosition() + face.mVertices[j].mTangent*0.1f); normals.push_back(LLVector3(0,0,1)); normals.push_back(LLVector3(0,0,1)); #endif @@ -4545,22 +3892,9 @@ void LLVolume::generateSilhouetteVertices(std::vector &vertices, } } -S32 LLVolume::lineSegmentIntersect(const LLVector3& start, const LLVector3& end, - S32 face, - LLVector3* intersection,LLVector2* tex_coord, LLVector3* normal, LLVector3* bi_normal) -{ - LLVector4a starta, enda; - starta.load3(start.mV); - enda.load3(end.mV); - - return lineSegmentIntersect(starta, enda, face, intersection, tex_coord, normal, bi_normal); - -} - - S32 LLVolume::lineSegmentIntersect(const LLVector4a& start, const LLVector4a& end, S32 face, - LLVector3* intersection,LLVector2* tex_coord, LLVector3* normal, LLVector3* bi_normal) + LLVector4a* intersection,LLVector2* tex_coord, LLVector4a* normal, LLVector4a* tangent_out) { S32 hit_face = -1; @@ -4598,9 +3932,9 @@ S32 LLVolume::lineSegmentIntersect(const LLVector4a& start, const LLVector4a& en if (LLLineSegmentBoxIntersect(start, end, box_center, box_size)) { - if (bi_normal != NULL) // if the caller wants binormals, we may need to generate them + if (tangent_out != NULL) // if the caller wants tangents, we may need to generate them { - genBinormals(i); + genTangents(i); } if (isUnique()) @@ -4634,7 +3968,7 @@ S32 LLVolume::lineSegmentIntersect(const LLVector4a& start, const LLVector4a& en LLVector4a intersect = dir; intersect.mul(closest_t); intersect.add(start); - intersection->set(intersect.getF32ptr()); + *intersection = intersect; } @@ -4649,19 +3983,42 @@ S32 LLVolume::lineSegmentIntersect(const LLVector4a& start, const LLVector4a& en if (normal!= NULL) { - LLVector4* norm = (LLVector4*) face.mNormals; + LLVector4a* norm = face.mNormals; + + LLVector4a n1,n2,n3; + n1 = norm[idx0]; + n1.mul(1.f-a-b); + + n2 = norm[idx1]; + n2.mul(a); + + n3 = norm[idx2]; + n3.mul(b); - *normal = ((1.f - a - b) * LLVector3(norm[idx0]) + - a * LLVector3(norm[idx1]) + - b * LLVector3(norm[idx2])); + n1.add(n2); + n1.add(n3); + + *normal = n1; } - if (bi_normal != NULL) + if (tangent_out != NULL) { - LLVector4* binormal = (LLVector4*) face.mBinormals; - *bi_normal = ((1.f - a - b) * LLVector3(binormal[idx0]) + - a * LLVector3(binormal[idx1]) + - b * LLVector3(binormal[idx2])); + LLVector4a* tangents = face.mTangents; + + LLVector4a t1,t2,t3; + t1 = tangents[idx0]; + t1.mul(1.f-a-b); + + t2 = tangents[idx1]; + t2.mul(a); + + t3 = tangents[idx2]; + t3.mul(b); + + t1.add(t2); + t1.add(t3); + + *tangent_out = t1; } } } @@ -4674,7 +4031,7 @@ S32 LLVolume::lineSegmentIntersect(const LLVector4a& start, const LLVector4a& en face.createOctree(); } - LLOctreeTriangleRayIntersect intersect(start, dir, &face, &closest_t, intersection, tex_coord, normal, bi_normal); + LLOctreeTriangleRayIntersect intersect(start, dir, &face, &closest_t, intersection, tex_coord, normal, tangent_out); intersect.traverse(face.mOctree); if (intersect.mHitFace) { @@ -5217,14 +4574,16 @@ LLVolumeFace::LLVolumeFace() : mNumS(0), mNumT(0), mNumVertices(0), + mNumAllocatedVertices(0), mNumIndices(0), mPositions(NULL), mNormals(NULL), - mBinormals(NULL), + mTangents(NULL), mTexCoords(NULL), mIndices(NULL), mWeights(NULL), - mOctree(NULL) + mOctree(NULL), + mOptimized(FALSE) { mExtents = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*3); mExtents[0].splat(-0.5f); @@ -5240,14 +4599,16 @@ LLVolumeFace::LLVolumeFace(const LLVolumeFace& src) mNumS(0), mNumT(0), mNumVertices(0), + mNumAllocatedVertices(0), mNumIndices(0), mPositions(NULL), mNormals(NULL), - mBinormals(NULL), + mTangents(NULL), mTexCoords(NULL), mIndices(NULL), mWeights(NULL), - mOctree(NULL) + mOctree(NULL), + mOptimized(FALSE) { mExtents = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*3); mCenter = mExtents+2; @@ -5277,8 +4638,6 @@ LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src) freeData(); - LLVector4a::memcpyNonAliased16((F32*) mExtents, (F32*) src.mExtents, 3*sizeof(LLVector4a)); - resizeVertices(src.mNumVertices); resizeIndices(src.mNumIndices); @@ -5288,28 +4647,26 @@ LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src) S32 tc_size = (mNumVertices*sizeof(LLVector2)+0xF) & ~0xF; LLVector4a::memcpyNonAliased16((F32*) mPositions, (F32*) src.mPositions, vert_size); + + if (src.mNormals) + { LLVector4a::memcpyNonAliased16((F32*) mNormals, (F32*) src.mNormals, vert_size); + } if(src.mTexCoords) { LLVector4a::memcpyNonAliased16((F32*) mTexCoords, (F32*) src.mTexCoords, tc_size); } - else - { - ll_aligned_free_16(mTexCoords) ; - mTexCoords = NULL ; - } - - if (src.mBinormals) + if (src.mTangents) { - allocateBinormals(src.mNumVertices); - LLVector4a::memcpyNonAliased16((F32*) mBinormals, (F32*) src.mBinormals, vert_size); + allocateTangents(src.mNumVertices); + LLVector4a::memcpyNonAliased16((F32*) mTangents, (F32*) src.mTangents, vert_size); } else { - ll_aligned_free_16(mBinormals); - mBinormals = NULL; + ll_aligned_free_16(mTangents); + mTangents = NULL; } if (src.mWeights) @@ -5331,6 +4688,8 @@ LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src) LLVector4a::memcpyNonAliased16((F32*) mIndices, (F32*) src.mIndices, idx_size); } + mOptimized = src.mOptimized; + //delete return *this; } @@ -5345,16 +4704,17 @@ LLVolumeFace::~LLVolumeFace() void LLVolumeFace::freeData() { - ll_aligned_free_16(mPositions); + ll_aligned_free(mPositions); mPositions = NULL; - ll_aligned_free_16( mNormals); + + //normals and texture coordinates are part of the same buffer as mPositions, do not free them separately mNormals = NULL; - ll_aligned_free_16(mTexCoords); mTexCoords = NULL; + ll_aligned_free_16(mIndices); mIndices = NULL; - ll_aligned_free_16(mBinormals); - mBinormals = NULL; + ll_aligned_free_16(mTangents); + mTangents = NULL; ll_aligned_free_16(mWeights); mWeights = NULL; @@ -5382,38 +4742,6 @@ BOOL LLVolumeFace::create(LLVolume* volume, BOOL partial_build) llerrs << "Unknown/uninitialized face type!" << llendl; } - //update the range of the texture coordinates - if(ret) - { - mTexCoordExtents[0].setVec(1.f, 1.f) ; - mTexCoordExtents[1].setVec(0.f, 0.f) ; - - for(U32 i = 0 ; i < (U32)mNumVertices ; i++) - { - if(mTexCoordExtents[0].mV[0] > mTexCoords[i].mV[0]) - { - mTexCoordExtents[0].mV[0] = mTexCoords[i].mV[0] ; - } - if(mTexCoordExtents[1].mV[0] < mTexCoords[i].mV[0]) - { - mTexCoordExtents[1].mV[0] = mTexCoords[i].mV[0] ; - } - - if(mTexCoordExtents[0].mV[1] > mTexCoords[i].mV[1]) - { - mTexCoordExtents[0].mV[1] = mTexCoords[i].mV[1] ; - } - if(mTexCoordExtents[1].mV[1] < mTexCoords[i].mV[1]) - { - mTexCoordExtents[1].mV[1] = mTexCoords[i].mV[1] ; - } - } - mTexCoordExtents[0].mV[0] = llmax(0.f, mTexCoordExtents[0].mV[0]) ; - mTexCoordExtents[0].mV[1] = llmax(0.f, mTexCoordExtents[0].mV[1]) ; - mTexCoordExtents[1].mV[0] = llmin(1.f, mTexCoordExtents[1].mV[0]) ; - mTexCoordExtents[1].mV[1] = llmin(1.f, mTexCoordExtents[1].mV[1]) ; - } - return ret ; } @@ -5529,22 +4857,28 @@ void LLVolumeFace::optimize(F32 angle_cutoff) } } - llassert(new_face.mNumIndices == mNumIndices); - llassert(new_face.mNumVertices <= mNumVertices); if (angle_cutoff > 1.f && !mNormals) { - ll_aligned_free_16(new_face.mNormals); + // Now alloc'd with positions + //ll_aligned_free_16(new_face.mNormals); new_face.mNormals = NULL; } if (!mTexCoords) { - ll_aligned_free_16(new_face.mTexCoords); + // Now alloc'd with positions + //ll_aligned_free_16(new_face.mTexCoords); new_face.mTexCoords = NULL; } - swapData(new_face); + // Only swap data if we've actually optimized the mesh + // + if (new_face.mNumVertices <= mNumVertices) + { + llassert(new_face.mNumIndices == mNumIndices); + swapData(new_face); + } } class LLVCacheTriangleData; @@ -5554,14 +4888,14 @@ class LLVCacheVertexData public: S32 mIdx; S32 mCacheTag; - F32 mScore; + F64 mScore; U32 mActiveTriangles; std::vector mTriangles; LLVCacheVertexData() { mCacheTag = -1; - mScore = 0.f; + mScore = 0.0; mActiveTriangles = 0; mIdx = -1; } @@ -5571,13 +4905,13 @@ class LLVCacheTriangleData { public: bool mActive; - F32 mScore; + F64 mScore; LLVCacheVertexData* mVertex[3]; LLVCacheTriangleData() { mActive = true; - mScore = 0.f; + mScore = 0.0; mVertex[0] = mVertex[1] = mVertex[2] = NULL; } @@ -5588,7 +4922,7 @@ public: { if (mVertex[i]) { - llassert_always(mVertex[i]->mActiveTriangles > 0); + llassert(mVertex[i]->mActiveTriangles > 0); mVertex[i]->mActiveTriangles--; } } @@ -5600,20 +4934,20 @@ public: } }; -const F32 FindVertexScore_CacheDecayPower = 1.5f; -const F32 FindVertexScore_LastTriScore = 0.75f; -const F32 FindVertexScore_ValenceBoostScale = 2.0f; -const F32 FindVertexScore_ValenceBoostPower = 0.5f; +const F64 FindVertexScore_CacheDecayPower = 1.5; +const F64 FindVertexScore_LastTriScore = 0.75; +const F64 FindVertexScore_ValenceBoostScale = 2.0; +const F64 FindVertexScore_ValenceBoostPower = 0.5; const U32 MaxSizeVertexCache = 32; +const F64 FindVertexScore_Scaler = 1.0/(MaxSizeVertexCache-3); -F32 find_vertex_score(LLVCacheVertexData& data) +F64 find_vertex_score(LLVCacheVertexData& data) { - if (data.mActiveTriangles == 0) - { //no triangle references this vertex - return -1.f; - } + F64 score = -1.0; - F32 score = 0.f; + if (data.mActiveTriangles >= 0) + { + score = 0.0; S32 cache_idx = data.mCacheTag; @@ -5629,15 +4963,15 @@ F32 find_vertex_score(LLVCacheVertexData& data) } else { //more points for being higher in the cache - F32 scaler = 1.f/(MaxSizeVertexCache-3); - score = 1.f-((cache_idx-3)*scaler); - score = powf(score, FindVertexScore_CacheDecayPower); + score = 1.0-((cache_idx-3)*FindVertexScore_Scaler); + score = pow(score, FindVertexScore_CacheDecayPower); } } //bonus points for having low valence - F32 valence_boost = powf((F32)data.mActiveTriangles, -FindVertexScore_ValenceBoostPower); + F64 valence_boost = pow((F64)data.mActiveTriangles, -FindVertexScore_ValenceBoostPower); score += FindVertexScore_ValenceBoostScale * valence_boost; + } return score; } @@ -5744,32 +5078,44 @@ public: void updateScores() { - for (U32 i = MaxSizeVertexCache; i < MaxSizeVertexCache+3; ++i) - { //trailing 3 vertices aren't actually in the cache for scoring purposes - if (mCache[i]) + LLVCacheVertexData** data_iter = mCache+MaxSizeVertexCache; + LLVCacheVertexData** end_data = mCache+MaxSizeVertexCache+3; + + while(data_iter != end_data) + { + LLVCacheVertexData* data = *data_iter++; + //trailing 3 vertices aren't actually in the cache for scoring purposes + if (data) { - mCache[i]->mCacheTag = -1; + data->mCacheTag = -1; } } - for (U32 i = 0; i < MaxSizeVertexCache; ++i) + data_iter = mCache; + end_data = mCache+MaxSizeVertexCache; + + while (data_iter != end_data) { //update scores of vertices in cache - if (mCache[i]) + LLVCacheVertexData* data = *data_iter++; + if (data) { - mCache[i]->mScore = find_vertex_score(*(mCache[i])); - llassert_always(mCache[i]->mCacheTag == i); + data->mScore = find_vertex_score(*data); } } mBestTriangle = NULL; //update triangle scores - for (U32 i = 0; i < MaxSizeVertexCache+3; ++i) + data_iter = mCache; + end_data = mCache+MaxSizeVertexCache+3; + + while (data_iter != end_data) { - if (mCache[i]) + LLVCacheVertexData* data = *data_iter++; + if (data) { - for (U32 j = 0; j < mCache[i]->mTriangles.size(); ++j) + for (std::vector::iterator iter = data->mTriangles.begin(), end_iter = data->mTriangles.end(); iter != end_iter; ++iter) { - LLVCacheTriangleData* tri = mCache[i]->mTriangles[j]; + LLVCacheTriangleData* tri = *iter; if (tri->mActive) { tri->mScore = tri->mVertex[0]->mScore; @@ -5786,13 +5132,17 @@ public: } //knock trailing 3 vertices off the cache - for (U32 i = MaxSizeVertexCache; i < MaxSizeVertexCache+3; ++i) + data_iter = mCache+MaxSizeVertexCache; + end_data = mCache+MaxSizeVertexCache+3; + while (data_iter != end_data) { - if (mCache[i]) + LLVCacheVertexData* data = *data_iter; + if (data) { - llassert_always(mCache[i]->mCacheTag == -1); - mCache[i] = NULL; + llassert(data->mCacheTag == -1); + *data_iter = NULL; } + ++data_iter; } } }; @@ -5802,6 +5152,9 @@ void LLVolumeFace::cacheOptimize() { //optimize for vertex cache according to Forsyth method: // http://home.comcast.net/~tom_forsyth/papers/fast_vert_cache_opt.html + llassert(!mOptimized); + mOptimized = TRUE; + LLVCacheLRU cache; if (mNumVertices < 3) @@ -5847,12 +5200,14 @@ void LLVolumeFace::cacheOptimize() for (U32 i = 0; i < (U32)mNumVertices; i++) { //initialize score values (no cache -- might try a fifo cache here) - vertex_data[i].mScore = find_vertex_score(vertex_data[i]); - vertex_data[i].mActiveTriangles = vertex_data[i].mTriangles.size(); + LLVCacheVertexData& data = vertex_data[i]; - for (U32 j = 0; j < vertex_data[i].mTriangles.size(); ++j) + data.mScore = find_vertex_score(data); + data.mActiveTriangles = data.mTriangles.size(); + + for (U32 j = 0; j < data.mActiveTriangles; ++j) { - vertex_data[i].mTriangles[j]->mScore += vertex_data[i].mScore; + data.mTriangles[j]->mScore += data.mScore; } } @@ -5922,10 +5277,10 @@ void LLVolumeFace::cacheOptimize() //allocate space for new buffer S32 num_verts = mNumVertices; - LLVector4a* pos = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); - LLVector4a* norm = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); S32 size = ((num_verts*sizeof(LLVector2)) + 0xF) & ~0xF; - LLVector2* tc = (LLVector2*) ll_aligned_malloc_16(size); + LLVector4a* pos = (LLVector4a*) ll_aligned_malloc(sizeof(LLVector4a)*2*num_verts+size, 64); + LLVector4a* norm = pos + num_verts; + LLVector2* tc = (LLVector2*) (norm + num_verts); LLVector4a* wght = NULL; if (mWeights) @@ -5934,7 +5289,7 @@ void LLVolumeFace::cacheOptimize() } LLVector4a* binorm = NULL; - if (mBinormals) + if (mTangents) { binorm = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); } @@ -5959,9 +5314,9 @@ void LLVolumeFace::cacheOptimize() { wght[cur_idx] = mWeights[idx]; } - if (mBinormals) + if (mTangents) { - binorm[cur_idx] = mBinormals[idx]; + binorm[cur_idx] = mTangents[idx]; } cur_idx++; @@ -5973,17 +5328,16 @@ void LLVolumeFace::cacheOptimize() mIndices[i] = new_idx[mIndices[i]]; } - ll_aligned_free_16(mPositions); - ll_aligned_free_16(mNormals); - ll_aligned_free_16(mTexCoords); + ll_aligned_free(mPositions); + // DO NOT free mNormals and mTexCoords as they are part of mPositions buffer ll_aligned_free_16(mWeights); - ll_aligned_free_16(mBinormals); + ll_aligned_free_16(mTangents); mPositions = pos; mNormals = norm; mTexCoords = tc; mWeights = wght; - mBinormals = binorm; + mTangents = binorm; //std::string result = llformat("ACMR pre/post: %.3f/%.3f -- %d triangles %d breaks", pre_acmr, post_acmr, mNumIndices/3, breaks); //llinfos << result << llendl; @@ -6064,7 +5418,7 @@ void LLVolumeFace::swapData(LLVolumeFace& rhs) { llswap(rhs.mPositions, mPositions); llswap(rhs.mNormals, mNormals); - llswap(rhs.mBinormals, mBinormals); + llswap(rhs.mTangents, mTangents); llswap(rhs.mTexCoords, mTexCoords); llswap(rhs.mIndices,mIndices); llswap(rhs.mNumVertices, mNumVertices); @@ -6097,8 +5451,9 @@ void LerpPlanarVertex(LLVolumeFace::VertexData& v0, BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) { - const std::vector& mesh = volume->getMesh(); - const std::vector& profile = volume->getProfile().mProfile; + + const LLAlignedArray& mesh = volume->getMesh(); + const LLAlignedArray& profile = volume->getProfile().mProfile; S32 max_s = volume->getProfile().getTotal(); S32 max_t = volume->getPath().mPath.size(); @@ -6123,9 +5478,9 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) VertexData baseVert; for(S32 t = 0; t < 4; t++) { - corners[t].getPosition().load3( mesh[offset + (grid_size*t)].mPos.mV); - corners[t].mTexCoord.mV[0] = profile[grid_size*t].mV[0]+0.5f; - corners[t].mTexCoord.mV[1] = 0.5f - profile[grid_size*t].mV[1]; + corners[t].getPosition().load4a(mesh[offset + (grid_size*t)].getF32ptr()); + corners[t].mTexCoord.mV[0] = profile[grid_size*t][0]+0.5f; + corners[t].mTexCoord.mV[1] = 0.5f - profile[grid_size*t][1]; } { @@ -6153,22 +5508,11 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) corners[2].mTexCoord=swap; } - LLVector4a binormal; - - calc_binormal_from_triangle( binormal, - corners[0].getPosition(), corners[0].mTexCoord, - corners[1].getPosition(), corners[1].mTexCoord, - corners[2].getPosition(), corners[2].mTexCoord); - - binormal.normalize3fast(); - S32 size = (grid_size+1)*(grid_size+1); resizeVertices(size); - allocateBinormals(size); - + LLVector4a* pos = (LLVector4a*) mPositions; LLVector4a* norm = (LLVector4a*) mNormals; - LLVector4a* binorm = (LLVector4a*) mBinormals; LLVector2* tc = (LLVector2*) mTexCoords; for(int gx = 0;gx& mesh = volume->getMesh(); - const std::vector& profile = volume->getProfile().mProfile; + const LLAlignedArray& mesh = volume->getMesh(); + const LLAlignedArray& profile = volume->getProfile().mProfile; // All types of caps have the same number of vertices and indices num_vertices = profile.size(); @@ -6264,8 +5607,7 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) if (!(mTypeMask & HOLLOW_MASK) && !(mTypeMask & OPEN_MASK)) { resizeVertices(num_vertices+1); - allocateBinormals(num_vertices+1); - + if (!partial_build) { resizeIndices(num_indices+3); @@ -6274,8 +5616,7 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) else { resizeVertices(num_vertices); - allocateBinormals(num_vertices); - + if (!partial_build) { resizeIndices(num_indices); @@ -6309,35 +5650,69 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) LLVector2* tc = (LLVector2*) mTexCoords; LLVector4a* pos = (LLVector4a*) mPositions; LLVector4a* norm = (LLVector4a*) mNormals; - LLVector4a* binorm = (LLVector4a*) mBinormals; - + // Copy the vertices into the array - for (S32 i = 0; i < num_vertices; i++) + + const LLVector4a* src = mesh.mArray+offset; + const LLVector4a* end = src+num_vertices; + + min = *src; + max = min; + + + const LLVector4a* p = profile.mArray; + + if (mTypeMask & TOP_MASK) { - if (mTypeMask & TOP_MASK) + min_uv.set((*p)[0]+0.5f, + (*p)[1]+0.5f); + + max_uv = min_uv; + + while(src < end) { - tc[i].mV[0] = profile[i].mV[0]+0.5f; - tc[i].mV[1] = profile[i].mV[1]+0.5f; + tc->mV[0] = (*p)[0]+0.5f; + tc->mV[1] = (*p)[1]+0.5f; + + llassert(src->isFinite3()); + update_min_max(min,max,*src); + update_min_max(min_uv, max_uv, *tc); + + *pos = *src; + + llassert(pos->isFinite3()); + + ++p; + ++tc; + ++src; + ++pos; + } } else + { + + min_uv.set((*p)[0]+0.5f, + 0.5f - (*p)[1]); + max_uv = min_uv; + + while(src < end) { // Mirror for underside. - tc[i].mV[0] = profile[i].mV[0]+0.5f; - tc[i].mV[1] = 0.5f - profile[i].mV[1]; - } - - pos[i].load3(mesh[i + offset].mPos.mV); + tc->mV[0] = (*p)[0]+0.5f; + tc->mV[1] = 0.5f - (*p)[1]; - if (i == 0) - { - max = pos[i]; - min = max; - min_uv = max_uv = tc[i]; - } - else - { - update_min_max(min,max,pos[i]); - update_min_max(min_uv, max_uv, tc[i]); + llassert(src->isFinite3()); + update_min_max(min,max,*src); + update_min_max(min_uv, max_uv, *tc); + + *pos = *src; + + llassert(pos->isFinite3()); + + ++p; + ++tc; + ++src; + ++pos; } } @@ -6346,48 +5721,17 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) cuv = (min_uv + max_uv)*0.5f; - LLVector4a binormal; - calc_binormal_from_triangle(binormal, - *mCenter, cuv, - pos[0], tc[0], - pos[1], tc[1]); - binormal.normalize3fast(); - - LLVector4a normal; - LLVector4a d0, d1; - - - d0.setSub(*mCenter, pos[0]); - d1.setSub(*mCenter, pos[1]); - - if (mTypeMask & TOP_MASK) - { - normal.setCross3(d0, d1); - } - else - { - normal.setCross3(d1, d0); - } - - normal.normalize3fast(); - VertexData vd; vd.setPosition(*mCenter); vd.mTexCoord = cuv; if (!(mTypeMask & HOLLOW_MASK) && !(mTypeMask & OPEN_MASK)) { - pos[num_vertices] = *mCenter; - tc[num_vertices] = cuv; + *pos++ = *mCenter; + *tc++ = cuv; num_vertices++; } - for (S32 i = 0; i < num_vertices; i++) - { - binorm[i].load4a(binormal.getF32ptr()); - norm[i].load4a(normal.getF32ptr()); - } - if (partial_build) { return TRUE; @@ -6406,33 +5750,38 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) { // Use the profile points instead of the mesh, since you want // the un-transformed profile distances. - LLVector3 p1 = profile[pt1]; - LLVector3 p2 = profile[pt2]; - LLVector3 pa = profile[pt1+1]; - LLVector3 pb = profile[pt2-1]; + const LLVector4a& p1 = profile[pt1]; + const LLVector4a& p2 = profile[pt2]; + const LLVector4a& pa = profile[pt1+1]; + const LLVector4a& pb = profile[pt2-1]; - p1.mV[VZ] = 0.f; - p2.mV[VZ] = 0.f; - pa.mV[VZ] = 0.f; - pb.mV[VZ] = 0.f; + const F32* p1V = p1.getF32ptr(); + const F32* p2V = p2.getF32ptr(); + const F32* paV = pa.getF32ptr(); + const F32* pbV = pb.getF32ptr(); + + //p1.mV[VZ] = 0.f; + //p2.mV[VZ] = 0.f; + //pa.mV[VZ] = 0.f; + //pb.mV[VZ] = 0.f; // Use area of triangle to determine backfacing F32 area_1a2, area_1ba, area_21b, area_2ab; - area_1a2 = (p1.mV[0]*pa.mV[1] - pa.mV[0]*p1.mV[1]) + - (pa.mV[0]*p2.mV[1] - p2.mV[0]*pa.mV[1]) + - (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]); + area_1a2 = (p1V[0]*paV[1] - paV[0]*p1V[1]) + + (paV[0]*p2V[1] - p2V[0]*paV[1]) + + (p2V[0]*p1V[1] - p1V[0]*p2V[1]); - area_1ba = (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*pa.mV[1] - pa.mV[0]*pb.mV[1]) + - (pa.mV[0]*p1.mV[1] - p1.mV[0]*pa.mV[1]); + area_1ba = (p1V[0]*pbV[1] - pbV[0]*p1V[1]) + + (pbV[0]*paV[1] - paV[0]*pbV[1]) + + (paV[0]*p1V[1] - p1V[0]*paV[1]); - area_21b = (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]) + - (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); + area_21b = (p2V[0]*p1V[1] - p1V[0]*p2V[1]) + + (p1V[0]*pbV[1] - pbV[0]*p1V[1]) + + (pbV[0]*p2V[1] - p2V[0]*pbV[1]); - area_2ab = (p2.mV[0]*pa.mV[1] - pa.mV[0]*p2.mV[1]) + - (pa.mV[0]*pb.mV[1] - pb.mV[0]*pa.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); + area_2ab = (p2V[0]*paV[1] - paV[0]*p2V[1]) + + (paV[0]*pbV[1] - pbV[0]*paV[1]) + + (pbV[0]*p2V[1] - p2V[0]*pbV[1]); BOOL use_tri1a2 = TRUE; BOOL tri_1a2 = TRUE; @@ -6467,10 +5816,13 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) } else { - LLVector3 d1 = p1 - pa; - LLVector3 d2 = p2 - pb; + LLVector4a d1; + d1.setSub(p1, pa); + + LLVector4a d2; + d2.setSub(p2, pb); - if (d1.magVecSquared() < d2.magVecSquared()) + if (d1.dot3(d1) < d2.dot3(d2)) { use_tri1a2 = TRUE; } @@ -6509,33 +5861,33 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) { // Use the profile points instead of the mesh, since you want // the un-transformed profile distances. - LLVector3 p1 = profile[pt1]; - LLVector3 p2 = profile[pt2]; - LLVector3 pa = profile[pt1+1]; - LLVector3 pb = profile[pt2-1]; + const LLVector4a& p1 = profile[pt1]; + const LLVector4a& p2 = profile[pt2]; + const LLVector4a& pa = profile[pt1+1]; + const LLVector4a& pb = profile[pt2-1]; - p1.mV[VZ] = 0.f; - p2.mV[VZ] = 0.f; - pa.mV[VZ] = 0.f; - pb.mV[VZ] = 0.f; + const F32* p1V = p1.getF32ptr(); + const F32* p2V = p2.getF32ptr(); + const F32* paV = pa.getF32ptr(); + const F32* pbV = pb.getF32ptr(); // Use area of triangle to determine backfacing F32 area_1a2, area_1ba, area_21b, area_2ab; - area_1a2 = (p1.mV[0]*pa.mV[1] - pa.mV[0]*p1.mV[1]) + - (pa.mV[0]*p2.mV[1] - p2.mV[0]*pa.mV[1]) + - (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]); + area_1a2 = (p1V[0]*paV[1] - paV[0]*p1V[1]) + + (paV[0]*p2V[1] - p2V[0]*paV[1]) + + (p2V[0]*p1V[1] - p1V[0]*p2V[1]); - area_1ba = (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*pa.mV[1] - pa.mV[0]*pb.mV[1]) + - (pa.mV[0]*p1.mV[1] - p1.mV[0]*pa.mV[1]); + area_1ba = (p1V[0]*pbV[1] - pbV[0]*p1V[1]) + + (pbV[0]*paV[1] - paV[0]*pbV[1]) + + (paV[0]*p1V[1] - p1V[0]*paV[1]); - area_21b = (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]) + - (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); + area_21b = (p2V[0]*p1V[1] - p1V[0]*p2V[1]) + + (p1V[0]*pbV[1] - pbV[0]*p1V[1]) + + (pbV[0]*p2V[1] - p2V[0]*pbV[1]); - area_2ab = (p2.mV[0]*pa.mV[1] - pa.mV[0]*p2.mV[1]) + - (pa.mV[0]*pb.mV[1] - pb.mV[0]*pa.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); + area_2ab = (p2V[0]*paV[1] - paV[0]*p2V[1]) + + (paV[0]*pbV[1] - pbV[0]*paV[1]) + + (pbV[0]*p2V[1] - p2V[0]*pbV[1]); BOOL use_tri1a2 = TRUE; BOOL tri_1a2 = TRUE; @@ -6570,10 +5922,12 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) } else { - LLVector3 d1 = p1 - pa; - LLVector3 d2 = p2 - pb; + LLVector4a d1; + d1.setSub(p1,pa); + LLVector4a d2; + d2.setSub(p2,pb); - if (d1.magVecSquared() < d2.magVecSquared()) + if (d1.dot3(d1) < d2.dot3(d2)) { use_tri1a2 = TRUE; } @@ -6622,63 +5976,68 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) } - + + LLVector4a d0,d1; + + d0.setSub(mPositions[mIndices[1]], mPositions[mIndices[0]]); + d1.setSub(mPositions[mIndices[2]], mPositions[mIndices[0]]); + + LLVector4a normal; + normal.setCross3(d0,d1); + + if (normal.dot3(normal).getF32() > F_APPROXIMATELY_ZERO) + { + normal.normalize3fast(); + } + else + { //degenerate, make up a value + normal.set(0,0,1); + } + + llassert(llfinite(normal.getF32ptr()[0])); + llassert(llfinite(normal.getF32ptr()[1])); + llassert(llfinite(normal.getF32ptr()[2])); + + llassert(!llisnan(normal.getF32ptr()[0])); + llassert(!llisnan(normal.getF32ptr()[1])); + llassert(!llisnan(normal.getF32ptr()[2])); + + for (S32 i = 0; i < num_vertices; i++) + { + norm[i].load4a(normal.getF32ptr()); + } + return TRUE; } -void LLVolumeFace::createBinormals() +void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVector4a *normal, + const LLVector2 *texcoord, U32 triangleCount, const U16* index_array, LLVector4a *tangent); + +void LLVolumeFace::createTangents() { - if (!mBinormals) + if (!mTangents) { - allocateBinormals(mNumVertices); + allocateTangents(mNumVertices); - //generate binormals - LLVector4a* pos = mPositions; - LLVector2* tc = (LLVector2*) mTexCoords; - LLVector4a* binorm = (LLVector4a*) mBinormals; + //generate tangents + //LLVector4a* pos = mPositions; + //LLVector2* tc = (LLVector2*) mTexCoords; + LLVector4a* binorm = (LLVector4a*) mTangents; - LLVector4a* end = mBinormals+mNumVertices; + LLVector4a* end = mTangents+mNumVertices; while (binorm < end) { (*binorm++).clear(); } - binorm = mBinormals; + binorm = mTangents; - for (U32 i = 0; i < (U32)mNumIndices/3; i++) - { //for each triangle - const U16& i0 = mIndices[i*3+0]; - const U16& i1 = mIndices[i*3+1]; - const U16& i2 = mIndices[i*3+2]; - - //calculate binormal - LLVector4a binormal; - calc_binormal_from_triangle(binormal, - pos[i0], tc[i0], - pos[i1], tc[i1], - pos[i2], tc[i2]); + CalculateTangentArray(mNumVertices, mPositions, mNormals, mTexCoords, mNumIndices/3, mIndices, mTangents); - - //add triangle normal to vertices - binorm[i0].add(binormal); - binorm[i1].add(binormal); - binorm[i2].add(binormal); - - //even out quad contributions - if (i % 2 == 0) - { - binorm[i2].add(binormal); - } - else - { - binorm[i1].add(binormal); - } - } - - //normalize binormals - for (U32 i = 0; i < (U32)mNumVertices; i++) + //normalize tangents + for (S32 i = 0; i < mNumVertices; i++) { - binorm[i].normalize3fast(); + //binorm[i].normalize3fast(); //bump map/planar projection code requires normals to be normalized mNormals[i].normalize3fast(); } @@ -6687,24 +6046,22 @@ void LLVolumeFace::createBinormals() void LLVolumeFace::resizeVertices(S32 num_verts) { - ll_aligned_free_16(mPositions); - ll_aligned_free_16(mNormals); - ll_aligned_free_16(mBinormals); - ll_aligned_free_16(mTexCoords); + ll_aligned_free(mPositions); + //DO NOT free mNormals and mTexCoords as they are part of mPositions buffer + ll_aligned_free_16(mTangents); - mBinormals = NULL; + mTangents = NULL; if (num_verts) { - mPositions = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); - ll_assert_aligned(mPositions, 16); - mNormals = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); - ll_assert_aligned(mNormals, 16); - //pad texture coordinate block end to allow for QWORD reads S32 size = ((num_verts*sizeof(LLVector2)) + 0xF) & ~0xF; - mTexCoords = (LLVector2*) ll_aligned_malloc_16(size); - ll_assert_aligned(mTexCoords, 16); + + mPositions = (LLVector4a*) ll_aligned_malloc(sizeof(LLVector4a)*2*num_verts+size, 64); + mNormals = mPositions+num_verts; + mTexCoords = (LLVector2*) (mNormals+num_verts); + + ll_assert_aligned(mPositions, 64); } else { @@ -6714,6 +6071,7 @@ void LLVolumeFace::resizeVertices(S32 num_verts) } mNumVertices = num_verts; + mNumAllocatedVertices = num_verts; } void LLVolumeFace::pushVertex(const LLVolumeFace::VertexData& cv) @@ -6724,27 +6082,42 @@ void LLVolumeFace::pushVertex(const LLVolumeFace::VertexData& cv) void LLVolumeFace::pushVertex(const LLVector4a& pos, const LLVector4a& norm, const LLVector2& tc) { S32 new_verts = mNumVertices+1; - S32 new_size = new_verts*16; - S32 old_size = mNumVertices*16; + + if (new_verts > mNumAllocatedVertices) + { + //double buffer size on expansion + new_verts *= 2; + + S32 new_tc_size = ((new_verts*8)+0xF) & ~0xF; + S32 old_tc_size = ((mNumVertices*8)+0xF) & ~0xF; + + S32 old_vsize = mNumVertices*16; + + S32 new_size = new_verts*16*2+new_tc_size; + + LLVector4a* old_buf = mPositions; + + mPositions = (LLVector4a*) ll_aligned_malloc(new_size, 64); + mNormals = mPositions+new_verts; + mTexCoords = (LLVector2*) (mNormals+new_verts); //positions - mPositions = (LLVector4a*) ll_aligned_realloc_16(mPositions, new_size, old_size); - ll_assert_aligned(mPositions,16); + LLVector4a::memcpyNonAliased16((F32*) mPositions, (F32*) old_buf, old_vsize); //normals - mNormals = (LLVector4a*) ll_aligned_realloc_16(mNormals, new_size, old_size); - ll_assert_aligned(mNormals,16); + LLVector4a::memcpyNonAliased16((F32*) mNormals, (F32*) (old_buf+mNumVertices), old_vsize); //tex coords - new_size = ((new_verts*8)+0xF) & ~0xF; - old_size = ((mNumVertices*8)+0xF) & ~0xF; - mTexCoords = (LLVector2*) ll_aligned_realloc_16(mTexCoords, new_size, old_size); - ll_assert_aligned(mTexCoords,16); - + LLVector4a::memcpyNonAliased16((F32*) mTexCoords, (F32*) (old_buf+mNumVertices*2), old_tc_size); - //just clear binormals - ll_aligned_free_16(mBinormals); - mBinormals = NULL; + //just clear tangents + ll_aligned_free_16(mTangents); + mTangents = NULL; + ll_aligned_free(old_buf); + + mNumAllocatedVertices = new_verts; + + } mPositions[mNumVertices] = pos; mNormals[mNumVertices] = norm; @@ -6753,10 +6126,10 @@ void LLVolumeFace::pushVertex(const LLVector4a& pos, const LLVector4a& norm, con mNumVertices++; } -void LLVolumeFace::allocateBinormals(S32 num_verts) +void LLVolumeFace::allocateTangents(S32 num_verts) { - ll_aligned_free_16(mBinormals); - mBinormals = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); + ll_aligned_free_16(mTangents); + mTangents = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); } void LLVolumeFace::allocateWeights(S32 num_verts) @@ -6833,13 +6206,23 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat llerrs << "Cannot append empty face." << llendl; } + U32 old_vsize = mNumVertices*16; + U32 new_vsize = new_count * 16; + U32 old_tcsize = (mNumVertices*sizeof(LLVector2)+0xF) & ~0xF; + U32 new_tcsize = (new_count*sizeof(LLVector2)+0xF) & ~0xF; + U32 new_size = new_vsize * 2 + new_tcsize; + //allocate new buffer space - mPositions = (LLVector4a*) ll_aligned_realloc_16(mPositions, new_count*sizeof(LLVector4a), mNumVertices*sizeof(LLVector4a)); - ll_assert_aligned(mPositions, 16); - mNormals = (LLVector4a*) ll_aligned_realloc_16(mNormals, new_count*sizeof(LLVector4a), mNumVertices*sizeof(LLVector4a)); - ll_assert_aligned(mNormals, 16); - mTexCoords = (LLVector2*) ll_aligned_realloc_16(mTexCoords, (new_count*sizeof(LLVector2)+0xF) & ~0xF, (mNumVertices*sizeof(LLVector2)+0xF) & ~0xF); - ll_assert_aligned(mTexCoords, 16); + LLVector4a* old_buf = mPositions; + mPositions = (LLVector4a*) ll_aligned_malloc(new_size, 64); + mNormals = mPositions + new_count; + mTexCoords = (LLVector2*) (mNormals+new_count); + + mNumAllocatedVertices = new_count; + + LLVector4a::memcpyNonAliased16((F32*) mPositions, (F32*) old_buf, old_vsize); + LLVector4a::memcpyNonAliased16((F32*) mNormals, (F32*) (old_buf+mNumVertices), old_vsize); + LLVector4a::memcpyNonAliased16((F32*) mTexCoords, (F32*) (old_buf+mNumVertices*2), old_tcsize); mNumVertices = new_count; @@ -6909,9 +6292,9 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) S32 num_vertices, num_indices; - const std::vector& mesh = volume->getMesh(); - const std::vector& profile = volume->getProfile().mProfile; - const std::vector& path_data = volume->getPath().mPath; + const LLAlignedArray& mesh = volume->getMesh(); + const LLAlignedArray& profile = volume->getProfile().mProfile; + const LLAlignedArray& path_data = volume->getPath().mPath; S32 max_s = volume->getProfile().getTotal(); @@ -6932,15 +6315,18 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) } } + LLVector4a* pos = (LLVector4a*) mPositions; - LLVector4a* norm = (LLVector4a*) mNormals; LLVector2* tc = (LLVector2*) mTexCoords; - S32 begin_stex = llfloor( profile[mBeginS].mV[2] ); + F32 begin_stex = floorf(profile[mBeginS][2]); S32 num_s = ((mTypeMask & INNER_MASK) && (mTypeMask & FLAT_MASK) && mNumS > 2) ? mNumS/2 : mNumS; S32 cur_vertex = 0; + S32 end_t = mBeginT+mNumT; + bool test = (mTypeMask & INNER_MASK) && (mTypeMask & FLAT_MASK) && mNumS > 2; + // Copy the vertices into the array - for (t = mBeginT; t < mBeginT + mNumT; t++) + for (t = mBeginT; t < end_t; t++) { tt = path_data[t].mTexT; for (s = 0; s < num_s; s++) @@ -6961,11 +6347,11 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) // Get s value for tex-coord. if (!flat) { - ss = profile[mBeginS + s].mV[2]; + ss = profile[mBeginS + s][2]; } else { - ss = profile[mBeginS + s].mV[2] - begin_stex; + ss = profile[mBeginS + s][2] - begin_stex; } } @@ -6985,20 +6371,15 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) i = mBeginS + s + max_s*t; } - pos[cur_vertex].load3(mesh[i].mPos.mV); - tc[cur_vertex] = LLVector2(ss,tt); + mesh[i].store4a((F32*)(pos+cur_vertex)); + tc[cur_vertex].set(ss,tt); - norm[cur_vertex].clear(); cur_vertex++; - if ((mTypeMask & INNER_MASK) && (mTypeMask & FLAT_MASK) && mNumS > 2 && s > 0) + if (test && s > 0) { - - pos[cur_vertex].load3(mesh[i].mPos.mV); - tc[cur_vertex] = LLVector2(ss,tt); - - norm[cur_vertex].clear(); - + mesh[i].store4a((F32*)(pos+cur_vertex)); + tc[cur_vertex].set(ss,tt); cur_vertex++; } } @@ -7015,28 +6396,62 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) } i = mBeginS + s + max_s*t; - ss = profile[mBeginS + s].mV[2] - begin_stex; - pos[cur_vertex].load3(mesh[i].mPos.mV); - tc[cur_vertex] = LLVector2(ss,tt); - norm[cur_vertex].clear(); + ss = profile[mBeginS + s][2] - begin_stex; + + mesh[i].store4a((F32*)(pos+cur_vertex)); + tc[cur_vertex].set(ss,tt); cur_vertex++; } } - - //get bounding box for this side - LLVector4a& face_min = mExtents[0]; - LLVector4a& face_max = mExtents[1]; mCenter->clear(); - face_min = face_max = pos[0]; + LLVector4a* cur_pos = pos; + LLVector4a* end_pos = pos + mNumVertices; - for (U32 i = 1; i < (U32)mNumVertices; ++i) + //get bounding box for this side + LLVector4a face_min; + LLVector4a face_max; + + face_min = face_max = *cur_pos++; + + while (cur_pos < end_pos) { - update_min_max(face_min, face_max, pos[i]); + update_min_max(face_min, face_max, *cur_pos++); } + mExtents[0] = face_min; + mExtents[1] = face_max; + + U32 tc_count = mNumVertices; + if (tc_count%2 == 1) + { //odd number of texture coordinates, duplicate last entry to padded end of array + tc_count++; + mTexCoords[mNumVertices] = mTexCoords[mNumVertices-1]; + } + + LLVector4a* cur_tc = (LLVector4a*) mTexCoords; + LLVector4a* end_tc = (LLVector4a*) (mTexCoords+tc_count); + + LLVector4a tc_min; + LLVector4a tc_max; + + tc_min = tc_max = *cur_tc++; + + while (cur_tc < end_tc) + { + update_min_max(tc_min, tc_max, *cur_tc++); + } + + F32* minp = tc_min.getF32ptr(); + F32* maxp = tc_max.getF32ptr(); + + mTexCoordExtents[0].mV[0] = llmin(minp[0], minp[2]); + mTexCoordExtents[0].mV[1] = llmin(minp[1], minp[3]); + mTexCoordExtents[1].mV[0] = llmax(maxp[0], maxp[2]); + mTexCoordExtents[1].mV[1] = llmax(maxp[1], maxp[3]); + mCenter->setAdd(face_min, face_max); mCenter->mul(0.5f); @@ -7101,37 +6516,112 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) } } + //clear normals - for (U32 i = 0; i < (U32)mNumVertices; i++) + F32* dst = (F32*) mNormals; + F32* end = (F32*) (mNormals+mNumVertices); + LLVector4a zero = LLVector4a::getZero(); + + while (dst < end) { - mNormals[i].clear(); + zero.store4a(dst); + dst += 4; } //generate normals - for (U32 i = 0; i < (U32)mNumIndices/3; i++) //for each triangle - { - const U16* idx = &(mIndices[i*3]); - + U32 count = mNumIndices/3; - LLVector4a* v[] = - { pos+idx[0], pos+idx[1], pos+idx[2] }; - - LLVector4a* n[] = - { norm+idx[0], norm+idx[1], norm+idx[2] }; + LLVector4a* norm = mNormals; + + static LLAlignedArray triangle_normals; + triangle_normals.resize(count); + LLVector4a* output = triangle_normals.mArray; + LLVector4a* end_output = output+count; + + U16* idx = mIndices; + + while (output < end_output) + { + LLVector4a b,v1,v2; + b.load4a((F32*) (pos+idx[0])); + v1.load4a((F32*) (pos+idx[1])); + v2.load4a((F32*) (pos+idx[2])); //calculate triangle normal - LLVector4a a, b, c; + LLVector4a a; - a.setSub(*v[0], *v[1]); - b.setSub(*v[0], *v[2]); - c.setCross3(a,b); + a.setSub(b, v1); + b.sub(v2); - n[0]->add(c); - n[1]->add(c); - n[2]->add(c); + + LLQuad& vector1 = *((LLQuad*) &v1); + LLQuad& vector2 = *((LLQuad*) &v2); + LLQuad& amQ = *((LLQuad*) &a); + LLQuad& bmQ = *((LLQuad*) &b); + + //v1.setCross3(t,v0); + //setCross3(const LLVector4a& a, const LLVector4a& b) + // Vectors are stored in memory in w, z, y, x order from high to low + // Set vector1 = { a[W], a[X], a[Z], a[Y] } + vector1 = _mm_shuffle_ps( amQ, amQ, _MM_SHUFFLE( 3, 0, 2, 1 )); + // Set vector2 = { b[W], b[Y], b[X], b[Z] } + vector2 = _mm_shuffle_ps( bmQ, bmQ, _MM_SHUFFLE( 3, 1, 0, 2 )); + // mQ = { a[W]*b[W], a[X]*b[Y], a[Z]*b[X], a[Y]*b[Z] } + vector2 = _mm_mul_ps( vector1, vector2 ); + // vector3 = { a[W], a[Y], a[X], a[Z] } + amQ = _mm_shuffle_ps( amQ, amQ, _MM_SHUFFLE( 3, 1, 0, 2 )); + // vector4 = { b[W], b[X], b[Z], b[Y] } + bmQ = _mm_shuffle_ps( bmQ, bmQ, _MM_SHUFFLE( 3, 0, 2, 1 )); + // mQ = { 0, a[X]*b[Y] - a[Y]*b[X], a[Z]*b[X] - a[X]*b[Z], a[Y]*b[Z] - a[Z]*b[Y] } + vector1 = _mm_sub_ps( vector2, _mm_mul_ps( amQ, bmQ )); + + llassert(v1.isFinite3()); + + v1.store4a((F32*) output); + + + output++; + idx += 3; + } + + idx = mIndices; + + LLVector4a* src = triangle_normals.mArray; + + for (U32 i = 0; i < count; i++) //for each triangle + { + LLVector4a c; + c.load4a((F32*) (src++)); + + LLVector4a* n0p = norm+idx[0]; + LLVector4a* n1p = norm+idx[1]; + LLVector4a* n2p = norm+idx[2]; + + idx += 3; + + LLVector4a n0,n1,n2; + n0.load4a((F32*) n0p); + n1.load4a((F32*) n1p); + n2.load4a((F32*) n2p); + + n0.add(c); + n1.add(c); + n2.add(c); + + llassert(c.isFinite3()); + //even out quad contributions - n[i%2+1]->add(c); + switch (i%2+1) + { + case 0: n0.add(c); break; + case 1: n1.add(c); break; + case 2: n2.add(c); break; + }; + + n0.store4a((F32*) n0p); + n1.store4a((F32*) n1p); + n2.store4a((F32*) n2p); } // adjust normals based on wrapping and stitching @@ -7268,53 +6758,101 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) return TRUE; } -// Finds binormal based on three vertices with texture coordinates. -// Fills in dummy values if the triangle has degenerate texture coordinates. -void calc_binormal_from_triangle(LLVector4a& binormal, - - const LLVector4a& pos0, - const LLVector2& tex0, - const LLVector4a& pos1, - const LLVector2& tex1, - const LLVector4a& pos2, - const LLVector2& tex2) +//adapted from Lengyel, Eric. “Computing Tangent Space Basis Vectors for an Arbitrary Mesh”. Terathon Software 3D Graphics Library, 2001. http://www.terathon.com/code/tangent.html +void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVector4a *normal, + const LLVector2 *texcoord, U32 triangleCount, const U16* index_array, LLVector4a *tangent) { - LLVector4a rx0( pos0[VX], tex0.mV[VX], tex0.mV[VY] ); - LLVector4a rx1( pos1[VX], tex1.mV[VX], tex1.mV[VY] ); - LLVector4a rx2( pos2[VX], tex2.mV[VX], tex2.mV[VY] ); - - LLVector4a ry0( pos0[VY], tex0.mV[VX], tex0.mV[VY] ); - LLVector4a ry1( pos1[VY], tex1.mV[VX], tex1.mV[VY] ); - LLVector4a ry2( pos2[VY], tex2.mV[VX], tex2.mV[VY] ); + //LLVector4a *tan1 = new LLVector4a[vertexCount * 2]; + LLVector4a* tan1 = (LLVector4a*) ll_aligned_malloc_16(vertexCount*2*sizeof(LLVector4a)); - LLVector4a rz0( pos0[VZ], tex0.mV[VX], tex0.mV[VY] ); - LLVector4a rz1( pos1[VZ], tex1.mV[VX], tex1.mV[VY] ); - LLVector4a rz2( pos2[VZ], tex2.mV[VX], tex2.mV[VY] ); - - LLVector4a lhs, rhs; + LLVector4a* tan2 = tan1 + vertexCount; - LLVector4a r0; - lhs.setSub(rx0, rx1); rhs.setSub(rx0, rx2); - r0.setCross3(lhs, rhs); + memset(tan1, 0, vertexCount*2*sizeof(LLVector4a)); + + for (U32 a = 0; a < triangleCount; a++) + { + U32 i1 = *index_array++; + U32 i2 = *index_array++; + U32 i3 = *index_array++; + + const LLVector4a& v1 = vertex[i1]; + const LLVector4a& v2 = vertex[i2]; + const LLVector4a& v3 = vertex[i3]; + + const LLVector2& w1 = texcoord[i1]; + const LLVector2& w2 = texcoord[i2]; + const LLVector2& w3 = texcoord[i3]; + + const F32* v1ptr = v1.getF32ptr(); + const F32* v2ptr = v2.getF32ptr(); + const F32* v3ptr = v3.getF32ptr(); - LLVector4a r1; - lhs.setSub(ry0, ry1); rhs.setSub(ry0, ry2); - r1.setCross3(lhs, rhs); + float x1 = v2ptr[0] - v1ptr[0]; + float x2 = v3ptr[0] - v1ptr[0]; + float y1 = v2ptr[1] - v1ptr[1]; + float y2 = v3ptr[1] - v1ptr[1]; + float z1 = v2ptr[2] - v1ptr[2]; + float z2 = v3ptr[2] - v1ptr[2]; + + float s1 = w2.mV[0] - w1.mV[0]; + float s2 = w3.mV[0] - w1.mV[0]; + float t1 = w2.mV[1] - w1.mV[1]; + float t2 = w3.mV[1] - w1.mV[1]; + + F32 rd = s1*t2-s2*t1; - LLVector4a r2; - lhs.setSub(rz0, rz1); rhs.setSub(rz0, rz2); - r2.setCross3(lhs, rhs); + float r = ((rd*rd) > FLT_EPSILON) ? 1.0F / rd : 1024.f; //some made up large ratio for division by zero - if( r0[VX] && r1[VX] && r2[VX] ) - { - binormal.set( - -r0[VZ] / r0[VX], - -r1[VZ] / r1[VX], - -r2[VZ] / r2[VX]); - // binormal.normVec(); - } - else - { - binormal.set( 0, 1 , 0 ); - } + llassert(llfinite(r)); + llassert(!llisnan(r)); + + LLVector4a sdir((t2 * x1 - t1 * x2) * r, (t2 * y1 - t1 * y2) * r, + (t2 * z1 - t1 * z2) * r); + LLVector4a tdir((s1 * x2 - s2 * x1) * r, (s1 * y2 - s2 * y1) * r, + (s1 * z2 - s2 * z1) * r); + + tan1[i1].add(sdir); + tan1[i2].add(sdir); + tan1[i3].add(sdir); + + tan2[i1].add(tdir); + tan2[i2].add(tdir); + tan2[i3].add(tdir); + } + + for (U32 a = 0; a < vertexCount; a++) + { + LLVector4a n = normal[a]; + + const LLVector4a& t = tan1[a]; + + LLVector4a ncrosst; + ncrosst.setCross3(n,t); + + // Gram-Schmidt orthogonalize + n.mul(n.dot3(t).getF32()); + + LLVector4a tsubn; + tsubn.setSub(t,n); + + if (tsubn.dot3(tsubn).getF32() > F_APPROXIMATELY_ZERO) + { + tsubn.normalize3fast(); + + // Calculate handedness + F32 handedness = ncrosst.dot3(tan2[a]).getF32() < 0.f ? -1.f : 1.f; + + tsubn.getF32ptr()[3] = handedness; + + tangent[a] = tsubn; + } + else + { //degenerate, make up a value + tangent[a].set(0,0,1,1); + } + } + + ll_aligned_free_16(tan1); } + + diff --git a/indra/llmath/llvolume.h b/indra/llmath/llvolume.h index f6e974774..7a74d544c 100644 --- a/indra/llmath/llvolume.h +++ b/indra/llmath/llvolume.h @@ -37,7 +37,6 @@ class LLPath; template class LLOctreeNode; -class LLVector4a; class LLVolumeFace; class LLVolume; class LLVolumeTriangle; @@ -50,12 +49,15 @@ class LLVolumeTriangle; #include "v3math.h" #include "v3dmath.h" #include "v4math.h" +#include "llvector4a.h" +#include "llmatrix4a.h" #include "llquaternion.h" #include "llstrider.h" #include "v4coloru.h" #include "llrefcount.h" #include "llpointer.h" #include "llfile.h" +#include "llalignedarray.h" //============================================================================ @@ -709,16 +711,16 @@ public: LLFaceID mFaceID; }; - std::vector mProfile; - std::vector mNormals; + LLAlignedArray mProfile; + //LLAlignedArray mNormals; std::vector mFaces; - std::vector mEdgeNormals; - std::vector mEdgeCenters; + + //LLAlignedArray mEdgeNormals; + //LLAlignedArray mEdgeCenters; friend std::ostream& operator<<(std::ostream &s, const LLProfile &profile); protected: - void genNormals(const LLProfileParams& params); static S32 getNumNGonPoints(const LLProfileParams& params, S32 sides, F32 offset=0.0f, F32 bevel = 0.0f, F32 ang_scale = 1.f, S32 split = 0); void genNGon(const LLProfileParams& params, S32 sides, F32 offset=0.0f, F32 bevel = 0.0f, F32 ang_scale = 1.f, S32 split = 0); @@ -742,13 +744,29 @@ protected: class LLPath { public: - struct PathPt + class PathPt { - LLVector3 mPos; - LLVector2 mScale; - LLQuaternion mRot; + public: + LLMatrix4a mRot; + LLVector4a mPos; + + LLVector4a mScale; F32 mTexT; - PathPt() { mPos.setVec(0,0,0); mTexT = 0; mScale.setVec(0,0); mRot.loadIdentity(); } + F32 pad[3]; //for alignment + PathPt() + { + mPos.clear(); + mTexT = 0; + mScale.clear(); + mRot.setRows(LLVector4a(1,0,0,0), + LLVector4a(0,1,0,0), + LLVector4a(0,0,1,0)); + + //distinguished data in the pad for debugging + pad[0] = 3.14159f; + pad[1] = -3.14159f; + pad[2] = 0.585f; + } }; public: @@ -780,7 +798,7 @@ public: friend std::ostream& operator<<(std::ostream &s, const LLPath &path); public: - std::vector mPath; + LLAlignedArray mPath; protected: BOOL mOpen; @@ -845,12 +863,12 @@ private: public: BOOL create(LLVolume* volume, BOOL partial_build = FALSE); - void createBinormals(); + void createTangents(); void appendFace(const LLVolumeFace& face, LLMatrix4& transform, LLMatrix4& normal_tranform); void resizeVertices(S32 num_verts); - void allocateBinormals(S32 num_verts); + void allocateTangents(S32 num_verts); void allocateWeights(S32 num_verts); void resizeIndices(S32 num_indices); void fillFromLegacyData(std::vector& v, std::vector& idx); @@ -913,11 +931,12 @@ public: LLVector2 mTexCoordExtents[2]; //minimum and maximum of texture coordinates of the face. S32 mNumVertices; + S32 mNumAllocatedVertices; S32 mNumIndices; LLVector4a* mPositions; LLVector4a* mNormals; - LLVector4a* mBinormals; + LLVector4a* mTangents; LLVector2* mTexCoords; U16* mIndices; @@ -931,9 +950,12 @@ public: // format is mWeights[vertex_index].mV[influence] = . // mWeights.size() should be empty or match mVertices.size() LLVector4a* mWeights; - + LLOctreeNode* mOctree; + //whether or not face has been cache optimized + BOOL mOptimized; + private: BOOL createUnCutCubeCap(LLVolume* volume, BOOL partial_build = FALSE); BOOL createCap(LLVolume* volume, BOOL partial_build = FALSE); @@ -945,15 +967,10 @@ class LLVolume : public LLRefCount friend class LLVolumeLODGroup; protected: - LLVolume(const LLVolume&); // Don't implement ~LLVolume(); // use unref public: - struct Point - { - LLVector3 mPos; - }; - + struct FaceParams { LLFaceID mFaceID; @@ -976,13 +993,13 @@ public: const LLProfile& getProfile() const { return *mProfilep; } LLPath& getPath() const { return *mPathp; } void resizePath(S32 length); - const std::vector& getMesh() const { return mMesh; } - const LLVector3& getMeshPt(const U32 i) const { return mMesh[i].mPos; } + const LLAlignedArray& getMesh() const { return mMesh; } + const LLVector4a& getMeshPt(const U32 i) const { return mMesh[i]; } void setDirty() { mPathp->setDirty(); mProfilep->setDirty(); } void regen(); - void genBinormals(S32 face); + void genTangents(S32 face); BOOL isConvex() const; BOOL isCap(S32 face); @@ -992,10 +1009,7 @@ public: S32 getSculptLevel() const { return mSculptLevel; } void setSculptLevel(S32 level) { mSculptLevel = level; } - S32 *getTriangleIndices(U32 &num_indices) const; - - // returns number of triangle indeces required for path/profile mesh - S32 getNumTriangleIndices() const; + static void getLoDTriangleCounts(const LLVolumeParams& params, S32* counts); S32 getNumTriangles(S32* vcount = NULL) const; @@ -1010,21 +1024,14 @@ public: //get the face index of the face that intersects with the given line segment at the point //closest to start. Moves end to the point of intersection. Returns -1 if no intersection. //Line segment must be in volume space. - S32 lineSegmentIntersect(const LLVector3& start, const LLVector3& end, + S32 lineSegmentIntersect(const LLVector4a& start, const LLVector4a& end, S32 face = -1, // which face to check, -1 = ALL_SIDES - LLVector3* intersection = NULL, // return the intersection point + LLVector4a* intersection = NULL, // return the intersection point LLVector2* tex_coord = NULL, // return the texture coordinates of the intersection point - LLVector3* normal = NULL, // return the surface normal at the intersection point - LLVector3* bi_normal = NULL // return the surface bi-normal at the intersection point + LLVector4a* normal = NULL, // return the surface normal at the intersection point + LLVector4a* tangent = NULL // return the surface tangent at the intersection point ); - S32 lineSegmentIntersect(const LLVector4a& start, const LLVector4a& end, - S32 face = 1, - LLVector3* intersection = NULL, - LLVector2* tex_coord = NULL, - LLVector3* normal = NULL, - LLVector3* bi_normal = NULL); - LLFaceID generateFaceMask(); BOOL isFaceMaskValid(LLFaceID face_mask); @@ -1068,7 +1075,8 @@ public: LLVolumeParams mParams; LLPath *mPathp; LLProfile *mProfilep; - std::vector mMesh; + LLAlignedArray mMesh; + BOOL mGenerateSingleFace; typedef std::vector face_list_t; @@ -1083,21 +1091,12 @@ public: std::ostream& operator<<(std::ostream &s, const LLVolumeParams &volume_params); -void calc_binormal_from_triangle( - LLVector4a& binormal, - const LLVector4a& pos0, - const LLVector2& tex0, - const LLVector4a& pos1, - const LLVector2& tex1, - const LLVector4a& pos2, - const LLVector2& tex2); - BOOL LLLineSegmentBoxIntersect(const F32* start, const F32* end, const F32* center, const F32* size); BOOL LLLineSegmentBoxIntersect(const LLVector3& start, const LLVector3& end, const LLVector3& center, const LLVector3& size); BOOL LLLineSegmentBoxIntersect(const LLVector4a& start, const LLVector4a& end, const LLVector4a& center, const LLVector4a& size); -BOOL LLTriangleRayIntersect(const LLVector3& vert0, const LLVector3& vert1, const LLVector3& vert2, const LLVector3& orig, const LLVector3& dir, - F32& intersection_a, F32& intersection_b, F32& intersection_t, BOOL two_sided); +//BOOL LLTriangleRayIntersect(const LLVector3& vert0, const LLVector3& vert1, const LLVector3& vert2, const LLVector3& orig, const LLVector3& dir, +// F32& intersection_a, F32& intersection_b, F32& intersection_t, BOOL two_sided); BOOL LLTriangleRayIntersect(const LLVector4a& vert0, const LLVector4a& vert1, const LLVector4a& vert2, const LLVector4a& orig, const LLVector4a& dir, F32& intersection_a, F32& intersection_b, F32& intersection_t); diff --git a/indra/llmath/llvolumeoctree.cpp b/indra/llmath/llvolumeoctree.cpp index cf9aeece8..dc1f013b7 100644 --- a/indra/llmath/llvolumeoctree.cpp +++ b/indra/llmath/llvolumeoctree.cpp @@ -94,14 +94,14 @@ void LLVolumeOctreeListener::handleChildAddition(const LLOctreeNode { LLVolumeOctreeListener* vl = (LLVolumeOctreeListener*) node->getListener(0); - /*const F32* start = mStart.getF32(); - const F32* end = mEnd.getF32(); - const F32* center = vl->mBounds[0].getF32(); - const F32* size = vl->mBounds[1].getF32();*/ - - //if (LLLineSegmentBoxIntersect(mStart, mEnd, vl->mBounds[0], vl->mBounds[1])) - if (LLLineSegmentBoxIntersect(mStart.getF32ptr(), mEnd.getF32ptr(), vl->mBounds[0].getF32ptr(), vl->mBounds[1].getF32ptr())) + if (LLLineSegmentBoxIntersect(mStart, mEnd, vl->mBounds[0], vl->mBounds[1])) { node->accept(this); for (U32 i = 0; i < node->getChildCount(); ++i) @@ -152,34 +146,60 @@ void LLOctreeTriangleRayIntersect::visit(const LLOctreeNode* n LLVector4a intersect = mDir; intersect.mul(*mClosestT); intersect.add(mStart); - mIntersection->set(intersect.getF32ptr()); + *mIntersection = intersect; } + U32 idx0 = tri->mIndex[0]; + U32 idx1 = tri->mIndex[1]; + U32 idx2 = tri->mIndex[2]; if (mTexCoord != NULL) { LLVector2* tc = (LLVector2*) mFace->mTexCoords; - *mTexCoord = ((1.f - a - b) * tc[tri->mIndex[0]] + - a * tc[tri->mIndex[1]] + - b * tc[tri->mIndex[2]]); + *mTexCoord = ((1.f - a - b) * tc[idx0] + + a * tc[idx1] + + b * tc[idx2]); } if (mNormal != NULL) { - LLVector4* norm = (LLVector4*) mFace->mNormals; + LLVector4a* norm = mFace->mNormals; + + LLVector4a n1,n2,n3; + n1 = norm[idx0]; + n1.mul(1.f-a-b); + + n2 = norm[idx1]; + n2.mul(a); + + n3 = norm[idx2]; + n3.mul(b); - *mNormal = ((1.f - a - b) * LLVector3(norm[tri->mIndex[0]]) + - a * LLVector3(norm[tri->mIndex[1]]) + - b * LLVector3(norm[tri->mIndex[2]])); + n1.add(n2); + n1.add(n3); + + *mNormal = n1; } - if (mBinormal != NULL) + if (mTangent != NULL) { - LLVector4* binormal = (LLVector4*) mFace->mBinormals; - *mBinormal = ((1.f - a - b) * LLVector3(binormal[tri->mIndex[0]]) + - a * LLVector3(binormal[tri->mIndex[1]]) + - b * LLVector3(binormal[tri->mIndex[2]])); + LLVector4a* tangents = mFace->mTangents; + + LLVector4a t1,t2,t3; + t1 = tangents[idx0]; + t1.mul(1.f-a-b); + + t2 = tangents[idx1]; + t2.mul(a); + + t3 = tangents[idx2]; + t3.mul(b); + + t1.add(t2); + t1.add(t3); + + *mTangent = t1; } } } diff --git a/indra/llmath/llvolumeoctree.h b/indra/llmath/llvolumeoctree.h index 03f10814e..40d2e890c 100644 --- a/indra/llmath/llvolumeoctree.h +++ b/indra/llmath/llvolumeoctree.h @@ -134,16 +134,16 @@ public: LLVector4a mStart; LLVector4a mDir; LLVector4a mEnd; - LLVector3* mIntersection; + LLVector4a* mIntersection; LLVector2* mTexCoord; - LLVector3* mNormal; - LLVector3* mBinormal; + LLVector4a* mNormal; + LLVector4a* mTangent; F32* mClosestT; bool mHitFace; LLOctreeTriangleRayIntersect(const LLVector4a& start, const LLVector4a& dir, const LLVolumeFace* face, F32* closest_t, - LLVector3* intersection,LLVector2* tex_coord, LLVector3* normal, LLVector3* bi_normal); + LLVector4a* intersection,LLVector2* tex_coord, LLVector4a* normal, LLVector4a* tangent); void traverse(const LLOctreeNode* node); diff --git a/indra/llprimitive/llmodel.cpp b/indra/llprimitive/llmodel.cpp index 5aa1ebdc3..653b0f5cb 100644 --- a/indra/llprimitive/llmodel.cpp +++ b/indra/llprimitive/llmodel.cpp @@ -263,13 +263,13 @@ LLModel::EModelStatus load_face_from_dom_triangles(std::vector& fa LLVolumeFace& new_face = *face_list.rbegin(); if (!norm_source) { - ll_aligned_free_16(new_face.mNormals); + //l_aligned_free_16(new_face.mNormals); new_face.mNormals = NULL; } if (!tc_source) { - ll_aligned_free_16(new_face.mTexCoords); + //ll_aligned_free_16(new_face.mTexCoords); new_face.mTexCoords = NULL; } @@ -294,13 +294,13 @@ LLModel::EModelStatus load_face_from_dom_triangles(std::vector& fa LLVolumeFace& new_face = *face_list.rbegin(); if (!norm_source) { - ll_aligned_free_16(new_face.mNormals); + //ll_aligned_free_16(new_face.mNormals); new_face.mNormals = NULL; } if (!tc_source) { - ll_aligned_free_16(new_face.mTexCoords); + //ll_aligned_free_16(new_face.mTexCoords); new_face.mTexCoords = NULL; } } @@ -482,13 +482,13 @@ LLModel::EModelStatus load_face_from_dom_polylist(std::vector& fac LLVolumeFace& new_face = *face_list.rbegin(); if (!norm_source) { - ll_aligned_free_16(new_face.mNormals); + //ll_aligned_free_16(new_face.mNormals); new_face.mNormals = NULL; } if (!tc_source) { - ll_aligned_free_16(new_face.mTexCoords); + //ll_aligned_free_16(new_face.mTexCoords); new_face.mTexCoords = NULL; } @@ -516,13 +516,13 @@ LLModel::EModelStatus load_face_from_dom_polylist(std::vector& fac LLVolumeFace& new_face = *face_list.rbegin(); if (!norm_source) { - ll_aligned_free_16(new_face.mNormals); + //ll_aligned_free_16(new_face.mNormals); new_face.mNormals = NULL; } if (!tc_source) { - ll_aligned_free_16(new_face.mTexCoords); + //ll_aligned_free_16(new_face.mTexCoords); new_face.mTexCoords = NULL; } } @@ -716,13 +716,13 @@ LLModel::EModelStatus load_face_from_dom_polygons(std::vector& fac LLVolumeFace& new_face = *face_list.rbegin(); if (!n) { - ll_aligned_free_16(new_face.mNormals); + //ll_aligned_free_16(new_face.mNormals); new_face.mNormals = NULL; } if (!t) { - ll_aligned_free_16(new_face.mTexCoords); + //ll_aligned_free_16(new_face.mTexCoords); new_face.mTexCoords = NULL; } } @@ -996,6 +996,43 @@ void LLModel::getNormalizedScaleTranslation(LLVector3& scale_out, LLVector3& tra translation_out = mNormalizedTranslation; } +LLVector3 LLModel::getTransformedCenter(const LLMatrix4& mat) +{ + LLVector3 ret; + + if (!mVolumeFaces.empty()) + { + LLMatrix4a m; + m.loadu(mat); + + LLVector4a minv,maxv; + + LLVector4a t; + m.affineTransform(mVolumeFaces[0].mPositions[0], t); + minv = maxv = t; + + for (S32 i = 0; i < (S32)mVolumeFaces.size(); ++i) + { + LLVolumeFace& face = mVolumeFaces[i]; + + for (U32 j = 0; j < (U32)face.mNumVertices; ++j) + { + m.affineTransform(face.mPositions[j],t); + update_min_max(minv, maxv, t); + } + } + + minv.add(maxv); + minv.mul(0.5f); + + ret.set(minv.getF32ptr()); + } + + return ret; +} + + + void LLModel::setNumVolumeFaces(S32 count) { mVolumeFaces.resize(count); @@ -1022,7 +1059,7 @@ void LLModel::setVolumeFaceData( } else { - ll_aligned_free_16(face.mNormals); + //ll_aligned_free_16(face.mNormals); face.mNormals = NULL; } @@ -1033,7 +1070,7 @@ void LLModel::setVolumeFaceData( } else { - ll_aligned_free_16(face.mTexCoords); + //ll_aligned_free_16(face.mTexCoords); face.mTexCoords = NULL; } @@ -1232,7 +1269,7 @@ void LLModel::generateNormals(F32 angle_cutoff) } else { - ll_aligned_free_16(new_face.mTexCoords); + //ll_aligned_free_16(new_face.mTexCoords); new_face.mTexCoords = NULL; } diff --git a/indra/llprimitive/llmodel.h b/indra/llprimitive/llmodel.h index 1cf528fd9..f6063165d 100644 --- a/indra/llprimitive/llmodel.h +++ b/indra/llprimitive/llmodel.h @@ -173,7 +173,8 @@ public: void optimizeVolumeFaces(); void offsetMesh( const LLVector3& pivotPoint ); void getNormalizedScaleTranslation(LLVector3& scale_out, LLVector3& translation_out); - + LLVector3 getTransformedCenter(const LLMatrix4& mat); + //reorder face list based on mMaterialList in this and reference so //order matches that of reference (material ordering touchup) bool matchMaterialOrder(LLModel* ref, int& refFaceCnt, int& modelFaceCnt ); diff --git a/indra/llrender/llshadermgr.cpp b/indra/llrender/llshadermgr.cpp index 7b890c57a..b3e9b55c9 100644 --- a/indra/llrender/llshadermgr.cpp +++ b/indra/llrender/llshadermgr.cpp @@ -1035,7 +1035,7 @@ void LLShaderMgr::initAttribsAndUniforms() mReservedAttribs.push_back("texcoord3"); mReservedAttribs.push_back("diffuse_color"); mReservedAttribs.push_back("emissive"); - mReservedAttribs.push_back("binormal"); + mReservedAttribs.push_back("tangent"); mReservedAttribs.push_back("weight"); mReservedAttribs.push_back("weight4"); mReservedAttribs.push_back("clothing"); diff --git a/indra/llrender/llvertexbuffer.cpp b/indra/llrender/llvertexbuffer.cpp index 67c271a0e..a577a722c 100644 --- a/indra/llrender/llvertexbuffer.cpp +++ b/indra/llrender/llvertexbuffer.cpp @@ -342,13 +342,32 @@ S32 LLVertexBuffer::sTypeSize[LLVertexBuffer::TYPE_MAX] = sizeof(LLVector2), // TYPE_TEXCOORD3, sizeof(LLColor4U), // TYPE_COLOR, sizeof(LLColor4U), // TYPE_EMISSIVE, only alpha is used currently - sizeof(LLVector4), // TYPE_BINORMAL, + sizeof(LLVector4), // TYPE_TANGENT, sizeof(F32), // TYPE_WEIGHT, sizeof(LLVector4), // TYPE_WEIGHT4, sizeof(LLVector4), // TYPE_CLOTHWEIGHT, sizeof(LLVector4), // TYPE_TEXTURE_INDEX (actually exists as position.w), no extra data, but stride is 16 bytes }; +static std::string vb_type_name[] = +{ + "TYPE_VERTEX", + "TYPE_NORMAL", + "TYPE_TEXCOORD0", + "TYPE_TEXCOORD1", + "TYPE_TEXCOORD2", + "TYPE_TEXCOORD3", + "TYPE_COLOR", + "TYPE_EMISSIVE", + "TYPE_TANGENT", + "TYPE_WEIGHT", + "TYPE_WEIGHT4", + "TYPE_CLOTHWEIGHT", + "TYPE_TEXTURE_INDEX", + "TYPE_MAX", + "TYPE_INDEX", +}; + U32 LLVertexBuffer::sGLMode[LLRender::NUM_MODES] = { GL_TRIANGLES, @@ -523,16 +542,16 @@ void LLVertexBuffer::setupClientArrays(U32 data_mask) } } - if (sLastMask & MAP_BINORMAL) + if (sLastMask & MAP_TANGENT) { - if (!(data_mask & MAP_BINORMAL)) + if (!(data_mask & MAP_TANGENT)) { glClientActiveTextureARB(GL_TEXTURE2_ARB); glDisableClientState(GL_TEXTURE_COORD_ARRAY); glClientActiveTextureARB(GL_TEXTURE0_ARB); } } - else if (data_mask & MAP_BINORMAL) + else if (data_mask & MAP_TANGENT) { glClientActiveTextureARB(GL_TEXTURE2_ARB); glEnableClientState(GL_TEXTURE_COORD_ARRAY); @@ -1320,7 +1339,7 @@ void LLVertexBuffer::setupVertexArray() 2, //TYPE_TEXCOORD3, 4, //TYPE_COLOR, 4, //TYPE_EMISSIVE, - 3, //TYPE_BINORMAL, + 4, //TYPE_TANGENT, 1, //TYPE_WEIGHT, 4, //TYPE_WEIGHT4, 4, //TYPE_CLOTHWEIGHT, @@ -1337,7 +1356,7 @@ void LLVertexBuffer::setupVertexArray() GL_FLOAT, //TYPE_TEXCOORD3, GL_UNSIGNED_BYTE, //TYPE_COLOR, GL_UNSIGNED_BYTE, //TYPE_EMISSIVE, - GL_FLOAT, //TYPE_BINORMAL, + GL_FLOAT, //TYPE_TANGENT, GL_FLOAT, //TYPE_WEIGHT, GL_FLOAT, //TYPE_WEIGHT4, GL_FLOAT, //TYPE_CLOTHWEIGHT, @@ -1354,7 +1373,7 @@ void LLVertexBuffer::setupVertexArray() false, //TYPE_TEXCOORD3, false, //TYPE_COLOR, false, //TYPE_EMISSIVE, - false, //TYPE_BINORMAL, + false, //TYPE_TANGENT, false, //TYPE_WEIGHT, false, //TYPE_WEIGHT4, false, //TYPE_CLOTHWEIGHT, @@ -1371,7 +1390,7 @@ void LLVertexBuffer::setupVertexArray() GL_FALSE, //TYPE_TEXCOORD3, GL_TRUE, //TYPE_COLOR, GL_TRUE, //TYPE_EMISSIVE, - GL_FALSE, //TYPE_BINORMAL, + GL_FALSE, //TYPE_TANGENT, GL_FALSE, //TYPE_WEIGHT, GL_FALSE, //TYPE_WEIGHT4, GL_FALSE, //TYPE_CLOTHWEIGHT, @@ -2033,9 +2052,13 @@ bool LLVertexBuffer::getNormalStrider(LLStrider& strider, S32 index, { return VertexBufferStrider::get(*this, strider, index, count, map_range); } -bool LLVertexBuffer::getBinormalStrider(LLStrider& strider, S32 index, S32 count, bool map_range) +bool LLVertexBuffer::getTangentStrider(LLStrider& strider, S32 index, S32 count, bool map_range) { - return VertexBufferStrider::get(*this, strider, index, count, map_range); + return VertexBufferStrider::get(*this, strider, index, count, map_range); +} +bool LLVertexBuffer::getTangentStrider(LLStrider& strider, S32 index, S32 count, bool map_range) +{ + return VertexBufferStrider::get(*this, strider, index, count, map_range); } bool LLVertexBuffer::getColorStrider(LLStrider& strider, S32 index, S32 count, bool map_range) { @@ -2308,6 +2331,14 @@ void LLVertexBuffer::setupVertexBuffer(U32 data_mask) if (gDebugGL && ((data_mask & mTypeMask) != data_mask)) { + for (U32 i = 0; i < LLVertexBuffer::TYPE_MAX; ++i) + { + U32 mask = 1 << i; + if (mask & data_mask && !(mask & mTypeMask)) + { //bit set in data_mask, but not set in mTypeMask + llwarns << "Missing required component " << vb_type_name[i] << llendl; + } + } llerrs << "LLVertexBuffer::setupVertexBuffer missing required components for supplied data mask." << llendl; } @@ -2337,11 +2368,11 @@ void LLVertexBuffer::setupVertexBuffer(U32 data_mask) void* ptr = (void*)(base + mOffsets[TYPE_TEXCOORD1]); glVertexAttribPointerARB(loc,2,GL_FLOAT, GL_FALSE, LLVertexBuffer::sTypeSize[TYPE_TEXCOORD1], ptr); } - if (data_mask & MAP_BINORMAL) + if (data_mask & MAP_TANGENT) { - S32 loc = TYPE_BINORMAL; - void* ptr = (void*)(base + mOffsets[TYPE_BINORMAL]); - glVertexAttribPointerARB(loc, 3,GL_FLOAT, GL_FALSE, LLVertexBuffer::sTypeSize[TYPE_BINORMAL], ptr); + S32 loc = TYPE_TANGENT; + void* ptr = (void*)(base + mOffsets[TYPE_TANGENT]); + glVertexAttribPointerARB(loc, 4,GL_FLOAT, GL_FALSE, LLVertexBuffer::sTypeSize[TYPE_TANGENT], ptr); } if (data_mask & MAP_TEXCOORD0) { @@ -2419,10 +2450,10 @@ void LLVertexBuffer::setupVertexBuffer(U32 data_mask) glTexCoordPointer(2,GL_FLOAT, LLVertexBuffer::sTypeSize[TYPE_TEXCOORD1], (void*)(base + mOffsets[TYPE_TEXCOORD1])); glClientActiveTextureARB(GL_TEXTURE0_ARB); } - if (data_mask & MAP_BINORMAL) + if (data_mask & MAP_TANGENT) { glClientActiveTextureARB(GL_TEXTURE2_ARB); - glTexCoordPointer(3,GL_FLOAT, LLVertexBuffer::sTypeSize[TYPE_BINORMAL], (void*)(base + mOffsets[TYPE_BINORMAL])); + glTexCoordPointer(4,GL_FLOAT, LLVertexBuffer::sTypeSize[TYPE_TANGENT], (void*)(base + mOffsets[TYPE_TANGENT])); glClientActiveTextureARB(GL_TEXTURE0_ARB); } if (data_mask & MAP_TEXCOORD0) diff --git a/indra/llrender/llvertexbuffer.h b/indra/llrender/llvertexbuffer.h index 4801117a5..ce1604277 100644 --- a/indra/llrender/llvertexbuffer.h +++ b/indra/llrender/llvertexbuffer.h @@ -173,7 +173,7 @@ public: TYPE_TEXCOORD3, TYPE_COLOR, TYPE_EMISSIVE, - TYPE_BINORMAL, + TYPE_TANGENT, TYPE_WEIGHT, TYPE_WEIGHT4, TYPE_CLOTHWEIGHT, @@ -191,7 +191,7 @@ public: MAP_COLOR = (1<& strider, S32 index=0, S32 count = -1, bool map_range = false); bool getTexCoord1Strider(LLStrider& strider, S32 index=0, S32 count = -1, bool map_range = false); bool getNormalStrider(LLStrider& strider, S32 index=0, S32 count = -1, bool map_range = false); - bool getBinormalStrider(LLStrider& strider, S32 index=0, S32 count = -1, bool map_range = false); + bool getTangentStrider(LLStrider& strider, S32 index=0, S32 count = -1, bool map_range = false); + bool getTangentStrider(LLStrider& strider, S32 index=0, S32 count = -1, bool map_range = false); bool getColorStrider(LLStrider& strider, S32 index=0, S32 count = -1, bool map_range = false); bool getEmissiveStrider(LLStrider& strider, S32 index=0, S32 count = -1, bool map_range = false); bool getWeightStrider(LLStrider& strider, S32 index=0, S32 count = -1, bool map_range = false); diff --git a/indra/newview/app_settings/shaders/class1/deferred/bumpSkinnedV.glsl b/indra/newview/app_settings/shaders/class1/deferred/bumpSkinnedV.glsl index 8ba75010a..10144f3e1 100644 --- a/indra/newview/app_settings/shaders/class1/deferred/bumpSkinnedV.glsl +++ b/indra/newview/app_settings/shaders/class1/deferred/bumpSkinnedV.glsl @@ -30,7 +30,7 @@ ATTRIBUTE vec3 position; ATTRIBUTE vec4 diffuse_color; ATTRIBUTE vec3 normal; ATTRIBUTE vec2 texcoord0; -ATTRIBUTE vec3 binormal; +ATTRIBUTE vec4 tangent; VARYING vec3 vary_mat0; VARYING vec3 vary_mat1; @@ -52,8 +52,8 @@ void main() vec3 n = normalize((mat * vec4(normal.xyz+position.xyz, 1.0)).xyz-pos.xyz); - vec3 b = normalize((mat * vec4(binormal.xyz+position.xyz, 1.0)).xyz-pos.xyz); - vec3 t = cross(b, n); + vec3 t = normalize((mat * vec4(tangent.xyz+position.xyz, 1.0)).xyz-pos.xyz); + vec3 b = cross(n, t) * tangent.w; vary_mat0 = vec3(t.x, b.x, n.x); vary_mat1 = vec3(t.y, b.y, n.y); diff --git a/indra/newview/app_settings/shaders/class1/deferred/bumpV.glsl b/indra/newview/app_settings/shaders/class1/deferred/bumpV.glsl index c8d38bb8f..9f9749394 100644 --- a/indra/newview/app_settings/shaders/class1/deferred/bumpV.glsl +++ b/indra/newview/app_settings/shaders/class1/deferred/bumpV.glsl @@ -31,7 +31,7 @@ ATTRIBUTE vec3 position; ATTRIBUTE vec4 diffuse_color; ATTRIBUTE vec3 normal; ATTRIBUTE vec2 texcoord0; -ATTRIBUTE vec3 binormal; +ATTRIBUTE vec4 tangent; VARYING vec3 vary_mat0; VARYING vec3 vary_mat1; @@ -46,8 +46,8 @@ void main() vary_texcoord0 = (texture_matrix0 * vec4(texcoord0,0,1)).xy; vec3 n = normalize(normal_matrix * normal); - vec3 b = normalize(normal_matrix * binormal); - vec3 t = cross(b, n); + vec3 t = normalize(normal_matrix * tangent.xyz); + vec3 b = cross(n, t) * tangent.w; vary_mat0 = vec3(t.x, b.x, n.x); vary_mat1 = vec3(t.y, b.y, n.y); diff --git a/indra/newview/app_settings/shaders/class1/deferred/fxaaF.glsl b/indra/newview/app_settings/shaders/class1/deferred/fxaaF.glsl index e02a7b405..2cef8f2a5 100644 --- a/indra/newview/app_settings/shaders/class1/deferred/fxaaF.glsl +++ b/indra/newview/app_settings/shaders/class1/deferred/fxaaF.glsl @@ -2093,7 +2093,6 @@ uniform sampler2D diffuseMap; uniform vec2 rcp_screen_res; uniform vec4 rcp_frame_opt; uniform vec4 rcp_frame_opt2; -uniform vec2 screen_res; VARYING vec2 vary_fragcoord; VARYING vec2 vary_tc; diff --git a/indra/newview/app_settings/shaders/class1/transform/binormalV.glsl b/indra/newview/app_settings/shaders/class1/transform/binormalV.glsl index 44f1aa34a..67bce8722 100644 --- a/indra/newview/app_settings/shaders/class1/transform/binormalV.glsl +++ b/indra/newview/app_settings/shaders/class1/transform/binormalV.glsl @@ -25,12 +25,12 @@ uniform mat3 normal_matrix; -ATTRIBUTE vec3 binormal; +ATTRIBUTE vec4 tangent; -VARYING vec4 binormal_out; +VARYING vec4 tangent_out; void main() { - binormal_out = vec4(normal_matrix * binormal, 0.0); + tangent_out = vec4(normal_matrix * tangent.xyz, tangent.w); } diff --git a/indra/newview/lldrawpoolavatar.h b/indra/newview/lldrawpoolavatar.h index 880378f85..4b93f47e8 100644 --- a/indra/newview/lldrawpoolavatar.h +++ b/indra/newview/lldrawpoolavatar.h @@ -190,7 +190,7 @@ public: RIGGED_DEFERRED_BUMP_MASK = LLVertexBuffer::MAP_VERTEX | LLVertexBuffer::MAP_NORMAL | LLVertexBuffer::MAP_TEXCOORD0 | - LLVertexBuffer::MAP_BINORMAL | + LLVertexBuffer::MAP_TANGENT | LLVertexBuffer::MAP_COLOR | LLVertexBuffer::MAP_WEIGHT4, RIGGED_DEFERRED_SIMPLE_MASK = LLVertexBuffer::MAP_VERTEX | diff --git a/indra/newview/lldrawpoolbump.cpp b/indra/newview/lldrawpoolbump.cpp index 1debd4209..f601bac76 100644 --- a/indra/newview/lldrawpoolbump.cpp +++ b/indra/newview/lldrawpoolbump.cpp @@ -851,7 +851,7 @@ void LLDrawPoolBump::renderDeferred(S32 pass) LLCullResult::drawinfo_iterator begin = gPipeline.beginRenderMap(type); LLCullResult::drawinfo_iterator end = gPipeline.endRenderMap(type); - U32 mask = LLVertexBuffer::MAP_VERTEX | LLVertexBuffer::MAP_TEXCOORD0 | LLVertexBuffer::MAP_BINORMAL | LLVertexBuffer::MAP_NORMAL | LLVertexBuffer::MAP_COLOR; + U32 mask = LLVertexBuffer::MAP_VERTEX | LLVertexBuffer::MAP_TEXCOORD0 | LLVertexBuffer::MAP_TANGENT | LLVertexBuffer::MAP_NORMAL | LLVertexBuffer::MAP_COLOR; for (LLCullResult::drawinfo_iterator i = begin; i != end; ++i) { diff --git a/indra/newview/llface.cpp b/indra/newview/llface.cpp index 7593ad938..15e887519 100644 --- a/indra/newview/llface.cpp +++ b/indra/newview/llface.cpp @@ -511,7 +511,7 @@ void LLFace::renderSelected(LLViewerTexture *imagep, const LLColor4& color) { LLGLEnable poly_offset(GL_POLYGON_OFFSET_FILL); glPolygonOffset(-1.f,-1.f); - mVertexBuffer->setBuffer(LLVertexBuffer::MAP_VERTEX | LLVertexBuffer::MAP_TEXCOORD0); + mVertexBuffer->setBuffer(mVertexBuffer->getTypeMask()); mVertexBuffer->draw(LLRender::TRIANGLES, mIndicesCount, mIndicesIndex); } @@ -789,6 +789,12 @@ BOOL LLFace::genVolumeBBoxes(const LLVolume &volume, S32 f, size.mul(scale); } + // Catch potential badness from normalization before it happens + // + llassert(mat_normal.mMatrix[0].isFinite3() && (mat_normal.mMatrix[0].dot3(mat_normal.mMatrix[0]).getF32() > F_APPROXIMATELY_ZERO)); + llassert(mat_normal.mMatrix[1].isFinite3() && (mat_normal.mMatrix[1].dot3(mat_normal.mMatrix[1]).getF32() > F_APPROXIMATELY_ZERO)); + llassert(mat_normal.mMatrix[2].isFinite3() && (mat_normal.mMatrix[2].dot3(mat_normal.mMatrix[2]).getF32() > F_APPROXIMATELY_ZERO)); + mat_normal.mMatrix[0].normalize3fast(); mat_normal.mMatrix[1].normalize3fast(); mat_normal.mMatrix[2].normalize3fast(); @@ -874,7 +880,7 @@ BOOL LLFace::genVolumeBBoxes(const LLVolume &volume, S32 f, // integrated with getGeometryVolume() for its texture coordinate // generation - but i'll leave that to someone more familiar // with the implications. -LLVector2 LLFace::surfaceToTexture(LLVector2 surface_coord, LLVector3 position, LLVector3 normal) +LLVector2 LLFace::surfaceToTexture(LLVector2 surface_coord, const LLVector4a& position, const LLVector4a& normal) { LLVector2 tc = surface_coord; @@ -894,7 +900,9 @@ LLVector2 LLFace::surfaceToTexture(LLVector2 surface_coord, LLVector3 position, LLVector4a& center = *(mDrawablep->getVOVolume()->getVolume()->getVolumeFace(mTEOffset).mCenter); LLVector4a volume_position; - volume_position.load3(mDrawablep->getVOVolume()->agentPositionToVolume(position).mV); + LLVector3 v_position(position.getF32ptr()); + + volume_position.load3(mDrawablep->getVOVolume()->agentPositionToVolume(v_position).mV); if (!mDrawablep->getVOVolume()->isVolumeGlobal()) { @@ -904,7 +912,8 @@ LLVector2 LLFace::surfaceToTexture(LLVector2 surface_coord, LLVector3 position, } LLVector4a volume_normal; - volume_normal.load3(mDrawablep->getVOVolume()->agentDirectionToVolume(normal).mV); + LLVector3 v_normal(normal.getF32ptr()); + volume_normal.load3(mDrawablep->getVOVolume()->agentDirectionToVolume(v_normal).mV); volume_normal.normalize3fast(); if (texgen == LLTextureEntry::TEX_GEN_PLANAR) @@ -937,7 +946,12 @@ void LLFace::getPlanarProjectedParams(LLQuaternion* face_rot, LLVector3* face_po const LLMatrix4& vol_mat = getWorldMatrix(); const LLVolumeFace& vf = getViewerObject()->getVolume()->getVolumeFace(mTEOffset); const LLVector4a& normal4a = vf.mNormals[0]; - const LLVector4a& binormal4a = vf.mBinormals[0]; + const LLVector4a& tangent = vf.mTangents[0]; + + LLVector4a binormal4a; + binormal4a.setCross3(normal4a, tangent); + binormal4a.mul(tangent.getF32ptr()[3]); + LLVector2 projected_binormal; planarProjection(projected_binormal, normal4a, *vf.mCenter, binormal4a); projected_binormal -= LLVector2(0.5f, 0.5f); // this normally happens in xform() @@ -1067,7 +1081,7 @@ void LLFace::cacheFaceInVRAM(const LLVolumeFace& vf) { LLFastTimer t(FTM_FACE_GEOM_VOLUME); U32 mask = LLVertexBuffer::MAP_VERTEX | LLVertexBuffer::MAP_TEXCOORD0 | - LLVertexBuffer::MAP_BINORMAL | LLVertexBuffer::MAP_NORMAL; + LLVertexBuffer::MAP_TANGENT | LLVertexBuffer::MAP_NORMAL; if (vf.mWeights) { @@ -1080,11 +1094,11 @@ void LLFace::cacheFaceInVRAM(const LLVolumeFace& vf) buff->allocateBuffer(vf.mNumVertices, 0, true); LLStrider f_vert; - LLStrider f_binorm; + LLStrider f_tangent; LLStrider f_norm; LLStrider f_tc; - buff->getBinormalStrider(f_binorm); + buff->getTangentStrider(f_tangent); buff->getVertexStrider(f_vert); buff->getNormalStrider(f_norm); buff->getTexCoord0Strider(f_tc); @@ -1092,7 +1106,7 @@ void LLFace::cacheFaceInVRAM(const LLVolumeFace& vf) for (U32 i = 0; i < (U32)vf.mNumVertices; ++i) { *f_vert++ = vf.mPositions[i]; - (*f_binorm++).set(vf.mBinormals[i].getF32ptr()); + *f_tangent++ = vf.mTangents[i]; *f_tc++ = vf.mTexCoords[i]; (*f_norm++).set(vf.mNormals[i].getF32ptr()); } @@ -1133,7 +1147,7 @@ static LLFastTimer::DeclareTimer FTM_FACE_GEOM_TEXTURE("Texture"); static LLFastTimer::DeclareTimer FTM_FACE_GEOM_COLOR("Color"); static LLFastTimer::DeclareTimer FTM_FACE_GEOM_EMISSIVE("Emissive"); static LLFastTimer::DeclareTimer FTM_FACE_GEOM_WEIGHTS("Weights"); -static LLFastTimer::DeclareTimer FTM_FACE_GEOM_BINORMAL("Binormal"); +static LLFastTimer::DeclareTimer FTM_FACE_GEOM_TANGENT("Binormal"); static LLFastTimer::DeclareTimer FTM_FACE_GEOM_FEEDBACK("Face Feedback"); static LLFastTimer::DeclareTimer FTM_FACE_GEOM_FEEDBACK_POSITION("Feedback Position"); @@ -1207,7 +1221,7 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume, LLStrider tex_coords1; LLStrider norm; LLStrider colors; - LLStrider binorm; + LLStrider tangent; LLStrider indicesp; LLStrider wght; @@ -1229,7 +1243,7 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume, bool rebuild_emissive = rebuild_color && mVertexBuffer->hasDataType(LLVertexBuffer::TYPE_EMISSIVE); bool rebuild_tcoord = full_rebuild || mDrawablep->isState(LLDrawable::REBUILD_TCOORD); bool rebuild_normal = rebuild_pos && mVertexBuffer->hasDataType(LLVertexBuffer::TYPE_NORMAL); - bool rebuild_binormal = rebuild_pos && mVertexBuffer->hasDataType(LLVertexBuffer::TYPE_BINORMAL); + bool rebuild_tangent = rebuild_pos && mVertexBuffer->hasDataType(LLVertexBuffer::TYPE_TANGENT); bool rebuild_weights = rebuild_pos && mVertexBuffer->hasDataType(LLVertexBuffer::TYPE_WEIGHT4); const LLTextureEntry *tep = mVObjp->getTE(f); @@ -1374,7 +1388,7 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume, if (vf.mVertexBuffer.isNull() || buff->getNumVerts() != vf.mNumVertices) { - mVObjp->getVolume()->genBinormals(f); + mVObjp->getVolume()->genTangents(f); LLFace::cacheFaceInVRAM(vf); buff = (LLVertexBuffer*) vf.mVertexBuffer.get(); } @@ -1459,15 +1473,15 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume, glEndTransformFeedback(); } - if (rebuild_binormal) + if (rebuild_tangent) { - LLFastTimer t(FTM_FACE_GEOM_FEEDBACK_BINORMAL); - gTransformBinormalProgram.bind(); + LLFastTimer t(FTM_FACE_GEOM_TANGENT); + gTransformTangentProgram.bind(); - mVertexBuffer->bindForFeedback(0, LLVertexBuffer::TYPE_BINORMAL, mGeomIndex, mGeomCount); + mVertexBuffer->bindForFeedback(0, LLVertexBuffer::TYPE_TANGENT, mGeomIndex, mGeomCount); glBeginTransformFeedback(GL_POINTS); - buff->setBuffer(LLVertexBuffer::MAP_BINORMAL); + buff->setBuffer(LLVertexBuffer::MAP_TANGENT); push_for_transform(buff, vf.mNumVertices, mGeomCount); glEndTransformFeedback(); } @@ -1529,7 +1543,7 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume, if (bump_code) { - mVObjp->getVolume()->genBinormals(f); + mVObjp->getVolume()->genTangents(f); F32 offset_multiple; switch( bump_code ) { @@ -1578,7 +1592,7 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume, U8 texgen = getTextureEntry()->getTexGen(); if (rebuild_tcoord && texgen != LLTextureEntry::TEX_GEN_DEFAULT) { //planar texgen needs binormals - mVObjp->getVolume()->genBinormals(f); + mVObjp->getVolume()->genTangents(f); } U8 tex_mode = 0; @@ -1789,11 +1803,14 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume, for (S32 i = 0; i < num_vertices; i++) { - LLVector4a tangent; - tangent.setCross3(vf.mBinormals[i], vf.mNormals[i]); + LLVector4a tangent = vf.mTangents[i]; + LLVector4a binorm; + binorm.setCross3(vf.mNormals[i], tangent); + binorm.mul(tangent.getF32ptr()[3]); + LLMatrix4a tangent_to_object; - tangent_to_object.setRows(tangent, vf.mBinormals[i], vf.mNormals[i]); + tangent_to_object.setRows(tangent, binorm, vf.mNormals[i]); LLVector4a t; tangent_to_object.rotate(binormal_dir, t); LLVector4a binormal; @@ -1899,7 +1916,6 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume, LLFastTimer t(FTM_FACE_GEOM_NORMAL); mVertexBuffer->getNormalStrider(norm, mGeomIndex, mGeomCount, map_range); F32* normals = (F32*) norm.get(); - for (S32 i = 0; i < num_vertices; i++) { LLVector4a normal; @@ -1915,19 +1931,27 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume, } } - if (rebuild_binormal) + if (rebuild_tangent) { - LLFastTimer t(FTM_FACE_GEOM_BINORMAL); - mVertexBuffer->getBinormalStrider(binorm, mGeomIndex, mGeomCount, map_range); - F32* binormals = (F32*) binorm.get(); - + LLFastTimer t(FTM_FACE_GEOM_TANGENT); + mVertexBuffer->getTangentStrider(tangent, mGeomIndex, mGeomCount, map_range); + F32* tangents = (F32*) tangent.get(); + + mVObjp->getVolume()->genTangents(f); + + LLVector4Logical mask; + mask.clear(); + mask.setElement<3>(); + for (S32 i = 0; i < num_vertices; i++) - { - LLVector4a binormal; - mat_normal.rotate(vf.mBinormals[i], binormal); - binormal.normalize3fast(); - binormal.store4a(binormals); - binormals += 4; + { + LLVector4a tangent_out; + mat_normal.rotate(vf.mTangents[i], tangent_out); + tangent_out.normalize3fast(); + tangent_out.setSelectWithMask(mask, vf.mTangents[i], tangent_out); + tangent_out.store4a(tangents); + + tangents += 4; } if (map_range) diff --git a/indra/newview/llface.h b/indra/newview/llface.h index 68cdcadde..d290ab292 100644 --- a/indra/newview/llface.h +++ b/indra/newview/llface.h @@ -121,7 +121,7 @@ public: LLXformMatrix* getXform() const { return mXform; } BOOL hasGeometry() const { return mGeomCount > 0; } LLVector3 getPositionAgent() const; - LLVector2 surfaceToTexture(LLVector2 surface_coord, LLVector3 position, LLVector3 normal); + LLVector2 surfaceToTexture(LLVector2 surface_coord, const LLVector4a& position, const LLVector4a& normal); void getPlanarProjectedParams(LLQuaternion* face_rot, LLVector3* face_pos, F32* scale) const; bool calcAlignedPlanarTE(const LLFace* align_to, LLVector2* st_offset, LLVector2* st_scale, F32* st_rot) const; diff --git a/indra/newview/llflexibleobject.cpp b/indra/newview/llflexibleobject.cpp index a80003991..3f34abc1d 100644 --- a/indra/newview/llflexibleobject.cpp +++ b/indra/newview/llflexibleobject.cpp @@ -293,6 +293,9 @@ void LLVolumeImplFlexible::onSetVolume(const LLVolumeParams &volume_params, cons void LLVolumeImplFlexible::updateRenderRes() { + if (!mAttributes) + return; + LLDrawable* drawablep = mVO->mDrawable; S32 new_res = mAttributes->getSimulateLOD(); @@ -424,7 +427,7 @@ void LLVolumeImplFlexible::doFlexibleUpdate() } } - if(!mInitialized) + if(!mInitialized || !mAttributes) { //the object is not visible return ; @@ -682,15 +685,17 @@ void LLVolumeImplFlexible::doFlexibleUpdate() new_point = &path->mPath[i]; LLVector3 pos = newSection[i].mPosition * rel_xform; LLQuaternion rot = mSection[i].mAxisRotation * newSection[i].mRotation * delta_rot; - - if (!mUpdated || (new_point->mPos-pos).magVec()/mVO->mDrawable->mDistanceWRTCamera > 0.001f) + + LLVector3 np(new_point->mPos.getF32ptr()); + + if (!mUpdated || (np-pos).magVec()/mVO->mDrawable->mDistanceWRTCamera > 0.001f) { - new_point->mPos = newSection[i].mPosition * rel_xform; + new_point->mPos.load3((newSection[i].mPosition * rel_xform).mV); mUpdated = FALSE; } - new_point->mRot = rot; - new_point->mScale = newSection[i].mScale; + new_point->mRot.loadu(LLMatrix3(rot)); + new_point->mScale.set(newSection[i].mScale.mV[0], newSection[i].mScale.mV[1], 0,1); new_point->mTexT = ((F32)i)/(num_render_sections); } diff --git a/indra/newview/llhudicon.cpp b/indra/newview/llhudicon.cpp index 6b9ad6a17..b11cbaf44 100644 --- a/indra/newview/llhudicon.cpp +++ b/indra/newview/llhudicon.cpp @@ -208,7 +208,7 @@ void LLHUDIcon::render() renderIcon(FALSE); } -BOOL LLHUDIcon::lineSegmentIntersect(const LLVector3& start, const LLVector3& end, LLVector3* intersection) +BOOL LLHUDIcon::lineSegmentIntersect(const LLVector4a& start, const LLVector4a& end, LLVector4a* intersection) { if (mHidden) return FALSE; @@ -281,23 +281,18 @@ BOOL LLHUDIcon::lineSegmentIntersect(const LLVector3& start, const LLVector3& en LLVector4a upper_right; upper_right.setAdd(lower_right, y_scalea); - LLVector4a enda; - enda.load3(end.mV); - LLVector4a starta; - starta.load3(start.mV); LLVector4a dir; - dir.setSub(enda, starta); + dir.setSub(end, start); F32 a,b,t; - if (LLTriangleRayIntersect(upper_right, upper_left, lower_right, starta, dir, a,b,t) || - LLTriangleRayIntersect(upper_left, lower_left, lower_right, starta, dir, a,b,t)) + if (LLTriangleRayIntersect(upper_right, upper_left, lower_right, start, dir, a,b,t) || + LLTriangleRayIntersect(upper_left, lower_left, lower_right, start, dir, a,b,t)) { if (intersection) { dir.mul(t); - starta.add(dir); - *intersection = LLVector3(starta.getF32ptr()); + intersection->setAdd(start, dir); } return TRUE; } @@ -337,12 +332,12 @@ LLHUDIcon* LLHUDIcon::handlePick(S32 pick_id) } //static -LLHUDIcon* LLHUDIcon::lineSegmentIntersectAll(const LLVector3& start, const LLVector3& end, LLVector3* intersection) +LLHUDIcon* LLHUDIcon::lineSegmentIntersectAll(const LLVector4a& start, const LLVector4a& end, LLVector4a* intersection) { icon_instance_t::iterator icon_it; - LLVector3 local_end = end; - LLVector3 position; + LLVector4a local_end = end; + LLVector4a position; LLHUDIcon* ret = NULL; for(icon_it = sIconInstances.begin(); icon_it != sIconInstances.end(); ++icon_it) diff --git a/indra/newview/llhudicon.h b/indra/newview/llhudicon.h index 5697cc94f..0a65a5092 100644 --- a/indra/newview/llhudicon.h +++ b/indra/newview/llhudicon.h @@ -68,7 +68,7 @@ public: static S32 generatePickIDs(S32 start_id, S32 step_size); static LLHUDIcon* handlePick(S32 pick_id); - static LLHUDIcon* lineSegmentIntersectAll(const LLVector3& start, const LLVector3& end, LLVector3* intersection); + static LLHUDIcon* lineSegmentIntersectAll(const LLVector4a& start, const LLVector4a& end, LLVector4a* intersection); static void updateAll(); static void cleanupDeadIcons(); @@ -79,7 +79,7 @@ public: BOOL getHidden() const { return mHidden; } void setHidden( BOOL hide ) { mHidden = hide; } - BOOL lineSegmentIntersect(const LLVector3& start, const LLVector3& end, LLVector3* intersection); + BOOL lineSegmentIntersect(const LLVector4a& start, const LLVector4a& end, LLVector4a* intersection); protected: LLHUDIcon(const U8 type); diff --git a/indra/newview/llhudnametag.cpp b/indra/newview/llhudnametag.cpp index cc016c07a..7abf6f56e 100644 --- a/indra/newview/llhudnametag.cpp +++ b/indra/newview/llhudnametag.cpp @@ -120,7 +120,7 @@ LLHUDNameTag::~LLHUDNameTag() } -BOOL LLHUDNameTag::lineSegmentIntersect(const LLVector3& start, const LLVector3& end, LLVector3& intersection, BOOL debug_render) +BOOL LLHUDNameTag::lineSegmentIntersect(const LLVector4a& start, const LLVector4a& end, LLVector4a& intersection, BOOL debug_render) { if (!mVisible || mHidden) { @@ -218,16 +218,23 @@ BOOL LLHUDNameTag::lineSegmentIntersect(const LLVector3& start, const LLVector3& gGL.vertex3fv(v[2].mV); gGL.end(); } - - LLVector3 dir = end-start; + LLVector4a dir; + dir.setSub(end,start); F32 a, b, t; - if (LLTriangleRayIntersect(v[0], v[1], v[2], start, dir, a, b, t, FALSE) || - LLTriangleRayIntersect(v[2], v[3], v[0], start, dir, a, b, t, FALSE) ) + LLVector4a v0,v1,v2,v3; + v0.load3(v[0].mV); + v1.load3(v[1].mV); + v2.load3(v[2].mV); + v3.load3(v[3].mV); + + if (LLTriangleRayIntersect(v0, v1, v2, start, dir, a, b, t) || + LLTriangleRayIntersect(v2, v3, v0, start, dir, a, b, t) ) { if (t <= 1.f) { - intersection = start + dir*t; + dir.mul(t); + intersection.setAdd(start, dir); return TRUE; } } diff --git a/indra/newview/llhudnametag.h b/indra/newview/llhudnametag.h index 8c6185272..e238fac5a 100644 --- a/indra/newview/llhudnametag.h +++ b/indra/newview/llhudnametag.h @@ -126,7 +126,7 @@ public: void setHidden( BOOL hide ) { mHidden = hide; } void shift(const LLVector3& offset); - BOOL lineSegmentIntersect(const LLVector3& start, const LLVector3& end, LLVector3& intersection, BOOL debug_render = FALSE); + BOOL lineSegmentIntersect(const LLVector4a& start, const LLVector4a& end, LLVector4a& intersection, BOOL debug_render = FALSE); static void shiftAll(const LLVector3& offset); static void addPickable(std::set &pick_list); diff --git a/indra/newview/llspatialpartition.cpp b/indra/newview/llspatialpartition.cpp index 3ae95d6c0..249fcd51b 100644 --- a/indra/newview/llspatialpartition.cpp +++ b/indra/newview/llspatialpartition.cpp @@ -2944,9 +2944,9 @@ void renderNormals(LLDrawable* drawablep) gGL.vertex3fv(face.mPositions[j].getF32ptr()); gGL.vertex3fv(p.getF32ptr()); - if (face.mBinormals) + if (face.mTangents) { - n.setMul(face.mBinormals[j], scale); + n.setMul(face.mTangents[j], scale); p.setAdd(face.mPositions[j], n); gGL.diffuseColor4f(0,1,1,1); @@ -3694,11 +3694,17 @@ void renderRaycast(LLDrawable* drawablep) gGL.translatef(trans.mV[0], trans.mV[1], trans.mV[2]); gGL.multMatrix((F32*) vobj->getRelativeXform().mMatrix); - LLVector3 start, end; + LLVector4a start, end; if (transform) { - start = vobj->agentPositionToVolume(gDebugRaycastStart); - end = vobj->agentPositionToVolume(gDebugRaycastEnd); + LLVector3 v_start(gDebugRaycastStart.getF32ptr()); + LLVector3 v_end(gDebugRaycastEnd.getF32ptr()); + + v_start = vobj->agentPositionToVolume(v_start); + v_end = vobj->agentPositionToVolume(v_end); + + start.load3(v_start.mV); + end.load3(v_end.mV); } else { @@ -3706,11 +3712,8 @@ void renderRaycast(LLDrawable* drawablep) end = gDebugRaycastEnd; } - LLVector4a starta, enda; - starta.load3(start.mV); - enda.load3(end.mV); LLVector4a dir; - dir.setSub(enda, starta); + dir.setSub(end, start); gGL.flush(); glPolygonMode(GL_FRONT_AND_BACK, GL_LINE); @@ -3733,10 +3736,11 @@ void renderRaycast(LLDrawable* drawablep) ((LLVolumeFace*) &face)->createOctree(); } - LLRenderOctreeRaycast render(starta, dir, &t); + LLRenderOctreeRaycast render(start, dir, &t); render.traverse(face.mOctree); } + gGL.popMatrix(); glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); } @@ -3757,10 +3761,18 @@ void renderRaycast(LLDrawable* drawablep) // draw intersection point gGL.pushMatrix(); gGL.loadMatrix(gGLModelView); - LLVector3 translate = gDebugRaycastIntersection; + LLVector3 translate(gDebugRaycastIntersection.getF32ptr()); gGL.translatef(translate.mV[0], translate.mV[1], translate.mV[2]); LLCoordFrame orient; - orient.lookDir(gDebugRaycastNormal, gDebugRaycastBinormal); + LLVector4a debug_binormal; + + debug_binormal.setCross3(gDebugRaycastNormal, gDebugRaycastTangent); + debug_binormal.mul(gDebugRaycastTangent.getF32ptr()[3]); + + LLVector3 normal(gDebugRaycastNormal.getF32ptr()); + LLVector3 binormal(debug_binormal.getF32ptr()); + + orient.lookDir(normal, binormal); LLMatrix4 rotation; orient.getRotMatrixToParent(rotation); gGL.multMatrix((float*)rotation.mMatrix); @@ -4261,28 +4273,30 @@ BOOL LLSpatialPartition::isVisible(const LLVector3& v) return TRUE; } +LL_ALIGN_PREFIX(16) class LLOctreeIntersect : public LLSpatialGroup::OctreeTraveler { public: - LLVector3 mStart; - LLVector3 mEnd; + LL_ALIGN_16(LLVector4a mStart); + LL_ALIGN_16(LLVector4a mEnd); + S32 *mFaceHit; - LLVector3 *mIntersection; + LLVector4a *mIntersection; LLVector2 *mTexCoord; - LLVector3 *mNormal; - LLVector3 *mBinormal; + LLVector4a *mNormal; + LLVector4a *mTangent; LLDrawable* mHit; BOOL mPickTransparent; - LLOctreeIntersect(LLVector3 start, LLVector3 end, BOOL pick_transparent, - S32* face_hit, LLVector3* intersection, LLVector2* tex_coord, LLVector3* normal, LLVector3* binormal) + LLOctreeIntersect(const LLVector4a& start, const LLVector4a& end, BOOL pick_transparent, + S32* face_hit, LLVector4a* intersection, LLVector2* tex_coord, LLVector4a* normal, LLVector4a* tangent) : mStart(start), mEnd(end), mFaceHit(face_hit), mIntersection(intersection), mTexCoord(tex_coord), mNormal(normal), - mBinormal(binormal), + mTangent(tangent), mHit(NULL), mPickTransparent(pick_transparent) { @@ -4315,23 +4329,22 @@ public: size = group->mBounds[1]; center = group->mBounds[0]; - LLVector3 local_start = mStart; - LLVector3 local_end = mEnd; + LLVector4a local_start = mStart; + LLVector4a local_end = mEnd; if (group->mSpatialPartition->isBridge()) { LLMatrix4 local_matrix = group->mSpatialPartition->asBridge()->mDrawable->getRenderMatrix(); local_matrix.invert(); - - local_start = mStart * local_matrix; - local_end = mEnd * local_matrix; + + LLMatrix4a local_matrix4a; + local_matrix4a.loadu(local_matrix); + + local_matrix4a.affineTransform(mStart, local_start); + local_matrix4a.affineTransform(mEnd, local_end); } - LLVector4a start, end; - start.load3(local_start.mV); - end.load3(local_end.mV); - - if (LLLineSegmentBoxIntersect(start, end, center, size)) + if (LLLineSegmentBoxIntersect(local_start, local_end, center, size)) { check(child); } @@ -4362,14 +4375,14 @@ public: if (vobj) { - LLVector3 intersection; + LLVector4a intersection; bool skip_check = false; if (vobj->isAvatar()) { LLVOAvatar* avatar = (LLVOAvatar*) vobj; if (avatar->isSelf() && gFloaterTools->getVisible()) { - LLViewerObject* hit = avatar->lineSegmentIntersectRiggedAttachments(mStart, mEnd, -1, mPickTransparent, mFaceHit, &intersection, mTexCoord, mNormal, mBinormal); + LLViewerObject* hit = avatar->lineSegmentIntersectRiggedAttachments(mStart, mEnd, -1, mPickTransparent, mFaceHit, &intersection, mTexCoord, mNormal, mTangent); if (hit) { mEnd = intersection; @@ -4389,7 +4402,7 @@ public: } } - if (!skip_check && vobj->lineSegmentIntersect(mStart, mEnd, -1, mPickTransparent, mFaceHit, &intersection, mTexCoord, mNormal, mBinormal)) + if (!skip_check && vobj->lineSegmentIntersect(mStart, mEnd, -1, mPickTransparent, mFaceHit, &intersection, mTexCoord, mNormal, mTangent)) { mEnd = intersection; // shorten ray so we only find CLOSER hits if (mIntersection) @@ -4404,19 +4417,19 @@ public: return false; } -}; +} LL_ALIGN_POSTFIX(16); -LLDrawable* LLSpatialPartition::lineSegmentIntersect(const LLVector3& start, const LLVector3& end, +LLDrawable* LLSpatialPartition::lineSegmentIntersect(const LLVector4a& start, const LLVector4a& end, BOOL pick_transparent, S32* face_hit, // return the face hit - LLVector3* intersection, // return the intersection point + LLVector4a* intersection, // return the intersection point LLVector2* tex_coord, // return the texture coordinates of the intersection point - LLVector3* normal, // return the surface normal at the intersection point - LLVector3* bi_normal // return the surface bi-normal at the intersection point + LLVector4a* normal, // return the surface normal at the intersection point + LLVector4a* tangent // return the surface tangent at the intersection point ) { - LLOctreeIntersect intersect(start, end, pick_transparent, face_hit, intersection, tex_coord, normal, bi_normal); + LLOctreeIntersect intersect(start, end, pick_transparent, face_hit, intersection, tex_coord, normal, tangent); LLDrawable* drawable = intersect.check(mOctree); return drawable; diff --git a/indra/newview/llspatialpartition.h b/indra/newview/llspatialpartition.h index d6e58fdc1..f41c3a57c 100644 --- a/indra/newview/llspatialpartition.h +++ b/indra/newview/llspatialpartition.h @@ -457,13 +457,13 @@ public: LLSpatialGroup *put(LLDrawable *drawablep, BOOL was_visible = FALSE); BOOL remove(LLDrawable *drawablep, LLSpatialGroup *curp); - LLDrawable* lineSegmentIntersect(const LLVector3& start, const LLVector3& end, + LLDrawable* lineSegmentIntersect(const LLVector4a& start, const LLVector4a& end, BOOL pick_transparent, S32* face_hit, // return the face hit - LLVector3* intersection = NULL, // return the intersection point + LLVector4a* intersection = NULL, // return the intersection point LLVector2* tex_coord = NULL, // return the texture coordinates of the intersection point - LLVector3* normal = NULL, // return the surface normal at the intersection point - LLVector3* bi_normal = NULL // return the surface bi-normal at the intersection point + LLVector4a* normal = NULL, // return the surface normal at the intersection point + LLVector4a* tangent = NULL // return the surface tangent at the intersection point ); @@ -695,7 +695,7 @@ class LLVolumeGeometryManager: public LLGeometryManager virtual void rebuildGeom(LLSpatialGroup* group); virtual void rebuildMesh(LLSpatialGroup* group); virtual void getGeometry(LLSpatialGroup* group); - void genDrawInfo(LLSpatialGroup* group, U32 mask, std::vector& faces, BOOL distance_sort = FALSE, BOOL batch_textures = FALSE); + void genDrawInfo(LLSpatialGroup* group, U32 mask, LLFace** faces, U32 face_count, BOOL distance_sort = FALSE, BOOL batch_textures = FALSE, BOOL no_materials = FALSE); void registerFace(LLSpatialGroup* group, LLFace* facep, U32 type); }; diff --git a/indra/newview/llviewerobject.cpp b/indra/newview/llviewerobject.cpp index 6429b89c7..27bf2c7f8 100644 --- a/indra/newview/llviewerobject.cpp +++ b/indra/newview/llviewerobject.cpp @@ -3904,19 +3904,19 @@ LLViewerObject* LLViewerObject::getRootEdit() const } -BOOL LLViewerObject::lineSegmentIntersect(const LLVector3& start, const LLVector3& end, +BOOL LLViewerObject::lineSegmentIntersect(const LLVector4a& start, const LLVector4a& end, S32 face, BOOL pick_transparent, S32* face_hit, - LLVector3* intersection, + LLVector4a* intersection, LLVector2* tex_coord, - LLVector3* normal, - LLVector3* bi_normal) + LLVector4a* normal, + LLVector4a* tangent) { return false; } -BOOL LLViewerObject::lineSegmentBoundingBox(const LLVector3& start, const LLVector3& end) +BOOL LLViewerObject::lineSegmentBoundingBox(const LLVector4a& start, const LLVector4a& end) { if (mDrawable.isNull() || mDrawable->isDead()) { @@ -3933,11 +3933,7 @@ BOOL LLViewerObject::lineSegmentBoundingBox(const LLVector3& start, const LLVect size.setSub(ext[1], ext[0]); size.mul(0.5f); - LLVector4a starta, enda; - starta.load3(start.mV); - enda.load3(end.mV); - - return LLLineSegmentBoxIntersect(starta, enda, center, size); + return LLLineSegmentBoxIntersect(start, end, center, size); } U8 LLViewerObject::getMediaType() const diff --git a/indra/newview/llviewerobject.h b/indra/newview/llviewerobject.h index 5935942b6..1d1a11d23 100644 --- a/indra/newview/llviewerobject.h +++ b/indra/newview/llviewerobject.h @@ -262,17 +262,17 @@ public: //detect if given line segment (in agent space) intersects with this viewer object. //returns TRUE if intersection detected and returns information about intersection - virtual BOOL lineSegmentIntersect(const LLVector3& start, const LLVector3& end, + virtual BOOL lineSegmentIntersect(const LLVector4a& start, const LLVector4a& end, S32 face = -1, // which face to check, -1 = ALL_SIDES BOOL pick_transparent = FALSE, S32* face_hit = NULL, // which face was hit - LLVector3* intersection = NULL, // return the intersection point + LLVector4a* intersection = NULL, // return the intersection point LLVector2* tex_coord = NULL, // return the texture coordinates of the intersection point - LLVector3* normal = NULL, // return the surface normal at the intersection point - LLVector3* bi_normal = NULL // return the surface bi-normal at the intersection point + LLVector4a* normal = NULL, // return the surface normal at the intersection point + LLVector4a* tangent = NULL // return the surface tangent at the intersection point ); - virtual BOOL lineSegmentBoundingBox(const LLVector3& start, const LLVector3& end); + virtual BOOL lineSegmentBoundingBox(const LLVector4a& start, const LLVector4a& end); virtual const LLVector3d getPositionGlobal() const; virtual const LLVector3 &getPositionRegion() const; diff --git a/indra/newview/llviewershadermgr.cpp b/indra/newview/llviewershadermgr.cpp index b39fb6e1f..ae089335e 100644 --- a/indra/newview/llviewershadermgr.cpp +++ b/indra/newview/llviewershadermgr.cpp @@ -77,7 +77,7 @@ LLGLSLShader gTransformPositionProgram(LLViewerShaderMgr::SHADER_TRANSFORM); LLGLSLShader gTransformTexCoordProgram(LLViewerShaderMgr::SHADER_TRANSFORM); LLGLSLShader gTransformNormalProgram(LLViewerShaderMgr::SHADER_TRANSFORM); LLGLSLShader gTransformColorProgram(LLViewerShaderMgr::SHADER_TRANSFORM); -LLGLSLShader gTransformBinormalProgram(LLViewerShaderMgr::SHADER_TRANSFORM); +LLGLSLShader gTransformTangentProgram(LLViewerShaderMgr::SHADER_TRANSFORM); //utility shaders LLGLSLShader gOcclusionProgram(LLViewerShaderMgr::SHADER_INTERFACE); @@ -2747,16 +2747,16 @@ BOOL LLViewerShaderMgr::loadTransformShaders() if (success) { - gTransformBinormalProgram.mName = "Binormal Transform Shader"; - gTransformBinormalProgram.mShaderFiles.clear(); - gTransformBinormalProgram.mShaderFiles.push_back(make_pair("transform/binormalV.glsl", GL_VERTEX_SHADER_ARB)); - gTransformBinormalProgram.mShaderLevel = mVertexShaderLevel[SHADER_TRANSFORM]; + gTransformTangentProgram.mName = "Binormal Transform Shader"; + gTransformTangentProgram.mShaderFiles.clear(); + gTransformTangentProgram.mShaderFiles.push_back(make_pair("transform/binormalV.glsl", GL_VERTEX_SHADER_ARB)); + gTransformTangentProgram.mShaderLevel = mVertexShaderLevel[SHADER_TRANSFORM]; const char* varyings[] = { - "binormal_out", + "tangent_out", }; - success = gTransformBinormalProgram.createShader(NULL, NULL, 1, varyings); + success = gTransformTangentProgram.createShader(NULL, NULL, 1, varyings); } diff --git a/indra/newview/llviewershadermgr.h b/indra/newview/llviewershadermgr.h index d41fb5f78..a27200974 100644 --- a/indra/newview/llviewershadermgr.h +++ b/indra/newview/llviewershadermgr.h @@ -234,7 +234,7 @@ extern LLGLSLShader gTransformPositionProgram; extern LLGLSLShader gTransformTexCoordProgram; extern LLGLSLShader gTransformNormalProgram; extern LLGLSLShader gTransformColorProgram; -extern LLGLSLShader gTransformBinormalProgram; +extern LLGLSLShader gTransformTangentProgram; //utility shaders extern LLGLSLShader gOcclusionProgram; extern LLGLSLShader gOcclusionCubeProgram; @@ -244,7 +244,6 @@ extern LLGLSLShader gSplatTextureRectProgram; extern LLGLSLShader gGlowCombineFXAAProgram; extern LLGLSLShader gDebugProgram; extern LLGLSLShader gClipProgram; -extern LLGLSLShader gAlphaMaskProgram; //output tex0[tc0] + tex1[tc1] extern LLGLSLShader gTwoTextureAddProgram; diff --git a/indra/newview/llviewerwindow.cpp b/indra/newview/llviewerwindow.cpp index ca054bae3..4f86993fe 100644 --- a/indra/newview/llviewerwindow.cpp +++ b/indra/newview/llviewerwindow.cpp @@ -218,13 +218,13 @@ BOOL gShowOverlayTitle = FALSE; BOOL gPickTransparent = TRUE; LLViewerObject* gDebugRaycastObject = NULL; -LLVector3 gDebugRaycastIntersection; -LLVector2 gDebugRaycastTexCoord; -LLVector3 gDebugRaycastNormal; -LLVector3 gDebugRaycastBinormal; -S32 gDebugRaycastFaceHit; -LLVector3 gDebugRaycastStart; -LLVector3 gDebugRaycastEnd; +LLVector4a gDebugRaycastIntersection; +LLVector2 gDebugRaycastTexCoord; +LLVector4a gDebugRaycastNormal; +LLVector4a gDebugRaycastTangent; +S32 gDebugRaycastFaceHit; +LLVector4a gDebugRaycastStart; +LLVector4a gDebugRaycastEnd; // HUD display lines in lower right BOOL gDisplayWindInfo = FALSE; @@ -3038,7 +3038,7 @@ void LLViewerWindow::updateUI() &gDebugRaycastIntersection, &gDebugRaycastTexCoord, &gDebugRaycastNormal, - &gDebugRaycastBinormal, + &gDebugRaycastTangent, &gDebugRaycastStart, &gDebugRaycastEnd); } @@ -3982,7 +3982,7 @@ LLPickInfo LLViewerWindow::pickImmediate(S32 x, S32 y_from_bot, BOOL pick_trans } LLHUDIcon* LLViewerWindow::cursorIntersectIcon(S32 mouse_x, S32 mouse_y, F32 depth, - LLVector3* intersection) + LLVector4a* intersection) { S32 x = mouse_x; S32 y = mouse_y; @@ -3999,9 +3999,11 @@ LLHUDIcon* LLViewerWindow::cursorIntersectIcon(S32 mouse_x, S32 mouse_y, F32 dep LLVector3 mouse_world_start = mouse_point_global; LLVector3 mouse_world_end = mouse_point_global + mouse_direction_global * depth; - return LLHUDIcon::lineSegmentIntersectAll(mouse_world_start, mouse_world_end, intersection); - + LLVector4a start, end; + start.load3(mouse_world_start.mV); + end.load3(mouse_world_end.mV); + return LLHUDIcon::lineSegmentIntersectAll(start, end, intersection); } LLViewerObject* LLViewerWindow::cursorIntersect(S32 mouse_x, S32 mouse_y, F32 depth, @@ -4009,12 +4011,12 @@ LLViewerObject* LLViewerWindow::cursorIntersect(S32 mouse_x, S32 mouse_y, F32 de S32 this_face, BOOL pick_transparent, S32* face_hit, - LLVector3 *intersection, + LLVector4a *intersection, LLVector2 *uv, - LLVector3 *normal, - LLVector3 *binormal, - LLVector3* start, - LLVector3* end) + LLVector4a *normal, + LLVector4a *tangent, + LLVector4a* start, + LLVector4a* end) { S32 x = mouse_x; S32 y = mouse_y; @@ -4049,17 +4051,27 @@ LLViewerObject* LLViewerWindow::cursorIntersect(S32 mouse_x, S32 mouse_y, F32 de if (!LLViewerJoystick::getInstance()->getOverrideCamera()) { //always set raycast intersection to mouse_world_end unless //flycam is on (for DoF effect) - gDebugRaycastIntersection = mouse_world_end; + gDebugRaycastIntersection.load3(mouse_world_end.mV); } + LLVector4a mw_start; + mw_start.load3(mouse_world_start.mV); + LLVector4a mw_end; + mw_end.load3(mouse_world_end.mV); + + LLVector4a mh_start; + mh_start.load3(mouse_hud_start.mV); + LLVector4a mh_end; + mh_end.load3(mouse_hud_end.mV); + if (start) { - *start = mouse_world_start; + *start = mw_start; } if (end) { - *end = mouse_world_end; + *end = mw_end; } LLViewerObject* found = NULL; @@ -4068,25 +4080,25 @@ LLViewerObject* LLViewerWindow::cursorIntersect(S32 mouse_x, S32 mouse_y, F32 de { if (this_object->isHUDAttachment()) // is a HUD object? { - if (this_object->lineSegmentIntersect(mouse_hud_start, mouse_hud_end, this_face, pick_transparent, - face_hit, intersection, uv, normal, binormal)) + if (this_object->lineSegmentIntersect(mh_start, mh_end, this_face, pick_transparent, + face_hit, intersection, uv, normal, tangent)) { found = this_object; } - } + } else // is a world object { - if (this_object->lineSegmentIntersect(mouse_world_start, mouse_world_end, this_face, pick_transparent, - face_hit, intersection, uv, normal, binormal)) + if (this_object->lineSegmentIntersect(mw_start, mw_end, this_face, pick_transparent, + face_hit, intersection, uv, normal, tangent)) { found = this_object; } - } - } + } + } else // check ALL objects { - found = gPipeline.lineSegmentIntersectInHUD(mouse_hud_start, mouse_hud_end, pick_transparent, - face_hit, intersection, uv, normal, binormal); + found = gPipeline.lineSegmentIntersectInHUD(mh_start, mh_end, pick_transparent, + face_hit, intersection, uv, normal, tangent); // [RLVa:KB] - Checked: 2009-12-28 (RLVa-1.1.0k) | Modified: RLVa-1.1.0k if ( (rlv_handler_t::isEnabled()) && (LLToolCamera::getInstance()->hasMouseCapture()) && (gKeyboard->currentMask(TRUE) & MASK_ALT) ) @@ -4097,8 +4109,8 @@ LLViewerObject* LLViewerWindow::cursorIntersect(S32 mouse_x, S32 mouse_y, F32 de if (!found) // if not found in HUD, look in world: { - found = gPipeline.lineSegmentIntersectInWorld(mouse_world_start, mouse_world_end, pick_transparent, - face_hit, intersection, uv, normal, binormal); + found = gPipeline.lineSegmentIntersectInWorld(mw_start, mw_end, pick_transparent, + face_hit, intersection, uv, normal, tangent); // [RLVa:KB] - Checked: 2010-01-02 (RLVa-1.1.0l) | Added: RLVa-1.1.0l #ifdef RLV_EXTENSION_CMD_INTERACT @@ -4118,6 +4130,10 @@ LLViewerObject* LLViewerWindow::cursorIntersect(S32 mouse_x, S32 mouse_y, F32 de } #endif // RLV_EXTENSION_CMD_INTERACT // [/RLVa:KB] + if (found && !pick_transparent) + { + gDebugRaycastIntersection = *intersection; + } } } @@ -5815,6 +5831,7 @@ LLPickInfo::LLPickInfo() mXYCoords(-1, -1), mIntersection(), mNormal(), + mTangent(), mBinormal(), mHUDIcon(NULL), mPickTransparent(FALSE) @@ -5836,6 +5853,7 @@ LLPickInfo::LLPickInfo(const LLCoordGL& mouse_pos, mSTCoords(-1.f, -1.f), mXYCoords(-1, -1), mNormal(), + mTangent(), mBinormal(), mHUDIcon(NULL), mPickTransparent(pick_transparent) @@ -5846,19 +5864,26 @@ void LLPickInfo::fetchResults() { S32 face_hit = -1; - LLVector3 intersection, normal, binormal; + LLVector4a intersection, normal; + LLVector4a tangent; + LLVector2 uv; LLHUDIcon* hit_icon = gViewerWindow->cursorIntersectIcon(mMousePt.mX, mMousePt.mY, 512.f, &intersection); + LLVector4a origin; + origin.load3(LLViewerCamera::getInstance()->getOrigin().mV); F32 icon_dist = 0.f; if (hit_icon) { - icon_dist = (LLViewerCamera::getInstance()->getOrigin()-intersection).magVec(); + LLVector4a delta; + delta.setSub(intersection, origin); + icon_dist = delta.getLength3().getF32(); } + LLViewerObject* hit_object = gViewerWindow->cursorIntersect(mMousePt.mX, mMousePt.mY, 512.f, NULL, -1, mPickTransparent, &face_hit, - &intersection, &uv, &normal, &binormal); + &intersection, &uv, &normal, &tangent); mPickPt = mMousePt; @@ -5868,9 +5893,13 @@ void LLPickInfo::fetchResults() LLViewerObject* objectp = hit_object; + + LLVector4a delta; + delta.setSub(origin, intersection); + if (hit_icon && (!objectp || - icon_dist < (LLViewerCamera::getInstance()->getOrigin()-intersection).magVec())) + icon_dist < delta.getLength3().getF32())) { // was this name referring to a hud icon? mHUDIcon = hit_icon; @@ -5907,11 +5936,16 @@ void LLPickInfo::fetchResults() { mPickType = PICK_OBJECT; } - mObjectOffset = gAgentCamera.calcFocusOffset(objectp, intersection, mPickPt.mX, mPickPt.mY); + + LLVector3 v_intersection(intersection.getF32ptr()); + + mObjectOffset = gAgentCamera.calcFocusOffset(objectp, v_intersection, mPickPt.mX, mPickPt.mY); mObjectID = objectp->mID; mObjectFace = (te_offset == NO_FACE) ? -1 : (S32)te_offset; - mPosGlobal = gAgent.getPosGlobalFromAgent(intersection); + + + mPosGlobal = gAgent.getPosGlobalFromAgent(v_intersection); if (mWantSurfaceInfo) { @@ -5955,7 +5989,16 @@ void LLPickInfo::getSurfaceInfo() mIntersection = LLVector3(0,0,0); mNormal = LLVector3(0,0,0); mBinormal = LLVector3(0,0,0); + mTangent = LLVector4(0,0,0,0); + LLVector4a tangent; + LLVector4a intersection; + LLVector4a normal; + + tangent.clear(); + normal.clear(); + intersection.clear(); + LLViewerObject* objectp = getObject(); if (objectp) @@ -5963,10 +6006,10 @@ void LLPickInfo::getSurfaceInfo() if (gViewerWindow->cursorIntersect(llround((F32)mMousePt.mX), llround((F32)mMousePt.mY), 1024.f, objectp, -1, mPickTransparent, &mObjectFace, - &mIntersection, + &intersection, &mSTCoords, - &mNormal, - &mBinormal)) + &normal, + &tangent)) { // if we succeeded with the intersect above, compute the texture coordinates: @@ -5975,10 +6018,26 @@ void LLPickInfo::getSurfaceInfo() LLFace* facep = objectp->mDrawable->getFace(mObjectFace); if (facep) { - mUVCoords = facep->surfaceToTexture(mSTCoords, mIntersection, mNormal); + mUVCoords = facep->surfaceToTexture(mSTCoords, intersection, normal); } } + mIntersection.set(intersection.getF32ptr()); + mNormal.set(normal.getF32ptr()); + mTangent.set(tangent.getF32ptr()); + + //extrapoloate binormal from normal and tangent + + LLVector4a binormal; + binormal.setCross3(normal, tangent); + binormal.mul(tangent.getF32ptr()[3]); + + mBinormal.set(binormal.getF32ptr()); + + mBinormal.normalize(); + mNormal.normalize(); + mTangent.normalize(); + // and XY coords: updateXYCoords(); diff --git a/indra/newview/llviewerwindow.h b/indra/newview/llviewerwindow.h index 3b38f450d..801f1e61f 100644 --- a/indra/newview/llviewerwindow.h +++ b/indra/newview/llviewerwindow.h @@ -115,6 +115,7 @@ public: LLVector2 mSTCoords; LLCoordScreen mXYCoords; LLVector3 mNormal; + LLVector4 mTangent; LLVector3 mBinormal; BOOL mPickTransparent; void getSurfaceInfo(); @@ -347,19 +348,19 @@ public: static void hoverPickCallback(const LLPickInfo& pick_info); LLHUDIcon* cursorIntersectIcon(S32 mouse_x, S32 mouse_y, F32 depth, - LLVector3* intersection); + LLVector4a* intersection); LLViewerObject* cursorIntersect(S32 mouse_x = -1, S32 mouse_y = -1, F32 depth = 512.f, LLViewerObject *this_object = NULL, S32 this_face = -1, BOOL pick_transparent = FALSE, S32* face_hit = NULL, - LLVector3 *intersection = NULL, + LLVector4a *intersection = NULL, LLVector2 *uv = NULL, - LLVector3 *normal = NULL, - LLVector3 *binormal = NULL, - LLVector3* start = NULL, - LLVector3* end = NULL); + LLVector4a *normal = NULL, + LLVector4a *tangent = NULL, + LLVector4a* start = NULL, + LLVector4a* end = NULL); // Returns a pointer to the last object hit @@ -509,13 +510,13 @@ extern LLFrameTimer gAwayTimer; // tracks time before setting the avatar awa extern LLFrameTimer gAwayTriggerTimer; // how long the avatar has been away extern LLViewerObject* gDebugRaycastObject; -extern LLVector3 gDebugRaycastIntersection; +extern LLVector4a gDebugRaycastIntersection; extern LLVector2 gDebugRaycastTexCoord; -extern LLVector3 gDebugRaycastNormal; -extern LLVector3 gDebugRaycastBinormal; +extern LLVector4a gDebugRaycastNormal; +extern LLVector4a gDebugRaycastTangent; extern S32 gDebugRaycastFaceHit; -extern LLVector3 gDebugRaycastStart; -extern LLVector3 gDebugRaycastEnd; +extern LLVector4a gDebugRaycastStart; +extern LLVector4a gDebugRaycastEnd; extern BOOL gDisplayCameraPos; extern BOOL gDisplayWindInfo; diff --git a/indra/newview/llvoavatar.cpp b/indra/newview/llvoavatar.cpp index dfdd754ba..3076f4dc4 100644 --- a/indra/newview/llvoavatar.cpp +++ b/indra/newview/llvoavatar.cpp @@ -1729,19 +1729,20 @@ void LLVOAvatar::renderCollisionVolumes() if (mNameText.notNull()) { - LLVector3 unused; - mNameText->lineSegmentIntersect(LLVector3(0,0,0), LLVector3(0,0,1), unused, TRUE); + LLVector4a unused; + + mNameText->lineSegmentIntersect(unused, unused, unused, TRUE); } } -BOOL LLVOAvatar::lineSegmentIntersect(const LLVector3& start, const LLVector3& end, +BOOL LLVOAvatar::lineSegmentIntersect(const LLVector4a& start, const LLVector4a& end, S32 face, BOOL pick_transparent, S32* face_hit, - LLVector3* intersection, + LLVector4a* intersection, LLVector2* tex_coord, - LLVector3* normal, - LLVector3* bi_normal) + LLVector4a* normal, + LLVector4a* tangent) { if ((isSelf() && !gAgent.needsRenderAvatar()) || !LLPipeline::sPickAvatar) { @@ -1758,8 +1759,8 @@ BOOL LLVOAvatar::lineSegmentIntersect(const LLVector3& start, const LLVector3& e glh::matrix4f inverse = mat.inverse(); glh::matrix4f norm_mat = inverse.transpose(); - glh::vec3f p1(start.mV); - glh::vec3f p2(end.mV); + glh::vec3f p1(start.getF32ptr()); + glh::vec3f p2(end.getF32ptr()); inverse.mult_matrix_vec(p1); inverse.mult_matrix_vec(p2); @@ -1778,12 +1779,12 @@ BOOL LLVOAvatar::lineSegmentIntersect(const LLVector3& start, const LLVector3& e if (intersection) { - *intersection = LLVector3(res_pos.v); + intersection->load3(res_pos.v); } if (normal) { - *normal = LLVector3(res_norm.v); + normal->load3(res_norm.v); } return TRUE; @@ -1817,7 +1818,8 @@ BOOL LLVOAvatar::lineSegmentIntersect(const LLVector3& start, const LLVector3& e } } - LLVector3 position; + + LLVector4a position; if (mNameText.notNull() && mNameText->lineSegmentIntersect(start, end, position)) { if (intersection) @@ -1831,14 +1833,14 @@ BOOL LLVOAvatar::lineSegmentIntersect(const LLVector3& start, const LLVector3& e return FALSE; } -LLViewerObject* LLVOAvatar::lineSegmentIntersectRiggedAttachments(const LLVector3& start, const LLVector3& end, +LLViewerObject* LLVOAvatar::lineSegmentIntersectRiggedAttachments(const LLVector4a& start, const LLVector4a& end, S32 face, BOOL pick_transparent, S32* face_hit, - LLVector3* intersection, + LLVector4a* intersection, LLVector2* tex_coord, - LLVector3* normal, - LLVector3* bi_normal) + LLVector4a* normal, + LLVector4a* tangent) { static const LLCachedControl allow_mesh_picking("SGAllowRiggedMeshSelection"); @@ -1851,8 +1853,8 @@ LLViewerObject* LLVOAvatar::lineSegmentIntersectRiggedAttachments(const LLVector if (lineSegmentBoundingBox(start, end)) { - LLVector3 local_end = end; - LLVector3 local_intersection; + LLVector4a local_end = end; + LLVector4a local_intersection; for (attachment_map_t::iterator iter = mAttachmentPoints.begin(); iter != mAttachmentPoints.end(); @@ -1866,7 +1868,7 @@ LLViewerObject* LLVOAvatar::lineSegmentIntersectRiggedAttachments(const LLVector { LLViewerObject* attached_object = (*attachment_iter); - if (attached_object->lineSegmentIntersect(start, local_end, face, pick_transparent, face_hit, &local_intersection, tex_coord, normal, bi_normal)) + if (attached_object->lineSegmentIntersect(start, local_end, face, pick_transparent, face_hit, &local_intersection, tex_coord, normal, tangent)) { local_end = local_intersection; if (intersection) diff --git a/indra/newview/llvoavatar.h b/indra/newview/llvoavatar.h index 11b5c1be6..7006cacf4 100644 --- a/indra/newview/llvoavatar.h +++ b/indra/newview/llvoavatar.h @@ -201,22 +201,22 @@ public: /*virtual*/ void updateRegion(LLViewerRegion *regionp); /*virtual*/ void updateSpatialExtents(LLVector4a& newMin, LLVector4a &newMax); /*virtual*/ void getSpatialExtents(LLVector4a& newMin, LLVector4a& newMax); - /*virtual*/ BOOL lineSegmentIntersect(const LLVector3& start, const LLVector3& end, + /*virtual*/ BOOL lineSegmentIntersect(const LLVector4a& start, const LLVector4a& end, S32 face = -1, // which face to check, -1 = ALL_SIDES BOOL pick_transparent = FALSE, S32* face_hit = NULL, // which face was hit - LLVector3* intersection = NULL, // return the intersection point + LLVector4a* intersection = NULL, // return the intersection point LLVector2* tex_coord = NULL, // return the texture coordinates of the intersection point - LLVector3* normal = NULL, // return the surface normal at the intersection point - LLVector3* bi_normal = NULL); // return the surface bi-normal at the intersection point - LLViewerObject* lineSegmentIntersectRiggedAttachments(const LLVector3& start, const LLVector3& end, + LLVector4a* normal = NULL, // return the surface normal at the intersection point + LLVector4a* tangent = NULL); // return the surface tangent at the intersection point + LLViewerObject* lineSegmentIntersectRiggedAttachments(const LLVector4a& start, const LLVector4a& end, S32 face = -1, // which face to check, -1 = ALL_SIDES BOOL pick_transparent = FALSE, S32* face_hit = NULL, // which face was hit - LLVector3* intersection = NULL, // return the intersection point + LLVector4a* intersection = NULL, // return the intersection point LLVector2* tex_coord = NULL, // return the texture coordinates of the intersection point - LLVector3* normal = NULL, // return the surface normal at the intersection point - LLVector3* bi_normal = NULL); // return the surface bi-normal at the intersection point + LLVector4a* normal = NULL, // return the surface normal at the intersection point + LLVector4a* tangent = NULL); // return the surface tangent at the intersection point //-------------------------------------------------------------------- // LLCharacter interface and related diff --git a/indra/newview/llvograss.cpp b/indra/newview/llvograss.cpp index f1ca2c6a3..769f75978 100644 --- a/indra/newview/llvograss.cpp +++ b/indra/newview/llvograss.cpp @@ -789,8 +789,8 @@ void LLVOGrass::updateDrawable(BOOL force_damped) } // virtual -BOOL LLVOGrass::lineSegmentIntersect(const LLVector3& start, const LLVector3& end, S32 face, BOOL pick_transparent, S32 *face_hitp, - LLVector3* intersection,LLVector2* tex_coord, LLVector3* normal, LLVector3* bi_normal) +BOOL LLVOGrass::lineSegmentIntersect(const LLVector4a& start, const LLVector4a& end, S32 face, BOOL pick_transparent, S32 *face_hitp, + LLVector4a* intersection,LLVector2* tex_coord, LLVector4a* normal, LLVector4a* tangent) { BOOL ret = FALSE; @@ -801,7 +801,8 @@ BOOL LLVOGrass::lineSegmentIntersect(const LLVector3& start, const LLVector3& en return FALSE; } - LLVector3 dir = end-start; + LLVector4a dir; + dir.setSub(end, start); mPatch = mRegionp->getLand().resolvePatchRegion(getPositionRegion()); @@ -869,23 +870,31 @@ BOOL LLVOGrass::lineSegmentIntersect(const LLVector3& start, const LLVector3& en U32 idx0 = 0,idx1 = 0,idx2 = 0; - if (LLTriangleRayIntersect(v[0], v[1], v[2], start, dir, a, b, t, FALSE)) + LLVector4a v0a,v1a,v2a,v3a; + + v0a.load3(v[0].mV); + v1a.load3(v[1].mV); + v2a.load3(v[2].mV); + v3a.load3(v[3].mV); + + + if (LLTriangleRayIntersect(v0a, v1a, v2a, start, dir, a, b, t)) { hit = TRUE; idx0 = 0; idx1 = 1; idx2 = 2; } - else if (LLTriangleRayIntersect(v[1], v[3], v[2], start, dir, a, b, t, FALSE)) + else if (LLTriangleRayIntersect(v1a, v3a, v2a, start, dir, a, b, t)) { hit = TRUE; idx0 = 1; idx1 = 3; idx2 = 2; } - else if (LLTriangleRayIntersect(v[2], v[1], v[0], start, dir, a, b, t, FALSE)) + else if (LLTriangleRayIntersect(v2a, v1a, v0a, start, dir, a, b, t)) { normal1 = -normal1; hit = TRUE; idx0 = 2; idx1 = 1; idx2 = 0; } - else if (LLTriangleRayIntersect(v[2], v[3], v[1], start, dir, a, b, t, FALSE)) + else if (LLTriangleRayIntersect(v2a, v3a, v1a, start, dir, a, b, t)) { normal1 = -normal1; hit = TRUE; @@ -908,7 +917,8 @@ BOOL LLVOGrass::lineSegmentIntersect(const LLVector3& start, const LLVector3& en closest_t = t; if (intersection != NULL) { - *intersection = start+dir*closest_t; + dir.mul(closest_t); + intersection->setAdd(start, dir); } if (tex_coord != NULL) @@ -918,7 +928,7 @@ BOOL LLVOGrass::lineSegmentIntersect(const LLVector3& start, const LLVector3& en if (normal != NULL) { - *normal = normal1; + normal->load3(normal1.mV); } ret = TRUE; } diff --git a/indra/newview/llvograss.h b/indra/newview/llvograss.h index 1e1c97ea5..941e075bf 100644 --- a/indra/newview/llvograss.h +++ b/indra/newview/llvograss.h @@ -81,14 +81,14 @@ public: /*virtual*/ BOOL isActive() const; // Whether this object needs to do an idleUpdate. /*virtual*/ void idleUpdate(LLAgent &agent, LLWorld &world, const F64 &time); - /*virtual*/ BOOL lineSegmentIntersect(const LLVector3& start, const LLVector3& end, + /*virtual*/ BOOL lineSegmentIntersect(const LLVector4a& start, const LLVector4a& end, S32 face = -1, // which face to check, -1 = ALL_SIDES BOOL pick_transparent = FALSE, S32* face_hit = NULL, // which face was hit - LLVector3* intersection = NULL, // return the intersection point + LLVector4a* intersection = NULL, // return the intersection point LLVector2* tex_coord = NULL, // return the texture coordinates of the intersection point - LLVector3* normal = NULL, // return the surface normal at the intersection point - LLVector3* bi_normal = NULL // return the surface bi-normal at the intersection point + LLVector4a* normal = NULL, // return the surface normal at the intersection point + LLVector4a* tangent = NULL // return the surface tangent at the intersection point ); static S32 sMaxGrassSpecies; diff --git a/indra/newview/llvosurfacepatch.cpp b/indra/newview/llvosurfacepatch.cpp index 465df0465..565c904e8 100644 --- a/indra/newview/llvosurfacepatch.cpp +++ b/indra/newview/llvosurfacepatch.cpp @@ -103,10 +103,10 @@ public: glTexCoordPointer(2,GL_FLOAT, LLVertexBuffer::sTypeSize[TYPE_TEXCOORD1], (void*)(base + mOffsets[TYPE_TEXCOORD1])); glClientActiveTextureARB(GL_TEXTURE0_ARB); } - if (data_mask & MAP_BINORMAL) + if (data_mask & MAP_TANGENT) { glClientActiveTextureARB(GL_TEXTURE2_ARB); - glTexCoordPointer(3,GL_FLOAT, LLVertexBuffer::sTypeSize[TYPE_BINORMAL], (void*)(base + mOffsets[TYPE_BINORMAL])); + glTexCoordPointer(3,GL_FLOAT, LLVertexBuffer::sTypeSize[TYPE_TANGENT], (void*)(base + mOffsets[TYPE_TANGENT])); glClientActiveTextureARB(GL_TEXTURE0_ARB); } if (data_mask & MAP_TEXCOORD0) @@ -942,8 +942,8 @@ void LLVOSurfacePatch::getGeomSizesEast(const S32 stride, const S32 east_stride, } } -BOOL LLVOSurfacePatch::lineSegmentIntersect(const LLVector3& start, const LLVector3& end, S32 face, BOOL pick_transparent, S32 *face_hitp, - LLVector3* intersection,LLVector2* tex_coord, LLVector3* normal, LLVector3* bi_normal) +BOOL LLVOSurfacePatch::lineSegmentIntersect(const LLVector4a& start, const LLVector4a& end, S32 face, BOOL pick_transparent, S32 *face_hitp, + LLVector4a* intersection,LLVector2* tex_coord, LLVector4a* normal, LLVector4a* tangent) { @@ -952,7 +952,9 @@ BOOL LLVOSurfacePatch::lineSegmentIntersect(const LLVector3& start, const LLVect return FALSE; } - LLVector3 delta = end-start; + LLVector4a da; + da.setSub(end, start); + LLVector3 delta(da.getF32ptr()); LLVector3 pdelta = delta; pdelta.mV[2] = 0; @@ -961,7 +963,9 @@ BOOL LLVOSurfacePatch::lineSegmentIntersect(const LLVector3& start, const LLVect F32 tdelta = 1.f/plength; - LLVector3 origin = start - mRegionp->getOriginAgent(); + LLVector3 v_start(start.getF32ptr()); + + LLVector3 origin = v_start - mRegionp->getOriginAgent(); if (mRegionp->getLandHeightRegion(origin) > origin.mV[2]) { @@ -1016,12 +1020,12 @@ BOOL LLVOSurfacePatch::lineSegmentIntersect(const LLVector3& start, const LLVect { sample.mV[2] = mRegionp->getLandHeightRegion(sample); } - *intersection = sample + mRegionp->getOriginAgent(); + intersection->load3((sample + mRegionp->getOriginAgent()).mV); } if (normal) { - *normal = mRegionp->getLand().resolveNormalGlobal(mRegionp->getPosGlobalFromRegion(sample)); + normal->load3((mRegionp->getLand().resolveNormalGlobal(mRegionp->getPosGlobalFromRegion(sample))).mV); } return TRUE; diff --git a/indra/newview/llvosurfacepatch.h b/indra/newview/llvosurfacepatch.h index 7637be867..6818c98f0 100644 --- a/indra/newview/llvosurfacepatch.h +++ b/indra/newview/llvosurfacepatch.h @@ -85,14 +85,14 @@ public: void dirtyPatch(); void dirtyGeom(); - /*virtual*/ BOOL lineSegmentIntersect(const LLVector3& start, const LLVector3& end, + /*virtual*/ BOOL lineSegmentIntersect(const LLVector4a& start, const LLVector4a& end, S32 face = -1, // which face to check, -1 = ALL_SIDES BOOL pick_transparent = FALSE, S32* face_hit = NULL, // which face was hit - LLVector3* intersection = NULL, // return the intersection point + LLVector4a* intersection = NULL, // return the intersection point LLVector2* tex_coord = NULL, // return the texture coordinates of the intersection point - LLVector3* normal = NULL, // return the surface normal at the intersection point - LLVector3* bi_normal = NULL // return the surface bi-normal at the intersection point + LLVector4a* normal = NULL, // return the surface normal at the intersection point + LLVector4a* tangent = NULL // return the surface tangent at the intersection point ); BOOL mDirtiedPatch; diff --git a/indra/newview/llvotree.cpp b/indra/newview/llvotree.cpp index a4d57e2d3..340a9f634 100644 --- a/indra/newview/llvotree.cpp +++ b/indra/newview/llvotree.cpp @@ -1253,8 +1253,8 @@ void LLVOTree::updateSpatialExtents(LLVector4a& newMin, LLVector4a& newMax) mDrawable->setPositionGroup(pos); } -BOOL LLVOTree::lineSegmentIntersect(const LLVector3& start, const LLVector3& end, S32 face, BOOL pick_transparent, S32 *face_hitp, - LLVector3* intersection,LLVector2* tex_coord, LLVector3* normal, LLVector3* bi_normal) +BOOL LLVOTree::lineSegmentIntersect(const LLVector4a& start, const LLVector4a& end, S32 face, BOOL pick_transparent, S32 *face_hitp, + LLVector4a* intersection,LLVector2* tex_coord, LLVector4a* normal, LLVector4a* tangent) { @@ -1283,16 +1283,19 @@ BOOL LLVOTree::lineSegmentIntersect(const LLVector3& start, const LLVector3& end LLVector3 pos, norm; - if (linesegment_tetrahedron(start, end, center, size, quat, pos, norm)) + LLVector3 start3(start.getF32ptr()); + LLVector3 end3(end.getF32ptr()); + + if (linesegment_tetrahedron(start3, end3, center, size, quat, pos, norm)) { if (intersection) { - *intersection = pos; + intersection->load3(pos.mV); } if (normal) { - *normal = norm; + normal->load3(norm.mV); } return TRUE; } diff --git a/indra/newview/llvotree.h b/indra/newview/llvotree.h index f90503030..fc8088823 100644 --- a/indra/newview/llvotree.h +++ b/indra/newview/llvotree.h @@ -115,14 +115,14 @@ public: U32 drawBranchPipeline(LLMatrix4& matrix, U16* indicesp, S32 trunk_LOD, S32 stop_level, U16 depth, U16 trunk_depth, F32 scale, F32 twist, F32 droop, F32 branches, F32 alpha); - /*virtual*/ BOOL lineSegmentIntersect(const LLVector3& start, const LLVector3& end, + /*virtual*/ BOOL lineSegmentIntersect(const LLVector4a& start, const LLVector4a& end, S32 face = -1, // which face to check, -1 = ALL_SIDES BOOL pick_transparent = FALSE, S32* face_hit = NULL, // which face was hit - LLVector3* intersection = NULL, // return the intersection point + LLVector4a* intersection = NULL, // return the intersection point LLVector2* tex_coord = NULL, // return the texture coordinates of the intersection point - LLVector3* normal = NULL, // return the surface normal at the intersection point - LLVector3* bi_normal = NULL // return the surface bi-normal at the intersection point + LLVector4a* normal = NULL, // return the surface normal at the intersection point + LLVector4a* tangent = NULL // return the surface tangent at the intersection point ); static S32 sMaxTreeSpecies; diff --git a/indra/newview/llvovolume.cpp b/indra/newview/llvovolume.cpp index 80f9dd3e5..ad0353a76 100644 --- a/indra/newview/llvovolume.cpp +++ b/indra/newview/llvovolume.cpp @@ -1053,13 +1053,17 @@ BOOL LLVOVolume::setVolume(const LLVolumeParams ¶ms_in, const S32 detail, bo { //already cached break; } - volume->genBinormals(i); + volume->genTangents(i); LLFace::cacheFaceInVRAM(face); } } + return TRUE; } + + + return FALSE; } @@ -1218,7 +1222,7 @@ BOOL LLVOVolume::calcLOD() else { distance = mDrawable->mDistanceWRTCamera; - radius = getVolume()->mLODScaleBias.scaledVec(getScale()).length(); + radius = getVolume() ? getVolume()->mLODScaleBias.scaledVec(getScale()).length() : getScale().length(); } @@ -3589,8 +3593,8 @@ LLVector3 LLVOVolume::volumeDirectionToAgent(const LLVector3& dir) const } -BOOL LLVOVolume::lineSegmentIntersect(const LLVector3& start, const LLVector3& end, S32 face, BOOL pick_transparent, S32 *face_hitp, - LLVector3* intersection,LLVector2* tex_coord, LLVector3* normal, LLVector3* bi_normal) +BOOL LLVOVolume::lineSegmentIntersect(const LLVector4a& start, const LLVector4a& end, S32 face, BOOL pick_transparent, S32 *face_hitp, + LLVector4a* intersection,LLVector2* tex_coord, LLVector4a* normal, LLVector4a* tangent) { if (!mbCanSelect || @@ -3631,23 +3635,25 @@ BOOL LLVOVolume::lineSegmentIntersect(const LLVector3& start, const LLVector3& e if (volume) { - LLVector3 v_start, v_end, v_dir; - + LLVector4a local_start = start; + LLVector4a local_end = end; + if (transform) { - v_start = agentPositionToVolume(start); - v_end = agentPositionToVolume(end); - } - else - { - v_start = start; - v_end = end; - } + LLVector3 v_start(start.getF32ptr()); + LLVector3 v_end(end.getF32ptr()); - LLVector3 p; - LLVector3 n; + v_start = agentPositionToVolume(v_start); + v_end = agentPositionToVolume(v_end); + + local_start.load3(v_start.mV); + local_end.load3(v_end.mV); + } + + LLVector4a p; + LLVector4a n; LLVector2 tc; - LLVector3 bn; + LLVector4a tn; if (intersection != NULL) { @@ -3664,9 +3670,9 @@ BOOL LLVOVolume::lineSegmentIntersect(const LLVector3& start, const LLVector3& e n = *normal; } - if (bi_normal != NULL) + if (tangent != NULL) { - bn = *bi_normal; + tn = *tangent; } S32 face_hit = -1; @@ -3692,8 +3698,8 @@ BOOL LLVOVolume::lineSegmentIntersect(const LLVector3& start, const LLVector3& e continue; } - face_hit = volume->lineSegmentIntersect(v_start, v_end, i, - &p, &tc, &n, &bn); + face_hit = volume->lineSegmentIntersect(local_start, local_end, i, + &p, &tc, &n, &tn); if (face_hit >= 0 && mDrawable->getNumFaces() > face_hit) { @@ -3702,7 +3708,7 @@ BOOL LLVOVolume::lineSegmentIntersect(const LLVector3& start, const LLVector3& e if (face && (pick_transparent || !face->getTexture() || !face->getTexture()->hasGLTexture() || face->getTexture()->getMask(face->surfaceToTexture(tc, p, n)))) { - v_end = p; + local_end = p; if (face_hitp != NULL) { *face_hitp = face_hit; @@ -3712,7 +3718,9 @@ BOOL LLVOVolume::lineSegmentIntersect(const LLVector3& start, const LLVector3& e { if (transform) { - *intersection = volumePositionToAgent(p); // must map back to agent space + LLVector3 v_p(p.getF32ptr()); + + intersection->load3(volumePositionToAgent(v_p).mV); // must map back to agent space } else { @@ -3724,27 +3732,36 @@ BOOL LLVOVolume::lineSegmentIntersect(const LLVector3& start, const LLVector3& e { if (transform) { - *normal = volumeDirectionToAgent(n); + LLVector3 v_n(n.getF32ptr()); + normal->load3(volumeDirectionToAgent(v_n).mV); } else { *normal = n; } - - (*normal).normVec(); + (*normal).normalize3fast(); } - if (bi_normal != NULL) + if (tangent != NULL) { if (transform) { - *bi_normal = volumeDirectionToAgent(bn); + LLVector3 v_tn(tn.getF32ptr()); + + LLVector4a trans_tangent; + trans_tangent.load3(volumeDirectionToAgent(v_tn).mV); + + LLVector4Logical mask; + mask.clear(); + mask.setElement<3>(); + + tangent->setSelectWithMask(mask, tn, trans_tangent); } else { - *bi_normal = bn; + *tangent = tn; } - (*bi_normal).normVec(); + (*tangent).normalize3fast(); } if (tex_coord != NULL) @@ -4262,11 +4279,18 @@ void LLVolumeGeometryManager::rebuildGeom(LLSpatialGroup* group) mFaceList.clear(); - std::vector fullbright_faces; - std::vector bump_faces; - std::vector simple_faces; + const U32 MAX_FACE_COUNT = 4096; + + static LLFace** fullbright_faces = (LLFace**) ll_aligned_malloc(MAX_FACE_COUNT*sizeof(LLFace*),64); + static LLFace** bump_faces = (LLFace**) ll_aligned_malloc(MAX_FACE_COUNT*sizeof(LLFace*),64); + static LLFace** simple_faces = (LLFace**) ll_aligned_malloc(MAX_FACE_COUNT*sizeof(LLFace*),64); + static LLFace** alpha_faces = (LLFace**) ll_aligned_malloc(MAX_FACE_COUNT*sizeof(LLFace*),64); + + U32 fullbright_count = 0; + U32 bump_count = 0; + U32 simple_count = 0; + U32 alpha_count = 0; - std::vector alpha_faces; U32 useage = group->mSpatialPartition->mBufferUsage; static const LLCachedControl render_max_vbo_size("RenderMaxVBOSize", 512); @@ -4623,7 +4647,10 @@ void LLVolumeGeometryManager::rebuildGeom(LLSpatialGroup* group) { if (facep->canRenderAsMask()) { //can be treated as alpha mask - simple_faces.push_back(facep); + if (simple_count < MAX_FACE_COUNT) + { + simple_faces[simple_count++] = facep; + } } else { @@ -4631,7 +4658,10 @@ void LLVolumeGeometryManager::rebuildGeom(LLSpatialGroup* group) { //only treat as alpha in the pipeline if < 100% transparent drawablep->setState(LLDrawable::HAS_ALPHA); } - alpha_faces.push_back(facep); + if (alpha_count < MAX_FACE_COUNT) + { + alpha_faces[alpha_count++] = facep; + } } } else @@ -4645,34 +4675,52 @@ void LLVolumeGeometryManager::rebuildGeom(LLSpatialGroup* group) && LLPipeline::sRenderBump) { if (te->getBumpmap()) - { //needs normal + binormal - bump_faces.push_back(facep); + { //needs normal + tangent + if (bump_count < MAX_FACE_COUNT) + { + bump_faces[bump_count++] = facep; + } } else if (te->getShiny() || !te->getFullbright()) { //needs normal - simple_faces.push_back(facep); + if (simple_count < MAX_FACE_COUNT) + { + simple_faces[simple_count++] = facep; + } } else { //doesn't need normal facep->setState(LLFace::FULLBRIGHT); - fullbright_faces.push_back(facep); + if (fullbright_count < MAX_FACE_COUNT) + { + fullbright_faces[fullbright_count++] = facep; + } } } else { if (te->getBumpmap() && LLPipeline::sRenderBump) - { //needs normal + binormal - bump_faces.push_back(facep); + { //needs normal + tangent + if (bump_count < MAX_FACE_COUNT) + { + bump_faces[bump_count++] = facep; + } } else if ((te->getShiny() && LLPipeline::sRenderBump) || !(te->getFullbright() || bake_sunlight)) { //needs normal - simple_faces.push_back(facep); + if (simple_count < MAX_FACE_COUNT) + { + simple_faces[simple_count++] = facep; + } } else { //doesn't need normal facep->setState(LLFace::FULLBRIGHT); - fullbright_faces.push_back(facep); + if (fullbright_count < MAX_FACE_COUNT) + { + fullbright_faces[fullbright_count++] = facep; + } } } } @@ -4714,18 +4762,18 @@ void LLVolumeGeometryManager::rebuildGeom(LLSpatialGroup* group) if (batch_textures) { - bump_mask |= LLVertexBuffer::MAP_BINORMAL; - genDrawInfo(group, simple_mask | LLVertexBuffer::MAP_TEXTURE_INDEX, simple_faces, FALSE, TRUE); - genDrawInfo(group, fullbright_mask | LLVertexBuffer::MAP_TEXTURE_INDEX, fullbright_faces, FALSE, TRUE); - genDrawInfo(group, bump_mask | LLVertexBuffer::MAP_TEXTURE_INDEX, bump_faces, FALSE, TRUE); - genDrawInfo(group, alpha_mask | LLVertexBuffer::MAP_TEXTURE_INDEX, alpha_faces, TRUE, TRUE); + bump_mask |= LLVertexBuffer::MAP_TANGENT; + genDrawInfo(group, simple_mask | LLVertexBuffer::MAP_TEXTURE_INDEX, simple_faces, simple_count, FALSE, TRUE); + genDrawInfo(group, fullbright_mask | LLVertexBuffer::MAP_TEXTURE_INDEX, fullbright_faces, fullbright_count, FALSE, TRUE); + genDrawInfo(group, bump_mask | LLVertexBuffer::MAP_TEXTURE_INDEX, bump_faces, bump_count, FALSE, TRUE); + genDrawInfo(group, alpha_mask | LLVertexBuffer::MAP_TEXTURE_INDEX, alpha_faces, alpha_count, TRUE, TRUE); } else { - genDrawInfo(group, simple_mask, simple_faces); - genDrawInfo(group, fullbright_mask, fullbright_faces); - genDrawInfo(group, bump_mask, bump_faces, FALSE, TRUE); - genDrawInfo(group, alpha_mask, alpha_faces, TRUE); + genDrawInfo(group, simple_mask, simple_faces, simple_count); + genDrawInfo(group, fullbright_mask, fullbright_faces, fullbright_count); + genDrawInfo(group, bump_mask, bump_faces, bump_count, FALSE, TRUE); + genDrawInfo(group, alpha_mask, alpha_faces, alpha_count, TRUE); } @@ -4767,13 +4815,17 @@ void LLVolumeGeometryManager::rebuildMesh(LLSpatialGroup* group) { LLFastTimer ftm(FTM_REBUILD_VOLUME_VB); LLFastTimer t(FTM_REBUILD_VOLUME_GEN_DRAW_INFO); //make sure getgeometryvolume shows up in the right place in timers - S32 num_mapped_vertex_buffer = LLVertexBuffer::sMappedCount ; group->mBuilt = 1.f; - std::set mapped_buffers; - OctreeGuard guard(group->mOctreeNode); + S32 num_mapped_vertex_buffer = LLVertexBuffer::sMappedCount ; + + const U32 MAX_BUFFER_COUNT = 4096; + static LLVertexBuffer* locked_buffer[MAX_BUFFER_COUNT]; + + U32 buffer_count = 0; + for (LLSpatialGroup::element_iter drawable_iter = group->getDataBegin(); drawable_iter != group->getDataEnd(); ++drawable_iter) { LLDrawable* drawablep = *drawable_iter; @@ -4807,9 +4859,9 @@ void LLVolumeGeometryManager::rebuildMesh(LLSpatialGroup* group) } - if (buff->isLocked()) + if (buff->isLocked() && buffer_count < MAX_BUFFER_COUNT) { - mapped_buffers.insert(buff); + locked_buffer[buffer_count++] = buff; } } } @@ -4825,7 +4877,7 @@ void LLVolumeGeometryManager::rebuildMesh(LLSpatialGroup* group) } } - for (std::set::iterator iter = mapped_buffers.begin(); iter != mapped_buffers.end(); ++iter) + for (LLVertexBuffer** iter = locked_buffer, ** end_iter = locked_buffer+buffer_count; iter != end_iter; ++iter) { (*iter)->flush(); } @@ -4904,7 +4956,7 @@ static LLFastTimer::DeclareTimer FTM_GEN_DRAW_INFO_RESIZE_VB("Resize VB"); -void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, std::vector& faces, BOOL distance_sort, BOOL batch_textures) +void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, LLFace** faces, U32 face_count, BOOL distance_sort, BOOL batch_textures, BOOL no_materials) { LLFastTimer t(FTM_REBUILD_VOLUME_GEN_DRAW_INFO); @@ -4931,17 +4983,18 @@ void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, std:: if (!distance_sort) { //sort faces by things that break batches - std::sort(faces.begin(), faces.end(), CompareBatchBreakerModified()); + std::sort(faces, faces+face_count, CompareBatchBreakerModified()); } else { //sort faces by distance - std::sort(faces.begin(), faces.end(), LLFace::CompareDistanceGreater()); + std::sort(faces, faces+face_count, LLFace::CompareDistanceGreater()); } } bool hud_group = group->isHUDGroup() ; - std::vector::iterator face_iter = faces.begin(); + LLFace** face_iter = faces; + LLFace** end_faces = faces+face_count; LLSpatialGroup::buffer_map_t buffer_map; @@ -4973,7 +5026,7 @@ void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, std:: //NEVER use more than 16 texture index channels (workaround for prevalent driver bug) texture_index_channels = llmin(texture_index_channels, 16); - while (face_iter != faces.end()) + while (face_iter != end_faces) { //pull off next face LLFace* facep = *face_iter; @@ -4999,11 +5052,15 @@ void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, std:: U32 index_count = facep->getIndicesCount(); U32 geom_count = facep->getGeomCount(); + //sum up vertices needed for this render batch - std::vector::iterator i = face_iter; + LLFace** i = face_iter; ++i; - std::vector texture_list; + const U32 MAX_TEXTURE_COUNT = 32; + static LLViewerTexture* texture_list[MAX_TEXTURE_COUNT]; + + U32 texture_count = 0; { LLFastTimer t(FTM_GEN_DRAW_INFO_FACE_SIZE); @@ -5011,11 +5068,15 @@ void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, std:: { U8 cur_tex = 0; facep->setTextureIndex(cur_tex); - texture_list.push_back(tex); + if (texture_count < MAX_TEXTURE_COUNT) + { + texture_list[texture_count++] = tex; + } if (can_batch_texture(facep)) - { - while (i != faces.end()) + { //populate texture_list with any textures that can be batched + //move i to the next unbatchable face + while (i != end_faces) { facep = *i; if (!can_batch_texture(facep)) @@ -5028,7 +5089,7 @@ void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, std:: if (distance_sort) { //textures might be out of order, see if texture exists in current batch bool found = false; - for (U32 tex_idx = 0; tex_idx < texture_list.size(); ++tex_idx) + for (U32 tex_idx = 0; tex_idx < texture_count; ++tex_idx) { if (facep->getTexture() == texture_list[tex_idx]) { @@ -5040,7 +5101,7 @@ void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, std:: if (!found) { - cur_tex = texture_list.size(); + cur_tex = texture_count; } } else @@ -5055,7 +5116,10 @@ void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, std:: tex = facep->getTexture(); - texture_list.push_back(tex); + if (texture_count < MAX_TEXTURE_COUNT) + { + texture_list[texture_count++] = tex; + } } if (geom_count + facep->getGeomCount() > max_vertices) @@ -5074,7 +5138,7 @@ void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, std:: } else { - while (i != faces.end() && + while (i != end_faces && (LLPipeline::sTextureBindTest || (distance_sort || (*i)->getTexture() == tex))) { facep = *i; diff --git a/indra/newview/llvovolume.h b/indra/newview/llvovolume.h index 029096ef6..b9cb79508 100644 --- a/indra/newview/llvovolume.h +++ b/indra/newview/llvovolume.h @@ -142,14 +142,14 @@ public: /*virtual*/ U32 getTriangleCount(S32* vcount = NULL) const; /*virtual*/ U32 getHighLODTriangleCount(); - /*virtual*/ BOOL lineSegmentIntersect(const LLVector3& start, const LLVector3& end, + /*virtual*/ BOOL lineSegmentIntersect(const LLVector4a& start, const LLVector4a& end, S32 face = -1, // which face to check, -1 = ALL_SIDES BOOL pick_transparent = FALSE, S32* face_hit = NULL, // which face was hit - LLVector3* intersection = NULL, // return the intersection point + LLVector4a* intersection = NULL, // return the intersection point LLVector2* tex_coord = NULL, // return the texture coordinates of the intersection point - LLVector3* normal = NULL, // return the surface normal at the intersection point - LLVector3* bi_normal = NULL // return the surface bi-normal at the intersection point + LLVector4a* normal = NULL, // return the surface normal at the intersection point + LLVector4a* tangent = NULL // return the surface tangent at the intersection point ); LLVector3 agentPositionToVolume(const LLVector3& pos) const; diff --git a/indra/newview/pipeline.cpp b/indra/newview/pipeline.cpp index c57ab4219..949ee92e1 100644 --- a/indra/newview/pipeline.cpp +++ b/indra/newview/pipeline.cpp @@ -6167,20 +6167,20 @@ BOOL LLPipeline::getRenderHighlights(void*) return sRenderHighlight; } -LLViewerObject* LLPipeline::lineSegmentIntersectInWorld(const LLVector3& start, const LLVector3& end, +LLViewerObject* LLPipeline::lineSegmentIntersectInWorld(const LLVector4a& start, const LLVector4a& end, BOOL pick_transparent, S32* face_hit, - LLVector3* intersection, // return the intersection point + LLVector4a* intersection, // return the intersection point LLVector2* tex_coord, // return the texture coordinates of the intersection point - LLVector3* normal, // return the surface normal at the intersection point - LLVector3* bi_normal // return the surface bi-normal at the intersection point + LLVector4a* normal, // return the surface normal at the intersection point + LLVector4a* tangent // return the surface tangent at the intersection point ) { LLDrawable* drawable = NULL; - LLVector3 local_end = end; + LLVector4a local_end = end; - LLVector3 position; + LLVector4a position; sPickAvatar = FALSE; //LLToolMgr::getInstance()->inBuildMode() ? FALSE : TRUE; @@ -6200,7 +6200,7 @@ LLViewerObject* LLPipeline::lineSegmentIntersectInWorld(const LLVector3& start, LLSpatialPartition* part = region->getSpatialPartition(j); if (part && hasRenderType(part->mDrawableType)) { - LLDrawable* hit = part->lineSegmentIntersect(start, local_end, pick_transparent, face_hit, &position, tex_coord, normal, bi_normal); + LLDrawable* hit = part->lineSegmentIntersect(start, local_end, pick_transparent, face_hit, &position, tex_coord, normal, tangent); if (hit) { drawable = hit; @@ -6215,8 +6215,8 @@ LLViewerObject* LLPipeline::lineSegmentIntersectInWorld(const LLVector3& start, { //save hit info in case we need to restore //due to attachment override - LLVector3 local_normal; - LLVector3 local_binormal; + LLVector4a local_normal; + LLVector4a local_tangent; LLVector2 local_texcoord; S32 local_face_hit = -1; @@ -6228,14 +6228,22 @@ LLViewerObject* LLPipeline::lineSegmentIntersectInWorld(const LLVector3& start, { local_texcoord = *tex_coord; } - if (bi_normal) + if (tangent) { - local_binormal = *bi_normal; + local_tangent = *tangent; + } + else + { + local_tangent.clear(); } if (normal) { local_normal = *normal; } + else + { + local_normal.clear(); + } const F32 ATTACHMENT_OVERRIDE_DIST = 0.1f; @@ -6249,12 +6257,15 @@ LLViewerObject* LLPipeline::lineSegmentIntersectInWorld(const LLVector3& start, LLSpatialPartition* part = region->getSpatialPartition(LLViewerRegion::PARTITION_BRIDGE); if (part && hasRenderType(part->mDrawableType)) { - LLDrawable* hit = part->lineSegmentIntersect(start, local_end, pick_transparent, face_hit, &position, tex_coord, normal, bi_normal); + LLDrawable* hit = part->lineSegmentIntersect(start, local_end, pick_transparent, face_hit, &position, tex_coord, normal, tangent); if (hit) { + LLVector4a delta; + delta.setSub(position, local_end); + if (!drawable || !drawable->getVObj()->isAttachment() || - (position-local_end).magVec() > ATTACHMENT_OVERRIDE_DIST) + delta.getLength3().getF32() > ATTACHMENT_OVERRIDE_DIST) { //avatar overrides if previously hit drawable is not an attachment or //attachment is far enough away from detected intersection drawable = hit; @@ -6272,9 +6283,9 @@ LLViewerObject* LLPipeline::lineSegmentIntersectInWorld(const LLVector3& start, { *tex_coord = local_texcoord; } - if (bi_normal) + if (tangent) { - *bi_normal = local_binormal; + *tangent = local_tangent; } if (normal) { @@ -6308,13 +6319,13 @@ LLViewerObject* LLPipeline::lineSegmentIntersectInWorld(const LLVector3& start, return drawable ? drawable->getVObj().get() : NULL; } -LLViewerObject* LLPipeline::lineSegmentIntersectInHUD(const LLVector3& start, const LLVector3& end, +LLViewerObject* LLPipeline::lineSegmentIntersectInHUD(const LLVector4a& start, const LLVector4a& end, BOOL pick_transparent, S32* face_hit, - LLVector3* intersection, // return the intersection point + LLVector4a* intersection, // return the intersection point LLVector2* tex_coord, // return the texture coordinates of the intersection point - LLVector3* normal, // return the surface normal at the intersection point - LLVector3* bi_normal // return the surface bi-normal at the intersection point + LLVector4a* normal, // return the surface normal at the intersection point + LLVector4a* tangent // return the surface tangent at the intersection point ) { LLDrawable* drawable = NULL; @@ -6334,7 +6345,7 @@ LLViewerObject* LLPipeline::lineSegmentIntersectInHUD(const LLVector3& start, co LLSpatialPartition* part = region->getSpatialPartition(LLViewerRegion::PARTITION_HUD); if (part) { - LLDrawable* hit = part->lineSegmentIntersect(start, end, pick_transparent, face_hit, intersection, tex_coord, normal, bi_normal); + LLDrawable* hit = part->lineSegmentIntersect(start, end, pick_transparent, face_hit, intersection, tex_coord, normal, tangent); if (hit) { drawable = hit; @@ -6815,13 +6826,18 @@ void LLPipeline::renderBloom(BOOL for_snapshot, F32 zoom_factor, int subfield, b { if (LLViewerJoystick::getInstance()->getOverrideCamera()) { //focus on point under cursor - focus_point = gDebugRaycastIntersection; + focus_point.set(gDebugRaycastIntersection.getF32ptr()); } else if (gAgentCamera.cameraMouselook()) { //focus on point under mouselook crosshairs + LLVector4a result; + result.clear(); + gViewerWindow->cursorIntersect(-1, -1, 512.f, NULL, -1, FALSE, - NULL, - &focus_point); + NULL, + &result); + + focus_point.set(result.getF32ptr()); } else if(gAgent.getRegion()) { diff --git a/indra/newview/pipeline.h b/indra/newview/pipeline.h index 3bde9fbcd..7db607b0b 100644 --- a/indra/newview/pipeline.h +++ b/indra/newview/pipeline.h @@ -189,21 +189,21 @@ public: void markMeshDirty(LLSpatialGroup* group); //get the object between start and end that's closest to start. - LLViewerObject* lineSegmentIntersectInWorld(const LLVector3& start, const LLVector3& end, + LLViewerObject* lineSegmentIntersectInWorld(const LLVector4a& start, const LLVector4a& end, BOOL pick_transparent, S32* face_hit, // return the face hit - LLVector3* intersection = NULL, // return the intersection point + LLVector4a* intersection = NULL, // return the intersection point LLVector2* tex_coord = NULL, // return the texture coordinates of the intersection point - LLVector3* normal = NULL, // return the surface normal at the intersection point - LLVector3* bi_normal = NULL // return the surface bi-normal at the intersection point + LLVector4a* normal = NULL, // return the surface normal at the intersection point + LLVector4a* tangent = NULL // return the surface tangent at the intersection point ); - LLViewerObject* lineSegmentIntersectInHUD(const LLVector3& start, const LLVector3& end, + LLViewerObject* lineSegmentIntersectInHUD(const LLVector4a& start, const LLVector4a& end, BOOL pick_transparent, S32* face_hit, // return the face hit - LLVector3* intersection = NULL, // return the intersection point + LLVector4a* intersection = NULL, // return the intersection point LLVector2* tex_coord = NULL, // return the texture coordinates of the intersection point - LLVector3* normal = NULL, // return the surface normal at the intersection point - LLVector3* bi_normal = NULL // return the surface bi-normal at the intersection point + LLVector4a* normal = NULL, // return the surface normal at the intersection point + LLVector4a* tangent = NULL // return the surface tangent at the intersection point ); // Something about these textures has changed. Dirty them.