diff --git a/LICENSES/LEGAL-intel_matrixlib.txt b/LICENSES/LEGAL-intel_matrixlib.txt new file mode 100644 index 000000000..7ab64f595 --- /dev/null +++ b/LICENSES/LEGAL-intel_matrixlib.txt @@ -0,0 +1,29 @@ +INTEL LICENSE AGREEMENT + +IMPORTANT - READ BEFORE COPYING OR USING. +Do not use or load this library and any associated materials (collectively, +the "Software") until you have read the following terms and conditions. By +loading or using the Software, you agree to the terms of this Agreement. If +you do not wish to so agree, do not use the Software. + +LICENSE: Subject to the restrictions below, Intel Corporation ("Intel") +grants to you the permission to use, copy, distribute and prepare derivative +works of this Software for any purpose and without fee, provided, that +Intel's copyright notice appear in all copies of the Software files. +The distribution of derivative works of the Software is also subject to the +following limitations: you (i) are solely responsible to your customers for +any liability which may arise from the distribution, (ii) do not make any +statement that your product is "certified", or that its performance is +guaranteed, by Intel, and (iii) do not use Intel's name or trademarks to +market your product without written permission. + +EXCLUSION OF ALL WARRANTIES. The Software is provided "AS IS" without any +express or implies warranty of any kind including warranties of +merchantability, noninfringement, or fitness for a particular purpose. +Intel does not warrant or assume responsibility for the accuracy or +completeness of any information contained within the Software. +As this Software is given free of charge, in no event shall Intel be liable +for any damages whatsoever arising out of the use of or inability to use the +Software, even if Intel has been adviced of the possibility of such damages. +Intel does not assume any responsibility for any errors which may appear in +this Software nor any responsibility to update it. diff --git a/indra/llappearance/llavatarjoint.cpp b/indra/llappearance/llavatarjoint.cpp index cff862a20..353857bc9 100644 --- a/indra/llappearance/llavatarjoint.cpp +++ b/indra/llappearance/llavatarjoint.cpp @@ -260,7 +260,7 @@ void LLAvatarJointCollisionVolume::renderCollision() updateWorldMatrix(); gGL.pushMatrix(); - gGL.multMatrix( &mXform.getWorldMatrix().mMatrix[0][0] ); + gGL.multMatrix( mXform.getWorldMatrix() ); gGL.diffuseColor3f( 0.f, 0.f, 1.f ); diff --git a/indra/llappearance/llpolymesh.cpp b/indra/llappearance/llpolymesh.cpp index d588d687d..fd2a845c4 100644 --- a/indra/llappearance/llpolymesh.cpp +++ b/indra/llappearance/llpolymesh.cpp @@ -231,9 +231,9 @@ BOOL LLPolyMeshSharedData::allocateVertexData( U32 numVertices ) mBaseCoords = (LLVector4a*) ll_aligned_malloc_16(numVertices*sizeof(LLVector4a)); mBaseNormals = (LLVector4a*) ll_aligned_malloc_16(numVertices*sizeof(LLVector4a)); mBaseBinormals = (LLVector4a*) ll_aligned_malloc_16(numVertices*sizeof(LLVector4a)); - mTexCoords = (LLVector2*) ll_aligned_malloc_16(numVertices*sizeof(LLVector2)); - mDetailTexCoords = (LLVector2*) ll_aligned_malloc_16(numVertices*sizeof(LLVector2)); - mWeights = (F32*) ll_aligned_malloc_16(numVertices*sizeof(F32)); + mTexCoords = (LLVector2*) ll_aligned_malloc_16((numVertices+numVertices%2)*sizeof(LLVector2)); + mDetailTexCoords = (LLVector2*) ll_aligned_malloc_16((numVertices+numVertices%2)*sizeof(LLVector2)); + mWeights = (F32*) ll_aligned_malloc_16(((numVertices)*sizeof(F32)+0xF) & ~0xF); for (i = 0; i < numVertices; i++) { mBaseCoords[i].clear(); diff --git a/indra/llappearance/llpolymesh.h b/indra/llappearance/llpolymesh.h index 87f421f33..d006e389c 100644 --- a/indra/llappearance/llpolymesh.h +++ b/indra/llappearance/llpolymesh.h @@ -146,10 +146,10 @@ public: class LLJointRenderData { public: - LLJointRenderData(const LLMatrix4* world_matrix, LLSkinJoint* skin_joint) : mWorldMatrix(world_matrix), mSkinJoint(skin_joint) {} + LLJointRenderData(const LLMatrix4a* world_matrix, LLSkinJoint* skin_joint) : mWorldMatrix(world_matrix), mSkinJoint(skin_joint) {} ~LLJointRenderData(){} - const LLMatrix4* mWorldMatrix; + const LLMatrix4a* mWorldMatrix; LLSkinJoint* mSkinJoint; }; diff --git a/indra/llappearance/llpolyskeletaldistortion.h b/indra/llappearance/llpolyskeletaldistortion.h index a9b843af6..0e2f6e05d 100644 --- a/indra/llappearance/llpolyskeletaldistortion.h +++ b/indra/llappearance/llpolyskeletaldistortion.h @@ -68,7 +68,16 @@ class LLPolySkeletalDistortionInfo : public LLViewerVisualParamInfo { friend class LLPolySkeletalDistortion; public: - + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } + LLPolySkeletalDistortionInfo(); /*virtual*/ ~LLPolySkeletalDistortionInfo() {}; @@ -77,12 +86,12 @@ public: protected: typedef std::vector bone_info_list_t; bone_info_list_t mBoneInfoList; -}; - +} LL_ALIGN_POSTFIX(16); //----------------------------------------------------------------------------- // LLPolySkeletalDeformation // A set of joint scale data for deforming the avatar mesh //----------------------------------------------------------------------------- +LL_ALIGN_PREFIX(16) class LLPolySkeletalDistortion : public LLViewerVisualParam { public: diff --git a/indra/llcharacter/llcharacter.cpp b/indra/llcharacter/llcharacter.cpp index 12960ac01..2b2a193d9 100644 --- a/indra/llcharacter/llcharacter.cpp +++ b/indra/llcharacter/llcharacter.cpp @@ -72,21 +72,11 @@ LLCharacter::~LLCharacter() delete param; } - U32 i ; - U32 size = sInstances.size() ; - for(i = 0 ; i < size ; i++) - { - if(sInstances[i] == this) - { - break ; - } - } + bool erased = vector_replace_with_last(sInstances,this); - llassert_always(i < size) ; + llassert_always(erased) ; llassert_always(sAllowInstancesChange) ; - sInstances[i] = sInstances[size - 1] ; - sInstances.pop_back() ; } diff --git a/indra/llcharacter/lljoint.cpp b/indra/llcharacter/lljoint.cpp index f2900d4a5..e8bf2b0c5 100644 --- a/indra/llcharacter/lljoint.cpp +++ b/indra/llcharacter/lljoint.cpp @@ -312,19 +312,15 @@ void LLJoint::setWorldPosition( const LLVector3& pos ) return; } - LLMatrix4 temp_matrix = getWorldMatrix(); - temp_matrix.mMatrix[VW][VX] = pos.mV[VX]; - temp_matrix.mMatrix[VW][VY] = pos.mV[VY]; - temp_matrix.mMatrix[VW][VZ] = pos.mV[VZ]; + LLMatrix4a temp_matrix = getWorldMatrix(); + temp_matrix.setTranslate_affine(pos); - LLMatrix4 parentWorldMatrix = mParent->getWorldMatrix(); - LLMatrix4 invParentWorldMatrix = parentWorldMatrix.invert(); + LLMatrix4a invParentWorldMatrix = mParent->getWorldMatrix(); + invParentWorldMatrix.invert(); - temp_matrix *= invParentWorldMatrix; + invParentWorldMatrix.mul(temp_matrix); - LLVector3 localPos( temp_matrix.mMatrix[VW][VX], - temp_matrix.mMatrix[VW][VY], - temp_matrix.mMatrix[VW][VZ] ); + LLVector3 localPos( invParentWorldMatrix.getRow().getF32ptr() ); setPosition( localPos ); } @@ -383,19 +379,19 @@ void LLJoint::setWorldRotation( const LLQuaternion& rot ) this->setRotation( rot ); return; } + + LLMatrix4a parentWorldMatrix = mParent->getWorldMatrix(); + LLQuaternion2 rota(rot); + LLMatrix4a temp_mat(rota); - LLMatrix4 temp_mat(rot); + LLMatrix4a invParentWorldMatrix = mParent->getWorldMatrix(); + invParentWorldMatrix.setTranslate_affine(LLVector3(0.f)); - LLMatrix4 parentWorldMatrix = mParent->getWorldMatrix(); - parentWorldMatrix.mMatrix[VW][VX] = 0; - parentWorldMatrix.mMatrix[VW][VY] = 0; - parentWorldMatrix.mMatrix[VW][VZ] = 0; + invParentWorldMatrix.invert(); - LLMatrix4 invParentWorldMatrix = parentWorldMatrix.invert(); + invParentWorldMatrix.mul(temp_mat); - temp_mat *= invParentWorldMatrix; - - setRotation(LLQuaternion(temp_mat)); + setRotation(LLQuaternion(LLMatrix4(invParentWorldMatrix.getF32ptr()))); } @@ -425,7 +421,7 @@ void LLJoint::setScale( const LLVector3& scale ) //-------------------------------------------------------------------- // getWorldMatrix() //-------------------------------------------------------------------- -const LLMatrix4 &LLJoint::getWorldMatrix() +const LLMatrix4a &LLJoint::getWorldMatrix() { updateWorldMatrixParent(); diff --git a/indra/llcharacter/lljoint.h b/indra/llcharacter/lljoint.h index cb55e6ca7..f633fc32d 100644 --- a/indra/llcharacter/lljoint.h +++ b/indra/llcharacter/lljoint.h @@ -162,7 +162,7 @@ public: void setScale( const LLVector3& scale ); // get/set world matrix - const LLMatrix4 &getWorldMatrix(); + const LLMatrix4a &getWorldMatrix(); void setWorldMatrix( const LLMatrix4& mat ); void updateWorldMatrixChildren(); diff --git a/indra/llcharacter/lljointsolverrp3.cpp b/indra/llcharacter/lljointsolverrp3.cpp index 6599a76b1..8ce737afb 100644 --- a/indra/llcharacter/lljointsolverrp3.cpp +++ b/indra/llcharacter/lljointsolverrp3.cpp @@ -171,12 +171,14 @@ void LLJointSolverRP3::solve() //------------------------------------------------------------------------- // get the poleVector in world space //------------------------------------------------------------------------- - LLMatrix4 worldJointAParentMat; + LLVector3 poleVec = mPoleVector; if ( mJointA->getParent() ) { - worldJointAParentMat = mJointA->getParent()->getWorldMatrix(); + LLVector4a pole_veca; + pole_veca.load3(mPoleVector.mV); + mJointA->getParent()->getWorldMatrix().rotate(pole_veca,pole_veca); + poleVec.set(pole_veca.getF32ptr()); } - LLVector3 poleVec = rotate_vector( mPoleVector, worldJointAParentMat ); //------------------------------------------------------------------------- // compute the following: diff --git a/indra/llcharacter/llkeyframestandmotion.cpp b/indra/llcharacter/llkeyframestandmotion.cpp index bccc714f1..43675a4c8 100644 --- a/indra/llcharacter/llkeyframestandmotion.cpp +++ b/indra/llcharacter/llkeyframestandmotion.cpp @@ -286,40 +286,38 @@ BOOL LLKeyframeStandMotion::onUpdate(F32 time, U8* joint_mask) //------------------------------------------------------------------------- if ( mTrackAnkles ) { - LLVector4 dirLeft4 = mAnkleLeftJoint.getWorldMatrix().getFwdRow4(); - LLVector4 dirRight4 = mAnkleRightJoint.getWorldMatrix().getFwdRow4(); - LLVector3 dirLeft = vec4to3( dirLeft4 ); - LLVector3 dirRight = vec4to3( dirRight4 ); + const LLVector4a& dirLeft4 = mAnkleLeftJoint.getWorldMatrix().getRow(); + const LLVector4a& dirRight4 = mAnkleRightJoint.getWorldMatrix().getRow(); - LLVector3 up; - LLVector3 dir; - LLVector3 left; + LLVector4a up; + LLVector4a dir; + LLVector4a left; - up = mNormalLeft; - up.normVec(); + up.load3(mNormalLeft.mV); + up.normalize3fast(); if (mFlipFeet) { - up *= -1.0f; + up.negate(); } - dir = dirLeft; - dir.normVec(); - left = up % dir; - left.normVec(); - dir = left % up; - mRotationLeft = LLQuaternion( dir, left, up ); + dir = dirLeft4; + dir.normalize3fast(); + left.setCross3(up,dir); + left.normalize3fast(); + dir.setCross3(left,up); + mRotationLeft = LLQuaternion( LLVector3(dir.getF32ptr()), LLVector3(left.getF32ptr()), LLVector3(up.getF32ptr())); - up = mNormalRight; - up.normVec(); + up.load3(mNormalRight.mV); + up.normalize3fast(); if (mFlipFeet) { - up *= -1.0f; + up.negate(); } - dir = dirRight; - dir.normVec(); - left = up % dir; - left.normVec(); - dir = left % up; - mRotationRight = LLQuaternion( dir, left, up ); + dir = dirRight4; + dir.normalize3fast(); + left.setCross3(up,dir); + left.normalize3fast(); + dir.setCross3(left,up); + mRotationRight = LLQuaternion( LLVector3(dir.getF32ptr()), LLVector3(left.getF32ptr()), LLVector3(up.getF32ptr())); } mAnkleLeftJoint.setWorldRotation( mRotationLeft ); mAnkleRightJoint.setWorldRotation( mRotationRight ); diff --git a/indra/llcommon/llstl.h b/indra/llcommon/llstl.h index ec98bb911..2e3d52733 100644 --- a/indra/llcommon/llstl.h +++ b/indra/llcommon/llstl.h @@ -241,12 +241,12 @@ inline typename T::mapped_type get_ptr_in_map(const T& inmap, typename T::key_ty template inline typename T::iterator vector_replace_with_last(T& invec, typename T::iterator& iter) { - typename T::iterator last = invec.end(); --last; + typename T::iterator last = invec.end(); if (iter == invec.end()) { return iter; } - else if (iter == last) + else if (iter == --last) { invec.pop_back(); return invec.end(); diff --git a/indra/llmath/llmatrix3a.h b/indra/llmath/llmatrix3a.h index 9916cfd2d..6d896613c 100644 --- a/indra/llmath/llmatrix3a.h +++ b/indra/llmath/llmatrix3a.h @@ -40,6 +40,7 @@ // LLMatrix3a is the base class for LLRotation, which should be used instead any time you're dealing with a // rotation matrix. +LL_ALIGN_PREFIX(16) class LLMatrix3a { public: @@ -113,8 +114,9 @@ protected: LL_ALIGN_16(LLVector4a mColumns[3]); -}; +} LL_ALIGN_POSTFIX(16); +LL_ALIGN_PREFIX(16) class LLRotation : public LLMatrix3a { public: @@ -123,6 +125,6 @@ public: // Returns true if this rotation is orthonormal with det ~= 1 inline bool isOkRotation() const; -}; +} LL_ALIGN_POSTFIX(16); #endif diff --git a/indra/llmath/llmatrix4a.h b/indra/llmath/llmatrix4a.h index 94e5e54af..f322c087a 100644 --- a/indra/llmath/llmatrix4a.h +++ b/indra/llmath/llmatrix4a.h @@ -31,10 +31,72 @@ #include "m4math.h" #include "m3math.h" +LL_ALIGN_PREFIX(16) class LLMatrix4a { -public: +private: LL_ALIGN_16(LLVector4a mMatrix[4]); +public: + enum + { + ROW_FWD = 0, + ROW_LEFT, + ROW_UP, + ROW_TRANS + }; + + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } + + LLMatrix4a() + {} + LLMatrix4a(const LLQuad& q1,const LLQuad& q2,const LLQuad& q3,const LLQuad& q4) + { + mMatrix[0] = q1; + mMatrix[1] = q2; + mMatrix[2] = q3; + mMatrix[3] = q4; + } + LLMatrix4a(const LLQuaternion2& quat) + { + const LLVector4a& xyzw = quat.getVector4a(); + LLVector4a nyxwz = _mm_shuffle_ps(xyzw, xyzw, _MM_SHUFFLE(2,3,0,1)); + nyxwz.negate(); + + const LLVector4a xnyynx = _mm_unpacklo_ps(xyzw,nyxwz); + const LLVector4a znwwnz = _mm_unpackhi_ps(xyzw,nyxwz); + + LLMatrix4a mata; + mata.setRow<0>(_mm_shuffle_ps(xyzw, xnyynx, _MM_SHUFFLE(0,1,2,3))); + mata.setRow<1>(_mm_shuffle_ps(znwwnz, xyzw, _MM_SHUFFLE(1,0,2,3))); + mata.setRow<2>(_mm_shuffle_ps(xnyynx, xyzw, _MM_SHUFFLE(2,3,3,2))); + mata.setRow<3>(_mm_shuffle_ps(xnyynx, znwwnz, _MM_SHUFFLE(2,3,1,3))); + + LLMatrix4a matb; + matb.setRow<0>(_mm_shuffle_ps(xyzw, xnyynx, _MM_SHUFFLE(3,1,2,3))); + matb.setRow<1>(_mm_shuffle_ps(znwwnz, xnyynx, _MM_SHUFFLE(1,0,2,3))); + matb.setRow<2>(_mm_shuffle_ps(xnyynx, znwwnz, _MM_SHUFFLE(3,2,3,2))); + matb.setRow<3>(xyzw); + + setMul(matb,mata); + } + + inline F32* getF32ptr() + { + return mMatrix[0].getF32ptr(); + } + + inline const F32* getF32ptr() const + { + return mMatrix[0].getF32ptr(); + } inline void clear() { @@ -44,13 +106,21 @@ public: mMatrix[3].clear(); } + inline void setIdentity() + { + static __m128 ones = _mm_set_ps(1.f,0.f,0.f,1.f); + mMatrix[0] = _mm_movelh_ps(ones,_mm_setzero_ps()); + mMatrix[1] = _mm_movehl_ps(_mm_setzero_ps(),ones); + mMatrix[2] = _mm_movelh_ps(_mm_setzero_ps(),ones); + mMatrix[3] = _mm_movehl_ps(ones,_mm_setzero_ps()); + } + inline void loadu(const LLMatrix4& src) { - mMatrix[0] = _mm_loadu_ps(src.mMatrix[0]); - mMatrix[1] = _mm_loadu_ps(src.mMatrix[1]); - mMatrix[2] = _mm_loadu_ps(src.mMatrix[2]); - mMatrix[3] = _mm_loadu_ps(src.mMatrix[3]); - + mMatrix[0].loadua(src.mMatrix[0]); + mMatrix[1].loadua(src.mMatrix[1]); + mMatrix[2].loadua(src.mMatrix[2]); + mMatrix[3].loadua(src.mMatrix[3]); } inline void loadu(const LLMatrix3& src) @@ -61,6 +131,14 @@ public: mMatrix[3].set(0,0,0,1.f); } + inline void loadu(const F32* src) + { + mMatrix[0].loadua(src+0); + mMatrix[1].loadua(src+4); + mMatrix[2].loadua(src+8); + mMatrix[3].loadua(src+12); + } + inline void add(const LLMatrix4a& rhs) { mMatrix[0].add(rhs.mMatrix[0]); @@ -69,6 +147,75 @@ public: mMatrix[3].add(rhs.mMatrix[3]); } + inline void mul(const LLMatrix4a& rhs) + { + //Not using rotate4 to avoid extra copy of *this. + LLVector4a x0,y0,z0,w0; + LLVector4a x1,y1,z1,w1; + LLVector4a x2,y2,z2,w2; + LLVector4a x3,y3,z3,w3; + + //16 shuffles + x0.splat<0>(rhs.mMatrix[0]); + x1.splat<0>(rhs.mMatrix[1]); + x2.splat<0>(rhs.mMatrix[2]); + x3.splat<0>(rhs.mMatrix[3]); + + y0.splat<1>(rhs.mMatrix[0]); + y1.splat<1>(rhs.mMatrix[1]); + y2.splat<1>(rhs.mMatrix[2]); + y3.splat<1>(rhs.mMatrix[3]); + + z0.splat<2>(rhs.mMatrix[0]); + z1.splat<2>(rhs.mMatrix[1]); + z2.splat<2>(rhs.mMatrix[2]); + z3.splat<2>(rhs.mMatrix[3]); + + w0.splat<3>(rhs.mMatrix[0]); + w1.splat<3>(rhs.mMatrix[1]); + w2.splat<3>(rhs.mMatrix[2]); + w3.splat<3>(rhs.mMatrix[3]); + + //16 muls + x0.mul(mMatrix[0]); + x1.mul(mMatrix[0]); + x2.mul(mMatrix[0]); + x3.mul(mMatrix[0]); + + y0.mul(mMatrix[1]); + y1.mul(mMatrix[1]); + y2.mul(mMatrix[1]); + y3.mul(mMatrix[1]); + + z0.mul(mMatrix[2]); + z1.mul(mMatrix[2]); + z2.mul(mMatrix[2]); + z3.mul(mMatrix[2]); + + w0.mul(mMatrix[3]); + w1.mul(mMatrix[3]); + w2.mul(mMatrix[3]); + w3.mul(mMatrix[3]); + + //12 adds + x0.add(y0); + z0.add(w0); + + x1.add(y1); + z1.add(w1); + + x2.add(y2); + z2.add(w2); + + x3.add(y3); + z3.add(w3); + + mMatrix[0].setAdd(x0,z0); + mMatrix[1].setAdd(x1,z1); + mMatrix[2].setAdd(x2,z2); + mMatrix[3].setAdd(x3,z3); + } + inline void setRows(const LLVector4a& r0, const LLVector4a& r1, const LLVector4a& r2) { mMatrix[0] = r0; @@ -76,6 +223,44 @@ public: mMatrix[2] = r2; } + template + inline void setRow(const LLVector4a& row) + { + mMatrix[N] = row; + } + + template + inline const LLVector4a& getRow() const + { + return mMatrix[N]; + } + + template + inline LLVector4a& getRow() + { + return mMatrix[N]; + } + + template + inline void setColumn(const LLVector4a& col) + { + mMatrix[0].copyComponent(col.getScalarAt<0>()); + mMatrix[1].copyComponent(col.getScalarAt<1>()); + mMatrix[2].copyComponent(col.getScalarAt<2>()); + mMatrix[3].copyComponent(col.getScalarAt<3>()); + } + + template + inline LLVector4a getColumn() + { + LLVector4a v; + v.copyComponent<0>(mMatrix[0].getScalarAt()); + v.copyComponent<1>(mMatrix[1].getScalarAt()); + v.copyComponent<2>(mMatrix[2].getScalarAt()); + v.copyComponent<3>(mMatrix[3].getScalarAt()); + return v; + } + inline void setMul(const LLMatrix4a& m, const F32 s) { mMatrix[0].setMul(m.mMatrix[0], s); @@ -84,6 +269,14 @@ public: mMatrix[3].setMul(m.mMatrix[3], s); } + inline void setMul(const LLMatrix4a& m0, const LLMatrix4a& m1) + { + m0.rotate4(m1.mMatrix[0],mMatrix[0]); + m0.rotate4(m1.mMatrix[1],mMatrix[1]); + m0.rotate4(m1.mMatrix[2],mMatrix[2]); + m0.rotate4(m1.mMatrix[3],mMatrix[3]); + } + inline void setLerp(const LLMatrix4a& a, const LLMatrix4a& b, F32 w) { LLVector4a d0,d1,d2,d3; @@ -107,13 +300,14 @@ public: //Singu Note: Don't mess with this. It's intentionally different from LL's. // Note how res isn't manipulated until the very end. + //Fast(er). Treats v[VW] as 0.f inline void rotate(const LLVector4a& v, LLVector4a& res) const { LLVector4a x,y,z; - x = _mm_shuffle_ps(v, v, _MM_SHUFFLE(0, 0, 0, 0)); - y = _mm_shuffle_ps(v, v, _MM_SHUFFLE(1, 1, 1, 1)); - z = _mm_shuffle_ps(v, v, _MM_SHUFFLE(2, 2, 2, 2)); + x.splat<0>(v); + y.splat<1>(v); + z.splat<2>(v); x.mul(mMatrix[0]); y.mul(mMatrix[1]); @@ -123,14 +317,15 @@ public: res.setAdd(x,z); } + //Proper. v[VW] as v[VW] inline void rotate4(const LLVector4a& v, LLVector4a& res) const { LLVector4a x,y,z,w; - x = _mm_shuffle_ps(v, v, _MM_SHUFFLE(0, 0, 0, 0)); - y = _mm_shuffle_ps(v, v, _MM_SHUFFLE(1, 1, 1, 1)); - z = _mm_shuffle_ps(v, v, _MM_SHUFFLE(2, 2, 2, 2)); - w = _mm_shuffle_ps(v, v, _MM_SHUFFLE(3, 3, 3, 3)); + x.splat<0>(v); + y.splat<1>(v); + z.splat<2>(v); + w.splat<3>(v); x.mul(mMatrix[0]); y.mul(mMatrix[1]); @@ -142,14 +337,15 @@ public: res.setAdd(x,z); } + //Fast(er). Treats v[VW] as 1.f inline void affineTransform(const LLVector4a& v, LLVector4a& res) const { LLVector4a x,y,z; - x = _mm_shuffle_ps(v, v, _MM_SHUFFLE(0, 0, 0, 0)); - y = _mm_shuffle_ps(v, v, _MM_SHUFFLE(1, 1, 1, 1)); - z = _mm_shuffle_ps(v, v, _MM_SHUFFLE(2, 2, 2, 2)); - + x.splat<0>(v); + y.splat<1>(v); + z.splat<2>(v); + x.mul(mMatrix[0]); y.mul(mMatrix[1]); z.mul(mMatrix[2]); @@ -158,6 +354,342 @@ public: z.add(mMatrix[3]); res.setAdd(x,z); } -}; + + inline void perspectiveTransform(const LLVector4a& v, LLVector4a& res) const + { + LLVector4a x,y,z,s,t,p,q; + + x.splat<0>(v); + y.splat<1>(v); + z.splat<2>(v); + + s.splat<3>(mMatrix[0]); + t.splat<3>(mMatrix[1]); + p.splat<3>(mMatrix[2]); + q.splat<3>(mMatrix[3]); + + s.mul(x); + t.mul(y); + p.mul(z); + q.add(s); + t.add(p); + q.add(t); + + x.mul(mMatrix[0]); + y.mul(mMatrix[1]); + z.mul(mMatrix[2]); + + x.add(y); + z.add(mMatrix[3]); + res.setAdd(x,z); + res.div(q); + } + + inline void transpose() + { + __m128 q1 = _mm_unpackhi_ps(mMatrix[0],mMatrix[1]); + __m128 q2 = _mm_unpacklo_ps(mMatrix[0],mMatrix[1]); + __m128 q3 = _mm_unpacklo_ps(mMatrix[2],mMatrix[3]); + __m128 q4 = _mm_unpackhi_ps(mMatrix[2],mMatrix[3]); + + mMatrix[0] = _mm_movelh_ps(q2,q3); + mMatrix[1] = _mm_movehl_ps(q3,q2); + mMatrix[2] = _mm_movelh_ps(q1,q4); + mMatrix[3] = _mm_movehl_ps(q4,q1); + } + +// Following procedure adapted from: +// http://software.intel.com/en-us/articles/optimized-matrix-library-for-use-with-the-intel-pentiumr-4-processors-sse2-instructions/ +// +// License/Copyright Statement: +// +// Copyright (c) 2001 Intel Corporation. +// +// Permition is granted to use, copy, distribute and prepare derivative works +// of this library for any purpose and without fee, provided, that the above +// copyright notice and this statement appear in all copies. +// Intel makes no representations about the suitability of this library for +// any purpose, and specifically disclaims all warranties. +// See LEGAL-intel_matrixlib.TXT for all the legal information. + inline float invert() + { + LL_ALIGN_16(const unsigned int Sign_PNNP[4]) = { 0x00000000, 0x80000000, 0x80000000, 0x00000000 }; + + // The inverse is calculated using "Divide and Conquer" technique. The + // original matrix is divide into four 2x2 sub-matrices. Since each + // register holds four matrix element, the smaller matrices are + // represented as a registers. Hence we get a better locality of the + // calculations. + + LLVector4a A = _mm_movelh_ps(mMatrix[0], mMatrix[1]), // the four sub-matrices + B = _mm_movehl_ps(mMatrix[1], mMatrix[0]), + C = _mm_movelh_ps(mMatrix[2], mMatrix[3]), + D = _mm_movehl_ps(mMatrix[3], mMatrix[2]); + LLVector4a iA, iB, iC, iD, // partial inverse of the sub-matrices + DC, AB; + LLSimdScalar dA, dB, dC, dD; // determinant of the sub-matrices + LLSimdScalar det, d, d1, d2; + LLVector4a rd; + + // AB = A# * B + AB.setMul(_mm_shuffle_ps(A,A,0x0F), B); + AB.sub(_mm_mul_ps(_mm_shuffle_ps(A,A,0xA5), _mm_shuffle_ps(B,B,0x4E))); + // DC = D# * C + DC.setMul(_mm_shuffle_ps(D,D,0x0F), C); + DC.sub(_mm_mul_ps(_mm_shuffle_ps(D,D,0xA5), _mm_shuffle_ps(C,C,0x4E))); + + // dA = |A| + dA = _mm_mul_ps(_mm_shuffle_ps(A, A, 0x5F),A); + dA -= _mm_movehl_ps(dA,dA); + // dB = |B| + dB = _mm_mul_ps(_mm_shuffle_ps(B, B, 0x5F),B); + dB -= _mm_movehl_ps(dB,dB); + + // dC = |C| + dC = _mm_mul_ps(_mm_shuffle_ps(C, C, 0x5F),C); + dC -= _mm_movehl_ps(dC,dC); + // dD = |D| + dD = _mm_mul_ps(_mm_shuffle_ps(D, D, 0x5F),D); + dD -= _mm_movehl_ps(dD,dD); + + // d = trace(AB*DC) = trace(A#*B*D#*C) + d = _mm_mul_ps(_mm_shuffle_ps(DC,DC,0xD8),AB); + + // iD = C*A#*B + iD.setMul(_mm_shuffle_ps(C,C,0xA0), _mm_movelh_ps(AB,AB)); + iD.add(_mm_mul_ps(_mm_shuffle_ps(C,C,0xF5), _mm_movehl_ps(AB,AB))); + // iA = B*D#*C + iA.setMul(_mm_shuffle_ps(B,B,0xA0), _mm_movelh_ps(DC,DC)); + iA.add(_mm_mul_ps(_mm_shuffle_ps(B,B,0xF5), _mm_movehl_ps(DC,DC))); + + // d = trace(AB*DC) = trace(A#*B*D#*C) [continue] + d = _mm_add_ps(d, _mm_movehl_ps(d, d)); + d += _mm_shuffle_ps(d, d, 1); + d1 = dA*dD; + d2 = dB*dC; + + // iD = D*|A| - C*A#*B + iD.setSub(_mm_mul_ps(D,_mm_shuffle_ps(dA,dA,0)), iD); + + // iA = A*|D| - B*D#*C; + iA.setSub(_mm_mul_ps(A,_mm_shuffle_ps(dD,dD,0)), iA); + + // det = |A|*|D| + |B|*|C| - trace(A#*B*D#*C) + det = d1+d2-d; + + __m128 is_zero_mask = _mm_cmpeq_ps(det,_mm_setzero_ps()); + rd = _mm_div_ss(_mm_set_ss(1.f),_mm_or_ps(_mm_andnot_ps(is_zero_mask, det), _mm_and_ps(is_zero_mask, _mm_set_ss(1.f)))); +#ifdef ZERO_SINGULAR + rd = _mm_and_ps(_mm_cmpneq_ss(det,_mm_setzero_ps()), rd); +#endif + + // iB = D * (A#B)# = D*B#*A + iB.setMul(D, _mm_shuffle_ps(AB,AB,0x33)); + iB.sub(_mm_mul_ps(_mm_shuffle_ps(D,D,0xB1), _mm_shuffle_ps(AB,AB,0x66))); + // iC = A * (D#C)# = A*C#*D + iC.setMul(A, _mm_shuffle_ps(DC,DC,0x33)); + iC.sub(_mm_mul_ps(_mm_shuffle_ps(A,A,0xB1), _mm_shuffle_ps(DC,DC,0x66))); + + rd = _mm_shuffle_ps(rd,rd,0); + rd = _mm_xor_ps(rd, _mm_load_ps((const float*)Sign_PNNP)); + + // iB = C*|B| - D*B#*A + iB.setSub(_mm_mul_ps(C,_mm_shuffle_ps(dB,dB,0)), iB); + + // iC = B*|C| - A*C#*D; + iC.setSub(_mm_mul_ps(B,_mm_shuffle_ps(dC,dC,0)), iC); + + + // iX = iX / det + iA.mul(rd); + iB.mul(rd); + iC.mul(rd); + iD.mul(rd); + + mMatrix[0] = _mm_shuffle_ps(iA,iB,0x77); + mMatrix[1] = _mm_shuffle_ps(iA,iB,0x22); + mMatrix[2] = _mm_shuffle_ps(iC,iD,0x77); + mMatrix[3] = _mm_shuffle_ps(iC,iD,0x22); + + F32 ret; + _mm_store_ss(&ret,det); + return ret; + } + + //=============Affine transformation matrix only========================= + + //Multiply matrix with a pure translation matrix. + inline void applyTranslation_affine(const F32& x, const F32& y, const F32& z) + { + const LLVector4a xyz0(x,y,z,0); //load + LLVector4a xxxx; + xxxx.splat<0>(xyz0); + LLVector4a yyyy; + yyyy.splat<1>(xyz0); + LLVector4a zzzz; + zzzz.splat<2>(xyz0); + + LLVector4a sum1; + LLVector4a sum2; + LLVector4a sum3; + + sum1.setMul(xxxx,mMatrix[0]); + sum2.setMul(yyyy,mMatrix[1]); + sum3.setMul(zzzz,mMatrix[2]); + + mMatrix[3].add(sum1); + mMatrix[3].add(sum2); + mMatrix[3].add(sum3); + } + + //Multiply matrix with a pure translation matrix. + inline void applyTranslation_affine(const LLVector3& trans) + { + applyTranslation_affine(trans.mV[VX],trans.mV[VY],trans.mV[VZ]); + } + + //Multiply matrix with a pure scale matrix. + inline void applyScale_affine(const F32& x, const F32& y, const F32& z) + { + const LLVector4a xyz0(x,y,z,0); //load + LLVector4a xxxx; + xxxx.splat<0>(xyz0); + LLVector4a yyyy; + yyyy.splat<1>(xyz0); + LLVector4a zzzz; + zzzz.splat<2>(xyz0); + + mMatrix[0].mul(xxxx); + mMatrix[1].mul(yyyy); + mMatrix[2].mul(zzzz); + } + + //Multiply matrix with a pure scale matrix. + inline void applyScale_affine(const LLVector3& scale) + { + applyScale_affine(scale.mV[VX],scale.mV[VY],scale.mV[VZ]); + } + + //Multiply matrix with a pure scale matrix. + inline void applyScale_affine(const F32& s) + { + const LLVector4a scale(s); //load + mMatrix[0].mul(scale); + mMatrix[1].mul(scale); + mMatrix[2].mul(scale); + } + + //Direct addition to row3. + inline void translate_affine(const LLVector3& trans) + { + LLVector4a translation; + translation.load3(trans.mV); + mMatrix[3].add(translation); + } + + //Direct assignment of row3. + inline void setTranslate_affine(const LLVector3& trans) + { + static const LLVector4Logical mask = _mm_load_ps((F32*)&S_V4LOGICAL_MASK_TABLE[3*4]); + + LLVector4a translation; + translation.load3(trans.mV); + + mMatrix[3].setSelectWithMask(mask,mMatrix[3],translation); + } + + inline void mul_affine(const LLMatrix4a& rhs) + { + LLVector4a x0,y0,z0; + LLVector4a x1,y1,z1; + LLVector4a x2,y2,z2; + LLVector4a x3,y3,z3; + + //12 shuffles + x0.splat<0>(rhs.mMatrix[0]); + x1.splat<0>(rhs.mMatrix[1]); + x2.splat<0>(rhs.mMatrix[2]); + x3.splat<0>(rhs.mMatrix[3]); + + y0.splat<1>(rhs.mMatrix[0]); + y1.splat<1>(rhs.mMatrix[1]); + y2.splat<1>(rhs.mMatrix[2]); + y3.splat<1>(rhs.mMatrix[3]); + + z0.splat<2>(rhs.mMatrix[0]); + z1.splat<2>(rhs.mMatrix[1]); + z2.splat<2>(rhs.mMatrix[2]); + z3.splat<2>(rhs.mMatrix[3]); + + //12 muls + x0.mul(mMatrix[0]); + x1.mul(mMatrix[0]); + x2.mul(mMatrix[0]); + x3.mul(mMatrix[0]); + + y0.mul(mMatrix[1]); + y1.mul(mMatrix[1]); + y2.mul(mMatrix[1]); + y3.mul(mMatrix[1]); + + z0.mul(mMatrix[2]); + z1.mul(mMatrix[2]); + z2.mul(mMatrix[2]); + z3.mul(mMatrix[2]); + + //9 adds + x0.add(y0); + + x1.add(y1); + + x2.add(y2); + + x3.add(y3); + z3.add(mMatrix[3]); + + mMatrix[0].setAdd(x0,z0); + mMatrix[1].setAdd(x1,z1); + mMatrix[2].setAdd(x2,z2); + mMatrix[3].setAdd(x3,z3); + } + + inline void extractRotation_affine() + { + static const LLVector4Logical mask = _mm_load_ps((F32*)&S_V4LOGICAL_MASK_TABLE[3*4]); + mMatrix[0].setSelectWithMask(mask,_mm_setzero_ps(),mMatrix[0]); + mMatrix[1].setSelectWithMask(mask,_mm_setzero_ps(),mMatrix[1]); + mMatrix[2].setSelectWithMask(mask,_mm_setzero_ps(),mMatrix[2]); + mMatrix[3].setSelectWithMask(mask,LLVector4a(1.f),_mm_setzero_ps()); + } + + //======================Logic==================== + inline bool isIdentity() const + { + static LLMatrix4a mins; + static LLMatrix4a maxs; + static LLVector4a delta(0.0001f); + + static bool init_mins = ( mins.setIdentity(), + mins.getRow<0>().sub(delta), + mins.getRow<1>().sub(delta), + mins.getRow<2>().sub(delta), + mins.getRow<3>().sub(delta), true ); + static bool init_maxs = ( maxs.setIdentity(), + maxs.getRow<0>().add(delta), + maxs.getRow<1>().add(delta), + maxs.getRow<2>().add(delta), + maxs.getRow<3>().add(delta), true ); + + LLVector4a mask1 = _mm_and_ps(_mm_cmpgt_ps(mMatrix[0],mins.getRow<0>()), _mm_cmplt_ps(mMatrix[0],maxs.getRow<0>())); + LLVector4a mask2 = _mm_and_ps(_mm_cmpgt_ps(mMatrix[1],mins.getRow<1>()), _mm_cmplt_ps(mMatrix[1],maxs.getRow<1>())); + LLVector4a mask3 = _mm_and_ps(_mm_cmpgt_ps(mMatrix[2],mins.getRow<2>()), _mm_cmplt_ps(mMatrix[2],maxs.getRow<2>())); + LLVector4a mask4 = _mm_and_ps(_mm_cmpgt_ps(mMatrix[3],mins.getRow<3>()), _mm_cmplt_ps(mMatrix[3],maxs.getRow<3>())); + + mask1 = _mm_and_ps(mask1,mask2); + mask2 = _mm_and_ps(mask3,mask4); + + return _mm_movemask_epi8(_mm_castps_si128(_mm_and_ps(mask1, mask2))) == 0xFFFF; + } +} LL_ALIGN_POSTFIX(16); #endif diff --git a/indra/llmath/lloctree.h b/indra/llmath/lloctree.h index 981e2176f..5189d852b 100644 --- a/indra/llmath/lloctree.h +++ b/indra/llmath/lloctree.h @@ -932,10 +932,10 @@ protected: MIN = 3 } eDName; - LLVector4a mCenter; - LLVector4a mSize; - LLVector4a mMax; - LLVector4a mMin; + LL_ALIGN_16(LLVector4a mCenter); + LL_ALIGN_16(LLVector4a mSize); + LL_ALIGN_16(LLVector4a mMax); + LL_ALIGN_16(LLVector4a mMin); oct_node* mParent; U8 mOctant; @@ -964,6 +964,26 @@ public: : BaseType(center, size, parent) { } + +#ifdef LL_OCTREE_POOLS + void* operator new(size_t size) + { + return getPool(size).malloc(); + } + void operator delete(void* ptr) + { + getPool(sizeof(LLOctreeNode)).free(ptr); + } +#else + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } +#endif bool balance() { diff --git a/indra/llmath/llplane.h b/indra/llmath/llplane.h index 08de6771e..0cbf02c83 100644 --- a/indra/llmath/llplane.h +++ b/indra/llmath/llplane.h @@ -101,7 +101,7 @@ public: } private: - LLVector4a mV; + LL_ALIGN_16(LLVector4a mV); } LL_ALIGN_POSTFIX(16); diff --git a/indra/llmath/llquaternion2.h b/indra/llmath/llquaternion2.h index fd9c0cf3a..6cfe91a02 100644 --- a/indra/llmath/llquaternion2.h +++ b/indra/llmath/llquaternion2.h @@ -40,6 +40,7 @@ ///////////////////////////// #include "llquaternion.h" +LL_ALIGN_PREFIX(16) class LLQuaternion2 { public: @@ -84,6 +85,8 @@ public: // Quantize this quaternion to 16 bit precision inline void quantize16(); + inline void mul(const LLQuaternion2& b); + ///////////////////////// // Quaternion inspection ///////////////////////// @@ -98,8 +101,8 @@ public: protected: - LLVector4a mQ; + LL_ALIGN_16(LLVector4a mQ); -}; +} LL_ALIGN_POSTFIX(16); #endif diff --git a/indra/llmath/llquaternion2.inl b/indra/llmath/llquaternion2.inl index 2a6987552..52d67620f 100644 --- a/indra/llmath/llquaternion2.inl +++ b/indra/llmath/llquaternion2.inl @@ -50,6 +50,39 @@ inline LLVector4a& LLQuaternion2::getVector4aRw() return mQ; } +inline void LLQuaternion2::mul(const LLQuaternion2& b) +{ + static LL_ALIGN_16(const unsigned int signMask[4]) = { 0x0, 0x0, 0x0, 0x80000000 }; + + LLVector4a sum1, sum2, prod1, prod2, prod3, prod4; + const LLVector4a& va = mQ; + const LLVector4a& vb = b.getVector4a(); + + // [VX] [VY] [VZ] [VW] + //prod1: +wx +wy +wz +ww Bwwww*Axyzw + //prod2: +xw +yw +zw -xx Bxyzx*Awwwx [VW] sign flip + //prod3: +yz +zx +xy -yy Byzxy*Azxyy [VW] sign flip + //prod4: -zy -xz -yx -zz Bzxyz*Ayzzz + + const LLVector4a Bwwww = _mm_shuffle_ps(vb,vb,_MM_SHUFFLE(3,3,3,3)); + const LLVector4a Bxyzx = _mm_shuffle_ps(vb,vb,_MM_SHUFFLE(0,2,1,0)); + const LLVector4a Awwwx = _mm_shuffle_ps(va,va,_MM_SHUFFLE(0,3,3,3)); + const LLVector4a Byzxy = _mm_shuffle_ps(vb,vb,_MM_SHUFFLE(1,0,2,1)); + const LLVector4a Azxyy = _mm_shuffle_ps(va,va,_MM_SHUFFLE(1,1,0,2)); + const LLVector4a Bzxyz = _mm_shuffle_ps(vb,vb,_MM_SHUFFLE(2,1,0,2)); + const LLVector4a Ayzxz = _mm_shuffle_ps(va,va,_MM_SHUFFLE(2,0,2,1)); + + prod1.setMul(Bwwww,va); + prod2.setMul(Bxyzx,Awwwx); + prod3.setMul(Byzxy,Azxyy); + prod4.setMul(Bzxyz,Ayzxz); + + sum1.setAdd(prod2,prod3); + sum1 = _mm_xor_ps(sum1, _mm_load_ps((const float*)signMask)); + sum2.setSub(prod1,prod4); + mQ.setAdd(sum1,sum2); +} + ///////////////////////// // Quaternion modification ///////////////////////// diff --git a/indra/llmath/llvector4a.h b/indra/llmath/llvector4a.h index 79d0a4455..2e958b308 100644 --- a/indra/llmath/llvector4a.h +++ b/indra/llmath/llvector4a.h @@ -128,7 +128,7 @@ public: inline void loadua(const F32* src); // Load only three floats beginning at address 'src'. Slowest method. - inline void load3(const F32* src); + inline void load3(const F32* src, const F32 w=0.f); // Store to a 16-byte aligned memory address inline void store4a(F32* dst) const; @@ -170,6 +170,9 @@ public: // Set all 4 elements to element i of v, with i NOT known at compile time inline void splat(const LLVector4a& v, U32 i); + + // Sets element N to that of src's element N. Much cleaner than.. {LLVector4Logical mask; mask.clear(); mask.setElement(); target.setSelectWithMask(mask,src,target);} + template inline void copyComponent(const LLVector4a& src); // Select bits from sourceIfTrue and sourceIfFalse according to bits in mask inline void setSelectWithMask( const LLVector4Logical& mask, const LLVector4a& sourceIfTrue, const LLVector4a& sourceIfFalse ); @@ -282,6 +285,8 @@ public: void quantize8( const LLVector4a& low, const LLVector4a& high ); void quantize16( const LLVector4a& low, const LLVector4a& high ); + void negate(); + //////////////////////////////////// // LOGICAL //////////////////////////////////// diff --git a/indra/llmath/llvector4a.inl b/indra/llmath/llvector4a.inl index 69d3d01ef..c3499d23d 100644 --- a/indra/llmath/llvector4a.inl +++ b/indra/llmath/llvector4a.inl @@ -41,11 +41,11 @@ inline void LLVector4a::loadua(const F32* src) } // Load only three floats beginning at address 'src'. Slowest method. -inline void LLVector4a::load3(const F32* src) +inline void LLVector4a::load3(const F32* src, const F32 w) { // mQ = { 0.f, src[2], src[1], src[0] } = { W, Z, Y, X } // NB: This differs from the convention of { Z, Y, X, W } - mQ = _mm_set_ps(0.f, src[2], src[1], src[0]); + mQ = _mm_set_ps(w, src[2], src[1], src[0]); } // Store to a 16-byte aligned memory address @@ -154,6 +154,13 @@ inline void LLVector4a::splat(const LLVector4a& v, U32 i) } } +// Sets element N to that of src's element N +template inline void LLVector4a::copyComponent(const LLVector4a& src) +{ + static const LLVector4Logical mask = _mm_load_ps((F32*)&S_V4LOGICAL_MASK_TABLE[N*4]); + setSelectWithMask(mask,src,mQ); +} + // Select bits from sourceIfTrue and sourceIfFalse according to bits in mask inline void LLVector4a::setSelectWithMask( const LLVector4Logical& mask, const LLVector4a& sourceIfTrue, const LLVector4a& sourceIfFalse ) { @@ -529,6 +536,11 @@ inline void LLVector4a::clamp( const LLVector4a& low, const LLVector4a& high ) setSelectWithMask( lowMask, low, *this ); } +inline void LLVector4a::negate() +{ + static LL_ALIGN_16(const U32 signMask[4]) = {0x80000000, 0x80000000, 0x80000000, 0x80000000 }; + mQ = _mm_xor_ps(*reinterpret_cast(signMask), mQ); +} //////////////////////////////////// // LOGICAL diff --git a/indra/llmath/llvector4logical.h b/indra/llmath/llvector4logical.h index c5698f7ce..5e2cc413b 100644 --- a/indra/llmath/llvector4logical.h +++ b/indra/llmath/llvector4logical.h @@ -79,7 +79,7 @@ public: { static const LL_ALIGN_16(U32 allOnes[4]) = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF }; ll_assert_aligned(allOnes,16); - mQ = _mm_andnot_ps( mQ, *(LLQuad*)(allOnes) ); + mQ = _mm_andnot_ps( mQ, _mm_load_ps((F32*)(allOnes))); return *this; } @@ -115,7 +115,7 @@ public: template void setElement() { - mQ = _mm_or_ps( mQ, *reinterpret_cast(S_V4LOGICAL_MASK_TABLE + 4*N) ); + mQ = _mm_or_ps( mQ, _mm_load_ps( (F32*)&S_V4LOGICAL_MASK_TABLE[4*N] ) ); } private: diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index 72c3f9693..3b5b2f148 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -50,7 +50,6 @@ #include "llstl.h" #include "llsdserialize.h" #include "llvector4a.h" -#include "llmatrix4a.h" #include "lltimer.h" #define DEBUG_SILHOUETTE_BINORMALS 0 @@ -2184,7 +2183,7 @@ BOOL LLVolume::generate() 0, 0, scale[2], 0, 0, 0, 0, 1 }; - LLMatrix4 rot((F32*) mPathp->mPath[s].mRot.mMatrix); + LLMatrix4 rot(mPathp->mPath[s].mRot.getF32ptr()); LLMatrix4 scale_mat(sc); scale_mat *= rot; @@ -3670,16 +3669,14 @@ S32 LLVolume::getNumTriangles(S32* vcount) const void LLVolume::generateSilhouetteVertices(std::vector &vertices, std::vector &normals, const LLVector3& obj_cam_vec_in, - const LLMatrix4& mat_in, - const LLMatrix3& norm_mat_in, + const LLMatrix4a& mat_in, + const LLMatrix4a& norm_mat_in, S32 face_mask) { - LLMatrix4a mat; - mat.loadu(mat_in); + const LLMatrix4a& mat = mat_in; + + const LLMatrix4a& norm_mat = norm_mat_in; - LLMatrix4a norm_mat; - norm_mat.loadu(norm_mat_in); - LLVector4a obj_cam_vec; obj_cam_vec.load3(obj_cam_vec_in.mV); diff --git a/indra/llmath/llvolume.h b/indra/llmath/llvolume.h index 7a74d544c..2f52a5949 100644 --- a/indra/llmath/llvolume.h +++ b/indra/llmath/llvolume.h @@ -27,6 +27,9 @@ #ifndef LL_LLVOLUME_H #define LL_LLVOLUME_H +#ifdef IN_PCH +#error "llvolume.h should not be in pch include chain." +#endif #include class LLProfileParams; @@ -747,10 +750,10 @@ public: class PathPt { public: - LLMatrix4a mRot; - LLVector4a mPos; + LL_ALIGN_16(LLMatrix4a mRot); + LL_ALIGN_16(LLVector4a mPos); - LLVector4a mScale; + LL_ALIGN_16(LLVector4a mScale); F32 mTexT; F32 pad[3]; //for alignment PathPt() @@ -1017,8 +1020,8 @@ public: void generateSilhouetteVertices(std::vector &vertices, std::vector &normals, const LLVector3& view_vec, - const LLMatrix4& mat, - const LLMatrix3& norm_mat, + const LLMatrix4a& mat, + const LLMatrix4a& norm_mat, S32 face_index); //get the face index of the face that intersects with the given line segment at the point diff --git a/indra/llmath/llvolumeoctree.h b/indra/llmath/llvolumeoctree.h index 40d2e890c..61b90f68a 100644 --- a/indra/llmath/llvolumeoctree.h +++ b/indra/llmath/llvolumeoctree.h @@ -127,13 +127,14 @@ public: LL_ALIGN_16(LLVector4a mExtents[2]); // extents (min, max) of this node and all its children }; +LL_ALIGN_PREFIX(16) class LLOctreeTriangleRayIntersect : public LLOctreeTraveler { public: const LLVolumeFace* mFace; - LLVector4a mStart; - LLVector4a mDir; - LLVector4a mEnd; + LL_ALIGN_16(LLVector4a mStart); + LL_ALIGN_16(LLVector4a mDir); + LL_ALIGN_16(LLVector4a mEnd); LLVector4a* mIntersection; LLVector2* mTexCoord; LLVector4a* mNormal; @@ -148,7 +149,7 @@ public: void traverse(const LLOctreeNode* node); virtual void visit(const LLOctreeNode* node); -}; +} LL_ALIGN_POSTFIX(16); class LLVolumeOctreeValidate : public LLOctreeTraveler { diff --git a/indra/llmath/xform.cpp b/indra/llmath/xform.cpp index a6b3b6d2a..62033eabd 100644 --- a/indra/llmath/xform.cpp +++ b/indra/llmath/xform.cpp @@ -90,30 +90,29 @@ void LLXformMatrix::updateMatrix(BOOL update_bounds) { update(); - mWorldMatrix.initAll(mScale, mWorldRotation, mWorldPosition); + LLMatrix4 world_matrix; + world_matrix.initAll(mScale, mWorldRotation, mWorldPosition); + mWorldMatrix.loadu(world_matrix); if (update_bounds && (mChanged & MOVED)) { - mMin.mV[0] = mMax.mV[0] = mWorldMatrix.mMatrix[3][0]; - mMin.mV[1] = mMax.mV[1] = mWorldMatrix.mMatrix[3][1]; - mMin.mV[2] = mMax.mV[2] = mWorldMatrix.mMatrix[3][2]; + mMax = mMin = mWorldMatrix.getRow<3>(); - F32 f0 = (fabs(mWorldMatrix.mMatrix[0][0])+fabs(mWorldMatrix.mMatrix[1][0])+fabs(mWorldMatrix.mMatrix[2][0])) * 0.5f; - F32 f1 = (fabs(mWorldMatrix.mMatrix[0][1])+fabs(mWorldMatrix.mMatrix[1][1])+fabs(mWorldMatrix.mMatrix[2][1])) * 0.5f; - F32 f2 = (fabs(mWorldMatrix.mMatrix[0][2])+fabs(mWorldMatrix.mMatrix[1][2])+fabs(mWorldMatrix.mMatrix[2][2])) * 0.5f; + LLVector4a total_sum,sum1,sum2; + total_sum.setAbs(mWorldMatrix.getRow<0>()); + sum1.setAbs(mWorldMatrix.getRow<1>()); + sum2.setAbs(mWorldMatrix.getRow<2>()); + sum1.add(sum2); + total_sum.add(sum1); + total_sum.mul(.5f); - mMin.mV[0] -= f0; - mMin.mV[1] -= f1; - mMin.mV[2] -= f2; - - mMax.mV[0] += f0; - mMax.mV[1] += f1; - mMax.mV[2] += f2; + mMax.add(total_sum); + mMin.sub(total_sum); } } void LLXformMatrix::getMinMax(LLVector3& min, LLVector3& max) const { - min = mMin; - max = mMax; + min.set(mMin.getF32ptr()); + max.set(mMax.getF32ptr()); } diff --git a/indra/llmath/xform.h b/indra/llmath/xform.h index a3d0070f9..06c408fa5 100644 --- a/indra/llmath/xform.h +++ b/indra/llmath/xform.h @@ -28,6 +28,7 @@ #include "v3math.h" #include "m4math.h" +#include "llmatrix4a.h" #include "llquaternion.h" const F32 MAX_OBJECT_Z = 4096.f; // should match REGION_HEIGHT_METERS, Pre-havok4: 768.f @@ -130,20 +131,21 @@ public: const LLVector3& getWorldPosition() const { return mWorldPosition; } }; +LL_ALIGN_PREFIX(16) class LLXformMatrix : public LLXform { public: LLXformMatrix() : LLXform() {}; virtual ~LLXformMatrix(); - const LLMatrix4& getWorldMatrix() const { return mWorldMatrix; } - void setWorldMatrix (const LLMatrix4& mat) { mWorldMatrix = mat; } + const LLMatrix4a& getWorldMatrix() const { return mWorldMatrix; } + void setWorldMatrix (const LLMatrix4a& mat) { mWorldMatrix = mat; } void init() { mWorldMatrix.setIdentity(); - mMin.clearVec(); - mMax.clearVec(); + mMin.clear(); + mMax.clear(); LLXform::init(); } @@ -153,11 +155,11 @@ public: void getMinMax(LLVector3& min,LLVector3& max) const; protected: - LLMatrix4 mWorldMatrix; - LLVector3 mMin; - LLVector3 mMax; + LL_ALIGN_16(LLMatrix4a mWorldMatrix); + LL_ALIGN_16(LLVector4a mMin); + LL_ALIGN_16(LLVector4a mMax); -}; +} LL_ALIGN_POSTFIX(16); BOOL LLXform::setParent(LLXform* parent) { diff --git a/indra/llprimitive/llmodel.cpp b/indra/llprimitive/llmodel.cpp index a72e22899..911d461f7 100644 --- a/indra/llprimitive/llmodel.cpp +++ b/indra/llprimitive/llmodel.cpp @@ -31,6 +31,7 @@ #include "llconvexdecomposition.h" #include "llsdserialize.h" #include "llvector4a.h" +#include "llmatrix4a.h" #if LL_MSVC #pragma warning (push) #pragma warning (disable : 4068) diff --git a/indra/llrender/llcubemap.cpp b/indra/llrender/llcubemap.cpp index 45a3b1817..3fd4464fb 100644 --- a/indra/llrender/llcubemap.cpp +++ b/indra/llrender/llcubemap.cpp @@ -34,6 +34,7 @@ #include "v3dmath.h" #include "m3math.h" #include "m4math.h" +#include "llmatrix4a.h" #include "llrender.h" #include "llglslshader.h" @@ -265,18 +266,19 @@ void LLCubeMap::setMatrix(S32 stage) gGL.getTexUnit(stage)->activate(); } - LLVector3 x(gGLModelView+0); - LLVector3 y(gGLModelView+4); - LLVector3 z(gGLModelView+8); + LLVector3 x(gGLModelView.getRow<0>().getF32ptr()); + LLVector3 y(gGLModelView.getRow<1>().getF32ptr()); + LLVector3 z(gGLModelView.getRow<2>().getF32ptr()); LLMatrix3 mat3; mat3.setRows(x,y,z); - LLMatrix4 trans(mat3); + LLMatrix4a trans; + trans.loadu(mat3); trans.transpose(); gGL.matrixMode(LLRender::MM_TEXTURE); gGL.pushMatrix(); - gGL.loadMatrix((F32 *)trans.mMatrix); + gGL.loadMatrix(trans); gGL.matrixMode(LLRender::MM_MODELVIEW); /*if (stage > 0) diff --git a/indra/llrender/llgl.cpp b/indra/llrender/llgl.cpp index 11d8fa557..1471ccfb3 100644 --- a/indra/llrender/llgl.cpp +++ b/indra/llrender/llgl.cpp @@ -444,6 +444,7 @@ LLGLManager::LLGLManager() : mHasDebugOutput(FALSE), mHasAdaptiveVsync(FALSE), + mHasTextureSwizzle(FALSE), mIsATI(FALSE), mIsNVIDIA(FALSE), @@ -1382,6 +1383,35 @@ void flush_glerror() glGetError(); } +const std::string getGLErrorString(GLenum error) +{ + switch(error) + { + case GL_NO_ERROR: + return "No Error"; + case GL_INVALID_ENUM: + return "Invalid Enum"; + case GL_INVALID_VALUE: + return "Invalid Value"; + case GL_INVALID_OPERATION: + return "Invalid Operation"; + case GL_INVALID_FRAMEBUFFER_OPERATION: + return "Invalid Framebuffer Operation"; + case GL_OUT_OF_MEMORY: + return "Out of Memory"; + case GL_STACK_UNDERFLOW: + return "Stack Underflow"; + case GL_STACK_OVERFLOW: + return "Stack Overflow"; +#ifdef GL_TABLE_TOO_LARGE + case GL_TABLE_TOO_LARGE: + return "Table too large"; +#endif + default: + return "UNKNOWN ERROR"; + } +} + //this function outputs gl error to the log file, does not crash the code. void log_glerror() { @@ -1394,17 +1424,8 @@ void log_glerror() error = glGetError(); while (LL_UNLIKELY(error)) { - GLubyte const * gl_error_msg = gluErrorString(error); - if (NULL != gl_error_msg) - { - llwarns << "GL Error: " << error << " GL Error String: " << gl_error_msg << llendl ; - } - else - { - // gluErrorString returns NULL for some extensions' error codes. - // you'll probably have to grep for the number in glext.h. - llwarns << "GL Error: UNKNOWN 0x" << std::hex << error << std::dec << llendl; - } + std::string gl_error_msg = getGLErrorString(error); + llwarns << "GL Error: 0x" << std::hex << error << std::dec << " GL Error String: " << gl_error_msg << llendl; error = glGetError(); } } @@ -1418,27 +1439,13 @@ void do_assert_glerror() while (LL_UNLIKELY(error)) { quit = TRUE; - GLubyte const * gl_error_msg = gluErrorString(error); - if (NULL != gl_error_msg) + + std::string gl_error_msg = getGLErrorString(error); + LL_WARNS("RenderState") << "GL Error: 0x" << std::hex << error << std::dec << LL_ENDL; + LL_WARNS("RenderState") << "GL Error String: " << gl_error_msg << LL_ENDL; + if (gDebugSession) { - LL_WARNS("RenderState") << "GL Error:" << error<< LL_ENDL; - LL_WARNS("RenderState") << "GL Error String:" << gl_error_msg << LL_ENDL; - - if (gDebugSession) - { - gFailLog << "GL Error:" << gl_error_msg << std::endl; - } - } - else - { - // gluErrorString returns NULL for some extensions' error codes. - // you'll probably have to grep for the number in glext.h. - LL_WARNS("RenderState") << "GL Error: UNKNOWN 0x" << std::hex << error << std::dec << LL_ENDL; - - if (gDebugSession) - { - gFailLog << "GL Error: UNKNOWN 0x" << std::hex << error << std::dec << std::endl; - } + gFailLog << "GL Error: 0x" << std::hex << error << std::dec << " GL Error String: " << gl_error_msg << std::endl; } error = glGetError(); } @@ -1662,10 +1669,6 @@ void LLGLState::checkTextureChannels(const std::string& msg) GLint stackDepth = 0; - glh::matrix4f mat; - glh::matrix4f identity; - identity.identity(); - for (GLint i = 1; i < gGLManager.mNumTextureUnits; i++) { gGL.getTexUnit(i)->activate(); @@ -1685,10 +1688,11 @@ void LLGLState::checkTextureChannels(const std::string& msg) } } - glGetFloatv(GL_TEXTURE_MATRIX, (GLfloat*) mat.m); + LLMatrix4a mat; + glGetFloatv(GL_TEXTURE_MATRIX, (GLfloat*) mat.mMatrix); stop_glerror(); - if (mat != identity) + if (!mat.isIdentity()) { error = TRUE; LL_WARNS("RenderState") << "Texture matrix in channel " << i << " corrupt." << LL_ENDL; @@ -2179,7 +2183,7 @@ void parse_glsl_version(S32& major, S32& minor) LLStringUtil::convertToS32(minor_str, minor); } -LLGLUserClipPlane::LLGLUserClipPlane(const LLPlane& p, const glh::matrix4f& modelview, const glh::matrix4f& projection, bool apply) +LLGLUserClipPlane::LLGLUserClipPlane(const LLPlane& p, const LLMatrix4a& modelview, const LLMatrix4a& projection, bool apply) { mApply = apply; @@ -2194,27 +2198,42 @@ LLGLUserClipPlane::LLGLUserClipPlane(const LLPlane& p, const glh::matrix4f& mode void LLGLUserClipPlane::setPlane(F32 a, F32 b, F32 c, F32 d) { - glh::matrix4f& P = mProjection; - glh::matrix4f& M = mModelview; - - glh::matrix4f invtrans_MVP = (P * M).inverse().transpose(); - glh::vec4f oplane(a,b,c,d); - glh::vec4f cplane; - invtrans_MVP.mult_matrix_vec(oplane, cplane); + LLMatrix4a& P = mProjection; + LLMatrix4a& M = mModelview; - cplane /= fabs(cplane[2]); // normalize such that depth is not scaled - cplane[3] -= 1; + LLMatrix4a invtrans_MVP; + invtrans_MVP.setMul(P,M); + invtrans_MVP.invert(); + invtrans_MVP.transpose(); - if(cplane[2] < 0) - cplane *= -1; + LLVector4a oplane(a,b,c,d); + LLVector4a cplane; + LLVector4a cplane_splat; + LLVector4a cplane_neg; + + invtrans_MVP.rotate4(oplane,cplane); + + cplane_splat.splat<2>(cplane); + cplane_splat.setAbs(cplane_splat); + cplane.div(cplane_splat); + cplane.sub(LLVector4a(0.f,0.f,0.f,1.f)); + + cplane_splat.splat<2>(cplane); + cplane_neg = cplane; + cplane_neg.negate(); + + cplane.setSelectWithMask( cplane_splat.lessThan( _mm_setzero_ps() ), cplane_neg, cplane ); + + LLMatrix4a suffix; + suffix.setIdentity(); + suffix.setColumn<2>(cplane); + LLMatrix4a newP; + newP.setMul(suffix,P); - glh::matrix4f suffix; - suffix.set_row(2, cplane); - glh::matrix4f newP = suffix * P; gGL.matrixMode(LLRender::MM_PROJECTION); gGL.pushMatrix(); - gGL.loadMatrix(newP.m); - gGLObliqueProjectionInverse = LLMatrix4(newP.inverse().transpose().m); + gGL.loadMatrix(newP); + //gGLObliqueProjectionInverse = LLMatrix4(newP.inverse().transpose().m); gGL.matrixMode(LLRender::MM_MODELVIEW); } @@ -2403,19 +2422,18 @@ void LLGLDepthTest::checkState() } } -LLGLSquashToFarClip::LLGLSquashToFarClip(glh::matrix4f P, U32 layer) +LLGLSquashToFarClip::LLGLSquashToFarClip(const LLMatrix4a& P_in, U32 layer) { - + LLMatrix4a P = P_in; F32 depth = 0.99999f - 0.0001f * layer; - for (U32 i = 0; i < 4; i++) - { - P.element(2, i) = P.element(3, i) * depth; - } + LLVector4a col = P.getColumn<3>(); + col.mul(depth); + P.setColumn<2>(col); gGL.matrixMode(LLRender::MM_PROJECTION); gGL.pushMatrix(); - gGL.loadMatrix(P.m); + gGL.loadMatrix(P); gGL.matrixMode(LLRender::MM_MODELVIEW); } diff --git a/indra/llrender/llgl.h b/indra/llrender/llgl.h index 7b691e021..1cf65c8ca 100644 --- a/indra/llrender/llgl.h +++ b/indra/llrender/llgl.h @@ -38,12 +38,12 @@ #include "llstring.h" #include "stdtypes.h" #include "v4math.h" +#include "llmatrix4a.h" #include "llplane.h" #include "llgltypes.h" #include "llinstancetracker.h" #include "llglheaders.h" -#include "glh/glh_linear.h" extern BOOL gDebugGL; extern BOOL gDebugSession; @@ -321,21 +321,23 @@ public: Does not stack. Caches inverse of projection matrix used in gGLObliqueProjectionInverse */ +LL_ALIGN_PREFIX(16) class LLGLUserClipPlane { public: - LLGLUserClipPlane(const LLPlane& plane, const glh::matrix4f& modelview, const glh::matrix4f& projection, bool apply = true); + LLGLUserClipPlane(const LLPlane& plane, const LLMatrix4a& modelview, const LLMatrix4a& projection, bool apply = true); ~LLGLUserClipPlane(); void setPlane(F32 a, F32 b, F32 c, F32 d); private: - bool mApply; - glh::matrix4f mProjection; - glh::matrix4f mModelview; -}; + LL_ALIGN_16(LLMatrix4a mProjection); + LL_ALIGN_16(LLMatrix4a mModelview); + + bool mApply; +} LL_ALIGN_POSTFIX(16); /* Modify and load projection matrix to push depth values to far clip plane. @@ -348,7 +350,7 @@ private: class LLGLSquashToFarClip { public: - LLGLSquashToFarClip(glh::matrix4f projection, U32 layer = 0); + LLGLSquashToFarClip(const LLMatrix4a& projection, U32 layer = 0); ~LLGLSquashToFarClip(); }; @@ -455,8 +457,6 @@ public: void wait(); }; -extern LLMatrix4 gGLObliqueProjectionInverse; - #include "llglstates.h" void init_glstates(); diff --git a/indra/llrender/llglheaders.h b/indra/llrender/llglheaders.h index 5aeed87ae..3a91c50ff 100644 --- a/indra/llrender/llglheaders.h +++ b/indra/llrender/llglheaders.h @@ -41,7 +41,6 @@ # include "GL/glx.h" # define GL_GLEXT_PROTOTYPES 1 # include "GL/glext.h" -# include "GL/glu.h" # include "GL/glx.h" # define GLX_GLXEXT_PROTOTYPES 1 # include "GL/glxext.h" @@ -266,7 +265,6 @@ extern PFNGLGENERATEMIPMAPEXTPROC glGenerateMipmapEXT; #define GL_GLEXT_PROTOTYPES 1 #include "GL/gl.h" #include "GL/glext.h" -#include "GL/glu.h" // The __APPLE__ kludge is to make glh_extensions.h not symbol-clash horribly # define __APPLE__ @@ -282,7 +280,6 @@ extern PFNGLGENERATEMIPMAPEXTPROC glGenerateMipmapEXT; // quotes so we get libraries/.../GL/ version #include "GL/gl.h" #include "GL/glext.h" -#include "GL/glu.h" #if LL_LINUX && !LL_MESA_HEADLESS @@ -551,7 +548,6 @@ extern PFNGLBINDBUFFERRANGEPROC glBindBufferRange; //---------------------------------------------------------------------------- #include -#include // quotes so we get libraries/.../GL/ version #include "GL/glext.h" @@ -789,7 +785,6 @@ extern PFNGLGETDEBUGMESSAGELOGARBPROC glGetDebugMessageLogARB; // LL_DARWIN #include -#include #define GL_EXT_separate_specular_color 1 #include diff --git a/indra/llrender/llimagegl.cpp b/indra/llrender/llimagegl.cpp index 2ad5d355e..a5fb16ee0 100644 --- a/indra/llrender/llimagegl.cpp +++ b/indra/llrender/llimagegl.cpp @@ -468,6 +468,8 @@ LLImageGL::~LLImageGL() sCount--; } +const S8 INVALID_OFFSET = -99 ; + void LLImageGL::init(BOOL usemipmaps) { // keep these members in the same order as declared in llimagehl.h @@ -484,14 +486,12 @@ void LLImageGL::init(BOOL usemipmaps) mHasExplicitFormat = FALSE; mAutoGenMips = FALSE; - mCanMask = TRUE; mIsMask = FALSE; mMaskRMSE = 1.f ; - - mNeedsAlphaAndPickMask = TRUE ; + mNeedsAlphaAndPickMask = FALSE ; mAlphaStride = 0 ; - mAlphaOffset = 0 ; + mAlphaOffset = INVALID_OFFSET ; mGLTextureCreated = FALSE ; mTexName = 0; @@ -1709,7 +1709,6 @@ void LLImageGL::setTarget(const LLGLenum target, const LLTexUnit::eTextureType b } //Used by media in V2 -const S8 INVALID_OFFSET = -99 ; void LLImageGL::setNeedsAlphaAndPickMask(BOOL need_mask) { if(mNeedsAlphaAndPickMask != need_mask) @@ -1723,7 +1722,6 @@ void LLImageGL::setNeedsAlphaAndPickMask(BOOL need_mask) else //do not need alpha mask { mAlphaOffset = INVALID_OFFSET ; - mCanMask = FALSE; } } } @@ -1746,8 +1744,7 @@ void LLImageGL::calcAlphaChannelOffsetAndStride() mAlphaStride = 2; break; case GL_RGB: - mNeedsAlphaAndPickMask = FALSE ; - mCanMask = FALSE; + setNeedsAlphaAndPickMask(FALSE); return ; //no alpha channel. case GL_RGBA: mAlphaStride = 4; @@ -1793,15 +1790,14 @@ void LLImageGL::calcAlphaChannelOffsetAndStride() { llwarns << "Cannot analyze alpha for image with format type " << std::hex << mFormatType << std::dec << llendl; - mNeedsAlphaAndPickMask = FALSE ; - mCanMask = FALSE; + setNeedsAlphaAndPickMask(FALSE); } } //std::map > > sTextureMaskMap; void LLImageGL::analyzeAlpha(const void* data_in, U32 w, U32 h) { - if(!mNeedsAlphaAndPickMask || !mCanMask) + if(!mNeedsAlphaAndPickMask) { return ; } diff --git a/indra/llrender/llimagegl.h b/indra/llrender/llimagegl.h index d5632e553..ecb5222de 100644 --- a/indra/llrender/llimagegl.h +++ b/indra/llrender/llimagegl.h @@ -135,7 +135,7 @@ public: BOOL getHasGLTexture() const { return mTexName != 0; } LLGLuint getTexName() const { return mTexName; } - BOOL getIsAlphaMask(const F32 max_rmse) const { return mCanMask && (max_rmse < 0.f ? (bool)mIsMask : (mMaskRMSE <= max_rmse)); } + BOOL getIsAlphaMask(const F32 max_rmse) const { return mNeedsAlphaAndPickMask && (max_rmse < 0.f ? (bool)mIsMask : (mMaskRMSE <= max_rmse)); } BOOL getIsResident(BOOL test_now = FALSE); // not const @@ -185,7 +185,6 @@ private: S8 mHasExplicitFormat; // If false (default), GL format is f(mComponents) S8 mAutoGenMips; - BOOL mCanMask; BOOL mIsMask; F32 mMaskRMSE; BOOL mNeedsAlphaAndPickMask; diff --git a/indra/llrender/llpostprocess.cpp b/indra/llrender/llpostprocess.cpp index ebdbc17ef..18be01d24 100644 --- a/indra/llrender/llpostprocess.cpp +++ b/indra/llrender/llpostprocess.cpp @@ -43,6 +43,7 @@ #include "llsdutil_math.h" #include "llvertexbuffer.h" #include "llfasttimer.h" +#include "llmatrix4a.h" extern LLGLSLShader gPostColorFilterProgram; extern LLGLSLShader gPostNightVisionProgram; @@ -305,21 +306,21 @@ public: { addSetting(mStrength); } - /*virtual*/ bool isEnabled() const { return LLPostProcessShader::isEnabled() && llabs(gGLModelView[0] - gGLPreviousModelView[0]) > .0000001; } + /*virtual*/ bool isEnabled() const { return LLPostProcessShader::isEnabled() && llabs(gGLModelView.getF32ptr()[0] - gGLPreviousModelView.getF32ptr()[0]) > .0000001; } /*virtual*/ S32 getColorChannel() const { return 0; } /*virtual*/ S32 getDepthChannel() const { return 1; } /*virtual*/ QuadType preDraw() { - glh::matrix4f inv_proj(gGLModelView); - inv_proj.mult_left(gGLProjection); - inv_proj = inv_proj.inverse(); - glh::matrix4f prev_proj(gGLPreviousModelView); - prev_proj.mult_left(gGLProjection); + LLMatrix4a inv_proj; + inv_proj.setMul(gGLProjection,gGLModelView); + inv_proj.invert(); + LLMatrix4a prev_proj; + prev_proj.setMul(gGLProjection,gGLPreviousModelView); LLVector2 screen_rect = LLPostProcess::getInstance()->getDimensions(); - getShader().uniformMatrix4fv(sPrevProj, 1, GL_FALSE, prev_proj.m); - getShader().uniformMatrix4fv(sInvProj, 1, GL_FALSE, inv_proj.m); + getShader().uniformMatrix4fv(sPrevProj, 1, GL_FALSE, prev_proj.getF32ptr()); + getShader().uniformMatrix4fv(sInvProj, 1, GL_FALSE, inv_proj.getF32ptr()); getShader().uniform2fv(sScreenRes, 1, screen_rect.mV); getShader().uniform1i(sBlurStrength, mStrength); diff --git a/indra/llrender/llrender.cpp b/indra/llrender/llrender.cpp index 2439c75e6..ed8497536 100644 --- a/indra/llrender/llrender.cpp +++ b/indra/llrender/llrender.cpp @@ -35,17 +35,18 @@ #include "llrendertarget.h" #include "lltexture.h" #include "llshadermgr.h" +#include "llmatrix4a.h" LLRender gGL; // Handy copies of last good GL matrices //Would be best to migrate these to LLMatrix4a and LLVector4a, but that's too divergent right now. -LL_ALIGN_16(F32 gGLModelView[16]); -LL_ALIGN_16(F32 gGLLastModelView[16]); -LL_ALIGN_16(F32 gGLPreviousModelView[16]); -LL_ALIGN_16(F32 gGLLastProjection[16]); -LL_ALIGN_16(F32 gGLProjection[16]); -LL_ALIGN_16(S32 gGLViewport[4]); +LLMatrix4a gGLModelView; +LLMatrix4a gGLLastModelView; +LLMatrix4a gGLPreviousModelView; +LLMatrix4a gGLLastProjection; +LLMatrix4a gGLProjection; +S32 gGLViewport[4]; U32 LLRender::sUICalls = 0; U32 LLRender::sUIVerts = 0; @@ -928,12 +929,12 @@ void LLLightState::setPosition(const LLVector4& position) } else { //transform position by current modelview matrix - glh::vec4f pos(position.mV); + LLVector4a pos; + pos.loadua(position.mV); - const glh::matrix4f& mat = gGL.getModelviewMatrix(); - mat.mult_matrix_vec(pos); + gGL.getModelviewMatrix().rotate4(pos,pos); - mPosition.set(pos.v); + mPosition.set(pos.getF32ptr()); } } @@ -1014,12 +1015,12 @@ void LLLightState::setSpotDirection(const LLVector3& direction) } else { //transform direction by current modelview matrix - glh::vec3f dir(direction.mV); + LLVector4a dir; + dir.load3(direction.mV); - const glh::matrix4f& mat = gGL.getModelviewMatrix(); - mat.mult_matrix_dir(dir); + gGL.getModelviewMatrix().rotate(dir,dir); - mSpotDirection.set(dir.v); + mSpotDirection.set(dir.getF32ptr()); } } @@ -1066,6 +1067,18 @@ LLRender::LLRender() } mLightHash = 0; + + //Init base matrix for each mode + for(S32 i = 0; i < NUM_MATRIX_MODES; ++i) + { + mMatrix[i][0].setIdentity(); + } + + gGLModelView.setIdentity(); + gGLLastModelView.setIdentity(); + gGLPreviousModelView.setIdentity(); + gGLLastProjection.setIdentity(); + gGLProjection.setIdentity(); } LLRender::~LLRender() @@ -1188,12 +1201,11 @@ void LLRender::syncMatrices() }; LLGLSLShader* shader = LLGLSLShader::sCurBoundShaderPtr; - - static glh::matrix4f cached_mvp; + static LLMatrix4a cached_mvp; static U32 cached_mvp_mdv_hash = 0xFFFFFFFF; static U32 cached_mvp_proj_hash = 0xFFFFFFFF; - static glh::matrix4f cached_normal; + static LLMatrix4a cached_normal; static U32 cached_normal_hash = 0xFFFFFFFF; if (shader) @@ -1205,9 +1217,9 @@ void LLRender::syncMatrices() U32 i = MM_MODELVIEW; if (mMatHash[i] != shader->mMatHash[i]) { //update modelview, normal, and MVP - glh::matrix4f& mat = mMatrix[i][mMatIdx[i]]; - - shader->uniformMatrix4fv(name[i], 1, GL_FALSE, mat.m); + const LLMatrix4a& mat = mMatrix[i][mMatIdx[i]]; + + shader->uniformMatrix4fv(name[i], 1, GL_FALSE, mat.getF32ptr()); shader->mMatHash[i] = mMatHash[i]; //update normal matrix @@ -1216,20 +1228,20 @@ void LLRender::syncMatrices() { if (cached_normal_hash != mMatHash[i]) { - cached_normal = mat.inverse().transpose(); + cached_normal = mat; + cached_normal.invert(); + cached_normal.transpose(); cached_normal_hash = mMatHash[i]; } + + const LLMatrix4a& norm = cached_normal; - glh::matrix4f& norm = cached_normal; + LLVector3 norms[3]; + norms[0].set(norm.getRow<0>().getF32ptr()); + norms[1].set(norm.getRow<1>().getF32ptr()); + norms[2].set(norm.getRow<2>().getF32ptr()); - F32 norm_mat[] = - { - norm.m[0], norm.m[1], norm.m[2], - norm.m[4], norm.m[5], norm.m[6], - norm.m[8], norm.m[9], norm.m[10] - }; - - shader->uniformMatrix3fv(LLShaderMgr::NORMAL_MATRIX, 1, GL_FALSE, norm_mat); + shader->uniformMatrix3fv(LLShaderMgr::NORMAL_MATRIX, 1, GL_FALSE, norms[0].mV); } //update MVP matrix @@ -1241,13 +1253,12 @@ void LLRender::syncMatrices() if (cached_mvp_mdv_hash != mMatHash[i] || cached_mvp_proj_hash != mMatHash[MM_PROJECTION]) { - cached_mvp = mat; - cached_mvp.mult_left(mMatrix[proj][mMatIdx[proj]]); + cached_mvp.setMul(mMatrix[proj][mMatIdx[proj]], mat); cached_mvp_mdv_hash = mMatHash[i]; cached_mvp_proj_hash = mMatHash[MM_PROJECTION]; } - shader->uniformMatrix4fv(LLShaderMgr::MODELVIEW_PROJECTION_MATRIX, 1, GL_FALSE, cached_mvp.m); + shader->uniformMatrix4fv(LLShaderMgr::MODELVIEW_PROJECTION_MATRIX, 1, GL_FALSE, cached_mvp.getF32ptr()); } } @@ -1255,9 +1266,9 @@ void LLRender::syncMatrices() i = MM_PROJECTION; if (mMatHash[i] != shader->mMatHash[i]) { //update projection matrix, normal, and MVP - glh::matrix4f& mat = mMatrix[i][mMatIdx[i]]; - - shader->uniformMatrix4fv(name[i], 1, GL_FALSE, mat.m); + const LLMatrix4a& mat = mMatrix[i][mMatIdx[i]]; + + shader->uniformMatrix4fv(name[i], 1, GL_FALSE, mat.getF32ptr()); shader->mMatHash[i] = mMatHash[i]; if (!mvp_done) @@ -1269,13 +1280,12 @@ void LLRender::syncMatrices() if (cached_mvp_mdv_hash != mMatHash[i] || cached_mvp_proj_hash != mMatHash[MM_PROJECTION]) { U32 mdv = MM_MODELVIEW; - cached_mvp = mat; - cached_mvp.mult_right(mMatrix[mdv][mMatIdx[mdv]]); + cached_mvp.setMul(mat,mMatrix[mdv][mMatIdx[mdv]]); cached_mvp_mdv_hash = mMatHash[MM_MODELVIEW]; cached_mvp_proj_hash = mMatHash[MM_PROJECTION]; } - - shader->uniformMatrix4fv(LLShaderMgr::MODELVIEW_PROJECTION_MATRIX, 1, GL_FALSE, cached_mvp.m); + + shader->uniformMatrix4fv(LLShaderMgr::MODELVIEW_PROJECTION_MATRIX, 1, GL_FALSE, cached_mvp.getF32ptr()); } } } @@ -1284,7 +1294,7 @@ void LLRender::syncMatrices() { if (mMatHash[i] != shader->mMatHash[i]) { - shader->uniformMatrix4fv(name[i], 1, GL_FALSE, mMatrix[i][mMatIdx[i]].m); + shader->uniformMatrix4fv(name[i], 1, GL_FALSE, mMatrix[i][mMatIdx[i]].getF32ptr()); shader->mMatHash[i] = mMatHash[i]; } } @@ -1312,7 +1322,7 @@ void LLRender::syncMatrices() if (mMatHash[i] != mCurMatHash[i]) { glMatrixMode(mode[i]); - glLoadMatrixf(mMatrix[i][mMatIdx[i]].m); + glLoadMatrixf(mMatrix[i][mMatIdx[i]].getF32ptr()); mCurMatHash[i] = mMatHash[i]; } } @@ -1323,7 +1333,7 @@ void LLRender::syncMatrices() { gGL.getTexUnit(i-2)->activate(); glMatrixMode(mode[i]); - glLoadMatrixf(mMatrix[i][mMatIdx[i]].m); + glLoadMatrixf(mMatrix[i][mMatIdx[i]].getF32ptr()); mCurMatHash[i] = mMatHash[i]; } } @@ -1332,32 +1342,143 @@ void LLRender::syncMatrices() stop_glerror(); } +LLMatrix4a LLRender::genRot(const GLfloat& a, const LLVector4a& axis) const +{ + F32 r = a * DEG_TO_RAD; + + F32 c = cosf(r); + F32 s = sinf(r); + + F32 ic = 1.f-c; + + const LLVector4a add1(c,axis[VZ]*s,-axis[VY]*s); //1,z,-y + const LLVector4a add2(-axis[VZ]*s,c,axis[VX]*s); //-z,1,x + const LLVector4a add3(axis[VY]*s,-axis[VX]*s,c); //y,-x,1 + + LLVector4a axis_x; + axis_x.splat<0>(axis); + LLVector4a axis_y; + axis_y.splat<1>(axis); + LLVector4a axis_z; + axis_z.splat<2>(axis); + + LLVector4a c_axis; + c_axis.setMul(axis,ic); + + LLMatrix4a rot_mat; + rot_mat.getRow<0>().setMul(c_axis,axis_x); + rot_mat.getRow<0>().add(add1); + rot_mat.getRow<1>().setMul(c_axis,axis_y); + rot_mat.getRow<1>().add(add2); + rot_mat.getRow<2>().setMul(c_axis,axis_z); + rot_mat.getRow<2>().add(add3); + rot_mat.setRow<3>(LLVector4a(0,0,0,1)); + + return rot_mat; +} +LLMatrix4a LLRender::genOrtho(const GLfloat& left, const GLfloat& right, const GLfloat& bottom, const GLfloat& top, const GLfloat& zNear, const GLfloat& zFar) const +{ + LLMatrix4a ortho_mat; + ortho_mat.setRow<0>(LLVector4a(2.f/(right-left),0,0)); + ortho_mat.setRow<1>(LLVector4a(0,2.f/(top-bottom),0)); + ortho_mat.setRow<2>(LLVector4a(0,0,-2.f/(zFar-zNear))); + ortho_mat.setRow<3>(LLVector4a(-(right+left)/(right-left),-(top+bottom)/(top-bottom),-(zFar+zNear)/(zFar-zNear),1)); + + return ortho_mat; +} + +LLMatrix4a LLRender::genPersp(const GLfloat& fovy, const GLfloat& aspect, const GLfloat& zNear, const GLfloat& zFar) const +{ + GLfloat f = 1.f/tanf(DEG_TO_RAD*fovy/2.f); + + LLMatrix4a persp_mat; + persp_mat.setRow<0>(LLVector4a(f/aspect,0,0)); + persp_mat.setRow<1>(LLVector4a(0,f,0)); + persp_mat.setRow<2>(LLVector4a(0,0,(zFar+zNear)/(zNear-zFar),-1.f)); + persp_mat.setRow<3>(LLVector4a(0,0,(2.f*zFar*zNear)/(zNear-zFar),0)); + + return persp_mat; +} + +LLMatrix4a LLRender::genLook(const LLVector3& pos_in, const LLVector3& dir_in, const LLVector3& up_in) const +{ + const LLVector4a pos(pos_in.mV[VX],pos_in.mV[VY],pos_in.mV[VZ],1.f); + LLVector4a dir(dir_in.mV[VX],dir_in.mV[VY],dir_in.mV[VZ]); + const LLVector4a up(up_in.mV[VX],up_in.mV[VY],up_in.mV[VZ]); + + LLVector4a left_norm; + left_norm.setCross3(dir,up); + left_norm.normalize3fast(); + LLVector4a up_norm; + up_norm.setCross3(left_norm,dir); + up_norm.normalize3fast(); + LLVector4a& dir_norm = dir; + dir.normalize3fast(); + + LLVector4a left_dot; + left_dot.setAllDot3(left_norm,pos); + left_dot.negate(); + LLVector4a up_dot; + up_dot.setAllDot3(up_norm,pos); + up_dot.negate(); + LLVector4a dir_dot; + dir_dot.setAllDot3(dir_norm,pos); + + dir_norm.negate(); + + LLMatrix4a lookat_mat; + lookat_mat.setRow<0>(left_norm); + lookat_mat.setRow<1>(up_norm); + lookat_mat.setRow<2>(dir_norm); + lookat_mat.setRow<3>(LLVector4a(0,0,0,1)); + + lookat_mat.getRow<0>().copyComponent<3>(left_dot); + lookat_mat.getRow<1>().copyComponent<3>(up_dot); + lookat_mat.getRow<2>().copyComponent<3>(dir_dot); + + lookat_mat.transpose(); + + return lookat_mat; +} + +const LLMatrix4a& LLRender::genNDCtoWC() const +{ + static LLMatrix4a mat( + LLVector4a(.5f,0,0,0), + LLVector4a(0,.5f,0,0), + LLVector4a(0,0,.5f,0), + LLVector4a(.5f,.5f,.5f,1.f)); + return mat; +} + void LLRender::translatef(const GLfloat& x, const GLfloat& y, const GLfloat& z) { + if( llabs(x) < F_APPROXIMATELY_ZERO && + llabs(y) < F_APPROXIMATELY_ZERO && + llabs(z) < F_APPROXIMATELY_ZERO) + { + return; + } + flush(); - { - glh::matrix4f trans_mat(1,0,0,x, - 0,1,0,y, - 0,0,1,z, - 0,0,0,1); - - mMatrix[mMatrixMode][mMatIdx[mMatrixMode]].mult_right(trans_mat); - mMatHash[mMatrixMode]++; - } + mMatrix[mMatrixMode][mMatIdx[mMatrixMode]].applyTranslation_affine(x,y,z); + mMatHash[mMatrixMode]++; + } void LLRender::scalef(const GLfloat& x, const GLfloat& y, const GLfloat& z) { + if( (llabs(x-1.f)) < F_APPROXIMATELY_ZERO && + (llabs(y-1.f)) < F_APPROXIMATELY_ZERO && + (llabs(z-1.f)) < F_APPROXIMATELY_ZERO) + { + return; + } flush(); { - glh::matrix4f scale_mat(x,0,0,0, - 0,y,0,0, - 0,0,z,0, - 0,0,0,1); - - mMatrix[mMatrixMode][mMatIdx[mMatrixMode]].mult_right(scale_mat); + mMatrix[mMatrixMode][mMatIdx[mMatrixMode]].applyScale_affine(x,y,z); mMatHash[mMatrixMode]++; } } @@ -1366,38 +1487,156 @@ void LLRender::ortho(F32 left, F32 right, F32 bottom, F32 top, F32 zNear, F32 zF { flush(); - { + LLMatrix4a ortho_mat; + ortho_mat.setRow<0>(LLVector4a(2.f/(right-left),0,0)); + ortho_mat.setRow<1>(LLVector4a(0,2.f/(top-bottom),0)); + ortho_mat.setRow<2>(LLVector4a(0,0,-2.f/(zFar-zNear))); + ortho_mat.setRow<3>(LLVector4a(-(right+left)/(right-left),-(top+bottom)/(top-bottom),-(zFar+zNear)/(zFar-zNear),1)); - glh::matrix4f ortho_mat(2.f/(right-left),0,0, -(right+left)/(right-left), - 0,2.f/(top-bottom),0, -(top+bottom)/(top-bottom), - 0,0,-2.f/(zFar-zNear), -(zFar+zNear)/(zFar-zNear), - 0,0,0,1); - - mMatrix[mMatrixMode][mMatIdx[mMatrixMode]].mult_right(ortho_mat); - mMatHash[mMatrixMode]++; - } + mMatrix[mMatrixMode][mMatIdx[mMatrixMode]].mul_affine(ortho_mat); + mMatHash[mMatrixMode]++; +} + +void LLRender::rotatef(const LLMatrix4a& rot) +{ + flush(); + + mMatrix[mMatrixMode][mMatIdx[mMatrixMode]].mul_affine(rot); + mMatHash[mMatrixMode]++; } void LLRender::rotatef(const GLfloat& a, const GLfloat& x, const GLfloat& y, const GLfloat& z) { + if( llabs(a) < F_APPROXIMATELY_ZERO || + llabs(a-360.f) < F_APPROXIMATELY_ZERO) + { + return; + } + flush(); - { - F32 r = a * DEG_TO_RAD; + rotatef(genRot(a,x,y,z)); +} - F32 c = cosf(r); - F32 s = sinf(r); +//LLRender::projectf & LLRender::unprojectf adapted from gluProject & gluUnproject in Mesa's GLU 9.0 library. +// License/Copyright Statement: +/* + * SGI FREE SOFTWARE LICENSE B (Version 2.0, Sept. 18, 2008) + * Copyright (C) 1991-2000 Silicon Graphics, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice including the dates of first publication and + * either this permission notice or a reference to + * http://oss.sgi.com/projects/FreeB/ + * shall be included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * SILICON GRAPHICS, INC. BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Except as contained in this notice, the name of Silicon Graphics, Inc. + * shall not be used in advertising or otherwise to promote the sale, use or + * other dealings in this Software without prior written authorization from + * Silicon Graphics, Inc. + */ - F32 ic = 1.f-c; +bool LLRender::projectf(const LLVector3& object, const LLMatrix4a& modelview, const LLMatrix4a& projection, const LLRect& viewport, LLVector3& windowCoordinate) +{ + //Begin SSE intrinsics - glh::matrix4f rot_mat(x*x*ic+c, x*y*ic-z*s, x*z*ic+y*s, 0, - x*y*ic+z*s, y*y*ic+c, y*z*ic-x*s, 0, - x*z*ic-y*s, y*z*ic+x*s, z*z*ic+c, 0, - 0,0,0,1); + // Declare locals + const LLVector4a obj_vector(object.mV[VX],object.mV[VY],object.mV[VZ]); + const LLVector4a one(1.f); + LLVector4a temp_vec; //Scratch vector + LLVector4a w; //Splatted W-component. - mMatrix[mMatrixMode][mMatIdx[mMatrixMode]].mult_right(rot_mat); - mMatHash[mMatrixMode]++; - } + modelview.affineTransform(obj_vector, temp_vec); //temp_vec = modelview * obj_vector; + + //Passing temp_matrix as v and res is safe. res not altered until after all other calculations + projection.rotate4(temp_vec, temp_vec); //temp_vec = projection * temp_vec + + w.splat<3>(temp_vec); //w = temp_vec.wwww + + //If w == 0.f, use 1.f instead. + LLVector4a div; + div.setSelectWithMask( w.equal( _mm_setzero_ps() ), one, w ); //float div = (w[N] == 0.f ? 1.f : w[N]); + temp_vec.div(div); //temp_vec /= div; + + //Map x, y to range 0-1 + temp_vec.mul(.5f); + temp_vec.add(.5f); + + LLVector4Logical mask = temp_vec.equal(_mm_setzero_ps()); + if(mask.areAllSet(LLVector4Logical::MASK_W)) + return false; + + //End SSE intrinsics + + //Window coordinates + windowCoordinate[0]=temp_vec[VX]*viewport.getWidth()+viewport.mLeft; + windowCoordinate[1]=temp_vec[VY]*viewport.getHeight()+viewport.mBottom; + //This is only correct when glDepthRange(0.0, 1.0) + windowCoordinate[2]=temp_vec[VZ]; + + return true; +} + +bool LLRender::unprojectf(const LLVector3& windowCoordinate, const LLMatrix4a& modelview, const LLMatrix4a& projection, const LLRect& viewport, LLVector3& object) +{ + //Begin SSE intrinsics + + // Declare locals + static const LLVector4a one(1.f); + static const LLVector4a two(2.f); + LLVector4a norm_view( + ((windowCoordinate.mV[VX] - (F32)viewport.mLeft) / (F32)viewport.getWidth()), + ((windowCoordinate.mV[VY] - (F32)viewport.mBottom) / (F32)viewport.getHeight()), + windowCoordinate.mV[VZ], + 1.f); + + LLMatrix4a inv_mat; //Inverse transformation matrix + LLVector4a temp_vec; //Scratch vector + LLVector4a w; //Splatted W-component. + + inv_mat.setMul(projection,modelview); //inv_mat = projection*modelview + + float det = inv_mat.invert(); + + //Normalize. -1.0 : +1.0 + norm_view.mul(two); // norm_view *= vec4(.2f) + norm_view.sub(one); // norm_view -= vec4(1.f) + + inv_mat.rotate4(norm_view,temp_vec); //inv_mat * norm_view + + w.splat<3>(temp_vec); //w = temp_vec.wwww + + //If w == 0.f, use 1.f instead. Defer return if temp_vec.w == 0.f until after all SSE intrinsics. + LLVector4a div; + div.setSelectWithMask( w.equal( _mm_setzero_ps() ), one, w ); //float div = (w[N] == 0.f ? 1.f : w[N]); + temp_vec.div(div); //temp_vec /= div; + + LLVector4Logical mask = temp_vec.equal(_mm_setzero_ps()); + if(mask.areAllSet(LLVector4Logical::MASK_W)) + return false; + + //End SSE intrinsics + + if(det == 0.f) + return false; + + object.set(temp_vec.getF32ptr()); + + return true; } void LLRender::pushMatrix() @@ -1433,24 +1672,21 @@ void LLRender::popMatrix() } } -void LLRender::loadMatrix(const GLfloat* m) +void LLRender::loadMatrix(const LLMatrix4a& mat) { flush(); - { - mMatrix[mMatrixMode][mMatIdx[mMatrixMode]].set_value((GLfloat*) m); - mMatHash[mMatrixMode]++; - } + + mMatrix[mMatrixMode][mMatIdx[mMatrixMode]] = mat; + mMatHash[mMatrixMode]++; } -void LLRender::multMatrix(const GLfloat* m) +void LLRender::multMatrix(const LLMatrix4a& mat) { flush(); - { - glh::matrix4f mat((GLfloat*) m); + + mMatrix[mMatrixMode][mMatIdx[mMatrixMode]].mul_affine(mat); + mMatHash[mMatrixMode]++; - mMatrix[mMatrixMode][mMatIdx[mMatrixMode]].mult_right(mat); - mMatHash[mMatrixMode]++; - } } void LLRender::matrixMode(U32 mode) @@ -1479,20 +1715,16 @@ void LLRender::loadIdentity() { flush(); - { - llassert_always(mMatrixMode < NUM_MATRIX_MODES) ; - - mMatrix[mMatrixMode][mMatIdx[mMatrixMode]].make_identity(); - mMatHash[mMatrixMode]++; - } + mMatrix[mMatrixMode][mMatIdx[mMatrixMode]].setIdentity(); + mMatHash[mMatrixMode]++; } -const glh::matrix4f& LLRender::getModelviewMatrix() +const LLMatrix4a& LLRender::getModelviewMatrix() { return mMatrix[MM_MODELVIEW][mMatIdx[MM_MODELVIEW]]; } -const glh::matrix4f& LLRender::getProjectionMatrix() +const LLMatrix4a& LLRender::getProjectionMatrix() { return mMatrix[MM_PROJECTION][mMatIdx[MM_PROJECTION]]; } diff --git a/indra/llrender/llrender.h b/indra/llrender/llrender.h index 66786d6ee..82e212370 100644 --- a/indra/llrender/llrender.h +++ b/indra/llrender/llrender.h @@ -38,18 +38,19 @@ #include "v3math.h" #include "v4coloru.h" #include "v4math.h" +#include "llmatrix4a.h" #include "llalignedarray.h" #include "llstrider.h" #include "llpointer.h" #include "llglheaders.h" -#include "llmatrix4a.h" -#include "glh/glh_linear.h" +#include "llrect.h" class LLVertexBuffer; class LLCubeMap; class LLImageGL; class LLRenderTarget; class LLTexture ; +class LLMatrix4a; #define LL_MATRIX_STACK_DEPTH 32 @@ -257,6 +258,8 @@ protected: F32 mSpotExponent; F32 mSpotCutoff; }; + +LL_ALIGN_PREFIX(16) class LLRender { friend class LLTexUnit; @@ -343,21 +346,32 @@ public: // Needed when the render context has changed and invalidated the current state void refreshState(void); + LLMatrix4a genRot(const GLfloat& a, const LLVector4a& axis) const; + LLMatrix4a genRot(const GLfloat& a, const GLfloat& x, const GLfloat& y, const GLfloat& z) const { return genRot(a,LLVector4a(x,y,z)); } + LLMatrix4a genOrtho(const GLfloat& left, const GLfloat& right, const GLfloat& bottom, const GLfloat& top, const GLfloat& znear, const GLfloat& zfar) const; + LLMatrix4a genPersp(const GLfloat& fovy, const GLfloat& aspect, const GLfloat& znear, const GLfloat& zfar) const; + LLMatrix4a genLook(const LLVector3& pos_in, const LLVector3& dir_in, const LLVector3& up_in) const; + const LLMatrix4a& genNDCtoWC() const; + void translatef(const GLfloat& x, const GLfloat& y, const GLfloat& z); void scalef(const GLfloat& x, const GLfloat& y, const GLfloat& z); + //rotatef requires generation of a transform matrix involving sine/cosine. If rotating by a constant value, use genRot, store the result in a static variable, and pass that var to rotatef. + void rotatef(const LLMatrix4a& rot); void rotatef(const GLfloat& a, const GLfloat& x, const GLfloat& y, const GLfloat& z); void ortho(F32 left, F32 right, F32 bottom, F32 top, F32 zNear, F32 zFar); + bool projectf(const LLVector3& object, const LLMatrix4a& modelview, const LLMatrix4a& projection, const LLRect& viewport, LLVector3& windowCoordinate); + bool unprojectf(const LLVector3& windowCoordinate, const LLMatrix4a& modelview, const LLMatrix4a& projection, const LLRect& viewport, LLVector3& object); void pushMatrix(); void popMatrix(); - void loadMatrix(const GLfloat* m); + void loadMatrix(const LLMatrix4a& mat); void loadIdentity(); - void multMatrix(const GLfloat* m); + void multMatrix(const LLMatrix4a& mat); void matrixMode(U32 mode); U32 getMatrixMode(); - const glh::matrix4f& getModelviewMatrix(); - const glh::matrix4f& getProjectionMatrix(); + const LLMatrix4a& getModelviewMatrix(); + const LLMatrix4a& getProjectionMatrix(); void syncMatrices(); void syncLightState(); @@ -447,7 +461,7 @@ private: U32 mMatrixMode; U32 mMatIdx[NUM_MATRIX_MODES]; U32 mMatHash[NUM_MATRIX_MODES]; - glh::matrix4f mMatrix[NUM_MATRIX_MODES][LL_MATRIX_STACK_DEPTH]; + LL_ALIGN_16(LLMatrix4a mMatrix[NUM_MATRIX_MODES][LL_MATRIX_STACK_DEPTH]); U32 mCurMatHash[NUM_MATRIX_MODES]; U32 mLightHash; LLColor4 mAmbientLightColor; @@ -478,13 +492,14 @@ private: LLAlignedArray mUIOffset; LLAlignedArray mUIScale; -}; +} LL_ALIGN_POSTFIX(16); -extern F32 gGLModelView[16]; -extern F32 gGLLastModelView[16]; -extern F32 gGLLastProjection[16]; -extern F32 gGLPreviousModelView[16]; -extern F32 gGLProjection[16]; + +extern LLMatrix4a gGLModelView; +extern LLMatrix4a gGLLastModelView; +extern LLMatrix4a gGLLastProjection; +extern LLMatrix4a gGLPreviousModelView; +extern LLMatrix4a gGLProjection; extern S32 gGLViewport[4]; extern LLRender gGL; diff --git a/indra/llrender/llvertexbuffer.cpp b/indra/llrender/llvertexbuffer.cpp index 5171dcf87..00f4e3dff 100644 --- a/indra/llrender/llvertexbuffer.cpp +++ b/indra/llrender/llvertexbuffer.cpp @@ -2044,6 +2044,10 @@ bool LLVertexBuffer::getNormalStrider(LLStrider& strider, S32 index, { return VertexBufferStrider::get(*this, strider, index, count, map_range); } +bool LLVertexBuffer::getNormalStrider(LLStrider& strider, S32 index, S32 count, bool map_range) +{ + return VertexBufferStrider::get(*this, strider, index, count, map_range); +} bool LLVertexBuffer::getTangentStrider(LLStrider& strider, S32 index, S32 count, bool map_range) { return VertexBufferStrider::get(*this, strider, index, count, map_range); diff --git a/indra/llrender/llvertexbuffer.h b/indra/llrender/llvertexbuffer.h index 77c753fc9..28008b767 100644 --- a/indra/llrender/llvertexbuffer.h +++ b/indra/llrender/llvertexbuffer.h @@ -250,6 +250,7 @@ public: bool getTexCoord1Strider(LLStrider& strider, S32 index=0, S32 count = -1, bool map_range = false); bool getTexCoord2Strider(LLStrider& strider, S32 index=0, S32 count = -1, bool map_range = false); bool getNormalStrider(LLStrider& strider, S32 index=0, S32 count = -1, bool map_range = false); + bool getNormalStrider(LLStrider& strider, S32 index=0, S32 count = -1, bool map_range = false); bool getTangentStrider(LLStrider& strider, S32 index=0, S32 count = -1, bool map_range = false); bool getTangentStrider(LLStrider& strider, S32 index=0, S32 count = -1, bool map_range = false); bool getColorStrider(LLStrider& strider, S32 index=0, S32 count = -1, bool map_range = false); diff --git a/indra/llvfs/lldir_win32.cpp b/indra/llvfs/lldir_win32.cpp index f611c6594..9a580d50f 100644 --- a/indra/llvfs/lldir_win32.cpp +++ b/indra/llvfs/lldir_win32.cpp @@ -62,13 +62,13 @@ LLDir_Win32::LLDir_Win32() if((*pSHGetKnownFolderPath)(FOLDERID_RoamingAppData, 0, NULL, &pPath) == S_OK) wcscpy_s(w_str,pPath); else - SHGetSpecialFolderPath(NULL, w_str, CSIDL_APPDATA, TRUE); + SHGetFolderPath(NULL, CSIDL_APPDATA | CSIDL_FLAG_CREATE, NULL, SHGFP_TYPE_DEFAULT, w_str ); if(pPath) CoTaskMemFree(pPath); } else //XP doesn't support SHGetKnownFolderPath { - SHGetSpecialFolderPath(NULL, w_str, CSIDL_APPDATA, TRUE); + SHGetFolderPath(NULL, CSIDL_APPDATA | CSIDL_FLAG_CREATE, NULL, SHGFP_TYPE_DEFAULT, w_str ); } mOSUserDir = utf16str_to_utf8str(llutf16string(w_str)); @@ -91,13 +91,13 @@ LLDir_Win32::LLDir_Win32() if((*pSHGetKnownFolderPath)(FOLDERID_LocalAppData, 0, NULL, &pPath) == S_OK) wcscpy_s(w_str,pPath); else - SHGetSpecialFolderPath(NULL, w_str, CSIDL_LOCAL_APPDATA, TRUE); + SHGetFolderPath(NULL, CSIDL_LOCAL_APPDATA | CSIDL_FLAG_CREATE, NULL, SHGFP_TYPE_DEFAULT, w_str ); if(pPath) CoTaskMemFree(pPath); } else //XP doesn't support SHGetKnownFolderPath { - SHGetSpecialFolderPath(NULL, w_str, CSIDL_LOCAL_APPDATA, TRUE); + SHGetFolderPath(NULL, CSIDL_LOCAL_APPDATA | CSIDL_FLAG_CREATE, NULL, SHGFP_TYPE_DEFAULT, w_str ); } if(shell) diff --git a/indra/llwindow/glh/glh_linear.h b/indra/llwindow/glh/glh_linear.h deleted file mode 100644 index c46b81531..000000000 --- a/indra/llwindow/glh/glh_linear.h +++ /dev/null @@ -1,1621 +0,0 @@ -/* - glh - is a platform-indepenedent C++ OpenGL helper library - - - Copyright (c) 2000 Cass Everitt - Copyright (c) 2000 NVIDIA Corporation - All rights reserved. - - Redistribution and use in source and binary forms, with or - without modification, are permitted provided that the following - conditions are met: - - * Redistributions of source code must retain the above - copyright notice, this list of conditions and the following - disclaimer. - - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials - provided with the distribution. - - * The names of contributors to this software may not be used - to endorse or promote products derived from this software - without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. - - - Cass Everitt - cass@r3.nu -*/ - -/* -glh_linear.h -*/ - -// Author: Cass W. Everitt - -#ifndef GLH_LINEAR_H -#define GLH_LINEAR_H - -#include -#include -#include - -// only supports float for now... -#define GLH_REAL_IS_FLOAT - -#ifdef GLH_REAL_IS_FLOAT -# define GLH_REAL float -# define GLH_REAL_NAMESPACE ns_float -#endif - -#define GLH_QUATERNION_NORMALIZATION_THRESHOLD 64 - -#define GLH_RAD_TO_DEG GLH_REAL(57.2957795130823208767981548141052) -#define GLH_DEG_TO_RAD GLH_REAL(0.0174532925199432957692369076848861) -#define GLH_ZERO GLH_REAL(0.0) -#define GLH_ONE GLH_REAL(1.0) -#define GLH_TWO GLH_REAL(2.0) -#define GLH_EPSILON GLH_REAL(10e-6) -#define GLH_PI GLH_REAL(3.1415926535897932384626433832795) - - -namespace glh -{ - inline bool equivalent(GLH_REAL a, GLH_REAL b) { return b - GLH_EPSILON < a && a < b + GLH_EPSILON; } - - inline GLH_REAL to_degrees(GLH_REAL radians) { return radians*GLH_RAD_TO_DEG; } - inline GLH_REAL to_radians(GLH_REAL degrees) { return degrees*GLH_DEG_TO_RAD; } - - // forward declarations for friend template functions. - template class vec; - - // forward declarations for friend template functions. - template - bool operator == ( const vec & v1, const vec & v2 ); - - // forward declarations for friend template functions. - template - bool operator != ( const vec & v1, const vec & v2 ); - - template - class vec - { - public: - int size() const { return N; } - - vec(const T & t = T()) - { for(int i = 0; i < N; i++) v[i] = t; } - vec(const T * tp) - { for(int i = 0; i < N; i++) v[i] = tp[i]; } - - const T * get_value() const - { return v; } - - - T dot( const vec & rhs ) const - { - T r = 0; - for(int i = 0; i < N; i++) r += v[i]*rhs.v[i]; - return r; - } - - T length() const - { - T r = 0; - for(int i = 0; i < N; i++) r += v[i]*v[i]; - return T(sqrt(r)); - } - - T square_norm() const - { - T r = 0; - for(int i = 0; i < N; i++) r += v[i]*v[i]; - return r; - } - - void negate() - { for(int i = 0; i < N; i++) v[i] = -v[i]; } - - - T normalize() - { - T sum(0); - for(int i = 0; i < N; i++) - sum += v[i]*v[i]; - sum = T(sqrt(sum)); - if (sum > GLH_EPSILON) - for(int i = 0; i < N; i++) - v[i] /= sum; - return sum; - } - - - vec & set_value( const T * rhs ) - { for(int i = 0; i < N; i++) v[i] = rhs[i]; return *this; } - - T & operator [] ( int i ) - { return v[i]; } - - const T & operator [] ( int i ) const - { return v[i]; } - - vec & operator *= ( T d ) - { for(int i = 0; i < N; i++) v[i] *= d; return *this;} - - vec & operator *= ( const vec & u ) - { for(int i = 0; i < N; i++) v[i] *= u[i]; return *this;} - - vec & operator /= ( T d ) - { if(d == 0) return *this; for(int i = 0; i < N; i++) v[i] /= d; return *this;} - - vec & operator += ( const vec & u ) - { for(int i = 0; i < N; i++) v[i] += u.v[i]; return *this;} - - vec & operator -= ( const vec & u ) - { for(int i = 0; i < N; i++) v[i] -= u.v[i]; return *this;} - - - vec operator - () const - { vec rv = v; rv.negate(); return rv; } - - vec operator + ( const vec &v) const - { vec rt(*this); return rt += v; } - - vec operator - ( const vec &v) const - { vec rt(*this); return rt -= v; } - - vec operator * ( T d) const - { vec rt(*this); return rt *= d; } - - friend bool operator == <> ( const vec &v1, const vec &v2 ); - friend bool operator != <> ( const vec &v1, const vec &v2 ); - - - //protected: - T v[N]; - }; - - - - // vector friend operators - - template inline - vec operator * ( const vec & b, T d ) - { - vec rt(b); - return rt *= d; - } - - template inline - vec operator * ( T d, const vec & b ) - { return b*d; } - - template inline - vec operator * ( const vec & b, const vec & d ) - { - vec rt(b); - return rt *= d; - } - - template inline - vec operator / ( const vec & b, T d ) - { vec rt(b); return rt /= d; } - - template inline - vec operator + ( const vec & v1, const vec & v2 ) - { vec rt(v1); return rt += v2; } - - template inline - vec operator - ( const vec & v1, const vec & v2 ) - { vec rt(v1); return rt -= v2; } - - - template inline - bool operator == ( const vec & v1, const vec & v2 ) - { - for(int i = 0; i < N; i++) - if(v1.v[i] != v2.v[i]) - return false; - return true; - } - - template inline - bool operator != ( const vec & v1, const vec & v2 ) - { return !(v1 == v2); } - - - typedef vec<3,unsigned char> vec3ub; - typedef vec<4,unsigned char> vec4ub; - - - - - - namespace GLH_REAL_NAMESPACE - { - typedef GLH_REAL real; - - class line; - class plane; - class matrix4; - class quaternion; - typedef quaternion rotation; - - class vec2 : public vec<2,real> - { - public: - vec2(const real & t = real()) : vec<2,real>(t) - {} - vec2(const vec<2,real> & t) : vec<2,real>(t) - {} - vec2(const real * tp) : vec<2,real>(tp) - {} - - vec2(real x, real y ) - { v[0] = x; v[1] = y; } - - void get_value(real & x, real & y) const - { x = v[0]; y = v[1]; } - - vec2 & set_value( const real & x, const real & y) - { v[0] = x; v[1] = y; return *this; } - - }; - - - class vec3 : public vec<3,real> - { - public: - vec3(const real & t = real()) : vec<3,real>(t) - {} - vec3(const vec<3,real> & t) : vec<3,real>(t) - {} - vec3(const real * tp) : vec<3,real>(tp) - {} - - vec3(real x, real y, real z) - { v[0] = x; v[1] = y; v[2] = z; } - - void get_value(real & x, real & y, real & z) const - { x = v[0]; y = v[1]; z = v[2]; } - - vec3 cross( const vec3 &rhs ) const - { - vec3 rt; - rt.v[0] = v[1]*rhs.v[2]-v[2]*rhs.v[1]; - rt.v[1] = v[2]*rhs.v[0]-v[0]*rhs.v[2]; - rt.v[2] = v[0]*rhs.v[1]-v[1]*rhs.v[0]; - return rt; - } - - vec3 & set_value( const real & x, const real & y, const real & z) - { v[0] = x; v[1] = y; v[2] = z; return *this; } - - }; - - - class vec4 : public vec<4,real> - { - public: - vec4(const real & t = real()) : vec<4,real>(t) - {} - vec4(const vec<4,real> & t) : vec<4,real>(t) - {} - - vec4(const vec<3,real> & t, real fourth) - - { v[0] = t.v[0]; v[1] = t.v[1]; v[2] = t.v[2]; v[3] = fourth; } - vec4(const real * tp) : vec<4,real>(tp) - {} - vec4(real x, real y, real z, real w) - { v[0] = x; v[1] = y; v[2] = z; v[3] = w; } - - void get_value(real & x, real & y, real & z, real & w) const - { x = v[0]; y = v[1]; z = v[2]; w = v[3]; } - - vec4 & set_value( const real & x, const real & y, const real & z, const real & w) - { v[0] = x; v[1] = y; v[2] = z; v[3] = w; return *this; } - }; - - inline - vec3 homogenize(const vec4 & v) - { - vec3 rt; - assert(v.v[3] != GLH_ZERO); - rt.v[0] = v.v[0]/v.v[3]; - rt.v[1] = v.v[1]/v.v[3]; - rt.v[2] = v.v[2]/v.v[3]; - return rt; - } - - - - class line - { - public: - - line() - { set_value(vec3(0,0,0),vec3(0,0,1)); } - - line( const vec3 & p0, const vec3 &p1) - { set_value(p0,p1); } - - void set_value( const vec3 &p0, const vec3 &p1) - { - position = p0; - direction = p1-p0; - direction.normalize(); - } - - bool get_closest_points(const line &line2, - vec3 &pointOnThis, - vec3 &pointOnThat) - { - - // quick check to see if parallel -- if so, quit. - if(fabs(direction.dot(line2.direction)) == 1.0) - return 0; - line l2 = line2; - - // Algorithm: Brian Jean - // - register real u; - register real v; - vec3 Vr = direction; - vec3 Vs = l2.direction; - register real Vr_Dot_Vs = Vr.dot(Vs); - register real detA = real(1.0 - (Vr_Dot_Vs * Vr_Dot_Vs)); - vec3 C = l2.position - position; - register real C_Dot_Vr = C.dot(Vr); - register real C_Dot_Vs = C.dot(Vs); - - u = (C_Dot_Vr - Vr_Dot_Vs * C_Dot_Vs)/detA; - v = (C_Dot_Vr * Vr_Dot_Vs - C_Dot_Vs)/detA; - - pointOnThis = position; - pointOnThis += direction * u; - pointOnThat = l2.position; - pointOnThat += l2.direction * v; - - return 1; - } - - vec3 get_closest_point(const vec3 &point) - { - vec3 np = point - position; - vec3 rp = direction*direction.dot(np)+position; - return rp; - } - - const vec3 & get_position() const {return position;} - - const vec3 & get_direction() const {return direction;} - - //protected: - vec3 position; - vec3 direction; - }; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // matrix - - - class matrix4 - { - - public: - - matrix4() { make_identity(); } - - matrix4( real r ) - { set_value(r); } - - matrix4( real * m ) - { set_value(m); } - - matrix4( real a00, real a01, real a02, real a03, - real a10, real a11, real a12, real a13, - real a20, real a21, real a22, real a23, - real a30, real a31, real a32, real a33 ) - { - element(0,0) = a00; - element(0,1) = a01; - element(0,2) = a02; - element(0,3) = a03; - - element(1,0) = a10; - element(1,1) = a11; - element(1,2) = a12; - element(1,3) = a13; - - element(2,0) = a20; - element(2,1) = a21; - element(2,2) = a22; - element(2,3) = a23; - - element(3,0) = a30; - element(3,1) = a31; - element(3,2) = a32; - element(3,3) = a33; - } - - - void get_value( real * mp ) const - { - int c = 0; - for(int j=0; j < 4; j++) - for(int i=0; i < 4; i++) - mp[c++] = element(i,j); - } - - - const real * get_value() const - { return m; } - - void set_value( real * mp) - { - int c = 0; - for(int j=0; j < 4; j++) - for(int i=0; i < 4; i++) - element(i,j) = mp[c++]; - } - - void set_value( real r ) - { - for(int i=0; i < 4; i++) - for(int j=0; j < 4; j++) - element(i,j) = r; - } - - void make_identity() - { - element(0,0) = 1.0; - element(0,1) = 0.0; - element(0,2) = 0.0; - element(0,3) = 0.0; - - element(1,0) = 0.0; - element(1,1) = 1.0; - element(1,2) = 0.0; - element(1,3) = 0.0; - - element(2,0) = 0.0; - element(2,1) = 0.0; - element(2,2) = 1.0; - element(2,3) = 0.0; - - element(3,0) = 0.0; - element(3,1) = 0.0; - element(3,2) = 0.0; - element(3,3) = 1.0; - } - - - static matrix4 identity() - { - static matrix4 mident ( - 1.0, 0.0, 0.0, 0.0, - 0.0, 1.0, 0.0, 0.0, - 0.0, 0.0, 1.0, 0.0, - 0.0, 0.0, 0.0, 1.0 ); - return mident; - } - - - void set_scale( real s ) - { - element(0,0) = s; - element(1,1) = s; - element(2,2) = s; - } - - void set_scale( const vec3 & s ) - { - element(0,0) = s.v[0]; - element(1,1) = s.v[1]; - element(2,2) = s.v[2]; - } - - - void set_translate( const vec3 & t ) - { - element(0,3) = t.v[0]; - element(1,3) = t.v[1]; - element(2,3) = t.v[2]; - } - - void set_row(int r, const vec4 & t) - { - element(r,0) = t.v[0]; - element(r,1) = t.v[1]; - element(r,2) = t.v[2]; - element(r,3) = t.v[3]; - } - - void set_column(int c, const vec4 & t) - { - element(0,c) = t.v[0]; - element(1,c) = t.v[1]; - element(2,c) = t.v[2]; - element(3,c) = t.v[3]; - } - - - void get_row(int r, vec4 & t) const - { - t.v[0] = element(r,0); - t.v[1] = element(r,1); - t.v[2] = element(r,2); - t.v[3] = element(r,3); - } - - vec4 get_row(int r) const - { - vec4 v; get_row(r, v); - return v; - } - - void get_column(int c, vec4 & t) const - { - t.v[0] = element(0,c); - t.v[1] = element(1,c); - t.v[2] = element(2,c); - t.v[3] = element(3,c); - } - - vec4 get_column(int c) const - { - vec4 v; get_column(c, v); - return v; - } - - matrix4 inverse() const - { - matrix4 minv; - - real r1[8], r2[8], r3[8], r4[8]; - real *s[4], *tmprow; - - s[0] = &r1[0]; - s[1] = &r2[0]; - s[2] = &r3[0]; - s[3] = &r4[0]; - - register int i,j,p,jj; - for(i=0;i<4;i++) - { - for(j=0;j<4;j++) - { - s[i][j] = element(i,j); - if(i==j) s[i][j+4] = 1.0; - else s[i][j+4] = 0.0; - } - } - real scp[4]; - for(i=0;i<4;i++) - { - scp[i] = real(fabs(s[i][0])); - for(j=1;j<4;j++) - if(real(fabs(s[i][j])) > scp[i]) scp[i] = real(fabs(s[i][j])); - if(scp[i] == 0.0) return minv; // singular matrix! - } - - int pivot_to; - real scp_max; - for(i=0;i<4;i++) - { - // select pivot row - pivot_to = i; - scp_max = real(fabs(s[i][i]/scp[i])); - // find out which row should be on top - for(p=i+1;p<4;p++) - if(real(fabs(s[p][i]/scp[p])) > scp_max) - { scp_max = real(fabs(s[p][i]/scp[p])); pivot_to = p; } - // Pivot if necessary - if(pivot_to != i) - { - tmprow = s[i]; - s[i] = s[pivot_to]; - s[pivot_to] = tmprow; - real tmpscp; - tmpscp = scp[i]; - scp[i] = scp[pivot_to]; - scp[pivot_to] = tmpscp; - } - - real mji; - // perform gaussian elimination - for(j=i+1;j<4;j++) - { - mji = s[j][i]/s[i][i]; - s[j][i] = 0.0; - for(jj=i+1;jj<8;jj++) - s[j][jj] -= mji*s[i][jj]; - } - } - if(s[3][3] == 0.0) return minv; // singular matrix! - - // - // Now we have an upper triangular matrix. - // - // x x x x | y y y y - // 0 x x x | y y y y - // 0 0 x x | y y y y - // 0 0 0 x | y y y y - // - // we'll back substitute to get the inverse - // - // 1 0 0 0 | z z z z - // 0 1 0 0 | z z z z - // 0 0 1 0 | z z z z - // 0 0 0 1 | z z z z - // - - real mij; - for(i=3;i>0;i--) - { - for(j=i-1;j > -1; j--) - { - mij = s[j][i]/s[i][i]; - for(jj=j+1;jj<8;jj++) - s[j][jj] -= mij*s[i][jj]; - } - } - - for(i=0;i<4;i++) - for(j=0;j<4;j++) - minv(i,j) = s[i][j+4] / s[i][i]; - - return minv; - } - - - matrix4 transpose() const - { - matrix4 mtrans; - - for(int i=0;i<4;i++) - for(int j=0;j<4;j++) - mtrans(i,j) = element(j,i); - return mtrans; - } - - matrix4 & mult_right( const matrix4 & b ) - { - matrix4 mt(*this); - set_value(real(0)); - - for(int i=0; i < 4; i++) - for(int j=0; j < 4; j++) - for(int c=0; c < 4; c++) - element(i,j) += mt(i,c) * b(c,j); - return *this; - } - - matrix4 & mult_left( const matrix4 & b ) - { - matrix4 mt(*this); - set_value(real(0)); - - for(int i=0; i < 4; i++) - for(int j=0; j < 4; j++) - for(int c=0; c < 4; c++) - element(i,j) += b(i,c) * mt(c,j); - return *this; - } - - // dst = M * src - void mult_matrix_vec( const vec3 &src, vec3 &dst ) const - { - real w = ( - src.v[0] * element(3,0) + - src.v[1] * element(3,1) + - src.v[2] * element(3,2) + - element(3,3) ); - - assert(w != GLH_ZERO); - - dst.v[0] = ( - src.v[0] * element(0,0) + - src.v[1] * element(0,1) + - src.v[2] * element(0,2) + - element(0,3) ) / w; - dst.v[1] = ( - src.v[0] * element(1,0) + - src.v[1] * element(1,1) + - src.v[2] * element(1,2) + - element(1,3) ) / w; - dst.v[2] = ( - src.v[0] * element(2,0) + - src.v[1] * element(2,1) + - src.v[2] * element(2,2) + - element(2,3) ) / w; - } - - void mult_matrix_vec( vec3 & src_and_dst) const - { mult_matrix_vec(vec3(src_and_dst), src_and_dst); } - - - // dst = src * M - void mult_vec_matrix( const vec3 &src, vec3 &dst ) const - { - real w = ( - src.v[0] * element(0,3) + - src.v[1] * element(1,3) + - src.v[2] * element(2,3) + - element(3,3) ); - - assert(w != GLH_ZERO); - - dst.v[0] = ( - src.v[0] * element(0,0) + - src.v[1] * element(1,0) + - src.v[2] * element(2,0) + - element(3,0) ) / w; - dst.v[1] = ( - src.v[0] * element(0,1) + - src.v[1] * element(1,1) + - src.v[2] * element(2,1) + - element(3,1) ) / w; - dst.v[2] = ( - src.v[0] * element(0,2) + - src.v[1] * element(1,2) + - src.v[2] * element(2,2) + - element(3,2) ) / w; - } - - - void mult_vec_matrix( vec3 & src_and_dst) const - { mult_vec_matrix(vec3(src_and_dst), src_and_dst); } - - // dst = M * src - void mult_matrix_vec( const vec4 &src, vec4 &dst ) const - { - dst.v[0] = ( - src.v[0] * element(0,0) + - src.v[1] * element(0,1) + - src.v[2] * element(0,2) + - src.v[3] * element(0,3)); - dst.v[1] = ( - src.v[0] * element(1,0) + - src.v[1] * element(1,1) + - src.v[2] * element(1,2) + - src.v[3] * element(1,3)); - dst.v[2] = ( - src.v[0] * element(2,0) + - src.v[1] * element(2,1) + - src.v[2] * element(2,2) + - src.v[3] * element(2,3)); - dst.v[3] = ( - src.v[0] * element(3,0) + - src.v[1] * element(3,1) + - src.v[2] * element(3,2) + - src.v[3] * element(3,3)); - } - - void mult_matrix_vec( vec4 & src_and_dst) const - { mult_matrix_vec(vec4(src_and_dst), src_and_dst); } - - - // dst = src * M - void mult_vec_matrix( const vec4 &src, vec4 &dst ) const - { - dst.v[0] = ( - src.v[0] * element(0,0) + - src.v[1] * element(1,0) + - src.v[2] * element(2,0) + - src.v[3] * element(3,0)); - dst.v[1] = ( - src.v[0] * element(0,1) + - src.v[1] * element(1,1) + - src.v[2] * element(2,1) + - src.v[3] * element(3,1)); - dst.v[2] = ( - src.v[0] * element(0,2) + - src.v[1] * element(1,2) + - src.v[2] * element(2,2) + - src.v[3] * element(3,2)); - dst.v[3] = ( - src.v[0] * element(0,3) + - src.v[1] * element(1,3) + - src.v[2] * element(2,3) + - src.v[3] * element(3,3)); - } - - - void mult_vec_matrix( vec4 & src_and_dst) const - { mult_vec_matrix(vec4(src_and_dst), src_and_dst); } - - - // dst = M * src - void mult_matrix_dir( const vec3 &src, vec3 &dst ) const - { - dst.v[0] = ( - src.v[0] * element(0,0) + - src.v[1] * element(0,1) + - src.v[2] * element(0,2) ) ; - dst.v[1] = ( - src.v[0] * element(1,0) + - src.v[1] * element(1,1) + - src.v[2] * element(1,2) ) ; - dst.v[2] = ( - src.v[0] * element(2,0) + - src.v[1] * element(2,1) + - src.v[2] * element(2,2) ) ; - } - - - void mult_matrix_dir( vec3 & src_and_dst) const - { mult_matrix_dir(vec3(src_and_dst), src_and_dst); } - - - // dst = src * M - void mult_dir_matrix( const vec3 &src, vec3 &dst ) const - { - dst.v[0] = ( - src.v[0] * element(0,0) + - src.v[1] * element(1,0) + - src.v[2] * element(2,0) ) ; - dst.v[1] = ( - src.v[0] * element(0,1) + - src.v[1] * element(1,1) + - src.v[2] * element(2,1) ) ; - dst.v[2] = ( - src.v[0] * element(0,2) + - src.v[1] * element(1,2) + - src.v[2] * element(2,2) ) ; - } - - - void mult_dir_matrix( vec3 & src_and_dst) const - { mult_dir_matrix(vec3(src_and_dst), src_and_dst); } - - - real & operator () (int row, int col) - { return element(row,col); } - - const real & operator () (int row, int col) const - { return element(row,col); } - - real & element (int row, int col) - { return m[row | (col<<2)]; } - - const real & element (int row, int col) const - { return m[row | (col<<2)]; } - - matrix4 & operator *= ( const matrix4 & mat ) - { - mult_right( mat ); - return *this; - } - - matrix4 & operator *= ( const real & r ) - { - for (int i = 0; i < 4; ++i) - { - element(0,i) *= r; - element(1,i) *= r; - element(2,i) *= r; - element(3,i) *= r; - } - return *this; - } - - matrix4 & operator += ( const matrix4 & mat ) - { - for (int i = 0; i < 4; ++i) - { - element(0,i) += mat.element(0,i); - element(1,i) += mat.element(1,i); - element(2,i) += mat.element(2,i); - element(3,i) += mat.element(3,i); - } - return *this; - } - - friend matrix4 operator * ( const matrix4 & m1, const matrix4 & m2 ); - friend bool operator == ( const matrix4 & m1, const matrix4 & m2 ); - friend bool operator != ( const matrix4 & m1, const matrix4 & m2 ); - - //protected: - real m[16]; - }; - - inline - matrix4 operator * ( const matrix4 & m1, const matrix4 & m2 ) - { - matrix4 product; - - product = m1; - product.mult_right(m2); - - return product; - } - - inline - bool operator ==( const matrix4 &m1, const matrix4 &m2 ) - { - return ( - m1(0,0) == m2(0,0) && - m1(0,1) == m2(0,1) && - m1(0,2) == m2(0,2) && - m1(0,3) == m2(0,3) && - m1(1,0) == m2(1,0) && - m1(1,1) == m2(1,1) && - m1(1,2) == m2(1,2) && - m1(1,3) == m2(1,3) && - m1(2,0) == m2(2,0) && - m1(2,1) == m2(2,1) && - m1(2,2) == m2(2,2) && - m1(2,3) == m2(2,3) && - m1(3,0) == m2(3,0) && - m1(3,1) == m2(3,1) && - m1(3,2) == m2(3,2) && - m1(3,3) == m2(3,3) ); - } - - inline - bool operator != ( const matrix4 & m1, const matrix4 & m2 ) - { return !( m1 == m2 ); } - - - - - - - - - - - - - - class quaternion - { - public: - - quaternion() - { - *this = identity(); - } - - quaternion( const real v[4] ) - { - set_value( v ); - } - - - quaternion( real q0, real q1, real q2, real q3 ) - { - set_value( q0, q1, q2, q3 ); - } - - - quaternion( const matrix4 & m ) - { - set_value( m ); - } - - - quaternion( const vec3 &axis, real radians ) - { - set_value( axis, radians ); - } - - - quaternion( const vec3 &rotateFrom, const vec3 &rotateTo ) - { - set_value( rotateFrom, rotateTo ); - } - - quaternion( const vec3 & from_look, const vec3 & from_up, - const vec3 & to_look, const vec3& to_up) - { - set_value(from_look, from_up, to_look, to_up); - } - - const real * get_value() const - { - return &q[0]; - } - - void get_value( real &q0, real &q1, real &q2, real &q3 ) const - { - q0 = q[0]; - q1 = q[1]; - q2 = q[2]; - q3 = q[3]; - } - - quaternion & set_value( real q0, real q1, real q2, real q3 ) - { - q[0] = q0; - q[1] = q1; - q[2] = q2; - q[3] = q3; - counter = 0; - return *this; - } - - void get_value( vec3 &axis, real &radians ) const - { - radians = real(acos( q[3] ) * GLH_TWO); - if ( radians == GLH_ZERO ) - axis = vec3( 0.0, 0.0, 1.0 ); - else - { - axis.v[0] = q[0]; - axis.v[1] = q[1]; - axis.v[2] = q[2]; - axis.normalize(); - } - } - - void get_value( matrix4 & m ) const - { - real s, xs, ys, zs, wx, wy, wz, xx, xy, xz, yy, yz, zz; - - real norm = q[0] * q[0] + q[1] * q[1] + q[2] * q[2] + q[3] * q[3]; - - s = (equivalent(norm,GLH_ZERO)) ? GLH_ZERO : ( GLH_TWO / norm ); - - xs = q[0] * s; - ys = q[1] * s; - zs = q[2] * s; - - wx = q[3] * xs; - wy = q[3] * ys; - wz = q[3] * zs; - - xx = q[0] * xs; - xy = q[0] * ys; - xz = q[0] * zs; - - yy = q[1] * ys; - yz = q[1] * zs; - zz = q[2] * zs; - - m(0,0) = real( GLH_ONE - ( yy + zz )); - m(1,0) = real ( xy + wz ); - m(2,0) = real ( xz - wy ); - - m(0,1) = real ( xy - wz ); - m(1,1) = real ( GLH_ONE - ( xx + zz )); - m(2,1) = real ( yz + wx ); - - m(0,2) = real ( xz + wy ); - m(1,2) = real ( yz - wx ); - m(2,2) = real ( GLH_ONE - ( xx + yy )); - - m(3,0) = m(3,1) = m(3,2) = m(0,3) = m(1,3) = m(2,3) = GLH_ZERO; - m(3,3) = GLH_ONE; - } - - quaternion & set_value( const real * qp ) - { - memcpy(q,qp,sizeof(real) * 4); - - counter = 0; - return *this; - } - - quaternion & set_value( const matrix4 & m ) - { - real tr, s; - int i, j, k; - const int nxt[3] = { 1, 2, 0 }; - - tr = m(0,0) + m(1,1) + m(2,2); - - if ( tr > GLH_ZERO ) - { - s = real(sqrt( tr + m(3,3) )); - q[3] = real ( s * 0.5 ); - s = real(0.5) / s; - - q[0] = real ( ( m(1,2) - m(2,1) ) * s ); - q[1] = real ( ( m(2,0) - m(0,2) ) * s ); - q[2] = real ( ( m(0,1) - m(1,0) ) * s ); - } - else - { - i = 0; - if ( m(1,1) > m(0,0) ) - i = 1; - - if ( m(2,2) > m(i,i) ) - i = 2; - - j = nxt[i]; - k = nxt[j]; - - s = real(sqrt( ( m(i,j) - ( m(j,j) + m(k,k) )) + GLH_ONE )); - - q[i] = real ( s * 0.5 ); - s = real(0.5 / s); - - q[3] = real ( ( m(j,k) - m(k,j) ) * s ); - q[j] = real ( ( m(i,j) + m(j,i) ) * s ); - q[k] = real ( ( m(i,k) + m(k,i) ) * s ); - } - - counter = 0; - return *this; - } - - quaternion & set_value( const vec3 &axis, real theta ) - { - real sqnorm = axis.square_norm(); - - if (sqnorm <= GLH_EPSILON) - { - // axis too small. - x = y = z = 0.0; - w = 1.0; - } - else - { - theta *= real(0.5); - real sin_theta = real(sin(theta)); - - if (!equivalent(sqnorm,GLH_ONE)) - sin_theta /= real(sqrt(sqnorm)); - x = sin_theta * axis.v[0]; - y = sin_theta * axis.v[1]; - z = sin_theta * axis.v[2]; - w = real(cos(theta)); - } - return *this; - } - - quaternion & set_value( const vec3 & rotateFrom, const vec3 & rotateTo ) - { - vec3 p1, p2; - real alpha; - - p1 = rotateFrom; - p1.normalize(); - p2 = rotateTo; - p2.normalize(); - - alpha = p1.dot(p2); - - if(equivalent(alpha,GLH_ONE)) - { - *this = identity(); - return *this; - } - - // ensures that the anti-parallel case leads to a positive dot - if(equivalent(alpha,-GLH_ONE)) - { - vec3 v; - - if(p1.v[0] != p1.v[1] || p1.v[0] != p1.v[2]) - v = vec3(p1.v[1], p1.v[2], p1.v[0]); - else - v = vec3(-p1.v[0], p1.v[1], p1.v[2]); - - v -= p1 * p1.dot(v); - v.normalize(); - - set_value(v, GLH_PI); - return *this; - } - - p1 = p1.cross(p2); - p1.normalize(); - set_value(p1,real(acos(alpha))); - - counter = 0; - return *this; - } - - quaternion & set_value( const vec3 & from_look, const vec3 & from_up, - const vec3 & to_look, const vec3 & to_up) - { - quaternion r_look = quaternion(from_look, to_look); - - vec3 rotated_from_up(from_up); - r_look.mult_vec(rotated_from_up); - - quaternion r_twist = quaternion(rotated_from_up, to_up); - - *this = r_twist; - *this *= r_look; - return *this; - } - - quaternion & operator *= ( const quaternion & qr ) - { - quaternion ql(*this); - - w = ql.w * qr.w - ql.x * qr.x - ql.y * qr.y - ql.z * qr.z; - x = ql.w * qr.x + ql.x * qr.w + ql.y * qr.z - ql.z * qr.y; - y = ql.w * qr.y + ql.y * qr.w + ql.z * qr.x - ql.x * qr.z; - z = ql.w * qr.z + ql.z * qr.w + ql.x * qr.y - ql.y * qr.x; - - counter += qr.counter; - counter++; - counter_normalize(); - return *this; - } - - void normalize() - { - real rnorm = GLH_ONE / real(sqrt(w * w + x * x + y * y + z * z)); - if (equivalent(rnorm, GLH_ZERO)) - return; - x *= rnorm; - y *= rnorm; - z *= rnorm; - w *= rnorm; - counter = 0; - } - - friend bool operator == ( const quaternion & q1, const quaternion & q2 ); - - friend bool operator != ( const quaternion & q1, const quaternion & q2 ); - - friend quaternion operator * ( const quaternion & q1, const quaternion & q2 ); - - bool equals( const quaternion & r, real tolerance ) const - { - real t; - - t = ( - (q[0]-r.q[0])*(q[0]-r.q[0]) + - (q[1]-r.q[1])*(q[1]-r.q[1]) + - (q[2]-r.q[2])*(q[2]-r.q[2]) + - (q[3]-r.q[3])*(q[3]-r.q[3]) ); - if(t > GLH_EPSILON) - return false; - return 1; - } - - quaternion & conjugate() - { - q[0] *= -GLH_ONE; - q[1] *= -GLH_ONE; - q[2] *= -GLH_ONE; - return *this; - } - - quaternion & invert() - { - return conjugate(); - } - - quaternion inverse() const - { - quaternion r = *this; - return r.invert(); - } - - // - // Quaternion multiplication with cartesian vector - // v' = q*v*q(star) - // - void mult_vec( const vec3 &src, vec3 &dst ) const - { - real v_coef = w * w - x * x - y * y - z * z; - real u_coef = GLH_TWO * (src.v[0] * x + src.v[1] * y + src.v[2] * z); - real c_coef = GLH_TWO * w; - - dst.v[0] = v_coef * src.v[0] + u_coef * x + c_coef * (y * src.v[2] - z * src.v[1]); - dst.v[1] = v_coef * src.v[1] + u_coef * y + c_coef * (z * src.v[0] - x * src.v[2]); - dst.v[2] = v_coef * src.v[2] + u_coef * z + c_coef * (x * src.v[1] - y * src.v[0]); - } - - void mult_vec( vec3 & src_and_dst) const - { - mult_vec(vec3(src_and_dst), src_and_dst); - } - - void scale_angle( real scaleFactor ) - { - vec3 axis; - real radians; - - get_value(axis, radians); - radians *= scaleFactor; - set_value(axis, radians); - } - - static quaternion slerp( const quaternion & p, const quaternion & q, real alpha ) - { - quaternion r; - - real cos_omega = p.x * q.x + p.y * q.y + p.z * q.z + p.w * q.w; - // if B is on opposite hemisphere from A, use -B instead - - int bflip; - if ( ( bflip = (cos_omega < GLH_ZERO)) ) - cos_omega = -cos_omega; - - // complementary interpolation parameter - real beta = GLH_ONE - alpha; - - if(cos_omega <= GLH_ONE - GLH_EPSILON) - return p; - - real omega = real(acos(cos_omega)); - real one_over_sin_omega = GLH_ONE / real(sin(omega)); - - beta = real(sin(omega*beta) * one_over_sin_omega); - alpha = real(sin(omega*alpha) * one_over_sin_omega); - - if (bflip) - alpha = -alpha; - - r.x = beta * p.q[0]+ alpha * q.q[0]; - r.y = beta * p.q[1]+ alpha * q.q[1]; - r.z = beta * p.q[2]+ alpha * q.q[2]; - r.w = beta * p.q[3]+ alpha * q.q[3]; - return r; - } - - static quaternion identity() - { - static quaternion ident( vec3( 0.0, 0.0, 0.0 ), GLH_ONE ); - return ident; - } - - real & operator []( int i ) - { - assert(i < 4); - return q[i]; - } - - const real & operator []( int i ) const - { - assert(i < 4); - return q[i]; - } - - protected: - - void counter_normalize() - { - if (counter > GLH_QUATERNION_NORMALIZATION_THRESHOLD) - normalize(); - } - - union - { - struct - { - real q[4]; - }; - struct - { - real x; - real y; - real z; - real w; - }; - }; - - // renormalization counter - unsigned char counter; - }; - - inline - bool operator == ( const quaternion & q1, const quaternion & q2 ) - { - return (equivalent(q1.x, q2.x) && - equivalent(q1.y, q2.y) && - equivalent(q1.z, q2.z) && - equivalent(q1.w, q2.w) ); - } - - inline - bool operator != ( const quaternion & q1, const quaternion & q2 ) - { - return ! ( q1 == q2 ); - } - - inline - quaternion operator * ( const quaternion & q1, const quaternion & q2 ) - { - quaternion r(q1); - r *= q2; - return r; - } - - - - - - - - - - - class plane - { - public: - - plane() - { - planedistance = 0.0; - planenormal.set_value( 0.0, 0.0, 1.0 ); - } - - - plane( const vec3 &p0, const vec3 &p1, const vec3 &p2 ) - { - vec3 v0 = p1 - p0; - vec3 v1 = p2 - p0; - planenormal = v0.cross(v1); - planenormal.normalize(); - planedistance = p0.dot(planenormal); - } - - plane( const vec3 &normal, real distance ) - { - planedistance = distance; - planenormal = normal; - planenormal.normalize(); - } - - plane( const vec3 &normal, const vec3 &point ) - { - planenormal = normal; - planenormal.normalize(); - planedistance = point.dot(planenormal); - } - - void offset( real d ) - { - planedistance += d; - } - - bool intersect( const line &l, vec3 &intersection ) const - { - vec3 pos, dir; - vec3 pn = planenormal; - real pd = planedistance; - - pos = l.get_position(); - dir = l.get_direction(); - - if(dir.dot(pn) == 0.0) return 0; - pos -= pn*pd; - // now we're talking about a plane passing through the origin - if(pos.dot(pn) < 0.0) pn.negate(); - if(dir.dot(pn) > 0.0) dir.negate(); - vec3 ppos = pn * pos.dot(pn); - pos = (ppos.length()/dir.dot(-pn))*dir; - intersection = l.get_position(); - intersection += pos; - return 1; - } - void transform( const matrix4 &matrix ) - { - matrix4 invtr = matrix.inverse(); - invtr = invtr.transpose(); - - vec3 pntOnplane = planenormal * planedistance; - vec3 newPntOnplane; - vec3 newnormal; - - invtr.mult_dir_matrix(planenormal, newnormal); - matrix.mult_vec_matrix(pntOnplane, newPntOnplane); - - newnormal.normalize(); - planenormal = newnormal; - planedistance = newPntOnplane.dot(planenormal); - } - - bool is_in_half_space( const vec3 &point ) const - { - - if(( point.dot(planenormal) - planedistance) < 0.0) - return 0; - return 1; - } - - - real distance( const vec3 & point ) const - { - return planenormal.dot(point - planenormal*planedistance); - } - - const vec3 &get_normal() const - { - return planenormal; - } - - - real get_distance_from_origin() const - { - return planedistance; - } - - - friend bool operator == ( const plane & p1, const plane & p2 ); - - - friend bool operator != ( const plane & p1, const plane & p2 ); - - //protected: - vec3 planenormal; - real planedistance; - }; - - inline - bool operator == (const plane & p1, const plane & p2 ) - { - return ( p1.planedistance == p2.planedistance && p1.planenormal == p2.planenormal); - } - - inline - bool operator != ( const plane & p1, const plane & p2 ) - { return ! (p1 == p2); } - - - - } // "ns_##GLH_REAL" - - // make common typedefs... -#ifdef GLH_REAL_IS_FLOAT - typedef GLH_REAL_NAMESPACE::vec2 vec2f; - typedef GLH_REAL_NAMESPACE::vec3 vec3f; - typedef GLH_REAL_NAMESPACE::vec4 vec4f; - typedef GLH_REAL_NAMESPACE::quaternion quaternionf; - typedef GLH_REAL_NAMESPACE::quaternion rotationf; - typedef GLH_REAL_NAMESPACE::line linef; - typedef GLH_REAL_NAMESPACE::plane planef; - typedef GLH_REAL_NAMESPACE::matrix4 matrix4f; -#endif - - - - -} // namespace glh - - - -#endif - diff --git a/indra/llwindow/llwindowmesaheadless.h b/indra/llwindow/llwindowmesaheadless.h index 438964dae..6c0e872e2 100644 --- a/indra/llwindow/llwindowmesaheadless.h +++ b/indra/llwindow/llwindowmesaheadless.h @@ -30,7 +30,6 @@ #if LL_MESA_HEADLESS #include "llwindow.h" -#include "GL/glu.h" #include "GL/osmesa.h" class LLWindowMesaHeadless : public LLWindow diff --git a/indra/newview/app_settings/shaders/class1/avatar/objectSkinV.glsl b/indra/newview/app_settings/shaders/class1/avatar/objectSkinV.glsl index 6acbf0aaf..2bd85f88b 100644 --- a/indra/newview/app_settings/shaders/class1/avatar/objectSkinV.glsl +++ b/indra/newview/app_settings/shaders/class1/avatar/objectSkinV.glsl @@ -22,6 +22,8 @@ * $/LicenseInfo$ */ +#define FLT_MAX 3.402823466e+38 + ATTRIBUTE vec4 weight4; uniform mat3x4 matrixPalette[52]; @@ -29,6 +31,9 @@ uniform float maxWeight; mat4 getObjectSkinnedTransform() { + + + int i; vec4 w = fract(weight4); @@ -38,6 +43,10 @@ mat4 getObjectSkinnedTransform() index = max(index, vec4( 0.0)); float sum = (w.x+w.y+w.z+w.w); + if(sum > 0.0) + w*=1.0/sum; + else + w=vec4(FLT_MAX); int i1 = int(index.x); int i2 = int(index.y); @@ -59,7 +68,7 @@ mat4 getObjectSkinnedTransform() ret[0] = vec4(mat[0], 0); ret[1] = vec4(mat[1], 0); ret[2] = vec4(mat[2], 0); - ret[3] = vec4(trans, sum); + ret[3] = vec4(trans, 1.0); return ret; } diff --git a/indra/newview/app_settings/shaders/class1/deferred/waterF.glsl b/indra/newview/app_settings/shaders/class1/deferred/waterF.glsl index 4f4d2cd6b..b0be02f0d 100644 --- a/indra/newview/app_settings/shaders/class1/deferred/waterF.glsl +++ b/indra/newview/app_settings/shaders/class1/deferred/waterF.glsl @@ -198,5 +198,5 @@ void main() frag_data[0] = vec4(color.rgb, color.a); // diffuse frag_data[1] = vec4(0); // speccolor, spec - frag_data[2] = vec4(encode_normal(screenspacewavef.xyz*0.5+0.5), 0.05, 0);// normalxy, 0, 0 + frag_data[2] = vec4(encode_normal(screenspacewavef.xyz), 0.05, 0);// normalxy, 0, 0 } diff --git a/indra/newview/app_settings/shaders/class1/effects/glowV.glsl b/indra/newview/app_settings/shaders/class1/effects/glowV.glsl index cdb228157..bde9a4537 100644 --- a/indra/newview/app_settings/shaders/class1/effects/glowV.glsl +++ b/indra/newview/app_settings/shaders/class1/effects/glowV.glsl @@ -26,7 +26,7 @@ uniform mat4 modelview_projection_matrix; ATTRIBUTE vec3 position; -ATTRIBUTE vec2 texcoord0; +ATTRIBUTE vec2 texcoord1; uniform vec2 glowDelta; @@ -39,12 +39,12 @@ void main() { gl_Position = modelview_projection_matrix * vec4(position, 1.0); - vary_texcoord0.xy = texcoord0 + glowDelta*(-3.5); - vary_texcoord1.xy = texcoord0 + glowDelta*(-2.5); - vary_texcoord2.xy = texcoord0 + glowDelta*(-1.5); - vary_texcoord3.xy = texcoord0 + glowDelta*(-0.5); - vary_texcoord0.zw = texcoord0 + glowDelta*(0.5); - vary_texcoord1.zw = texcoord0 + glowDelta*(1.5); - vary_texcoord2.zw = texcoord0 + glowDelta*(2.5); - vary_texcoord3.zw = texcoord0 + glowDelta*(3.5); + vary_texcoord0.xy = texcoord1 + glowDelta*(-3.5); + vary_texcoord1.xy = texcoord1 + glowDelta*(-2.5); + vary_texcoord2.xy = texcoord1 + glowDelta*(-1.5); + vary_texcoord3.xy = texcoord1 + glowDelta*(-0.5); + vary_texcoord0.zw = texcoord1 + glowDelta*(0.5); + vary_texcoord1.zw = texcoord1 + glowDelta*(1.5); + vary_texcoord2.zw = texcoord1 + glowDelta*(2.5); + vary_texcoord3.zw = texcoord1 + glowDelta*(3.5); } diff --git a/indra/newview/app_settings/shaders/class1/interface/glowcombineF.glsl b/indra/newview/app_settings/shaders/class1/interface/glowcombineF.glsl index 891b971f1..f76a90158 100644 --- a/indra/newview/app_settings/shaders/class1/interface/glowcombineF.glsl +++ b/indra/newview/app_settings/shaders/class1/interface/glowcombineF.glsl @@ -39,6 +39,6 @@ VARYING vec2 vary_texcoord1; void main() { - frag_color = texture2D(glowMap, vary_texcoord0.xy) + - texture2DRect(screenMap, vary_texcoord1.xy); + frag_color = texture2D(glowMap, vary_texcoord1.xy) + + texture2DRect(screenMap, vary_texcoord0.xy); } diff --git a/indra/newview/app_settings/shaders/class1/lighting/lightWaterF.glsl b/indra/newview/app_settings/shaders/class1/lighting/lightWaterF.glsl index 3586652cb..248bff55f 100644 --- a/indra/newview/app_settings/shaders/class1/lighting/lightWaterF.glsl +++ b/indra/newview/app_settings/shaders/class1/lighting/lightWaterF.glsl @@ -39,10 +39,10 @@ void default_lighting_water() { vec4 color = diffuseLookup(vary_texcoord0.xy) * vertex_color; - if(color.a < .004) + /*if(color.a < .004) { discard; - } + }*/ color.rgb = atmosLighting(color.rgb); diff --git a/indra/newview/awavefront.h b/indra/newview/awavefront.h index 3f58ee4a0..09f0dc3b9 100644 --- a/indra/newview/awavefront.h +++ b/indra/newview/awavefront.h @@ -28,6 +28,9 @@ class LLFace; class LLPolyMesh; class LLViewerObject; class LLVOAvatar; +class LLVolume; +class LLVolumeFace; +class LLXform; typedef std::vector > vert_t; typedef std::vector vec3_t; diff --git a/indra/newview/llagentcamera.cpp b/indra/newview/llagentcamera.cpp index b96d18da4..4dfe7bc4b 100644 --- a/indra/newview/llagentcamera.cpp +++ b/indra/newview/llagentcamera.cpp @@ -395,7 +395,7 @@ void LLAgentCamera::slamLookAt(const LLVector3 &look_at) //----------------------------------------------------------------------------- LLVector3 LLAgentCamera::calcFocusOffset(LLViewerObject *object, LLVector3 original_focus_point, S32 x, S32 y) { - LLMatrix4 obj_matrix = object->getRenderMatrix(); + const LLMatrix4a& obj_matrix = object->getRenderMatrix(); LLQuaternion obj_rot = object->getRenderRotation(); LLVector3 obj_pos = object->getRenderPosition(); @@ -427,24 +427,24 @@ LLVector3 LLAgentCamera::calcFocusOffset(LLViewerObject *object, LLVector3 origi // find the largest ratio stored in obj_to_cam_ray_proportions // this corresponds to the object's local axial plane (XY, YZ, XZ) that is *most* facing the camera - LLVector3 longest_object_axis; + LLVector4a focus_plane_normal; // is x-axis longest? if (obj_to_cam_ray_proportions.mV[VX] > obj_to_cam_ray_proportions.mV[VY] && obj_to_cam_ray_proportions.mV[VX] > obj_to_cam_ray_proportions.mV[VZ]) { // then grab it - longest_object_axis.setVec(obj_matrix.getFwdRow4()); + focus_plane_normal = obj_matrix.getRow(); } // is y-axis longest? else if (obj_to_cam_ray_proportions.mV[VY] > obj_to_cam_ray_proportions.mV[VZ]) { // then grab it - longest_object_axis.setVec(obj_matrix.getLeftRow4()); + focus_plane_normal = obj_matrix.getRow(); } // otherwise, use z axis else { - longest_object_axis.setVec(obj_matrix.getUpRow4()); + focus_plane_normal = obj_matrix.getRow(); } // Use this axis as the normal to project mouse click on to plane with that normal, at the object center. @@ -453,11 +453,10 @@ LLVector3 LLAgentCamera::calcFocusOffset(LLViewerObject *object, LLVector3 origi // We do this to allow the camera rotation tool to "tumble" the object by rotating the camera. // If the focus point were the object surface under the mouse, camera rotation would introduce an undesirable // eccentricity to the object orientation - LLVector3 focus_plane_normal(longest_object_axis); - focus_plane_normal.normalize(); + focus_plane_normal.normalize3fast(); LLVector3d focus_pt_global; - gViewerWindow->mousePointOnPlaneGlobal(focus_pt_global, x, y, gAgent.getPosGlobalFromAgent(obj_pos), focus_plane_normal); + gViewerWindow->mousePointOnPlaneGlobal(focus_pt_global, x, y, gAgent.getPosGlobalFromAgent(obj_pos), LLVector3(focus_plane_normal.getF32ptr())); LLVector3 focus_pt = gAgent.getPosAgentFromGlobal(focus_pt_global); // find vector from camera to focus point in object space @@ -1795,7 +1794,7 @@ LLVector3d LLAgentCamera::calcCameraPositionTargetGlobal(BOOL *hit_limit) head_offset.mdV[VX] = gAgentAvatarp->mHeadOffset.mV[VX]; head_offset.mdV[VY] = gAgentAvatarp->mHeadOffset.mV[VY]; head_offset.mdV[VZ] = gAgentAvatarp->mHeadOffset.mV[VZ] + 0.1f; - const LLMatrix4& mat = ((LLViewerObject*) gAgentAvatarp->getParent())->getRenderMatrix(); + const LLMatrix4 mat(((LLViewerObject*) gAgentAvatarp->getParent())->getRenderMatrix().getF32ptr()); camera_position_global = gAgent.getPosGlobalFromAgent ((gAgentAvatarp->getPosition()+ LLVector3(head_offset)*gAgentAvatarp->getRotation()) * mat); diff --git a/indra/newview/llappviewer.cpp b/indra/newview/llappviewer.cpp index 217332797..caf66d6bc 100644 --- a/indra/newview/llappviewer.cpp +++ b/indra/newview/llappviewer.cpp @@ -98,6 +98,7 @@ #include "llprimitive.h" #include "llurlaction.h" #include "llurlentry.h" +#include "llvolumemgr.h" #include "llnotifications.h" #include "llnotificationsutil.h" #include diff --git a/indra/newview/lldrawable.cpp b/indra/newview/lldrawable.cpp index 8263c0868..56fe132d3 100644 --- a/indra/newview/lldrawable.cpp +++ b/indra/newview/lldrawable.cpp @@ -179,7 +179,7 @@ LLVOVolume* LLDrawable::getVOVolume() const } } -const LLMatrix4& LLDrawable::getRenderMatrix() const +const LLMatrix4a& LLDrawable::getRenderMatrix() const { return isRoot() ? getWorldMatrix() : getParent()->getWorldMatrix(); } @@ -1209,8 +1209,7 @@ void LLSpatialBridge::updateSpatialExtents() LLVector4a size = root->mBounds[1]; //VECTORIZE THIS - LLMatrix4a mat; - mat.loadu(mDrawable->getXform()->getWorldMatrix()); + const LLMatrix4a& mat = mDrawable->getXform()->getWorldMatrix(); LLVector4a t; t.splat(0.f); @@ -1274,27 +1273,35 @@ LLCamera LLSpatialBridge::transformCamera(LLCamera& camera) { LLCamera ret = camera; LLXformMatrix* mat = mDrawable->getXform(); - LLVector3 center = LLVector3(0,0,0) * mat->getWorldMatrix(); + const LLVector4a& center = mat->getWorldMatrix().getRow<3>(); - LLVector3 delta = ret.getOrigin() - center; - LLQuaternion rot = ~mat->getRotation(); + LLQuaternion2 invRot; + invRot.setConjugate( LLQuaternion2(mat->getRotation()) ); - delta *= rot; - LLVector3 lookAt = ret.getAtAxis(); - LLVector3 up_axis = ret.getUpAxis(); - LLVector3 left_axis = ret.getLeftAxis(); + LLVector4a delta; + delta.load3(ret.getOrigin().mV); + delta.sub(center); - lookAt *= rot; - up_axis *= rot; - left_axis *= rot; + LLVector4a lookAt; + lookAt.load3(ret.getAtAxis().mV); + LLVector4a up_axis; - if (!delta.isFinite()) + up_axis.load3(ret.getUpAxis().mV); + LLVector4a left_axis; + left_axis.load3(ret.getLeftAxis().mV); + + delta.setRotated(invRot, delta); + lookAt.setRotated(invRot, lookAt); + up_axis.setRotated(invRot, up_axis); + left_axis.setRotated(invRot, left_axis); + + if (!delta.isFinite3()) { - delta.clearVec(); + delta.clear(); } - ret.setOrigin(delta); - ret.setAxes(lookAt, left_axis, up_axis); + ret.setOrigin(LLVector3(delta.getF32ptr())); + ret.setAxes(LLVector3(lookAt.getF32ptr()), LLVector3(left_axis.getF32ptr()), LLVector3(up_axis.getF32ptr())); return ret; } @@ -1587,12 +1594,17 @@ const LLVector3 LLDrawable::getPositionAgent() const { if (isActive()) { - LLVector3 pos(0,0,0); if (!isRoot()) { - pos = mVObjp->getPosition(); + LLVector4a pos; + pos.load3(mVObjp->getPosition().mV); + getRenderMatrix().affineTransform(pos,pos); + return LLVector3(pos.getF32ptr()); + } + else + { + return LLVector3(getRenderMatrix().getRow<3>().getF32ptr()); } - return pos * getRenderMatrix(); } else { diff --git a/indra/newview/lldrawable.h b/indra/newview/lldrawable.h index 421d6b23b..97ad55d5b 100644 --- a/indra/newview/lldrawable.h +++ b/indra/newview/lldrawable.h @@ -110,8 +110,8 @@ public: const LLViewerObject *getVObj() const { return mVObjp; } LLVOVolume* getVOVolume() const; // cast mVObjp tp LLVOVolume if OK - const LLMatrix4& getWorldMatrix() const { return mXform.getWorldMatrix(); } - const LLMatrix4& getRenderMatrix() const; + const LLMatrix4a& getWorldMatrix() const { return mXform.getWorldMatrix(); } + const LLMatrix4a& getRenderMatrix() const; void setPosition(LLVector3 v) const { } const LLVector3& getPosition() const { return mXform.getPosition(); } const LLVector3& getWorldPosition() const { return mXform.getPositionW(); } @@ -305,7 +305,7 @@ private: //aligned members LL_ALIGN_16(LLVector4a mPositionGroup); public: - LLXformMatrix mXform; + LL_ALIGN_16(LLXformMatrix mXform); // vis data LLPointer mParent; diff --git a/indra/newview/lldrawpool.cpp b/indra/newview/lldrawpool.cpp index 90701c0f4..7a6ca0e24 100644 --- a/indra/newview/lldrawpool.cpp +++ b/indra/newview/lldrawpool.cpp @@ -297,17 +297,14 @@ LLViewerTexture *LLFacePool::getTexture() void LLFacePool::removeFaceReference(LLFace *facep) { - if (facep->getReferenceIndex() != -1) + S32 idx = facep->getReferenceIndex(); + if (idx != -1) { - if (facep->getReferenceIndex() != (S32)mReferences.size()) - { - LLFace *back = mReferences.back(); - mReferences[facep->getReferenceIndex()] = back; - back->setReferenceIndex(facep->getReferenceIndex()); - } - mReferences.pop_back(); + facep->setReferenceIndex(-1); + std::vector::iterator iter = vector_replace_with_last(mReferences, mReferences.begin() + idx); + if(iter != mReferences.end()) + (*iter)->setReferenceIndex(idx); } - facep->setReferenceIndex(-1); } void LLFacePool::addFaceReference(LLFace *facep) @@ -449,7 +446,7 @@ void LLRenderPass::applyModelMatrix(LLDrawInfo& params) if (params.mModelMatrix) { llassert(gGL.getMatrixMode() == LLRender::MM_MODELVIEW); - gGL.multMatrix((GLfloat*) params.mModelMatrix->mMatrix); + gGL.multMatrix(*params.mModelMatrix); } gPipeline.mMatrixOpCount++; } @@ -484,7 +481,7 @@ void LLRenderPass::pushBatch(LLDrawInfo& params, U32 mask, BOOL texture, BOOL ba tex_setup = true; gGL.getTexUnit(0)->activate(); gGL.matrixMode(LLRender::MM_TEXTURE); - gGL.loadMatrix((GLfloat*) params.mTextureMatrix->mMatrix); + gGL.loadMatrix(*params.mTextureMatrix); gPipeline.mTextureMatrixOps++; } } diff --git a/indra/newview/lldrawpoolalpha.cpp b/indra/newview/lldrawpoolalpha.cpp index 74ee302a6..2e2bf3aac 100644 --- a/indra/newview/lldrawpoolalpha.cpp +++ b/indra/newview/lldrawpoolalpha.cpp @@ -515,7 +515,7 @@ void LLDrawPoolAlpha::renderAlpha(U32 mask, S32 pass) tex_setup = true; gGL.getTexUnit(0)->activate(); gGL.matrixMode(LLRender::MM_TEXTURE); - gGL.loadMatrix((GLfloat*) params.mTextureMatrix->mMatrix); + gGL.loadMatrix(*params.mTextureMatrix); gPipeline.mTextureMatrixOps++; } } diff --git a/indra/newview/lldrawpoolavatar.cpp b/indra/newview/lldrawpoolavatar.cpp index dacc26422..0c6102f20 100644 --- a/indra/newview/lldrawpoolavatar.cpp +++ b/indra/newview/lldrawpoolavatar.cpp @@ -155,16 +155,9 @@ void LLDrawPoolAvatar::prerender() } } -LLMatrix4& LLDrawPoolAvatar::getModelView() +const LLMatrix4a& LLDrawPoolAvatar::getModelView() { - static LLMatrix4 ret; - - ret.initRows(LLVector4(gGLModelView+0), - LLVector4(gGLModelView+4), - LLVector4(gGLModelView+8), - LLVector4(gGLModelView+12)); - - return ret; + return gGLModelView; } //----------------------------------------------------------------------------- @@ -1333,7 +1326,7 @@ void LLDrawPoolAvatar::renderAvatars(LLVOAvatar* single_avatar, S32 pass) { LLMatrix4 rot_mat; LLViewerCamera::getInstance()->getMatrixToLocal(rot_mat); - LLMatrix4 cfr(OGL_TO_CFR_ROTATION); + LLMatrix4 cfr(OGL_TO_CFR_ROTATION.getF32ptr()); rot_mat *= cfr; LLVector4 wind; @@ -1390,16 +1383,11 @@ void LLDrawPoolAvatar::getRiggedGeometry(LLFace* face, LLPointer U16 offset = 0; - LLMatrix4 mat_vert = skin->mBindShapeMatrix; - glh::matrix4f m((F32*) mat_vert.mMatrix); - m = m.inverse().transpose(); - - F32 mat3[] = - { m.m[0], m.m[1], m.m[2], - m.m[4], m.m[5], m.m[6], - m.m[8], m.m[9], m.m[10] }; - - LLMatrix3 mat_normal(mat3); + LLMatrix4a mat_vert; + mat_vert.loadu(skin->mBindShapeMatrix); + LLMatrix4a mat_inv_trans = mat_vert; + mat_inv_trans.invert(); + mat_inv_trans.transpose(); //let getGeometryVolume know if alpha should override shiny U32 type = gPipeline.getPoolTypeFromTE(face->getTextureEntry(), face->getTexture()); @@ -1414,7 +1402,7 @@ void LLDrawPoolAvatar::getRiggedGeometry(LLFace* face, LLPointer } //llinfos << "Rebuilt face " << face->getTEOffset() << " of " << face->getDrawable() << " at " << gFrameTimeSeconds << llendl; - face->getGeometryVolume(*volume, face->getTEOffset(), mat_vert, mat_normal, offset, true); + face->getGeometryVolume(*volume, face->getTEOffset(), mat_vert, mat_inv_trans, offset, true); buffer->flush(); } @@ -1540,8 +1528,10 @@ void LLDrawPoolAvatar::renderRigged(LLVOAvatar* avatar, U32 type, bool glow) } if (joint) { - mat[i] = skin->mInvBindMatrix[i]; - mat[i] *= joint->getWorldMatrix(); + LLMatrix4a tmp; + tmp.loadu((F32*)skin->mInvBindMatrix[i].mMatrix); + tmp.setMul(joint->getWorldMatrix(),tmp); + mat[i] = LLMatrix4(tmp.getF32ptr()); } } @@ -1665,7 +1655,7 @@ void LLDrawPoolAvatar::renderRigged(LLVOAvatar* avatar, U32 type, bool glow) if (face->mTextureMatrix && vobj->mTexAnimMode) { gGL.matrixMode(LLRender::MM_TEXTURE); - gGL.loadMatrix((F32*) face->mTextureMatrix->mMatrix); + gGL.loadMatrix(*face->mTextureMatrix); buff->setBuffer(data_mask); buff->drawRange(LLRender::TRIANGLES, start, end, count, offset); gGL.loadIdentity(); diff --git a/indra/newview/lldrawpoolavatar.h b/indra/newview/lldrawpoolavatar.h index af06c4836..bf4a14754 100644 --- a/indra/newview/lldrawpoolavatar.h +++ b/indra/newview/lldrawpoolavatar.h @@ -68,7 +68,7 @@ public: LLDrawPoolAvatar(); - static LLMatrix4& getModelView(); + static const LLMatrix4a& getModelView(); /*virtual*/ LLDrawPool *instancePool(); diff --git a/indra/newview/lldrawpoolbump.cpp b/indra/newview/lldrawpoolbump.cpp index f0d7b4241..0a0a2d610 100644 --- a/indra/newview/lldrawpoolbump.cpp +++ b/indra/newview/lldrawpoolbump.cpp @@ -373,11 +373,7 @@ void LLDrawPoolBump::bindCubeMap(LLGLSLShader* shader, S32 shader_level, S32& di { if (!invisible && shader ) { - LLMatrix4 mat; - mat.initRows(LLVector4(gGLModelView+0), - LLVector4(gGLModelView+4), - LLVector4(gGLModelView+8), - LLVector4(gGLModelView+12)); + LLMatrix4 mat(gGLModelView.getF32ptr()); LLVector3 vec = LLVector3(gShinyOrigin) * mat; LLVector4 vec4(vec, gShinyOrigin.mV[3]); shader->uniform4fv(LLViewerShaderMgr::SHINY_ORIGIN, 1, vec4.mV); @@ -521,11 +517,7 @@ void LLDrawPoolBump::beginFullbrightShiny() LLCubeMap* cube_map = gSky.mVOSkyp ? gSky.mVOSkyp->getCubeMap() : NULL; if( cube_map ) { - LLMatrix4 mat; - mat.initRows(LLVector4(gGLModelView+0), - LLVector4(gGLModelView+4), - LLVector4(gGLModelView+8), - LLVector4(gGLModelView+12)); + LLMatrix4 mat(gGLModelView.getF32ptr()); shader->bind(); LLVector3 vec = LLVector3(gShinyOrigin) * mat; LLVector4 vec4(vec, gShinyOrigin.mV[3]); @@ -1497,16 +1489,16 @@ void LLDrawPoolBump::pushBatch(LLDrawInfo& params, U32 mask, BOOL texture, BOOL { gGL.getTexUnit(1)->activate(); gGL.matrixMode(LLRender::MM_TEXTURE); - gGL.loadMatrix((GLfloat*) params.mTextureMatrix->mMatrix); + gGL.loadMatrix(*params.mTextureMatrix); } gGL.getTexUnit(0)->activate(); gGL.matrixMode(LLRender::MM_TEXTURE); - gGL.loadMatrix((GLfloat*) params.mTextureMatrix->mMatrix); + gGL.loadMatrix(*params.mTextureMatrix); gPipeline.mTextureMatrixOps++; } - gGL.loadMatrix((GLfloat*) params.mTextureMatrix->mMatrix); + gGL.loadMatrix(*params.mTextureMatrix); gPipeline.mTextureMatrixOps++; tex_setup = true; diff --git a/indra/newview/lldrawpoolmaterials.cpp b/indra/newview/lldrawpoolmaterials.cpp index d1b508065..6b2998617 100644 --- a/indra/newview/lldrawpoolmaterials.cpp +++ b/indra/newview/lldrawpoolmaterials.cpp @@ -187,7 +187,7 @@ void LLDrawPoolMaterials::pushBatch(LLDrawInfo& params, U32 mask, BOOL texture, gGL.matrixMode(LLRender::MM_TEXTURE); } - gGL.loadMatrix((GLfloat*) params.mTextureMatrix->mMatrix); + gGL.loadMatrix(*params.mTextureMatrix); gPipeline.mTextureMatrixOps++; tex_setup = true; diff --git a/indra/newview/lldrawpoolterrain.cpp b/indra/newview/lldrawpoolterrain.cpp index eb2738078..456fc3301 100644 --- a/indra/newview/lldrawpoolterrain.cpp +++ b/indra/newview/lldrawpoolterrain.cpp @@ -314,8 +314,11 @@ void LLDrawPoolTerrain::drawLoop() { LLFace *facep = *iter; - LLMatrix4* model_matrix = &(facep->getDrawable()->getRegion()->mRenderMatrix); - + LLMatrix4a* model_matrix = &(facep->getDrawable()->getRegion()->mRenderMatrix); + if(model_matrix && model_matrix->isIdentity()) + { + model_matrix = NULL; + } if (model_matrix != gGLLastMatrix) { llassert(gGL.getMatrixMode() == LLRender::MM_MODELVIEW); @@ -323,7 +326,7 @@ void LLDrawPoolTerrain::drawLoop() gGL.loadMatrix(gGLModelView); if (model_matrix) { - gGL.multMatrix((GLfloat*) model_matrix->mMatrix); + gGL.multMatrix(*model_matrix); } gPipeline.mMatrixOpCount++; } diff --git a/indra/newview/lldrawpooltree.cpp b/indra/newview/lldrawpooltree.cpp index bb2fb09eb..392522913 100644 --- a/indra/newview/lldrawpooltree.cpp +++ b/indra/newview/lldrawpooltree.cpp @@ -104,24 +104,46 @@ void LLDrawPoolTree::render(S32 pass) LLGLState test(GL_ALPHA_TEST, LLGLSLShader::sNoFixedFunction ? 0 : 1); LLOverrideFaceColor color(this, 1.f, 1.f, 1.f, 1.f); - static LLCachedControl sRenderAnimateTrees("RenderAnimateTrees", false); - if (sRenderAnimateTrees) - { - renderTree(); - } - else gGL.getTexUnit(sDiffTex)->bind(mTexturep); - + for (std::vector::iterator iter = mDrawFace.begin(); iter != mDrawFace.end(); iter++) { LLFace *face = *iter; + if(face->getViewerObject()) + { + LLVOTree* pTree = dynamic_cast(face->getViewerObject()); + if(pTree && !pTree->mDrawList.empty() ) + { + LLMatrix4a* model_matrix = &(face->getDrawable()->getRegion()->mRenderMatrix); + + gGL.loadMatrix(gGLModelView); + gGL.multMatrix(*model_matrix); + gPipeline.mMatrixOpCount++; + + for(std::vector >::iterator iter2 = pTree->mDrawList.begin(); + iter2 != pTree->mDrawList.end(); iter2++) + { + LLDrawInfo& params = *iter2->get(); + gGL.pushMatrix(); + gGL.multMatrix(*params.mModelMatrix); + gPipeline.mMatrixOpCount++; + params.mVertexBuffer->setBuffer(LLDrawPoolTree::VERTEX_DATA_MASK); + params.mVertexBuffer->drawRange(params.mDrawMode, params.mStart, params.mEnd, params.mCount, params.mOffset); + gGL.popMatrix(); + } + continue; + } + } LLVertexBuffer* buff = face->getVertexBuffer(); if(buff) { - LLMatrix4* model_matrix = &(face->getDrawable()->getRegion()->mRenderMatrix); - + LLMatrix4a* model_matrix = &(face->getDrawable()->getRegion()->mRenderMatrix); + if(model_matrix && model_matrix->isIdentity()) + { + model_matrix = NULL; + } if (model_matrix != gGLLastMatrix) { gGLLastMatrix = model_matrix; @@ -129,7 +151,7 @@ void LLDrawPoolTree::render(S32 pass) if (model_matrix) { llassert(gGL.getMatrixMode() == LLRender::MM_MODELVIEW); - gGL.multMatrix((GLfloat*) model_matrix->mMatrix); + gGL.multMatrix(*model_matrix); } gPipeline.mMatrixOpCount++; } @@ -209,130 +231,6 @@ void LLDrawPoolTree::endShadowPass(S32 pass) gDeferredTreeShadowProgram.unbind(); } -// -void LLDrawPoolTree::renderTree(BOOL selecting) -{ - LLGLState normalize(GL_NORMALIZE, TRUE); - - // Bind the texture for this tree. - gGL.getTexUnit(sDiffTex)->bind(mTexturep.get(), TRUE); - - U32 indices_drawn = 0; - - gGL.matrixMode(LLRender::MM_MODELVIEW); - - for (std::vector::iterator iter = mDrawFace.begin(); - iter != mDrawFace.end(); iter++) - { - LLFace *face = *iter; - LLDrawable *drawablep = face->getDrawable(); - - if (drawablep->isDead() || !face->getVertexBuffer()) - { - continue; - } - - face->getVertexBuffer()->setBuffer(LLDrawPoolTree::VERTEX_DATA_MASK); - U16* indicesp = (U16*) face->getVertexBuffer()->getIndicesPointer(); - - // Render each of the trees - LLVOTree *treep = (LLVOTree *)drawablep->getVObj().get(); - - LLColor4U color(255,255,255,255); - - if (!selecting || treep->mGLName != 0) - { - if (selecting) - { - S32 name = treep->mGLName; - - color = LLColor4U((U8)(name >> 16), (U8)(name >> 8), (U8)name, 255); - } - - gGLLastMatrix = NULL; - gGL.loadMatrix(gGLModelView); - //gGL.pushMatrix(); - - LLMatrix4 matrix(gGLModelView); - - // Translate to tree base HACK - adjustment in Z plants tree underground - const LLVector3 &pos_agent = treep->getPositionAgent(); - //gGL.translatef(pos_agent.mV[VX], pos_agent.mV[VY], pos_agent.mV[VZ] - 0.1f); - LLMatrix4 trans_mat; - trans_mat.setTranslation(pos_agent.mV[VX], pos_agent.mV[VY], pos_agent.mV[VZ] - 0.1f); - trans_mat *= matrix; - - // Rotate to tree position and bend for current trunk/wind - // Note that trunk stiffness controls the amount of bend at the trunk as - // opposed to the crown of the tree - // - const F32 TRUNK_STIFF = 22.f; - - LLQuaternion rot = - LLQuaternion(treep->mTrunkBend.magVec()*TRUNK_STIFF*DEG_TO_RAD, LLVector4(treep->mTrunkBend.mV[VX], treep->mTrunkBend.mV[VY], 0)) * - LLQuaternion(90.f*DEG_TO_RAD, LLVector4(0,0,1)) * - treep->getRotation(); - - LLMatrix4 rot_mat(rot); - rot_mat *= trans_mat; - - F32 radius = treep->getScale().magVec()*0.05f; - LLMatrix4 scale_mat; - scale_mat.mMatrix[0][0] = - scale_mat.mMatrix[1][1] = - scale_mat.mMatrix[2][2] = radius; - - scale_mat *= rot_mat; - - //TO-DO: Make these set-able? - const F32 THRESH_ANGLE_FOR_BILLBOARD = 7.5f; //Made LoD now a little less aggressive here -Shyotl - const F32 BLEND_RANGE_FOR_BILLBOARD = 1.5f; - - F32 droop = treep->mDroop + 25.f*(1.f - treep->mTrunkBend.magVec()); - - S32 stop_depth = 0; - F32 app_angle = treep->getAppAngle()*LLVOTree::sTreeFactor; - F32 alpha = 1.0; - S32 trunk_LOD = LLVOTree::sMAX_NUM_TREE_LOD_LEVELS; - - for (S32 j = 0; j < 4; j++) - { - - if (app_angle > LLVOTree::sLODAngles[j]) - { - trunk_LOD = j; - break; - } - } - if(trunk_LOD >= LLVOTree::sMAX_NUM_TREE_LOD_LEVELS) - { - continue ; //do not render. - } - - if (app_angle < (THRESH_ANGLE_FOR_BILLBOARD - BLEND_RANGE_FOR_BILLBOARD)) - { - // - // Draw only the billboard - // - // Only the billboard, can use closer to normal alpha func. - stop_depth = -1; - LLFacePool::LLOverrideFaceColor clr(this, color); - indices_drawn += treep->drawBranchPipeline(scale_mat, indicesp, trunk_LOD, stop_depth, treep->mDepth, treep->mTrunkDepth, 1.0, treep->mTwist, droop, treep->mBranches, alpha); - } - else // if (app_angle > (THRESH_ANGLE_FOR_BILLBOARD + BLEND_RANGE_FOR_BILLBOARD)) - { - // - // Draw only the full geometry tree - // - LLFacePool::LLOverrideFaceColor clr(this, color); - indices_drawn += treep->drawBranchPipeline(scale_mat, indicesp, trunk_LOD, stop_depth, treep->mDepth, treep->mTrunkDepth, 1.0, treep->mTwist, droop, treep->mBranches, alpha); - } - - //gGL.popMatrix(); - } - } -}// - BOOL LLDrawPoolTree::verify() const { /* BOOL ok = TRUE; diff --git a/indra/newview/lldrawpoolwater.cpp b/indra/newview/lldrawpoolwater.cpp index 12f91ea67..daebb322b 100644 --- a/indra/newview/lldrawpoolwater.cpp +++ b/indra/newview/lldrawpoolwater.cpp @@ -293,11 +293,10 @@ void LLDrawPoolWater::render(S32 pass) gGL.matrixMode(LLRender::MM_TEXTURE); gGL.loadIdentity(); - LLMatrix4 camera_mat = LLViewerCamera::getInstance()->getModelview(); - LLMatrix4 camera_rot(camera_mat.getMat3()); + LLMatrix4a camera_rot = LLViewerCamera::getInstance()->getModelview(); + camera_rot.extractRotation_affine(); camera_rot.invert(); - - gGL.loadMatrix((F32 *)camera_rot.mMatrix); + gGL.loadMatrix(camera_rot); gGL.matrixMode(LLRender::MM_MODELVIEW); LLOverrideFaceColor overrid(this, 1.f, 1.f, 1.f, 0.5f*up_dot); @@ -727,7 +726,7 @@ void LLDrawPoolWater::shade() gGL.getTexUnit(diffTex)->bind(face->getTexture()); sNeedsReflectionUpdate = TRUE; - + if (water->getUseTexture() || !water->getIsEdgePatch()) { sNeedsDistortionUpdate = TRUE; diff --git a/indra/newview/lldrawpoolwlsky.cpp b/indra/newview/lldrawpoolwlsky.cpp index 864975460..83047aa81 100644 --- a/indra/newview/lldrawpoolwlsky.cpp +++ b/indra/newview/lldrawpoolwlsky.cpp @@ -154,7 +154,8 @@ void LLDrawPoolWLSky::renderDome(F32 camHeightLocal, LLGLSLShader * shader) cons // the windlight sky dome works most conveniently in a coordinate system // where Y is up, so permute our basis vectors accordingly. - gGL.rotatef(120.f, 1.f / F_SQRT3, 1.f / F_SQRT3, 1.f / F_SQRT3); + static const LLMatrix4a rot = gGL.genRot(120.f, 1.f / F_SQRT3, 1.f / F_SQRT3, 1.f / F_SQRT3); + gGL.rotatef(rot); gGL.scalef(0.333f, 0.333f, 0.333f); diff --git a/indra/newview/llface.cpp b/indra/newview/llface.cpp index d4718c2bf..86b840c03 100644 --- a/indra/newview/llface.cpp +++ b/indra/newview/llface.cpp @@ -206,7 +206,7 @@ void LLFace::destroy() if (mTextureMatrix) { - delete mTextureMatrix; + ll_aligned_free_16(mTextureMatrix); mTextureMatrix = NULL; if (mDrawablep.notNull()) @@ -493,7 +493,11 @@ void LLFace::updateCenterAgent() { if (mDrawablep->isActive()) { - mCenterAgent = mCenterLocal * getRenderMatrix(); + LLVector4a local_pos; + local_pos.load3(mCenterLocal.mV); + + getRenderMatrix().affineTransform(local_pos,local_pos); + mCenterAgent.set(local_pos.getF32ptr()); } else { @@ -521,15 +525,21 @@ void LLFace::renderSelected(LLViewerTexture *imagep, const LLColor4& color) gGL.getTexUnit(0)->bind(imagep); gGL.pushMatrix(); + + const LLMatrix4a* model_matrix = NULL; if (mDrawablep->isActive()) { - gGL.multMatrix((GLfloat*)mDrawablep->getRenderMatrix().mMatrix); + model_matrix = &(mDrawablep->getRenderMatrix()); } else { - gGL.multMatrix((GLfloat*)mDrawablep->getRegion()->mRenderMatrix.mMatrix); + model_matrix = &mDrawablep->getRegion()->mRenderMatrix; } - + if(model_matrix && !model_matrix->isIdentity()) + { + gGL.multMatrix(*model_matrix); + } + if (mDrawablep->isState(LLDrawable::RIGGED)) { LLVOVolume* volume = mDrawablep->getVOVolume(); @@ -540,7 +550,7 @@ void LLFace::renderSelected(LLViewerTexture *imagep, const LLColor4& color) { LLGLEnable offset(GL_POLYGON_OFFSET_FILL); glPolygonOffset(-1.f, -1.f); - gGL.multMatrix((F32*) volume->getRelativeXform().mMatrix); + gGL.multMatrix(volume->getRelativeXform()); const LLVolumeFace& vol_face = rigged->getVolumeFace(getTEOffset()); // Singu Note: Implementation changed to utilize a VBO, avoiding fixed functions unless required @@ -808,14 +818,14 @@ bool less_than_max_mag(const LLVector4a& vec) } BOOL LLFace::genVolumeBBoxes(const LLVolume &volume, S32 f, - const LLMatrix4& mat_vert_in, BOOL global_volume) + const LLMatrix4a& mat_vert_in, BOOL global_volume) { //get bounding box if (mDrawablep->isState(LLDrawable::REBUILD_VOLUME | LLDrawable::REBUILD_POSITION | LLDrawable::REBUILD_RIGGED)) { //VECTORIZE THIS - LLMatrix4a mat_vert; - mat_vert.loadu(mat_vert_in); + const LLMatrix4a& mat_vert = mat_vert_in; + //mat_vert.loadu(mat_vert_in); LLVector4a min,max; @@ -956,9 +966,9 @@ LLVector2 LLFace::surfaceToTexture(LLVector2 surface_coord, const LLVector4a& po if (mTextureMatrix) // if we have a texture matrix, use it { - LLVector3 tc3(tc); - tc3 = tc3 * *mTextureMatrix; - tc = LLVector2(tc3); + LLVector4a tc4(tc.mV[VX],tc.mV[VY],0.f); + mTextureMatrix->affineTransform(tc4,tc4); + tc.set(tc4.getF32ptr()); } else // otherwise use the texture entry parameters @@ -975,7 +985,7 @@ LLVector2 LLFace::surfaceToTexture(LLVector2 surface_coord, const LLVector4a& po // by planarProjection(). This is needed to match planar texgen parameters. void LLFace::getPlanarProjectedParams(LLQuaternion* face_rot, LLVector3* face_pos, F32* scale) const { - const LLMatrix4& vol_mat = getWorldMatrix(); + const LLMatrix4a& vol_mat = getWorldMatrix(); const LLVolumeFace& vf = getViewerObject()->getVolume()->getVolumeFace(mTEOffset); const LLVector4a& normal4a = vf.mNormals[0]; const LLVector4a& tangent = vf.mTangents[0]; @@ -994,13 +1004,20 @@ void LLFace::getPlanarProjectedParams(LLQuaternion* face_rot, LLVector3* face_po F32 ang = acos(projected_binormal.mV[VY]); ang = (projected_binormal.mV[VX] < 0.f) ? -ang : ang; - //VECTORIZE THIS - LLVector3 binormal(binormal4a.getF32ptr()); - LLVector3 normal(normal4a.getF32ptr()); - binormal.rotVec(ang, normal); - LLQuaternion local_rot( binormal % normal, binormal, normal ); - *face_rot = local_rot * vol_mat.quaternion(); - *face_pos = vol_mat.getTranslation(); + LLMatrix4a rot = gGL.genRot(ang, normal4a); + rot.rotate(binormal4a, binormal4a); + + LLVector4a x_axis; + x_axis.setCross3(binormal4a, normal4a); + + LLQuaternion2 local_rot(LLQuaternion( LLVector3(x_axis.getF32ptr()), LLVector3(binormal4a.getF32ptr()), LLVector3(normal4a.getF32ptr()) )); + + LLMatrix4 vol_mat2(vol_mat.getF32ptr()); + + local_rot.mul(LLQuaternion2(vol_mat2.quaternion())); + + *face_rot = LLQuaternion(local_rot.getVector4a().getF32ptr()); + face_pos->set(vol_mat.getRow().getF32ptr()); } // Returns the necessary texture transform to align this face's TE to align_to's TE @@ -1083,7 +1100,7 @@ bool LLFace::canRenderAsMask() static const LLCachedControl auto_mask_max_rmse("SHAutoMaskMaxRMSE",.09f); if ((te->getColor().mV[3] == 1.0f) && // can't treat as mask if we have face alpha (te->getGlow() == 0.f) && // glowing masks are hard to implement - don't mask - (!getViewerObject()->isAttachment() && getTexture()->getIsAlphaMask(use_rmse_auto_mask ? auto_mask_max_rmse : -1.f))) // texture actually qualifies for masking (lazily recalculated but expensive) + (getTexture()->getIsAlphaMask((!getViewerObject()->isAttachment() && use_rmse_auto_mask) ? auto_mask_max_rmse : -1.f))) // texture actually qualifies for masking (lazily recalculated but expensive) { if (LLPipeline::sRenderDeferred) { @@ -1202,7 +1219,7 @@ static LLFastTimer::DeclareTimer FTM_FACE_TEX_QUICK_PLANAR("Quick Planar"); BOOL LLFace::getGeometryVolume(const LLVolume& volume, const S32 &f, - const LLMatrix4& mat_vert_in, const LLMatrix3& mat_norm_in, + const LLMatrix4a& mat_vert_in, const LLMatrix4a& mat_norm_in, const U16 &index_offset, bool force_rebuild) { @@ -1349,8 +1366,7 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume, } } - LLMatrix4a mat_normal; - mat_normal.loadu(mat_norm_in); + const LLMatrix4a& mat_normal = mat_norm_in; F32 r = 0, os = 0, ot = 0, ms = 0, mt = 0, cos_ang = 0, sin_ang = 0; bool do_xform = false; @@ -1409,7 +1425,7 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume, LLGLSLShader* cur_shader = LLGLSLShader::sCurBoundShaderPtr; gGL.pushMatrix(); - gGL.loadMatrix((GLfloat*) mat_vert_in.mMatrix); + gGL.loadMatrix(mat_vert_in); if (rebuild_pos) { @@ -1550,7 +1566,7 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume, LLQuaternion bump_quat; if (mDrawablep->isActive()) { - bump_quat = LLQuaternion(mDrawablep->getRenderMatrix()); + bump_quat = LLQuaternion(LLMatrix4(mDrawablep->getRenderMatrix().getF32ptr())); } if (bump_code) @@ -1712,16 +1728,12 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume, else { //do tex mat, no texgen, no atlas, no bump for (S32 i = 0; i < num_vertices; i++) - { - LLVector2 tc(vf.mTexCoords[i]); + { //LLVector4a& norm = vf.mNormals[i]; //LLVector4a& center = *(vf.mCenter); - - LLVector3 tmp(tc.mV[0], tc.mV[1], 0.f); - tmp = tmp * *mTextureMatrix; - tc.mV[0] = tmp.mV[0]; - tc.mV[1] = tmp.mV[1]; - *tex_coords0++ = tc; + LLVector4a tc(vf.mTexCoords[i].mV[VX],vf.mTexCoords[i].mV[VY],0.f); + mTextureMatrix->affineTransform(tc,tc); + (tex_coords0++)->set(tc.getF32ptr()); } } } @@ -1739,12 +1751,9 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume, vec.mul(scalea); planarProjection(tc, norm, center, vec); - LLVector3 tmp(tc.mV[0], tc.mV[1], 0.f); - tmp = tmp * *mTextureMatrix; - tc.mV[0] = tmp.mV[0]; - tc.mV[1] = tmp.mV[1]; - - *tex_coords0++ = tc; + LLVector4a tmp(tc.mV[VX],tc.mV[VY],0.f); + mTextureMatrix->affineTransform(tmp,tmp); + (tex_coords0++)->set(tmp.getF32ptr()); } } else @@ -1854,10 +1863,9 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume, if (tex_mode && mTextureMatrix) { - LLVector3 tmp(tc.mV[0], tc.mV[1], 0.f); - tmp = tmp * *mTextureMatrix; - tc.mV[0] = tmp.mV[0]; - tc.mV[1] = tmp.mV[1]; + LLVector4a tmp(tc.mV[VX],tc.mV[VY],0.f); + mTextureMatrix->affineTransform(tmp,tmp); + tc.set(tmp.getF32ptr()); } else { @@ -1935,8 +1943,7 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume, mVertexBuffer->getVertexStrider(vert, mGeomIndex, mGeomCount, map_range); - LLMatrix4a mat_vert; - mat_vert.loadu(mat_vert_in); + const LLMatrix4a& mat_vert = mat_vert_in; F32* dst = (F32*) vert.get(); F32* end_f32 = dst+mGeomCount*4; @@ -2053,10 +2060,6 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume, mVObjp->getVolume()->genTangents(f); - LLVector4Logical mask; - mask.clear(); - mask.setElement<3>(); - LLVector4a* src = vf.mTangents; LLVector4a* end = vf.mTangents+num_vertices; @@ -2065,7 +2068,7 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume, LLVector4a tangent_out; mat_normal.rotate(*src, tangent_out); tangent_out.normalize3fast(); - tangent_out.setSelectWithMask(mask, *src, tangent_out); + tangent_out.copyComponent<3>(*src); tangent_out.store4a(tangents); src++; @@ -2529,7 +2532,7 @@ S32 LLFace::pushVertices(const U16* index_array) const return mIndicesCount; } -const LLMatrix4& LLFace::getRenderMatrix() const +const LLMatrix4a& LLFace::getRenderMatrix() const { return mDrawablep->getRenderMatrix(); } @@ -2545,7 +2548,7 @@ S32 LLFace::renderElements(const U16 *index_array) const else { gGL.pushMatrix(); - gGL.multMatrix((float*)getRenderMatrix().mMatrix); + gGL.multMatrix(getRenderMatrix()); ret = pushVertices(index_array); gGL.popMatrix(); } @@ -2605,7 +2608,10 @@ LLVector3 LLFace::getPositionAgent() const } else { - return mCenterLocal * getRenderMatrix(); + LLVector4a center_local; + center_local.load3(mCenterLocal.mV); + getRenderMatrix().affineTransform(center_local,center_local); + return LLVector3(center_local.getF32ptr()); } } diff --git a/indra/newview/llface.h b/indra/newview/llface.h index feae55853..946098bae 100644 --- a/indra/newview/llface.h +++ b/indra/newview/llface.h @@ -100,8 +100,8 @@ public: LLFace(LLDrawable* drawablep, LLViewerObject* objp) { init(drawablep, objp); } ~LLFace() { destroy(); } - const LLMatrix4& getWorldMatrix() const { return mVObjp->getWorldMatrix(mXform); } - const LLMatrix4& getRenderMatrix() const; + const LLMatrix4a& getWorldMatrix() const { return mVObjp->getWorldMatrix(mXform); } + const LLMatrix4a& getRenderMatrix() const; U32 getIndicesCount() const { return mIndicesCount; }; S32 getIndicesStart() const { return mIndicesIndex; }; U16 getGeomCount() const { return mGeomCount; } // vertex count for this face @@ -173,7 +173,7 @@ public: bool canRenderAsMask(); // logic helper BOOL getGeometryVolume(const LLVolume& volume, const S32 &f, - const LLMatrix4& mat_vert, const LLMatrix3& mat_normal, + const LLMatrix4a& mat_vert, const LLMatrix4a& mat_normal, const U16 &index_offset, bool force_rebuild = false); @@ -196,7 +196,7 @@ public: void setSize(S32 numVertices, S32 num_indices = 0, bool align = false); - BOOL genVolumeBBoxes(const LLVolume &volume, S32 f,const LLMatrix4& mat, BOOL global_volume = FALSE); + BOOL genVolumeBBoxes(const LLVolume &volume, S32 f,const LLMatrix4a& mat, BOOL global_volume = FALSE); void init(LLDrawable* drawablep, LLViewerObject* objp); void destroy(); @@ -239,7 +239,7 @@ public: static U32 getRiggedDataMask(U32 type); public: //aligned members - LLVector4a mExtents[2]; + LL_ALIGN_16(LLVector4a mExtents[2]); private: F32 adjustPartialOverlapPixelArea(F32 cos_angle_to_view_dir, F32 radius ); @@ -258,9 +258,7 @@ public: F32 mLastUpdateTime; F32 mLastSkinTime; F32 mLastMoveTime; - LLMatrix4* mTextureMatrix; - LLMatrix4* mSpecMapMatrix; - LLMatrix4* mNormalMapMatrix; + LLMatrix4a* mTextureMatrix; LLDrawInfo* mDrawInfo; bool mShinyInAlpha; diff --git a/indra/newview/llflexibleobject.cpp b/indra/newview/llflexibleobject.cpp index 3f34abc1d..e378c4458 100644 --- a/indra/newview/llflexibleobject.cpp +++ b/indra/newview/llflexibleobject.cpp @@ -80,17 +80,10 @@ LLVolumeImplFlexible::LLVolumeImplFlexible(LLViewerObject* vo, LLFlexibleObjectD LLVolumeImplFlexible::~LLVolumeImplFlexible() { - S32 end_idx = sInstanceList.size()-1; - - if (end_idx != mInstanceIndex) - { - sInstanceList[mInstanceIndex] = sInstanceList[end_idx]; - sInstanceList[mInstanceIndex]->mInstanceIndex = mInstanceIndex; - sUpdateDelay[mInstanceIndex] = sUpdateDelay[end_idx]; - } - - sInstanceList.pop_back(); - sUpdateDelay.pop_back(); + std::vector::iterator iter = vector_replace_with_last(sInstanceList, sInstanceList.begin() + mInstanceIndex); + if(iter != sInstanceList.end()) + (*iter)->mInstanceIndex = mInstanceIndex; + vector_replace_with_last(sUpdateDelay,sUpdateDelay.begin() + mInstanceIndex); } //static @@ -883,35 +876,38 @@ LLQuaternion LLVolumeImplFlexible::getEndRotation() void LLVolumeImplFlexible::updateRelativeXform(bool force_identity) { - LLQuaternion delta_rot; - LLVector3 delta_pos, delta_scale; + LLVOVolume* vo = (LLVOVolume*) mVO; bool use_identity = vo->mDrawable->isSpatialRoot() || force_identity; + vo->mRelativeXform.setIdentity(); + //matrix from local space to parent relative/global space - delta_rot = use_identity ? LLQuaternion() : vo->mDrawable->getRotation(); - delta_pos = use_identity ? LLVector3(0,0,0) : vo->mDrawable->getPosition(); - delta_scale = LLVector3(1,1,1); + LLVector4a delta_pos; + LLQuaternion2 delta_rot; + if(use_identity) + { + delta_pos.set(0,0,0,1.f); + delta_rot.getVector4aRw() = delta_pos; + } + else + { + delta_pos.load3(vo->mDrawable->getPosition().mV,1.f); + delta_rot.getVector4aRw().loadua(vo->mDrawable->getRotation().mQ); + vo->mRelativeXform.getRow<0>().setRotated(delta_rot,vo->mRelativeXform.getRow<0>()); + vo->mRelativeXform.getRow<1>().setRotated(delta_rot,vo->mRelativeXform.getRow<1>()); + vo->mRelativeXform.getRow<2>().setRotated(delta_rot,vo->mRelativeXform.getRow<2>()); + } - // Vertex transform (4x4) - LLVector3 x_axis = LLVector3(delta_scale.mV[VX], 0.f, 0.f) * delta_rot; - LLVector3 y_axis = LLVector3(0.f, delta_scale.mV[VY], 0.f) * delta_rot; - LLVector3 z_axis = LLVector3(0.f, 0.f, delta_scale.mV[VZ]) * delta_rot; + vo->mRelativeXform.setRow<3>(delta_pos); - vo->mRelativeXform.initRows(LLVector4(x_axis, 0.f), - LLVector4(y_axis, 0.f), - LLVector4(z_axis, 0.f), - LLVector4(delta_pos, 1.f)); - - x_axis.normVec(); - y_axis.normVec(); - z_axis.normVec(); - - vo->mRelativeXformInvTrans.setRows(x_axis, y_axis, z_axis); + vo->mRelativeXformInvTrans = vo->mRelativeXform; + vo->mRelativeXformInvTrans.invert(); + vo->mRelativeXformInvTrans.transpose(); } -const LLMatrix4& LLVolumeImplFlexible::getWorldMatrix(LLXformMatrix* xform) const +const LLMatrix4a& LLVolumeImplFlexible::getWorldMatrix(LLXformMatrix* xform) const { return xform->getWorldMatrix(); } diff --git a/indra/newview/llflexibleobject.h b/indra/newview/llflexibleobject.h index d8b322546..8768a13dc 100644 --- a/indra/newview/llflexibleobject.h +++ b/indra/newview/llflexibleobject.h @@ -98,7 +98,7 @@ private: bool isVolumeUnique() const { return true; } bool isVolumeGlobal() const { return true; } bool isActive() const { return true; } - const LLMatrix4& getWorldMatrix(LLXformMatrix* xform) const; + const LLMatrix4a& getWorldMatrix(LLXformMatrix* xform) const; void updateRelativeXform(bool force_identity); void doFlexibleUpdate(); // Called to update the simulation void doFlexibleRebuild(); // Called to rebuild the geometry diff --git a/indra/newview/llfloaterbvhpreview.h b/indra/newview/llfloaterbvhpreview.h index deae420fb..c775f4135 100644 --- a/indra/newview/llfloaterbvhpreview.h +++ b/indra/newview/llfloaterbvhpreview.h @@ -40,13 +40,24 @@ class LLVOAvatar; class LLViewerJointMesh; +LL_ALIGN_PREFIX(16) class LLPreviewAnimation : public LLViewerDynamicTexture { public: virtual ~LLPreviewAnimation(); public: - LLPreviewAnimation(S32 width, S32 height); + LLPreviewAnimation(S32 width, S32 height); + + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } /*virtual*/ S8 getType() const ; @@ -69,7 +80,7 @@ protected: LLVector3 mCameraOffset; LLVector3 mCameraRelPos; LLPointer mDummyAvatar; -}; +} LL_ALIGN_POSTFIX(16); class LLFloaterBvhPreview : public LLFloaterNameDesc { diff --git a/indra/newview/llfloaterexploreanimations.h b/indra/newview/llfloaterexploreanimations.h index 95d97c050..bb3b91469 100644 --- a/indra/newview/llfloaterexploreanimations.h +++ b/indra/newview/llfloaterexploreanimations.h @@ -49,7 +49,7 @@ private: protected: void draw(); - LLPreviewAnimation mAnimPreview; + LL_ALIGN_16(LLPreviewAnimation mAnimPreview); LLRect mPreviewRect; S32 mLastMouseX; S32 mLastMouseY; diff --git a/indra/newview/llfloaterimagepreview.h b/indra/newview/llfloaterimagepreview.h index c2186450a..0acb80fd8 100644 --- a/indra/newview/llfloaterimagepreview.h +++ b/indra/newview/llfloaterimagepreview.h @@ -52,6 +52,16 @@ protected: public: LLImagePreviewSculpted(S32 width, S32 height); + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } + /*virtual*/ S8 getType() const ; void setPreviewTarget(LLImageRaw *imagep, F32 distance); @@ -85,6 +95,16 @@ protected: public: LLImagePreviewAvatar(S32 width, S32 height); + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } + /*virtual*/ S8 getType() const ; void setPreviewTarget(const std::string& joint_name, const std::string& mesh_name, LLImageRaw* imagep, F32 distance, BOOL male); diff --git a/indra/newview/llfloatermodelpreview.cpp b/indra/newview/llfloatermodelpreview.cpp index 159a25a21..26f4c136c 100644 --- a/indra/newview/llfloatermodelpreview.cpp +++ b/indra/newview/llfloatermodelpreview.cpp @@ -1594,9 +1594,11 @@ bool LLModelLoader::doLoadModel() mesh_scale *= normalized_transformation; normalized_transformation = mesh_scale; - glh::matrix4f inv_mat((F32*) normalized_transformation.mMatrix); - inv_mat = inv_mat.inverse(); - LLMatrix4 inverse_normalized_transformation(inv_mat.m); + LLMatrix4a inv_mat; + inv_mat.loadu(normalized_transformation); + inv_mat.invert(); + + LLMatrix4 inverse_normalized_transformation(inv_mat.getF32ptr()); domSkin::domBind_shape_matrix* bind_mat = skin->getBind_shape_matrix(); @@ -5135,9 +5137,10 @@ BOOL LLModelPreview::render() } gGL.pushMatrix(); - LLMatrix4 mat = instance.mTransform; + LLMatrix4a mat; + mat.loadu((F32*)instance.mTransform.mMatrix); - gGL.multMatrix((GLfloat*) mat.mMatrix); + gGL.multMatrix(mat); for (U32 i = 0; i < mVertexBuffer[mPreviewLOD][model].size(); ++i) { @@ -5218,9 +5221,10 @@ BOOL LLModelPreview::render() } gGL.pushMatrix(); - LLMatrix4 mat = instance.mTransform; + LLMatrix4a mat; + mat.loadu((F32*)instance.mTransform.mMatrix); - gGL.multMatrix((GLfloat*) mat.mMatrix); + gGL.multMatrix(mat); bool render_mesh = true; @@ -5325,9 +5329,10 @@ BOOL LLModelPreview::render() } gGL.pushMatrix(); - LLMatrix4 mat = instance.mTransform; + LLMatrix4a mat; + mat.loadu((F32*)instance.mTransform.mMatrix); - gGL.multMatrix((GLfloat*) mat.mMatrix); + gGL.multMatrix(mat); LLPhysicsDecomp* decomp = gMeshRepo.mDecompThread; diff --git a/indra/newview/llfloatermodelpreview.h b/indra/newview/llfloatermodelpreview.h index 8ac80d41a..46e290dcb 100644 --- a/indra/newview/llfloatermodelpreview.h +++ b/indra/newview/llfloatermodelpreview.h @@ -311,6 +311,16 @@ public: LLModelPreview(S32 width, S32 height, LLFloater* fmp); virtual ~LLModelPreview(); + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } + void resetPreviewTarget(); void setPreviewTarget(F32 distance); void setTexture(U32 name) { mTextureName = name; } diff --git a/indra/newview/llhudrender.cpp b/indra/newview/llhudrender.cpp index e9b720d24..4c4173705 100644 --- a/indra/newview/llhudrender.cpp +++ b/indra/newview/llhudrender.cpp @@ -56,8 +56,6 @@ void hud_render_utf8text(const std::string &str, const LLVector3 &pos_agent, hud_render_text(wstr, pos_agent, font, style, shadow, x_offset, y_offset, color, orthographic); } -int glProjectf(const LLVector3& object, const F32* modelview, const F32* projection, const LLRect& viewport, LLVector3& windowCoordinate); - void hud_render_text(const LLWString &wstr, const LLVector3 &pos_agent, const LLFontGL &font, const U8 style, @@ -115,7 +113,7 @@ void hud_render_text(const LLWString &wstr, const LLVector3 &pos_agent, const LLRect& world_view_rect = gViewerWindow->getWorldViewRectRaw(); - glProjectf(render_pos, gGLModelView, gGLProjection, world_view_rect, window_coordinates); + gGL.projectf(render_pos, gGLModelView, gGLProjection, world_view_rect, window_coordinates); //fonts all render orthographically, set up projection`` gGL.matrixMode(LLRender::MM_PROJECTION); diff --git a/indra/newview/llmaniprotate.cpp b/indra/newview/llmaniprotate.cpp index 6f014f598..b04c0260e 100644 --- a/indra/newview/llmaniprotate.cpp +++ b/indra/newview/llmaniprotate.cpp @@ -183,9 +183,12 @@ void LLManipRotate::render() LLMatrix4 mat; mat.initRows(a, b, c, LLVector4(0.f, 0.f, 0.f, 1.f)); - gGL.multMatrix( &mat.mMatrix[0][0] ); + LLMatrix4a mata; + mata.loadu((F32*)mat.mMatrix); + gGL.multMatrix( mata ); - gGL.rotatef( -90, 0.f, 1.f, 0.f); + static const LLMatrix4a rot = gGL.genRot(-90, 0.f, 1.f, 0.f); + gGL.rotatef(rot); LLColor4 color; if (mManipPart == LL_ROT_ROLL || mHighlightedPart == LL_ROT_ROLL) { @@ -253,7 +256,8 @@ void LLManipRotate::render() mManipulatorScales = lerp(mManipulatorScales, LLVector4(1.f, SELECTED_MANIPULATOR_SCALE, 1.f, 1.f), LLCriticalDamp::getInterpolant(MANIPULATOR_SCALE_HALF_LIFE)); gGL.pushMatrix(); { - gGL.rotatef( 90.f, 1.f, 0.f, 0.f ); + static const LLMatrix4a rot = gGL.genRot( 90.f, 1.f, 0.f, 0.f ); + gGL.rotatef(rot); gGL.scalef(mManipulatorScales.mV[VY], mManipulatorScales.mV[VY], mManipulatorScales.mV[VY]); renderActiveRing( mRadiusMeters, width_meters, LLColor4( 0.f, 1.f, 0.f, 1.f), LLColor4( 0.f, 1.f, 0.f, 0.3f)); } @@ -264,7 +268,8 @@ void LLManipRotate::render() mManipulatorScales = lerp(mManipulatorScales, LLVector4(SELECTED_MANIPULATOR_SCALE, 1.f, 1.f, 1.f), LLCriticalDamp::getInterpolant(MANIPULATOR_SCALE_HALF_LIFE)); gGL.pushMatrix(); { - gGL.rotatef( 90.f, 0.f, 1.f, 0.f ); + static const LLMatrix4a rot = gGL.genRot( 90.f, 0.f, 1.f, 0.f ); + gGL.rotatef( rot ); gGL.scalef(mManipulatorScales.mV[VX], mManipulatorScales.mV[VX], mManipulatorScales.mV[VX]); renderActiveRing( mRadiusMeters, width_meters, LLColor4( 1.f, 0.f, 0.f, 1.f), LLColor4( 1.f, 0.f, 0.f, 0.3f)); } @@ -308,7 +313,8 @@ void LLManipRotate::render() gGL.pushMatrix(); { - gGL.rotatef( 90.f, 1.f, 0.f, 0.f ); + static const LLMatrix4a rot = gGL.genRot( 90.f, 1.f, 0.f, 0.f ); + gGL.rotatef( rot ); if (mHighlightedPart == LL_ROT_Y) { mManipulatorScales = lerp(mManipulatorScales, LLVector4(1.f, SELECTED_MANIPULATOR_SCALE, 1.f, 1.f), LLCriticalDamp::getInterpolant(MANIPULATOR_SCALE_HALF_LIFE)); @@ -326,7 +332,8 @@ void LLManipRotate::render() gGL.pushMatrix(); { - gGL.rotatef( 90.f, 0.f, 1.f, 0.f ); + static const LLMatrix4a rot = gGL.genRot( 90.f, 0.f, 1.f, 0.f ); + gGL.rotatef( rot ); if (mHighlightedPart == LL_ROT_X) { mManipulatorScales = lerp(mManipulatorScales, LLVector4(SELECTED_MANIPULATOR_SCALE, 1.f, 1.f, 1.f), LLCriticalDamp::getInterpolant(MANIPULATOR_SCALE_HALF_LIFE)); diff --git a/indra/newview/llmanipscale.cpp b/indra/newview/llmanipscale.cpp index dbbe202a0..3ef134ee0 100644 --- a/indra/newview/llmanipscale.cpp +++ b/indra/newview/llmanipscale.cpp @@ -452,7 +452,7 @@ void LLManipScale::highlightManipulators(S32 x, S32 y) { LLVector4 translation(bbox.getPositionAgent()); transform.initRotTrans(bbox.getRotation(), translation); - LLMatrix4 cfr(OGL_TO_CFR_ROTATION); + LLMatrix4 cfr(OGL_TO_CFR_ROTATION.getF32ptr()); transform *= cfr; LLMatrix4 window_scale; F32 zoom_level = 2.f * gAgentCamera.mHUDCurZoom; @@ -463,8 +463,8 @@ void LLManipScale::highlightManipulators(S32 x, S32 y) } else { - LLMatrix4 projMatrix = LLViewerCamera::getInstance()->getProjection(); - LLMatrix4 modelView = LLViewerCamera::getInstance()->getModelview(); + LLMatrix4 projMatrix( LLViewerCamera::getInstance()->getProjection().getF32ptr() ); + LLMatrix4 modelView( LLViewerCamera::getInstance()->getModelview().getF32ptr() ); transform.initAll(LLVector3(1.f, 1.f, 1.f), bbox.getRotation(), bbox.getPositionAgent()); transform *= modelView; diff --git a/indra/newview/llmaniptranslate.cpp b/indra/newview/llmaniptranslate.cpp index 0dbd5c50d..b066e5275 100644 --- a/indra/newview/llmaniptranslate.cpp +++ b/indra/newview/llmaniptranslate.cpp @@ -807,8 +807,8 @@ void LLManipTranslate::highlightManipulators(S32 x, S32 y) } //LLBBox bbox = LLSelectMgr::getInstance()->getBBoxOfSelection(); - LLMatrix4 projMatrix = LLViewerCamera::getInstance()->getProjection(); - LLMatrix4 modelView = LLViewerCamera::getInstance()->getModelview(); + LLMatrix4 projMatrix( LLViewerCamera::getInstance()->getProjection().getF32ptr() ); + LLMatrix4 modelView( LLViewerCamera::getInstance()->getModelview().getF32ptr() ); LLVector3 object_position = getPivotPoint(); @@ -827,7 +827,7 @@ void LLManipTranslate::highlightManipulators(S32 x, S32 y) relative_camera_dir = LLVector3(1.f, 0.f, 0.f) * ~grid_rotation; LLVector4 translation(object_position); transform.initRotTrans(grid_rotation, translation); - LLMatrix4 cfr(OGL_TO_CFR_ROTATION); + LLMatrix4 cfr(OGL_TO_CFR_ROTATION.getF32ptr()); transform *= cfr; LLMatrix4 window_scale; F32 zoom_level = 2.f * gAgentCamera.mHUDCurZoom; @@ -1693,12 +1693,15 @@ void LLManipTranslate::highlightIntersection(LLVector3 normal, normal = -normal; } F32 d = -(selection_center * normal); - glh::vec4f plane(normal.mV[0], normal.mV[1], normal.mV[2], d ); + LLVector4a plane(normal.mV[0], normal.mV[1], normal.mV[2], d ); - gGL.getModelviewMatrix().inverse().mult_vec_matrix(plane); + LLMatrix4a inv_mat = gGL.getModelviewMatrix(); + inv_mat.invert(); + inv_mat.transpose(); + inv_mat.rotate4(plane,plane); static LLStaticHashedString sClipPlane("clip_plane"); - gClipProgram.uniform4fv(sClipPlane, 1, plane.v); + gClipProgram.uniform4fv(sClipPlane, 1, plane.getF32ptr()); BOOL particles = gPipeline.hasRenderType(LLPipeline::RENDER_TYPE_PARTICLES); #if ENABLE_CLASSIC_CLOUDS diff --git a/indra/newview/llnetmap.cpp b/indra/newview/llnetmap.cpp index e81c406a3..93b204759 100644 --- a/indra/newview/llnetmap.cpp +++ b/indra/newview/llnetmap.cpp @@ -726,24 +726,30 @@ void LLNetMap::draw() if (rotate_map) { - gGL.color4fv((map_frustum_color()).mV); + LLColor4 c = map_frustum_color(); gGL.begin( LLRender::TRIANGLES ); + gGL.color4fv(c.mV); gGL.vertex2f( ctr_x, ctr_y ); + c.mV[VW] *= .1f; + gGL.color4fv(c.mV); gGL.vertex2f( ctr_x - half_width_pixels, ctr_y + far_clip_pixels ); gGL.vertex2f( ctr_x + half_width_pixels, ctr_y + far_clip_pixels ); gGL.end(); } else { - gGL.color4fv((map_frustum_rotating_color()).mV); + LLColor4 c = map_frustum_rotating_color(); // If we don't rotate the map, we have to rotate the frustum. gGL.pushMatrix(); gGL.translatef( ctr_x, ctr_y, 0 ); gGL.rotatef( atan2( LLViewerCamera::getInstance()->getAtAxis().mV[VX], LLViewerCamera::getInstance()->getAtAxis().mV[VY] ) * RAD_TO_DEG, 0.f, 0.f, -1.f); gGL.begin( LLRender::TRIANGLES ); + gGL.color4fv(c.mV); gGL.vertex2f( 0.f, 0.f ); + c.mV[VW] *= .1f; + gGL.color4fv(c.mV); gGL.vertex2f( -half_width_pixels, far_clip_pixels ); gGL.vertex2f( half_width_pixels, far_clip_pixels ); gGL.end(); diff --git a/indra/newview/llpanelprimmediacontrols.cpp b/indra/newview/llpanelprimmediacontrols.cpp index 3ebc38710..5fa463a34 100644 --- a/indra/newview/llpanelprimmediacontrols.cpp +++ b/indra/newview/llpanelprimmediacontrols.cpp @@ -70,10 +70,10 @@ #include "lllayoutstack.h" // Functions pulled from pipeline.cpp -glh::matrix4f glh_get_current_modelview(); -glh::matrix4f glh_get_current_projection(); +const LLMatrix4a& glh_get_current_modelview(); +const LLMatrix4a& glh_get_current_projection(); // Functions pulled from llviewerdisplay.cpp -bool get_hud_matrices(glh::matrix4f &proj, glh::matrix4f &model); +bool get_hud_matrices(LLMatrix4a &proj, LLMatrix4a &model); // Warning: make sure these two match! const LLPanelPrimMediaControls::EZoomLevel LLPanelPrimMediaControls::kZoomLevels[] = { ZOOM_NONE, ZOOM_MEDIUM }; @@ -609,37 +609,45 @@ void LLPanelPrimMediaControls::updateShape() vert_it = vect_face.begin(); vert_end = vect_face.end(); - glh::matrix4f mat; + LLMatrix4a mat; if (!is_hud) { - mat = glh_get_current_projection() * glh_get_current_modelview(); + mat.setMul(glh_get_current_projection(),glh_get_current_modelview()); } else { - glh::matrix4f proj, modelview; + LLMatrix4a proj, modelview; if (get_hud_matrices(proj, modelview)) - mat = proj * modelview; + { + //mat = proj * modelview; + mat.setMul(proj,modelview); + } } - LLVector3 min = LLVector3(1,1,1); - LLVector3 max = LLVector3(-1,-1,-1); + LLVector4a min; + min.splat(1.f); + LLVector4a max; + max.splat(-1.f); for(; vert_it != vert_end; ++vert_it) { // project silhouette vertices into screen space - glh::vec3f screen_vert = glh::vec3f(vert_it->mV); - mat.mult_matrix_vec(screen_vert); - + LLVector4a screen_vert; + screen_vert.load3(vert_it->mV,1.f); + + mat.perspectiveTransform(screen_vert,screen_vert); + // add to screenspace bounding box - update_min_max(min, max, LLVector3(screen_vert.v)); + min.setMin(screen_vert,min); + max.setMax(screen_vert,max); } // convert screenspace bbox to pixels (in screen coords) LLRect window_rect = gViewerWindow->getWorldViewRectScaled(); LLCoordGL screen_min; - screen_min.mX = llround((F32)window_rect.mLeft + (F32)window_rect.getWidth() * (min.mV[VX] + 1.f) * 0.5f); - screen_min.mY = llround((F32)window_rect.mBottom + (F32)window_rect.getHeight() * (min.mV[VY] + 1.f) * 0.5f); + screen_min.mX = llround((F32)window_rect.mLeft + (F32)window_rect.getWidth() * (min.getF32ptr()[VX] + 1.f) * 0.5f); + screen_min.mY = llround((F32)window_rect.mBottom + (F32)window_rect.getHeight() * (min.getF32ptr()[VY] + 1.f) * 0.5f); LLCoordGL screen_max; - screen_max.mX = llround((F32)window_rect.mLeft + (F32)window_rect.getWidth() * (max.mV[VX] + 1.f) * 0.5f); - screen_max.mY = llround((F32)window_rect.mBottom + (F32)window_rect.getHeight() * (max.mV[VY] + 1.f) * 0.5f); + screen_max.mX = llround((F32)window_rect.mLeft + (F32)window_rect.getWidth() * (max.getF32ptr()[VX] + 1.f) * 0.5f); + screen_max.mY = llround((F32)window_rect.mBottom + (F32)window_rect.getHeight() * (max.getF32ptr()[VY] + 1.f) * 0.5f); // grow panel so that screenspace bounding box fits inside "media_region" element of panel LLRect media_panel_rect; diff --git a/indra/newview/llselectmgr.cpp b/indra/newview/llselectmgr.cpp index 1195fb834..c10570d7d 100644 --- a/indra/newview/llselectmgr.cpp +++ b/indra/newview/llselectmgr.cpp @@ -1262,12 +1262,12 @@ void LLSelectMgr::getGrid(LLVector3& origin, LLQuaternion &rotation, LLVector3 & size.setSub(max_extents, min_extents); size.mul(0.5f); - mGridOrigin.set(center.getF32ptr()); LLDrawable* drawable = first_grid_object->mDrawable; if (drawable && drawable->isActive()) { - mGridOrigin = mGridOrigin * first_grid_object->getRenderMatrix(); + first_grid_object->getRenderMatrix().affineTransform(center,center); } + mGridOrigin.set(center.getF32ptr()); mGridScale.set(size.getF32ptr()); } } @@ -6119,7 +6119,7 @@ void pushWireframe(LLDrawable* drawable) { LLVertexBuffer::unbind(); gGL.pushMatrix(); - gGL.multMatrix((F32*) vobj->getRelativeXform().mMatrix); + gGL.multMatrix(vobj->getRelativeXform()); LLVolume* volume = NULL; @@ -6176,7 +6176,7 @@ void LLSelectNode::renderOneWireframe(const LLColor4& color) if (drawable->isActive()) { gGL.loadMatrix(gGLModelView); - gGL.multMatrix((F32*) objectp->getRenderMatrix().mMatrix); + gGL.multMatrix(objectp->getRenderMatrix()); } else if (!is_hud_object) { @@ -6297,7 +6297,7 @@ void LLSelectNode::renderOneSilhouette(const LLColor4 &color) if (drawable->isActive()) { - gGL.multMatrix((F32*) objectp->getRenderMatrix().mMatrix); + gGL.multMatrix(objectp->getRenderMatrix()); } LLVolume *volume = objectp->getVolume(); diff --git a/indra/newview/llspatialpartition.cpp b/indra/newview/llspatialpartition.cpp index 1eef9a463..9c8182cef 100644 --- a/indra/newview/llspatialpartition.cpp +++ b/indra/newview/llspatialpartition.cpp @@ -2917,7 +2917,7 @@ void renderNormals(LLDrawable* drawablep) { LLVolume* volume = vol->getVolume(); gGL.pushMatrix(); - gGL.multMatrix((F32*) vol->getRelativeXform().mMatrix); + gGL.multMatrix(vol->getRelativeXform()); gGL.getTexUnit(0)->unbind(LLTexUnit::TT_TEXTURE); @@ -3071,7 +3071,7 @@ void renderPhysicsShape(LLDrawable* drawable, LLVOVolume* volume) LLVector3 size(0.25f,0.25f,0.25f); gGL.pushMatrix(); - gGL.multMatrix((F32*) volume->getRelativeXform().mMatrix); + gGL.multMatrix(volume->getRelativeXform()); if (type == LLPhysicsShapeBuilderUtil::PhysicsShapeSpecification::USER_MESH) { @@ -3369,7 +3369,7 @@ void renderPhysicsShapes(LLSpatialGroup* group) if (object && object->getPCode() == LLViewerObject::LL_VO_SURFACE_PATCH) { gGL.pushMatrix(); - gGL.multMatrix((F32*) object->getRegion()->mRenderMatrix.mMatrix); + gGL.multMatrix(object->getRegion()->mRenderMatrix); //push face vertices for terrain for (S32 i = 0; i < drawable->getNumFaces(); ++i) { @@ -3576,6 +3576,7 @@ void renderLights(LLDrawable* drawablep) } } +LL_ALIGN_PREFIX(16) class LLRenderOctreeRaycast : public LLOctreeTriangleRayIntersect { public: @@ -3648,7 +3649,7 @@ public: } } } -}; +} LL_ALIGN_POSTFIX(16); void renderRaycast(LLDrawable* drawablep) { @@ -3683,7 +3684,7 @@ void renderRaycast(LLDrawable* drawablep) gGL.pushMatrix(); gGL.translatef(trans.mV[0], trans.mV[1], trans.mV[2]); - gGL.multMatrix((F32*) vobj->getRelativeXform().mMatrix); + gGL.multMatrix(vobj->getRelativeXform()); LLVector4a start, end; if (transform) @@ -3760,10 +3761,13 @@ void renderRaycast(LLDrawable* drawablep) LLVector3 normal(gDebugRaycastNormal.getF32ptr()); LLVector3 binormal(debug_binormal.getF32ptr()); + //LLCoordFrame isn't vectorized, for now. orient.lookDir(normal, binormal); LLMatrix4 rotation; orient.getRotMatrixToParent(rotation); - gGL.multMatrix((float*)rotation.mMatrix); + LLMatrix4a rotationa; + rotationa.loadu((F32*)rotation.mMatrix); + gGL.multMatrix(rotationa); gGL.diffuseColor4f(1,0,0,0.5f); drawBox(LLVector3(0, 0, 0), LLVector3(0.1f, 0.022f, 0.022f)); @@ -4330,14 +4334,11 @@ public: if (group->mSpatialPartition->isBridge()) { - LLMatrix4 local_matrix = group->mSpatialPartition->asBridge()->mDrawable->getRenderMatrix(); + LLMatrix4a local_matrix = group->mSpatialPartition->asBridge()->mDrawable->getRenderMatrix(); local_matrix.invert(); - LLMatrix4a local_matrix4a; - local_matrix4a.loadu(local_matrix); - - local_matrix4a.affineTransform(mStart, local_start); - local_matrix4a.affineTransform(mEnd, local_end); + local_matrix.affineTransform(mStart, local_start); + local_matrix.affineTransform(mEnd, local_end); } if (LLLineSegmentBoxIntersect(local_start, local_end, center, size)) diff --git a/indra/newview/llspatialpartition.h b/indra/newview/llspatialpartition.h index 8d2b5a47b..29f86656a 100644 --- a/indra/newview/llspatialpartition.h +++ b/indra/newview/llspatialpartition.h @@ -102,15 +102,15 @@ public: void validate(); - LLVector4a mExtents[2]; + LL_ALIGN_16(LLVector4a mExtents[2]); LLPointer mVertexBuffer; LLPointer mTexture; std::vector > mTextureList; S32 mDebugColor; - const LLMatrix4* mTextureMatrix; - const LLMatrix4* mModelMatrix; + const LLMatrix4a* mTextureMatrix; + const LLMatrix4a* mModelMatrix; U16 mStart; U16 mEnd; U32 mCount; diff --git a/indra/newview/lltoolmorph.h b/indra/newview/lltoolmorph.h index 950a3ab11..a3f557a46 100644 --- a/indra/newview/lltoolmorph.h +++ b/indra/newview/lltoolmorph.h @@ -65,6 +65,16 @@ public: LLWearable *wearable, F32 param_weight); + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } + /*virtual*/ S8 getType() const ; BOOL needsRender(); @@ -110,6 +120,17 @@ protected: /*virtual */ ~LLVisualParamReset(){} public: LLVisualParamReset(); + + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } + /*virtual */ BOOL render(); /*virtual*/ S8 getType() const ; diff --git a/indra/newview/llviewercamera.cpp b/indra/newview/llviewercamera.cpp index b70183fd4..ba35d78d8 100644 --- a/indra/newview/llviewercamera.cpp +++ b/indra/newview/llviewercamera.cpp @@ -27,10 +27,9 @@ #include "llviewerprecompiledheaders.h" #include "llviewercamera.h" - #include "llagent.h" #include "llagentcamera.h" -#include "llmatrix4a.h" + #include "llviewercontrol.h" #include "llviewerobjectlist.h" #include "llviewerregion.h" @@ -56,53 +55,6 @@ U32 LLViewerCamera::sCurCameraID = LLViewerCamera::CAMERA_WORLD; -//glu pick matrix implementation borrowed from Mesa3D -glh::matrix4f gl_pick_matrix(GLfloat x, GLfloat y, GLfloat width, GLfloat height, GLint* viewport) -{ - GLfloat m[16]; - GLfloat sx, sy; - GLfloat tx, ty; - - sx = viewport[2] / width; - sy = viewport[3] / height; - tx = (viewport[2] + 2.f * (viewport[0] - x)) / width; - ty = (viewport[3] + 2.f * (viewport[1] - y)) / height; - - #define M(row,col) m[col*4+row] - M(0,0) = sx; M(0,1) = 0.f; M(0,2) = 0.f; M(0,3) = tx; - M(1,0) = 0.f; M(1,1) = sy; M(1,2) = 0.f; M(1,3) = ty; - M(2,0) = 0.f; M(2,1) = 0.f; M(2,2) = 1.f; M(2,3) = 0.f; - M(3,0) = 0.f; M(3,1) = 0.f; M(3,2) = 0.f; M(3,3) = 1.f; - #undef M - - return glh::matrix4f(m); -} - -glh::matrix4f gl_perspective(GLfloat fovy, GLfloat aspect, GLfloat zNear, GLfloat zFar) -{ - GLfloat f = 1.f/tanf(DEG_TO_RAD*fovy/2.f); - - return glh::matrix4f(f/aspect, 0, 0, 0, - 0, f, 0, 0, - 0, 0, (zFar+zNear)/(zNear-zFar), (2.f*zFar*zNear)/(zNear-zFar), - 0, 0, -1.f, 0); -} - -glh::matrix4f gl_lookat(LLVector3 eye, LLVector3 center, LLVector3 up) -{ - LLVector3 f = center-eye; - f.normVec(); - up.normVec(); - LLVector3 s = f % up; - LLVector3 u = s % f; - - return glh::matrix4f(s[0], s[1], s[2], 0, - u[0], u[1], u[2], 0, - -f[0], -f[1], -f[2], 0, - 0, 0, 0, 1); - -} - LLViewerCamera::LLViewerCamera() : LLCamera() { calcProjection(getFar()); @@ -172,37 +124,26 @@ void LLViewerCamera::updateCameraLocation(const LLVector3 ¢er, mScreenPixelArea =(S32)((F32)getViewHeightInPixels() * ((F32)getViewHeightInPixels() * getAspect())); } -const LLMatrix4 &LLViewerCamera::getProjection() const +const LLMatrix4a &LLViewerCamera::getProjection() const { calcProjection(getFar()); return mProjectionMatrix; } -const LLMatrix4 &LLViewerCamera::getModelview() const +const LLMatrix4a &LLViewerCamera::getModelview() const { - LLMatrix4 cfr(OGL_TO_CFR_ROTATION); - getMatrixToLocal(mModelviewMatrix); - mModelviewMatrix *= cfr; + LLMatrix4 modelview; + getMatrixToLocal(modelview); + LLMatrix4a modelviewa; + modelviewa.loadu((F32*)modelview.mMatrix); + mModelviewMatrix.setMul(OGL_TO_CFR_ROTATION,modelviewa); return mModelviewMatrix; } void LLViewerCamera::calcProjection(const F32 far_distance) const { - F32 fov_y, z_far, z_near, aspect, f; - fov_y = getView(); - z_far = far_distance; - z_near = getNear(); - aspect = getAspect(); - - f = 1/tan(fov_y*0.5f); - - mProjectionMatrix.setZero(); - mProjectionMatrix.mMatrix[0][0] = f/aspect; - mProjectionMatrix.mMatrix[1][1] = f; - mProjectionMatrix.mMatrix[2][2] = (z_far + z_near)/(z_near - z_far); - mProjectionMatrix.mMatrix[3][2] = (2*z_far*z_near)/(z_near - z_far); - mProjectionMatrix.mMatrix[2][3] = -1; + mProjectionMatrix = gGL.genPersp( getView()*RAD_TO_DEG, getAspect(), getNear(), far_distance ); } // Sets up opengl state for 3D drawing. If for selection, also @@ -213,59 +154,33 @@ void LLViewerCamera::calcProjection(const F32 far_distance) const //static void LLViewerCamera::updateFrustumPlanes(LLCamera& camera, BOOL ortho, BOOL zflip, BOOL no_hacks) { - GLint* viewport = (GLint*) gGLViewport; - F64 model[16]; - F64 proj[16]; - - for (U32 i = 0; i < 16; i++) - { - model[i] = (F64) gGLModelView[i]; - proj[i] = (F64) gGLProjection[i]; - } - - GLdouble objX,objY,objZ; - LLVector3 frust[8]; + LLRect view_port(gGLViewport[0],gGLViewport[1]+gGLViewport[3],gGLViewport[0]+gGLViewport[2],gGLViewport[1]); + if (no_hacks) { - gluUnProject(viewport[0],viewport[1],0,model,proj,viewport,&objX,&objY,&objZ); - frust[0].setVec((F32)objX,(F32)objY,(F32)objZ); - gluUnProject(viewport[0]+viewport[2],viewport[1],0,model,proj,viewport,&objX,&objY,&objZ); - frust[1].setVec((F32)objX,(F32)objY,(F32)objZ); - gluUnProject(viewport[0]+viewport[2],viewport[1]+viewport[3],0,model,proj,viewport,&objX,&objY,&objZ); - frust[2].setVec((F32)objX,(F32)objY,(F32)objZ); - gluUnProject(viewport[0],viewport[1]+viewport[3],0,model,proj,viewport,&objX,&objY,&objZ); - frust[3].setVec((F32)objX,(F32)objY,(F32)objZ); + gGL.unprojectf(LLVector3(view_port.mLeft,view_port.mBottom,0.f),gGLModelView,gGLProjection,view_port,frust[0]); + gGL.unprojectf(LLVector3(view_port.mRight,view_port.mBottom,0.f),gGLModelView,gGLProjection,view_port,frust[1]); + gGL.unprojectf(LLVector3(view_port.mRight,view_port.mTop,0.f),gGLModelView,gGLProjection,view_port,frust[2]); + gGL.unprojectf(LLVector3(view_port.mLeft,view_port.mTop,0.f),gGLModelView,gGLProjection,view_port,frust[3]); - gluUnProject(viewport[0],viewport[1],1,model,proj,viewport,&objX,&objY,&objZ); - frust[4].setVec((F32)objX,(F32)objY,(F32)objZ); - gluUnProject(viewport[0]+viewport[2],viewport[1],1,model,proj,viewport,&objX,&objY,&objZ); - frust[5].setVec((F32)objX,(F32)objY,(F32)objZ); - gluUnProject(viewport[0]+viewport[2],viewport[1]+viewport[3],1,model,proj,viewport,&objX,&objY,&objZ); - frust[6].setVec((F32)objX,(F32)objY,(F32)objZ); - gluUnProject(viewport[0],viewport[1]+viewport[3],1,model,proj,viewport,&objX,&objY,&objZ); - frust[7].setVec((F32)objX,(F32)objY,(F32)objZ); + gGL.unprojectf(LLVector3(view_port.mLeft,view_port.mBottom,1.f),gGLModelView,gGLProjection,view_port,frust[4]); + gGL.unprojectf(LLVector3(view_port.mRight,view_port.mBottom,1.f),gGLModelView,gGLProjection,view_port,frust[5]); + gGL.unprojectf(LLVector3(view_port.mRight,view_port.mTop,1.f),gGLModelView,gGLProjection,view_port,frust[6]); + gGL.unprojectf(LLVector3(view_port.mLeft,view_port.mTop,1.f),gGLModelView,gGLProjection,view_port,frust[7]); } else if (zflip) { - gluUnProject(viewport[0],viewport[1]+viewport[3],0,model,proj,viewport,&objX,&objY,&objZ); - frust[0].setVec((F32)objX,(F32)objY,(F32)objZ); - gluUnProject(viewport[0]+viewport[2],viewport[1]+viewport[3],0,model,proj,viewport,&objX,&objY,&objZ); - frust[1].setVec((F32)objX,(F32)objY,(F32)objZ); - gluUnProject(viewport[0]+viewport[2],viewport[1],0,model,proj,viewport,&objX,&objY,&objZ); - frust[2].setVec((F32)objX,(F32)objY,(F32)objZ); - gluUnProject(viewport[0],viewport[1],0,model,proj,viewport,&objX,&objY,&objZ); - frust[3].setVec((F32)objX,(F32)objY,(F32)objZ); + gGL.unprojectf(LLVector3(view_port.mLeft,view_port.mTop,0.f),gGLModelView,gGLProjection,view_port,frust[0]); + gGL.unprojectf(LLVector3(view_port.mRight,view_port.mTop,0.f),gGLModelView,gGLProjection,view_port,frust[1]); + gGL.unprojectf(LLVector3(view_port.mRight,view_port.mBottom,0.f),gGLModelView,gGLProjection,view_port,frust[2]); + gGL.unprojectf(LLVector3(view_port.mLeft,view_port.mBottom,0.f),gGLModelView,gGLProjection,view_port,frust[3]); - gluUnProject(viewport[0],viewport[1]+viewport[3],1,model,proj,viewport,&objX,&objY,&objZ); - frust[4].setVec((F32)objX,(F32)objY,(F32)objZ); - gluUnProject(viewport[0]+viewport[2],viewport[1]+viewport[3],1,model,proj,viewport,&objX,&objY,&objZ); - frust[5].setVec((F32)objX,(F32)objY,(F32)objZ); - gluUnProject(viewport[0]+viewport[2],viewport[1],1,model,proj,viewport,&objX,&objY,&objZ); - frust[6].setVec((F32)objX,(F32)objY,(F32)objZ); - gluUnProject(viewport[0],viewport[1],1,model,proj,viewport,&objX,&objY,&objZ); - frust[7].setVec((F32)objX,(F32)objY,(F32)objZ); + gGL.unprojectf(LLVector3(view_port.mLeft,view_port.mTop,1.f),gGLModelView,gGLProjection,view_port,frust[4]); + gGL.unprojectf(LLVector3(view_port.mRight,view_port.mTop,1.f),gGLModelView,gGLProjection,view_port,frust[5]); + gGL.unprojectf(LLVector3(view_port.mRight,view_port.mBottom,1.f),gGLModelView,gGLProjection,view_port,frust[6]); + gGL.unprojectf(LLVector3(view_port.mLeft,view_port.mBottom,1.f),gGLModelView,gGLProjection,view_port,frust[7]); for (U32 i = 0; i < 4; i++) { @@ -276,14 +191,10 @@ void LLViewerCamera::updateFrustumPlanes(LLCamera& camera, BOOL ortho, BOOL zfli } else { - gluUnProject(viewport[0],viewport[1],0,model,proj,viewport,&objX,&objY,&objZ); - frust[0].setVec((F32)objX,(F32)objY,(F32)objZ); - gluUnProject(viewport[0]+viewport[2],viewport[1],0,model,proj,viewport,&objX,&objY,&objZ); - frust[1].setVec((F32)objX,(F32)objY,(F32)objZ); - gluUnProject(viewport[0]+viewport[2],viewport[1]+viewport[3],0,model,proj,viewport,&objX,&objY,&objZ); - frust[2].setVec((F32)objX,(F32)objY,(F32)objZ); - gluUnProject(viewport[0],viewport[1]+viewport[3],0,model,proj,viewport,&objX,&objY,&objZ); - frust[3].setVec((F32)objX,(F32)objY,(F32)objZ); + gGL.unprojectf(LLVector3(view_port.mLeft,view_port.mBottom,0.f),gGLModelView,gGLProjection,view_port,frust[0]); + gGL.unprojectf(LLVector3(view_port.mRight,view_port.mBottom,0.f),gGLModelView,gGLProjection,view_port,frust[1]); + gGL.unprojectf(LLVector3(view_port.mRight,view_port.mTop,0.f),gGLModelView,gGLProjection,view_port,frust[2]); + gGL.unprojectf(LLVector3(view_port.mLeft,view_port.mTop,0.f),gGLModelView,gGLProjection,view_port,frust[3]); if (ortho) { @@ -330,20 +241,24 @@ void LLViewerCamera::setPerspective(BOOL for_selection, gGL.matrixMode( LLRender::MM_PROJECTION ); gGL.loadIdentity(); - glh::matrix4f proj_mat; + LLMatrix4a proj_mat; + proj_mat.setIdentity(); if (for_selection) { // make a tiny little viewport // anything drawn into this viewport will be "selected" - GLint viewport[4]; - viewport[0] = gViewerWindow->getWorldViewRectRaw().mLeft; - viewport[1] = gViewerWindow->getWorldViewRectRaw().mBottom; - viewport[2] = gViewerWindow->getWorldViewRectRaw().getWidth(); - viewport[3] = gViewerWindow->getWorldViewRectRaw().getHeight(); + const LLRect& rect = gViewerWindow->getWorldViewRectRaw(); - proj_mat = gl_pick_matrix(x+width/2.f, y_from_bot+height/2.f, (GLfloat) width, (GLfloat) height, viewport); + const F32 scale_x = rect.getWidth() / F32(width); + const F32 scale_y = rect.getHeight() / F32(height); + const F32 trans_x = scale_x + (2.f * (rect.mLeft - x)) / F32(width) - 1.f; + const F32 trans_y = scale_y + (2.f * (rect.mBottom - y_from_bot)) / F32(height) - 1.f; + + //Generate a pick matrix + proj_mat.applyScale_affine(scale_x, scale_y, 1.f); + proj_mat.setTranslate_affine(LLVector3(trans_x, trans_y, 0.f)); if (limit_select_distance) { @@ -377,37 +292,28 @@ void LLViewerCamera::setPerspective(BOOL for_selection, float offset = mZoomFactor - 1.f; int pos_y = mZoomSubregion / llceil(mZoomFactor); int pos_x = mZoomSubregion - (pos_y*llceil(mZoomFactor)); - glh::matrix4f translate; - translate.set_translate(glh::vec3f(offset - (F32)pos_x * 2.f, offset - (F32)pos_y * 2.f, 0.f)); - glh::matrix4f scale; - scale.set_scale(glh::vec3f(mZoomFactor, mZoomFactor, 1.f)); - proj_mat = scale*proj_mat; - proj_mat = translate*proj_mat; + proj_mat.applyScale_affine(mZoomFactor,mZoomFactor,1.f); + proj_mat.applyTranslation_affine(offset - (F32)pos_x * 2.f, offset - (F32)pos_y * 2.f, 0.f); } calcProjection(z_far); // Update the projection matrix cache - proj_mat *= gl_perspective(fov_y,aspect,z_near,z_far); - - gGL.loadMatrix(proj_mat.m); - - for (U32 i = 0; i < 16; i++) - { - gGLProjection[i] = proj_mat.m[i]; - } + proj_mat.mul(gGL.genPersp(fov_y,aspect,z_near,z_far)); + + gGL.loadMatrix(proj_mat); + + gGLProjection = proj_mat; gGL.matrixMode(LLRender::MM_MODELVIEW ); - glh::matrix4f modelview((GLfloat*) OGL_TO_CFR_ROTATION); + LLMatrix4a ogl_matrix; + getOpenGLTransform(ogl_matrix.getF32ptr()); - GLfloat ogl_matrix[16]; - - getOpenGLTransform(ogl_matrix); - - modelview *= glh::matrix4f(ogl_matrix); + LLMatrix4a modelview; + modelview.setMul(OGL_TO_CFR_ROTATION, ogl_matrix); - gGL.loadMatrix(modelview.m); + gGL.loadMatrix(modelview); if (for_selection && (width > 1 || height > 1)) { @@ -426,10 +332,7 @@ void LLViewerCamera::setPerspective(BOOL for_selection, { // Save GL matrices for access elsewhere in code, especially project_world_to_screen //glGetDoublev(GL_MODELVIEW_MATRIX, gGLModelView); - for (U32 i = 0; i < 16; i++) - { - gGLModelView[i] = modelview.m[i]; - } + glh_set_current_modelview(modelview); } updateFrustumPlanes(*this); @@ -443,89 +346,14 @@ void LLViewerCamera::setPerspective(BOOL for_selection, }*/ } - // Uses the last GL matrices set in set_perspective to project a point from // screen coordinates to the agent's region. void LLViewerCamera::projectScreenToPosAgent(const S32 screen_x, const S32 screen_y, LLVector3* pos_agent) const { - GLdouble x, y, z; - - F64 mdlv[16]; - F64 proj[16]; - - for (U32 i = 0; i < 16; i++) - { - mdlv[i] = (F64) gGLModelView[i]; - proj[i] = (F64) gGLProjection[i]; - } - - gluUnProject( - GLdouble(screen_x), GLdouble(screen_y), 0.0, - mdlv, proj, (GLint*)gGLViewport, - &x, - &y, - &z ); - pos_agent->setVec( (F32)x, (F32)y, (F32)z ); -} - -//Based off of http://www.opengl.org/wiki/GluProject_and_gluUnProject_code -int glProjectf(const LLVector3& object, const F32* modelview, const F32* projection, const LLRect& viewport, LLVector3& windowCoordinate) -{ - const LLVector4a obj_vector(object.mV[VX],object.mV[VY],object.mV[VZ]); - LLVector4a temp_matrix; - - const LLMatrix4a &view_matrix=*(LLMatrix4a*)modelview; - const LLMatrix4a &proj_matrix=*(LLMatrix4a*)projection; - - view_matrix.affineTransform(obj_vector, temp_matrix); - - //Passing temp_matrix as v and res is safe. res not altered until after all other calculations - proj_matrix.rotate4(temp_matrix, temp_matrix); - - if(temp_matrix[VW]==0.0) - return 0; - - temp_matrix.div(temp_matrix[VW]); - - //Map x, y to range 0-1 - temp_matrix.mul(.5f); - temp_matrix.add(.5f); - - //Window coordinates - windowCoordinate[0]=temp_matrix[VX]*viewport.getWidth()+viewport.mLeft; - windowCoordinate[1]=temp_matrix[VY]*viewport.getHeight()+viewport.mBottom; - //This is only correct when glDepthRange(0.0, 1.0) - windowCoordinate[2]=temp_matrix[VZ]; - - return 1; -} - -void MultiplyMatrices4by4OpenGL_FLOAT(LLMatrix4a& dest_matrix, const LLMatrix4a& input_matrix1, const LLMatrix4a& input_matrix2) -{ - input_matrix1.rotate4(input_matrix2.mMatrix[VX],dest_matrix.mMatrix[VX]); - input_matrix1.rotate4(input_matrix2.mMatrix[VY],dest_matrix.mMatrix[VY]); - input_matrix1.rotate4(input_matrix2.mMatrix[VZ],dest_matrix.mMatrix[VZ]); - input_matrix1.rotate4(input_matrix2.mMatrix[VW],dest_matrix.mMatrix[VW]); - - //Those four lines do this: - /* - result[0]=matrix1[0]*matrix2[0]+matrix1[4]*matrix2[1]+matrix1[8]*matrix2[2]+matrix1[12]*matrix2[3]; - result[1]=matrix1[1]*matrix2[0]+matrix1[5]*matrix2[1]+matrix1[9]*matrix2[2]+matrix1[13]*matrix2[3]; - result[2]=matrix1[2]*matrix2[0]+matrix1[6]*matrix2[1]+matrix1[10]*matrix2[2]+matrix1[14]*matrix2[3]; - result[3]=matrix1[3]*matrix2[0]+matrix1[7]*matrix2[1]+matrix1[11]*matrix2[2]+matrix1[15]*matrix2[3]; - result[4]=matrix1[0]*matrix2[4]+matrix1[4]*matrix2[5]+matrix1[8]*matrix2[6]+matrix1[12]*matrix2[7]; - result[5]=matrix1[1]*matrix2[4]+matrix1[5]*matrix2[5]+matrix1[9]*matrix2[6]+matrix1[13]*matrix2[7]; - result[6]=matrix1[2]*matrix2[4]+matrix1[6]*matrix2[5]+matrix1[10]*matrix2[6]+matrix1[14]*matrix2[7]; - result[7]=matrix1[3]*matrix2[4]+matrix1[7]*matrix2[5]+matrix1[11]*matrix2[6]+matrix1[15]*matrix2[7]; - result[8]=matrix1[0]*matrix2[8]+matrix1[4]*matrix2[9]+matrix1[8]*matrix2[10]+matrix1[12]*matrix2[11]; - result[9]=matrix1[1]*matrix2[8]+matrix1[5]*matrix2[9]+matrix1[9]*matrix2[10]+matrix1[13]*matrix2[11]; - result[10]=matrix1[2]*matrix2[8]+matrix1[6]*matrix2[9]+matrix1[10]*matrix2[10]+matrix1[14]*matrix2[11]; - result[11]=matrix1[3]*matrix2[8]+matrix1[7]*matrix2[9]+matrix1[11]*matrix2[10]+matrix1[15]*matrix2[11]; - result[12]=matrix1[0]*matrix2[12]+matrix1[4]*matrix2[13]+matrix1[8]*matrix2[14]+matrix1[12]*matrix2[15]; - result[13]=matrix1[1]*matrix2[12]+matrix1[5]*matrix2[13]+matrix1[9]*matrix2[14]+matrix1[13]*matrix2[15]; - result[14]=matrix1[2]*matrix2[12]+matrix1[6]*matrix2[13]+matrix1[10]*matrix2[14]+matrix1[14]*matrix2[15]; - result[15]=matrix1[3]*matrix2[12]+ matrix1[7]*matrix2[13]+matrix1[11]*matrix2[14]+matrix1[15]*matrix2[15]; - */ + gGL.unprojectf( + LLVector3(screen_x,screen_y,0.f), + gGLModelView, gGLProjection, LLRect(gGLViewport[0],gGLViewport[1]+gGLViewport[3],gGLViewport[0]+gGLViewport[2],gGLViewport[1]), + *pos_agent ); } // Uses the last GL matrices set in set_perspective to project a point from @@ -553,7 +381,7 @@ BOOL LLViewerCamera::projectPosAgentToScreen(const LLVector3 &pos_agent, LLCoord const LLRect& world_view_rect = gViewerWindow->getWorldViewRectRaw(); - if (GL_TRUE == glProjectf(pos_agent, gGLModelView, gGLProjection, world_view_rect, window_coordinates)) + if (gGL.projectf(pos_agent, gGLModelView, gGLProjection, world_view_rect, window_coordinates)) { F32 &x = window_coordinates.mV[VX]; F32 &y = window_coordinates.mV[VY]; @@ -653,7 +481,7 @@ BOOL LLViewerCamera::projectPosAgentToScreenEdge(const LLVector3 &pos_agent, const LLRect& world_view_rect = gViewerWindow->getWorldViewRectRaw(); LLVector3 window_coordinates; - if (GL_TRUE == glProjectf(pos_agent, gGLModelView, gGLProjection, world_view_rect, window_coordinates)) + if (gGL.projectf(pos_agent, gGLModelView, gGLProjection, world_view_rect, window_coordinates)) { F32 &x = window_coordinates.mV[VX]; F32 &y = window_coordinates.mV[VY]; @@ -848,14 +676,12 @@ BOOL LLViewerCamera::areVertsVisible(LLViewerObject* volumep, BOOL all_verts) LLVOVolume* vo_volume = (LLVOVolume*) volumep; vo_volume->updateRelativeXform(); - LLMatrix4 mat = vo_volume->getRelativeXform(); LLMatrix4 render_mat(vo_volume->getRenderRotation(), LLVector4(vo_volume->getRenderPosition())); LLMatrix4a render_mata; render_mata.loadu(render_mat); - LLMatrix4a mata; - mata.loadu(mat); + const LLMatrix4a& mata = vo_volume->getRelativeXform();; num_faces = volume->getNumVolumeFaces(); for (i = 0; i < num_faces; i++) diff --git a/indra/newview/llviewercamera.h b/indra/newview/llviewercamera.h index d9ac2af30..8d4e9ce97 100644 --- a/indra/newview/llviewercamera.h +++ b/indra/newview/llviewercamera.h @@ -32,16 +32,17 @@ #include "llstat.h" #include "lltimer.h" #include "m4math.h" +#include "llmatrix4a.h" #include "llcoord.h" class LLViewerObject; // This rotation matrix moves the default OpenGL reference frame // (-Z at, Y up) to Cory's favorite reference frame (X at, Z up) -const F32 OGL_TO_CFR_ROTATION[16] = { 0.f, 0.f, -1.f, 0.f, // -Z becomes X - -1.f, 0.f, 0.f, 0.f, // -X becomes Y - 0.f, 1.f, 0.f, 0.f, // Y becomes Z - 0.f, 0.f, 0.f, 1.f }; +static LL_ALIGN_16(const LLMatrix4a OGL_TO_CFR_ROTATION(LLVector4a( 0.f, 0.f, -1.f, 0.f), // -Z becomes X + LLVector4a(-1.f, 0.f, 0.f, 0.f), // -X becomes Y + LLVector4a( 0.f, 1.f, 0.f, 0.f), // Y becomes Z + LLVector4a( 0.f, 0.f, 0.f, 1.f) )); const BOOL FOR_SELECTION = TRUE; const BOOL NOT_FOR_SELECTION = FALSE; @@ -88,8 +89,8 @@ public: static void updateCameraAngle(void* user_data, const LLSD& value); void setPerspective(BOOL for_selection, S32 x, S32 y_from_bot, S32 width, S32 height, BOOL limit_select_distance, F32 z_near = 0, F32 z_far = 0); - const LLMatrix4 &getProjection() const; - const LLMatrix4 &getModelview() const; + const LLMatrix4a &getProjection() const; + const LLMatrix4a &getModelview() const; // Warning! These assume the current global matrices are correct void projectScreenToPosAgent(const S32 screen_x, const S32 screen_y, LLVector3* pos_agent ) const; @@ -137,8 +138,8 @@ protected: F32 mAverageSpeed ; F32 mAverageAngularSpeed ; - mutable LLMatrix4 mProjectionMatrix; // Cache of perspective matrix - mutable LLMatrix4 mModelviewMatrix; + mutable LLMatrix4a mProjectionMatrix; // Cache of perspective matrix + mutable LLMatrix4a mModelviewMatrix; F32 mCameraFOVDefault; F32 mSavedFOVDefault; // F32 mCosHalfCameraFOV; diff --git a/indra/newview/llviewerdisplay.cpp b/indra/newview/llviewerdisplay.cpp index 093f61d4f..4994869ce 100644 --- a/indra/newview/llviewerdisplay.cpp +++ b/indra/newview/llviewerdisplay.cpp @@ -767,8 +767,8 @@ void display(BOOL rebuild, F32 zoom_factor, int subfield, BOOL for_snapshot, boo LLGLState::checkTextureChannels(); LLGLState::checkClientArrays(); - glh::matrix4f proj = glh_get_current_projection(); - glh::matrix4f mod = glh_get_current_modelview(); + const LLMatrix4a saved_proj = glh_get_current_projection(); + const LLMatrix4a saved_mod = glh_get_current_modelview(); glViewport(0,0,512,512); LLVOAvatar::updateFreezeCounter() ; @@ -777,12 +777,12 @@ void display(BOOL rebuild, F32 zoom_factor, int subfield, BOOL for_snapshot, boo LLVOAvatar::updateImpostors(); } - glh_set_current_projection(proj); - glh_set_current_modelview(mod); + glh_set_current_projection(saved_proj); + glh_set_current_modelview(saved_mod); gGL.matrixMode(LLRender::MM_PROJECTION); - gGL.loadMatrix(proj.m); + gGL.loadMatrix(saved_proj); gGL.matrixMode(LLRender::MM_MODELVIEW); - gGL.loadMatrix(mod.m); + gGL.loadMatrix(saved_mod); gViewerWindow->setup3DViewport(); LLGLState::checkStates(); @@ -1049,12 +1049,9 @@ void display(BOOL rebuild, F32 zoom_factor, int subfield, BOOL for_snapshot, boo //store this frame's modelview matrix for use //when rendering next frame's occlusion queries - for (U32 i = 0; i < 16; i++) - { - gGLPreviousModelView[i] = gGLLastModelView[i]; - gGLLastModelView[i] = gGLModelView[i]; - gGLLastProjection[i] = gGLProjection[i]; - } + gGLPreviousModelView = gGLLastModelView; + gGLLastModelView = gGLModelView; + gGLLastProjection = gGLProjection; stop_glerror(); } @@ -1146,8 +1143,8 @@ void render_hud_attachments() gGL.matrixMode(LLRender::MM_MODELVIEW); gGL.pushMatrix(); - glh::matrix4f current_proj = glh_get_current_projection(); - glh::matrix4f current_mod = glh_get_current_modelview(); + const LLMatrix4a saved_proj = glh_get_current_projection(); + const LLMatrix4a saved_mod = glh_get_current_modelview(); // clamp target zoom level to reasonable values // gAgentCamera.mHUDTargetZoom = llclamp(gAgentCamera.mHUDTargetZoom, 0.1f, 1.f); @@ -1243,8 +1240,8 @@ void render_hud_attachments() gGL.matrixMode(LLRender::MM_MODELVIEW); gGL.popMatrix(); - glh_set_current_projection(current_proj); - glh_set_current_modelview(current_mod); + glh_set_current_projection(saved_proj); + glh_set_current_modelview(saved_mod); } LLRect get_whole_screen_region() @@ -1267,7 +1264,7 @@ LLRect get_whole_screen_region() return whole_screen; } -bool get_hud_matrices(const LLRect& screen_region, glh::matrix4f &proj, glh::matrix4f &model) +bool get_hud_matrices(const LLRect& screen_region, LLMatrix4a &proj, LLMatrix4a &model) { if (isAgentAvatarValid() && gAgentAvatarp->hasHUDAttachment()) { @@ -1275,28 +1272,24 @@ bool get_hud_matrices(const LLRect& screen_region, glh::matrix4f &proj, glh::mat LLBBox hud_bbox = gAgentAvatarp->getHUDBBox(); F32 hud_depth = llmax(1.f, hud_bbox.getExtentLocal().mV[VX] * 1.1f); - proj = gl_ortho(-0.5f * LLViewerCamera::getInstance()->getAspect(), 0.5f * LLViewerCamera::getInstance()->getAspect(), -0.5f, 0.5f, 0.f, hud_depth); - proj.element(2,2) = -0.01f; - + proj = gGL.genOrtho(-0.5f * LLViewerCamera::getInstance()->getAspect(), 0.5f * LLViewerCamera::getInstance()->getAspect(), -0.5f, 0.5f, 0.f, hud_depth); + proj.getRow<2>().copyComponent<2>(LLVector4a(-0.01f)); + F32 aspect_ratio = LLViewerCamera::getInstance()->getAspect(); - glh::matrix4f mat; F32 scale_x = (F32)gViewerWindow->getWorldViewWidthScaled() / (F32)screen_region.getWidth(); F32 scale_y = (F32)gViewerWindow->getWorldViewHeightScaled() / (F32)screen_region.getHeight(); - mat.set_scale(glh::vec3f(scale_x, scale_y, 1.f)); - mat.set_translate( - glh::vec3f(clamp_rescale((F32)(screen_region.getCenterX() - screen_region.mLeft), 0.f, (F32)gViewerWindow->getWorldViewWidthScaled(), 0.5f * scale_x * aspect_ratio, -0.5f * scale_x * aspect_ratio), - clamp_rescale((F32)(screen_region.getCenterY() - screen_region.mBottom), 0.f, (F32)gViewerWindow->getWorldViewHeightScaled(), 0.5f * scale_y, -0.5f * scale_y), - 0.f)); - proj *= mat; - - glh::matrix4f tmp_model((GLfloat*) OGL_TO_CFR_ROTATION); - - mat.set_scale(glh::vec3f(zoom_level, zoom_level, zoom_level)); - mat.set_translate(glh::vec3f(-hud_bbox.getCenterLocal().mV[VX] + (hud_depth * 0.5f), 0.f, 0.f)); - - tmp_model *= mat; - model = tmp_model; + + proj.applyTranslation_affine( + clamp_rescale((F32)(screen_region.getCenterX() - screen_region.mLeft), 0.f, (F32)gViewerWindow->getWorldViewWidthScaled(), 0.5f * scale_x * aspect_ratio, -0.5f * scale_x * aspect_ratio), + clamp_rescale((F32)(screen_region.getCenterY() - screen_region.mBottom), 0.f, (F32)gViewerWindow->getWorldViewHeightScaled(), 0.5f * scale_y, -0.5f * scale_y), + 0.f); + proj.applyScale_affine(scale_x, scale_y, 1.f); + + model = OGL_TO_CFR_ROTATION; + model.applyTranslation_affine(LLVector3(-hud_bbox.getCenterLocal().mV[VX] + (hud_depth * 0.5f), 0.f, 0.f)); + model.applyScale_affine(zoom_level); + return TRUE; } else @@ -1305,7 +1298,7 @@ bool get_hud_matrices(const LLRect& screen_region, glh::matrix4f &proj, glh::mat } } -bool get_hud_matrices(glh::matrix4f &proj, glh::matrix4f &model) +bool get_hud_matrices(LLMatrix4a &proj, LLMatrix4a &model) { LLRect whole_screen = get_whole_screen_region(); return get_hud_matrices(whole_screen, proj, model); @@ -1319,17 +1312,17 @@ BOOL setup_hud_matrices() BOOL setup_hud_matrices(const LLRect& screen_region) { - glh::matrix4f proj, model; + LLMatrix4a proj, model; bool result = get_hud_matrices(screen_region, proj, model); if (!result) return result; - + // set up transform to keep HUD objects in front of camera gGL.matrixMode(LLRender::MM_PROJECTION); - gGL.loadMatrix(proj.m); + gGL.loadMatrix(proj); glh_set_current_projection(proj); gGL.matrixMode(LLRender::MM_MODELVIEW); - gGL.loadMatrix(model.m); + gGL.loadMatrix(model); glh_set_current_modelview(model); return TRUE; } @@ -1340,13 +1333,13 @@ void render_ui(F32 zoom_factor, int subfield, bool tiling) { LLGLState::checkStates(); - glh::matrix4f saved_view = glh_get_current_modelview(); + const LLMatrix4a saved_view = glh_get_current_modelview(); if (!gSnapshot) { gGL.pushMatrix(); gGL.loadMatrix(gGLLastModelView); - glh_set_current_modelview(glh_copy_matrix(gGLLastModelView)); + glh_set_current_modelview(gGLLastModelView); } { diff --git a/indra/newview/llviewerjointmesh.cpp b/indra/newview/llviewerjointmesh.cpp index c3bdfe5f8..208e0643f 100644 --- a/indra/newview/llviewerjointmesh.cpp +++ b/indra/newview/llviewerjointmesh.cpp @@ -118,14 +118,15 @@ void LLViewerJointMesh::uploadJointMatrices() //calculate joint matrices for (joint_num = 0; joint_num < reference_mesh->mJointRenderData.count(); joint_num++) { - LLMatrix4 joint_mat = *reference_mesh->mJointRenderData[joint_num]->mWorldMatrix; + LLMatrix4a joint_mat = *reference_mesh->mJointRenderData[joint_num]->mWorldMatrix; if (hardware_skinning) { - joint_mat *= LLDrawPoolAvatar::getModelView(); + joint_mat.setMul(LLDrawPoolAvatar::getModelView(),joint_mat); + //joint_mat *= LLDrawPoolAvatar::getModelView(); } - gJointMatUnaligned[joint_num] = joint_mat; - gJointRotUnaligned[joint_num] = joint_mat.getMat3(); + gJointMatUnaligned[joint_num] = LLMatrix4(joint_mat.getF32ptr()); + gJointRotUnaligned[joint_num] = gJointMatUnaligned[joint_num].getMat3(); } BOOL last_pivot_uploaded = FALSE; @@ -334,8 +335,7 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea, BOOL first_pass, BOOL is_dummy) else { gGL.pushMatrix(); - LLMatrix4 jointToWorld = getWorldMatrix(); - gGL.multMatrix((GLfloat*)jointToWorld.mMatrix); + gGL.multMatrix(getWorldMatrix()); buff->setBuffer(mask); buff->drawRange(LLRender::TRIANGLES, start, end, count, offset); gGL.popMatrix(); diff --git a/indra/newview/llviewerobject.cpp b/indra/newview/llviewerobject.cpp index ee9bdf4ba..f9bc5190c 100644 --- a/indra/newview/llviewerobject.cpp +++ b/indra/newview/llviewerobject.cpp @@ -3719,18 +3719,18 @@ const LLQuaternion LLViewerObject::getRenderRotation() const { if (!mDrawable->isRoot()) { - ret = getRotation() * LLQuaternion(mDrawable->getParent()->getWorldMatrix()); + ret = getRotation() * LLQuaternion(LLMatrix4(mDrawable->getParent()->getWorldMatrix().getF32ptr())); } else { - ret = LLQuaternion(mDrawable->getWorldMatrix()); + ret = LLQuaternion(mDrawable->getWorldMatrix().getF32ptr()); } } return ret; } -const LLMatrix4 LLViewerObject::getRenderMatrix() const +const LLMatrix4a& LLViewerObject::getRenderMatrix() const { return mDrawable->getWorldMatrix(); } diff --git a/indra/newview/llviewerobject.h b/indra/newview/llviewerobject.h index 21003bcb1..40564be40 100644 --- a/indra/newview/llviewerobject.h +++ b/indra/newview/llviewerobject.h @@ -288,7 +288,7 @@ public: const LLQuaternion getRotationRegion() const; const LLQuaternion getRotationEdit() const; const LLQuaternion getRenderRotation() const; - virtual const LLMatrix4 getRenderMatrix() const; + virtual const LLMatrix4a& getRenderMatrix() const; void setPosition(const LLVector3 &pos, BOOL damped = FALSE); void setPositionGlobal(const LLVector3d &position, BOOL damped = FALSE); @@ -298,7 +298,7 @@ public: void setPositionParent(const LLVector3 &pos_parent, BOOL damped = FALSE); void setPositionAbsoluteGlobal( const LLVector3d &pos_global, BOOL damped = FALSE ); - virtual const LLMatrix4& getWorldMatrix(LLXformMatrix* xform) const { return xform->getWorldMatrix(); } + virtual const LLMatrix4a& getWorldMatrix(LLXformMatrix* xform) const { return xform->getWorldMatrix(); } inline void setRotation(const F32 x, const F32 y, const F32 z, BOOL damped = FALSE); inline void setRotation(const LLQuaternion& quat, BOOL damped = FALSE); diff --git a/indra/newview/llviewerobjectlist.cpp b/indra/newview/llviewerobjectlist.cpp index c7e900238..4bb792f8b 100644 --- a/indra/newview/llviewerobjectlist.cpp +++ b/indra/newview/llviewerobjectlist.cpp @@ -1459,15 +1459,10 @@ void LLViewerObjectList::removeFromActiveList(LLViewerObject* objectp) objectp->setListIndex(-1); - S32 last_index = mActiveObjects.size()-1; + std::vector >::iterator iter = vector_replace_with_last(mActiveObjects,mActiveObjects.begin() + idx); + if(iter != mActiveObjects.end()) + (*iter)->setListIndex(idx); - if (idx != last_index) - { - mActiveObjects[idx] = mActiveObjects[last_index]; - mActiveObjects[idx]->setListIndex(idx); - } - - mActiveObjects.pop_back(); } } diff --git a/indra/newview/llviewerpartsim.cpp b/indra/newview/llviewerpartsim.cpp index 8f07858cf..a27e1ec4e 100644 --- a/indra/newview/llviewerpartsim.cpp +++ b/indra/newview/llviewerpartsim.cpp @@ -406,8 +406,7 @@ void LLViewerPartGroup::updateParticles(const F32 lastdt) // Kill dead particles (either flagged dead, or too old) if ((part->mLastUpdateTime > part->mMaxAge) || (LLViewerPart::LL_PART_DEAD_MASK == part->mFlags)) { - mParticles[i] = mParticles.back() ; - mParticles.pop_back() ; + vector_replace_with_last(mParticles,mParticles.begin() + i); delete part ; } else @@ -417,8 +416,7 @@ void LLViewerPartGroup::updateParticles(const F32 lastdt) { // Transfer particles between groups LLViewerPartSim::getInstance()->put(part) ; - mParticles[i] = mParticles.back() ; - mParticles.pop_back() ; + vector_replace_with_last(mParticles,mParticles.begin() + i); } else { @@ -675,11 +673,9 @@ void LLViewerPartSim::updateSimulation() S32 count = (S32) mViewerPartSources.size(); S32 start = (S32)ll_frand((F32)count); S32 dir = 1; - S32 deldir = 0; if (ll_frand() > 0.5f) { dir = -1; - deldir = -1; } S32 num_updates = 0; @@ -725,11 +721,9 @@ void LLViewerPartSim::updateSimulation() if (mViewerPartSources[i]->isDead()) { - mViewerPartSources[i] = mViewerPartSources.back(); - mViewerPartSources.pop_back(); + vector_replace_with_last(mViewerPartSources,mViewerPartSources.begin() + i); //mViewerPartSources.erase(mViewerPartSources.begin() + i); count--; - i+=deldir; } else { @@ -764,8 +758,7 @@ void LLViewerPartSim::updateSimulation() if (!mViewerPartGroups[i]->getCount()) { delete mViewerPartGroups[i]; - mViewerPartGroups[i] = mViewerPartGroups.back(); - mViewerPartGroups.pop_back(); + vector_replace_with_last(mViewerPartGroups,mViewerPartGroups.begin() + i); //mViewerPartGroups.erase(mViewerPartGroups.begin() + i); i--; count--; @@ -849,15 +842,15 @@ void LLViewerPartSim::removeLastCreatedSource() void LLViewerPartSim::cleanupRegion(LLViewerRegion *regionp) { group_list_t& vec = mViewerPartGroups; - for (group_list_t::size_type i = 0;igetRegion() == regionp) + if ((*it)->getRegion() == regionp) { - delete vec[i]; - vec[i--] = vec.back(); - vec.pop_back(); + delete *it; + it = vector_replace_with_last(vec,it); //i = mViewerPartGroups.erase(iter); } + else ++it; } } diff --git a/indra/newview/llviewerprecompiledheaders.h b/indra/newview/llviewerprecompiledheaders.h index f98c6f10f..cce678081 100644 --- a/indra/newview/llviewerprecompiledheaders.h +++ b/indra/newview/llviewerprecompiledheaders.h @@ -187,7 +187,7 @@ //#include "lltextureentry.h" #include "lltreeparams.h" //#include "llvolume.h" -#include "llvolumemgr.h" +//#include "llvolumemgr.h" #include "material_codes.h" // Library includes from llxml diff --git a/indra/newview/llviewerregion.cpp b/indra/newview/llviewerregion.cpp index 80a18cef8..cbc64d533 100644 --- a/indra/newview/llviewerregion.cpp +++ b/indra/newview/llviewerregion.cpp @@ -321,6 +321,9 @@ LLViewerRegion::LLViewerRegion(const U64 &handle, { // Moved this up... -> mWidth = region_width_meters; // + + mRenderMatrix.setIdentity(); + mImpl->mOriginGlobal = from_region_handle(handle); updateRenderMatrix(); @@ -547,7 +550,7 @@ void LLViewerRegion::setOriginGlobal(const LLVector3d &origin_global) void LLViewerRegion::updateRenderMatrix() { - mRenderMatrix.setTranslation(getOriginAgent()); + mRenderMatrix.setTranslate_affine(getOriginAgent()); } void LLViewerRegion::setTimeDilation(F32 time_dilation) diff --git a/indra/newview/llviewerregion.h b/indra/newview/llviewerregion.h index b72abe0ca..20c72de3f 100644 --- a/indra/newview/llviewerregion.h +++ b/indra/newview/llviewerregion.h @@ -114,6 +114,16 @@ public: const F32 region_width_meters); ~LLViewerRegion(); + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } + // Call this after you have the region name and handle. void loadObjectCache(); void saveObjectCache(); @@ -400,7 +410,7 @@ public: LLStat mPacketsStat; LLStat mPacketsLostStat; - LLMatrix4 mRenderMatrix; + LL_ALIGN_16(LLMatrix4a mRenderMatrix); // These arrays are maintained in parallel. Ideally they'd be combined into a // single array of an aggrigate data type but for compatibility with the old diff --git a/indra/newview/llviewertexlayer.h b/indra/newview/llviewertexlayer.h index 959c883da..d732a87bb 100644 --- a/indra/newview/llviewertexlayer.h +++ b/indra/newview/llviewertexlayer.h @@ -79,6 +79,16 @@ public: LLViewerTexLayerSetBuffer(LLTexLayerSet* const owner, S32 width, S32 height); virtual ~LLViewerTexLayerSetBuffer(); + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } + public: /*virtual*/ S8 getType() const; BOOL isInitialized(void) const; diff --git a/indra/newview/llviewertexture.cpp b/indra/newview/llviewertexture.cpp index 10c924225..c49be918a 100644 --- a/indra/newview/llviewertexture.cpp +++ b/indra/newview/llviewertexture.cpp @@ -914,6 +914,7 @@ LLViewerFetchedTexture::LLViewerFetchedTexture(const LLUUID& id, const LLHost& h { init(TRUE) ; generateGLTexture() ; + mGLTexturep->setNeedsAlphaAndPickMask(TRUE) ; } LLViewerFetchedTexture::LLViewerFetchedTexture(const LLImageRaw* raw, BOOL usemipmaps) @@ -928,6 +929,7 @@ LLViewerFetchedTexture::LLViewerFetchedTexture(const std::string& url, const LLU { init(TRUE) ; generateGLTexture() ; + mGLTexturep->setNeedsAlphaAndPickMask(TRUE) ; } void LLViewerFetchedTexture::init(bool firstinit) @@ -3172,8 +3174,6 @@ LLViewerMediaTexture::LLViewerMediaTexture(const LLUUID& id, BOOL usemipmaps, LL mGLTexturep->setAllowCompression(false); - mGLTexturep->setNeedsAlphaAndPickMask(FALSE) ; - mIsPlaying = FALSE ; setMediaImpl() ; @@ -3204,7 +3204,6 @@ void LLViewerMediaTexture::reinit(BOOL usemipmaps /* = TRUE */) mUseMipMaps = usemipmaps ; getLastReferencedTimer()->reset() ; mGLTexturep->setUseMipMaps(mUseMipMaps) ; - mGLTexturep->setNeedsAlphaAndPickMask(FALSE) ; } void LLViewerMediaTexture::setUseMipMaps(BOOL mipmap) diff --git a/indra/newview/llviewertextureanim.cpp b/indra/newview/llviewertextureanim.cpp index 2b364851a..525de0ed2 100644 --- a/indra/newview/llviewertextureanim.cpp +++ b/indra/newview/llviewertextureanim.cpp @@ -49,15 +49,9 @@ LLViewerTextureAnim::LLViewerTextureAnim(LLVOVolume* vobj) : LLTextureAnim() LLViewerTextureAnim::~LLViewerTextureAnim() { - S32 end_idx = sInstanceList.size()-1; - - if (end_idx != mInstanceIndex) - { - sInstanceList[mInstanceIndex] = sInstanceList[end_idx]; - sInstanceList[mInstanceIndex]->mInstanceIndex = mInstanceIndex; - } - - sInstanceList.pop_back(); + std::vector::iterator iter = vector_replace_with_last(sInstanceList, sInstanceList.begin() + mInstanceIndex); + if(iter != sInstanceList.end()) + (*iter)->mInstanceIndex = mInstanceIndex; } void LLViewerTextureAnim::reset() diff --git a/indra/newview/llviewerwindow.cpp b/indra/newview/llviewerwindow.cpp index 48cc1cb0d..099927c6d 100644 --- a/indra/newview/llviewerwindow.cpp +++ b/indra/newview/llviewerwindow.cpp @@ -696,32 +696,35 @@ public: static const LLCachedControl debug_show_render_matrices("DebugShowRenderMatrices"); if (debug_show_render_matrices) { - addText(xpos, ypos, llformat("%.4f .%4f %.4f %.4f", gGLProjection[12], gGLProjection[13], gGLProjection[14], gGLProjection[15])); + F32* m = gGLProjection.getF32ptr(); + + addText(xpos, ypos, llformat("%.4f .%4f %.4f %.4f", m[12], m[13], m[14], m[15])); ypos += y_inc; - addText(xpos, ypos, llformat("%.4f .%4f %.4f %.4f", gGLProjection[8], gGLProjection[9], gGLProjection[10], gGLProjection[11])); + addText(xpos, ypos, llformat("%.4f .%4f %.4f %.4f", m[8], m[9], m[10], m[11])); ypos += y_inc; - addText(xpos, ypos, llformat("%.4f .%4f %.4f %.4f", gGLProjection[4], gGLProjection[5], gGLProjection[6], gGLProjection[7])); + addText(xpos, ypos, llformat("%.4f .%4f %.4f %.4f", m[4], m[5], m[6], m[7])); ypos += y_inc; - addText(xpos, ypos, llformat("%.4f .%4f %.4f %.4f", gGLProjection[0], gGLProjection[1], gGLProjection[2], gGLProjection[3])); + addText(xpos, ypos, llformat("%.4f .%4f %.4f %.4f", m[0], m[1], m[2], m[3])); ypos += y_inc; addText(xpos, ypos, "Projection Matrix"); ypos += y_inc; + m = gGLModelView.getF32ptr(); - addText(xpos, ypos, llformat("%.4f .%4f %.4f %.4f", gGLModelView[12], gGLModelView[13], gGLModelView[14], gGLModelView[15])); + addText(xpos, ypos, llformat("%.4f .%4f %.4f %.4f", m[12], m[13], m[14], m[15])); ypos += y_inc; - addText(xpos, ypos, llformat("%.4f .%4f %.4f %.4f", gGLModelView[8], gGLModelView[9], gGLModelView[10], gGLModelView[11])); + addText(xpos, ypos, llformat("%.4f .%4f %.4f %.4f", m[8], m[9], m[10], m[11])); ypos += y_inc; - addText(xpos, ypos, llformat("%.4f .%4f %.4f %.4f", gGLModelView[4], gGLModelView[5], gGLModelView[6], gGLModelView[7])); + addText(xpos, ypos, llformat("%.4f .%4f %.4f %.4f", m[4], m[5], m[6], m[7])); ypos += y_inc; - addText(xpos, ypos, llformat("%.4f .%4f %.4f %.4f", gGLModelView[0], gGLModelView[1], gGLModelView[2], gGLModelView[3])); + addText(xpos, ypos, llformat("%.4f .%4f %.4f %.4f", m[0], m[1], m[2], m[3])); ypos += y_inc; addText(xpos, ypos, "View Matrix"); diff --git a/indra/newview/llvoavatar.cpp b/indra/newview/llvoavatar.cpp index 86e2c68b0..bd0000846 100644 --- a/indra/newview/llvoavatar.cpp +++ b/indra/newview/llvoavatar.cpp @@ -1651,7 +1651,10 @@ const LLVector3 LLVOAvatar::getRenderPosition() const } else { - return getPosition() * mDrawable->getParent()->getRenderMatrix(); + LLVector4a pos; + pos.load3(getPosition().mV); + mDrawable->getParent()->getRenderMatrix().affineTransform(pos,pos); + return LLVector3(pos.getF32ptr()); } } @@ -1709,9 +1712,7 @@ void LLVOAvatar::getSpatialExtents(LLVector4a& newMin, LLVector4a& newMax) LLPolyMesh* mesh = i->second; for (S32 joint_num = 0; joint_num < mesh->mJointRenderData.count(); joint_num++) { - LLVector4a trans; - trans.load3( mesh->mJointRenderData[joint_num]->mWorldMatrix->getTranslation().mV); - update_min_max(newMin, newMax, trans); + update_min_max(newMin, newMax, mesh->mJointRenderData[joint_num]->mWorldMatrix->getRow()); } } @@ -1835,7 +1836,7 @@ void LLVOAvatar::renderJoints() jointp->updateWorldMatrix(); gGL.pushMatrix(); - gGL.multMatrix( &jointp->getXform()->getWorldMatrix().mMatrix[0][0] ); + gGL.multMatrix(jointp->getXform()->getWorldMatrix()); gGL.diffuseColor3f( 1.f, 0.f, 1.f ); @@ -1924,36 +1925,37 @@ BOOL LLVOAvatar::lineSegmentIntersect(const LLVector4a& start, const LLVector4a& { mCollisionVolumes[i].updateWorldMatrix(); - glh::matrix4f mat((F32*) mCollisionVolumes[i].getXform()->getWorldMatrix().mMatrix); - glh::matrix4f inverse = mat.inverse(); - glh::matrix4f norm_mat = inverse.transpose(); + const LLMatrix4a& mat = mCollisionVolumes[i].getXform()->getWorldMatrix(); + LLMatrix4a inverse = mat; + inverse.invert(); + LLMatrix4a norm_mat = inverse; + norm_mat.transpose(); - glh::vec3f p1(start.getF32ptr()); - glh::vec3f p2(end.getF32ptr()); - inverse.mult_matrix_vec(p1); - inverse.mult_matrix_vec(p2); + LLVector4a p1, p2; + inverse.affineTransform(start,p1); //Might need to use perspectiveTransform here. + inverse.affineTransform(end,p2); LLVector3 position; LLVector3 norm; - if (linesegment_sphere(LLVector3(p1.v), LLVector3(p2.v), LLVector3(0,0,0), 1.f, position, norm)) + if (linesegment_sphere(LLVector3(p1.getF32ptr()), LLVector3(p2.getF32ptr()), LLVector3(0,0,0), 1.f, position, norm)) { - glh::vec3f res_pos(position.mV); - mat.mult_matrix_vec(res_pos); - - norm.normalize(); - glh::vec3f res_norm(norm.mV); - norm_mat.mult_matrix_dir(res_norm); - if (intersection) { - intersection->load3(res_pos.v); + LLVector4a res_pos; + res_pos.load3(position.mV); + mat.affineTransform(res_pos,res_pos); + *intersection = res_pos; } if (normal) { - normal->load3(res_norm.v); + LLVector4a res_norm; + res_norm.load3(norm.mV); + res_norm.normalize3fast(); + norm_mat.perspectiveTransform(res_norm,res_norm); + *normal = res_norm; } return TRUE; @@ -4123,7 +4125,7 @@ BOOL LLVOAvatar::updateCharacter(LLAgent &agent) } - LLQuaternion root_rotation = mRoot->getWorldMatrix().quaternion(); + LLQuaternion root_rotation = LLMatrix4(mRoot->getWorldMatrix().getF32ptr()).quaternion(); F32 root_roll, root_pitch, root_yaw; root_rotation.getEulerAngles(&root_roll, &root_pitch, &root_yaw); @@ -4140,7 +4142,7 @@ BOOL LLVOAvatar::updateCharacter(LLAgent &agent) // and head turn. Once in motion, it must conform however. BOOL self_in_mouselook = isSelf() && gAgentCamera.cameraMouselook(); - LLVector3 pelvisDir( mRoot->getWorldMatrix().getFwdRow4().mV ); + LLVector3 pelvisDir( mRoot->getWorldMatrix().getRow().getF32ptr() ); static const LLCachedControl s_pelvis_rot_threshold_slow(gSavedSettings, "AvatarRotateThresholdSlow", 60.0); static const LLCachedControl s_pelvis_rot_threshold_fast(gSavedSettings, "AvatarRotateThresholdFast", 2.0); @@ -6618,12 +6620,7 @@ BOOL LLVOAvatar::detachObject(LLViewerObject *viewer_object) if (attachment->isObjectAttached(viewer_object)) { - std::vector >::iterator it = std::find(mAttachedObjectsVector.begin(),mAttachedObjectsVector.end(),std::make_pair(viewer_object,attachment)); - if(it != mAttachedObjectsVector.end()) - { - (*it) = mAttachedObjectsVector.back(); - mAttachedObjectsVector.pop_back(); - } + vector_replace_with_last(mAttachedObjectsVector,std::make_pair(viewer_object,attachment)); cleanupAttachedMesh( viewer_object ); attachment->removeObject(viewer_object); @@ -8843,7 +8840,6 @@ void LLVOAvatar::updateSoftwareSkinnedVertices(const LLMeshSkinInfo* skin, const //build matrix palette LLMatrix4a mp[JOINT_COUNT]; - LLMatrix4* mat = (LLMatrix4*) mp; U32 count = llmin((U32) skin->mJointNames.size(), (U32) JOINT_COUNT); @@ -8858,8 +8854,9 @@ void LLVOAvatar::updateSoftwareSkinnedVertices(const LLMeshSkinInfo* skin, const } if (joint) { - mat[j] = skin->mInvBindMatrix[j]; - mat[j] *= joint->getWorldMatrix(); + LLMatrix4a mat; + mat.loadu((F32*)skin->mInvBindMatrix[j].mMatrix); + mp[j].setMul(joint->getWorldMatrix(),mat); } } diff --git a/indra/newview/llvoclouds.cpp b/indra/newview/llvoclouds.cpp index 0947e6025..24f64a70e 100644 --- a/indra/newview/llvoclouds.cpp +++ b/indra/newview/llvoclouds.cpp @@ -142,7 +142,6 @@ BOOL LLVOClouds::updateGeometry(LLDrawable *drawable) group->setState(LLSpatialGroup::GEOM_DIRTY); } drawable->setNumFaces(0, NULL, getTEImage(0)); - LLPipeline::sCompiles++; return TRUE; } @@ -195,7 +194,6 @@ BOOL LLVOClouds::updateGeometry(LLDrawable *drawable) } mDrawable->movePartition(); - LLPipeline::sCompiles++; return TRUE; } diff --git a/indra/newview/llvograss.cpp b/indra/newview/llvograss.cpp index 9d802740f..5988ece29 100644 --- a/indra/newview/llvograss.cpp +++ b/indra/newview/llvograss.cpp @@ -492,7 +492,6 @@ void LLVOGrass::plantBlades() mDepth = (face->mCenterLocal - LLViewerCamera::getInstance()->getOrigin())*LLViewerCamera::getInstance()->getAtAxis(); mDrawable->setPosition(face->mCenterLocal); mDrawable->movePartition(); - LLPipeline::sCompiles++; } void LLVOGrass::getGeometry(S32 idx, @@ -620,7 +619,6 @@ void LLVOGrass::getGeometry(S32 idx, index_offset += 8; } - LLPipeline::sCompiles++; } U32 LLVOGrass::getPartitionType() const diff --git a/indra/newview/llvoground.cpp b/indra/newview/llvoground.cpp index 97b7418b4..6ffb580cd 100644 --- a/indra/newview/llvoground.cpp +++ b/indra/newview/llvoground.cpp @@ -156,6 +156,5 @@ BOOL LLVOGround::updateGeometry(LLDrawable *drawable) *(texCoordsp++) = LLVector2(0.5f, 0.5f); face->getVertexBuffer()->flush(); - LLPipeline::sCompiles++; return TRUE; } diff --git a/indra/newview/llvopartgroup.cpp b/indra/newview/llvopartgroup.cpp index 0e17c77ef..7b4077798 100644 --- a/indra/newview/llvopartgroup.cpp +++ b/indra/newview/llvopartgroup.cpp @@ -339,7 +339,6 @@ BOOL LLVOPartGroup::updateGeometry(LLDrawable *drawable) group->setState(LLSpatialGroup::GEOM_DIRTY); } drawable->setNumFaces(0, NULL, getTEImage(0)); - LLPipeline::sCompiles++; return TRUE; } @@ -482,7 +481,6 @@ BOOL LLVOPartGroup::updateGeometry(LLDrawable *drawable) mScale.set(max_scale, max_scale, max_scale); mDrawable->movePartition(); - LLPipeline::sCompiles++; return TRUE; } diff --git a/indra/newview/llvosky.cpp b/indra/newview/llvosky.cpp index 2c3e6fbf9..83f8d9388 100644 --- a/indra/newview/llvosky.cpp +++ b/indra/newview/llvosky.cpp @@ -298,7 +298,6 @@ void LLSkyTex::create(const F32 brightness) void LLSkyTex::createGLImage(S32 which) { - mTexture[which]->setNeedsAlphaAndPickMask(false); //Needed, else analyzeAlpha is called every frame for each texture. mTexture[which]->createGLTexture(0, mImageRaw[which], 0, TRUE, LLGLTexture::LOCAL); mTexture[which]->setAddressMode(LLTexUnit::TAM_CLAMP); } @@ -1411,8 +1410,6 @@ BOOL LLVOSky::updateGeometry(LLDrawable *drawable) { setDrawRefl(-1); } - - LLPipeline::sCompiles++; return TRUE; } diff --git a/indra/newview/llvotree.cpp b/indra/newview/llvotree.cpp index ad77d930c..f33e24199 100644 --- a/indra/newview/llvotree.cpp +++ b/indra/newview/llvotree.cpp @@ -107,6 +107,10 @@ LLVOTree::~LLVOTree() delete[] mData; mData = NULL; } + for(std::vector >::iterator iter = mDrawList.begin(); iter != mDrawList.end(); iter++) + { + delete (*iter)->mModelMatrix; + } } //static @@ -397,6 +401,11 @@ void LLVOTree::idleUpdate(LLAgent &agent, LLWorld &world, const F64 &time) mTrunkVel.normalize(); } } + else + { + mTrunkBend.clear(); + mTrunkVel.clear(); + } S32 trunk_LOD = sMAX_NUM_TREE_LOD_LEVELS; F32 app_angle = getAppAngle()*LLVOTree::sTreeFactor; @@ -446,6 +455,10 @@ void LLVOTree::idleUpdate(LLAgent &agent, LLWorld &world, const F64 &time) } } } + else + { + gPipeline.markRebuild(mDrawable, LLDrawable::REBUILD_ALL, FALSE); + } mTrunkLOD = trunk_LOD; //return TRUE; @@ -541,6 +554,12 @@ BOOL LLVOTree::updateGeometry(LLDrawable *drawable) { LLFastTimer ftm(FTM_UPDATE_TREE); + for(std::vector >::iterator iter = mDrawList.begin(); iter != mDrawList.end(); iter++) + { + delete (*iter)->mModelMatrix; + } + mDrawList.clear(); + if(mTrunkLOD >= sMAX_NUM_TREE_LOD_LEVELS) //do not display the tree. { mReferenceBuffer = NULL ; @@ -582,8 +601,7 @@ BOOL LLVOTree::updateGeometry(LLDrawable *drawable) max_vertices += sLODVertexCount[lod]; } - static LLCachedControl sRenderAnimateTrees(gSavedSettings, "RenderAnimateTrees"); - mReferenceBuffer = new LLVertexBuffer(LLDrawPoolTree::VERTEX_DATA_MASK, sRenderAnimateTrees ? GL_STATIC_DRAW_ARB : 0); + mReferenceBuffer = new LLVertexBuffer(LLDrawPoolTree::VERTEX_DATA_MASK, GL_STATIC_DRAW_ARB); mReferenceBuffer->allocateBuffer(max_vertices, max_indices, TRUE); LLStrider vertices; @@ -886,31 +904,21 @@ BOOL LLVOTree::updateGeometry(LLDrawable *drawable) llassert(vertex_count == max_vertices); llassert(index_count == max_indices); } - - static LLCachedControl sRenderAnimateTrees(gSavedSettings, "RenderAnimateTrees"); - if (sRenderAnimateTrees) - { - mDrawable->getFace(0)->setVertexBuffer(mReferenceBuffer); - } - else - { - //generate tree mesh - updateMesh(); - } + + //generate tree mesh + updateMesh(); return TRUE; } void LLVOTree::updateMesh() { - LLMatrix4 matrix; - // Translate to tree base HACK - adjustment in Z plants tree underground const LLVector3 &pos_region = getPositionRegion(); //gGL.translatef(pos_agent.mV[VX], pos_agent.mV[VY], pos_agent.mV[VZ] - 0.1f); - LLMatrix4 trans_mat; - trans_mat.setTranslation(pos_region.mV[VX], pos_region.mV[VY], pos_region.mV[VZ] - 0.1f); - trans_mat *= matrix; + LLMatrix4a trans_mat; + trans_mat.setIdentity(); + trans_mat.setTranslate_affine(pos_region - LLVector3(0.f,0.f,0.1f)); // Rotate to tree position and bend for current trunk/wind // Note that trunk stiffness controls the amount of bend at the trunk as @@ -923,16 +931,12 @@ void LLVOTree::updateMesh() LLQuaternion(90.f*DEG_TO_RAD, LLVector4(0,0,1)) * getRotation(); - LLMatrix4 rot_mat(rot); - rot_mat *= trans_mat; + + LLMatrix4a rot_mat = trans_mat; + rot_mat.mul(LLQuaternion2(rot)); F32 radius = getScale().magVec()*0.05f; - LLMatrix4 scale_mat; - scale_mat.mMatrix[0][0] = - scale_mat.mMatrix[1][1] = - scale_mat.mMatrix[2][2] = radius; - - scale_mat *= rot_mat; + rot_mat.applyScale_affine(radius); // const F32 THRESH_ANGLE_FOR_BILLBOARD = 15.f; // const F32 BLEND_RANGE_FOR_BILLBOARD = 3.f; @@ -949,78 +953,102 @@ void LLVOTree::updateMesh() LLFace* facep = mDrawable->getFace(0); if (!facep) return; - LLVertexBuffer* buff = new LLVertexBuffer(LLDrawPoolTree::VERTEX_DATA_MASK, GL_STATIC_DRAW_ARB); - buff->allocateBuffer(vert_count, index_count, TRUE); - facep->setVertexBuffer(buff); - LLStrider vertices; - LLStrider normals; + LLStrider vertices; + LLStrider normals; LLStrider tex_coords; LLStrider indices; U16 idx_offset = 0; - buff->getVertexStrider(vertices); - buff->getNormalStrider(normals); - buff->getTexCoord0Strider(tex_coords); - buff->getIndexStrider(indices); + LLVertexBuffer* buff = NULL; - genBranchPipeline(vertices, normals, tex_coords, indices, idx_offset, scale_mat, mTrunkLOD, stop_depth, mDepth, mTrunkDepth, 1.0, mTwist, droop, mBranches, alpha); + static LLCachedControl sRenderAnimateTrees("RenderAnimateTrees", false); + if (sRenderAnimateTrees) + { + facep->setVertexBuffer(NULL); + } + else + { + buff = new LLVertexBuffer(LLDrawPoolTree::VERTEX_DATA_MASK, GL_STATIC_DRAW_ARB); + buff->allocateBuffer(vert_count, index_count, TRUE); + facep->setVertexBuffer(buff); + + buff->getVertexStrider(vertices); + buff->getNormalStrider(normals); + buff->getTexCoord0Strider(tex_coords); + buff->getIndexStrider(indices); + } + + genBranchPipeline(vertices, normals, tex_coords, indices, idx_offset, rot_mat, mTrunkLOD, stop_depth, mDepth, mTrunkDepth, 1.0, mTwist, droop, mBranches, alpha); - mReferenceBuffer->flush(); - buff->flush(); + if(buff) + { + mReferenceBuffer->flush(); + buff->flush(); + } } -void LLVOTree::appendMesh(LLStrider& vertices, - LLStrider& normals, +void LLVOTree::appendMesh(LLStrider& vertices, + LLStrider& normals, LLStrider& tex_coords, LLStrider& indices, U16& cur_idx, - LLMatrix4& matrix, - LLMatrix4& norm_mat, + LLMatrix4a& matrix, + LLMatrix4a& norm_mat, S32 vert_start, S32 vert_count, S32 index_count, S32 index_offset) { - LLStrider v; - LLStrider n; + LLStrider v; + LLStrider n; LLStrider t; LLStrider idx; - mReferenceBuffer->getVertexStrider(v); - mReferenceBuffer->getNormalStrider(n); - mReferenceBuffer->getTexCoord0Strider(t); - mReferenceBuffer->getIndexStrider(idx); - - //copy/transform vertices into mesh - check - for (S32 i = 0; i < vert_count; i++) - { - U16 index = vert_start + i; - *vertices++ = v[index] * matrix; - LLVector3 norm = n[index] * norm_mat; - norm.normalize(); - *normals++ = norm; - *tex_coords++ = t[index]; - } - - //copy offset indices into mesh - check - for (S32 i = 0; i < index_count; i++) + static LLCachedControl sRenderAnimateTrees(gSavedSettings, "RenderAnimateTrees"); + if(sRenderAnimateTrees) //Instead of manipulating the vbo, use the reference vbo and apply the transformation matrix to the matrix stack at draw-time. { - U16 index = index_offset + i; - *indices++ = idx[index]-vert_start+cur_idx; + LLDrawInfo* draw_info = new LLDrawInfo(vert_start,vert_start+vert_count-1,index_count,index_offset,NULL,mReferenceBuffer); + draw_info->mModelMatrix = new LLMatrix4a(matrix); //Make sure these are deleted before clearing/destructing mDrawList! + mDrawList.push_back(draw_info); } + else + { + mReferenceBuffer->getVertexStrider(v); + mReferenceBuffer->getNormalStrider(n); + mReferenceBuffer->getTexCoord0Strider(t); + mReferenceBuffer->getIndexStrider(idx); + + //copy/transform vertices into mesh - check + for (S32 i = 0; i < vert_count; i++) + { + U16 index = vert_start + i; + matrix.affineTransform(v[index],*vertices++); + LLVector4a& norm = *normals++; + norm_mat.perspectiveTransform(n[index],norm); + norm.normalize3fast(); + *tex_coords++ = t[index]; + } - //increment index offset - check - cur_idx += vert_count; + //copy offset indices into mesh - check + for (S32 i = 0; i < index_count; i++) + { + U16 index = index_offset + i; + *indices++ = idx[index]-vert_start+cur_idx; + } + + //increment index offset - check + cur_idx += vert_count; + } } -void LLVOTree::genBranchPipeline(LLStrider& vertices, - LLStrider& normals, +void LLVOTree::genBranchPipeline(LLStrider& vertices, + LLStrider& normals, LLStrider& tex_coords, LLStrider& indices, U16& index_offset, - LLMatrix4& matrix, + LLMatrix4a& matrix, S32 trunk_LOD, S32 stop_level, U16 depth, @@ -1049,46 +1077,44 @@ void LLVOTree::genBranchPipeline(LLStrider& vertices, { llassert(sLODIndexCount[trunk_LOD] > 0); width = scale * length * aspect; - LLMatrix4 scale_mat; - scale_mat.mMatrix[0][0] = width; - scale_mat.mMatrix[1][1] = width; - scale_mat.mMatrix[2][2] = scale*length; - scale_mat *= matrix; - glh::matrix4f norm((F32*) scale_mat.mMatrix); - LLMatrix4 norm_mat = LLMatrix4(norm.inverse().transpose().m); + LLMatrix4a scale_mat = matrix; + scale_mat.applyScale_affine(width,width,scale*length); + LLMatrix4a norm_mat = scale_mat; norm_mat.invert(); + norm_mat.transpose(); + appendMesh(vertices, normals, tex_coords, indices, index_offset, scale_mat, norm_mat, sLODVertexOffset[trunk_LOD], sLODVertexCount[trunk_LOD], sLODIndexCount[trunk_LOD], sLODIndexOffset[trunk_LOD]); } - + + LLMatrix4a trans_matrix = matrix; + trans_matrix.applyTranslation_affine(0.f,0.f,scale*length); + const LLMatrix4a& trans_mat = trans_matrix; + // Recurse to create more branches for (S32 i=0; i < (S32)branches; i++) { - LLMatrix4 trans_mat; - trans_mat.setTranslation(0,0,scale*length); - trans_mat *= matrix; LLQuaternion rot = LLQuaternion(20.f*DEG_TO_RAD, LLVector4(0.f, 0.f, 1.f)) * LLQuaternion(droop*DEG_TO_RAD, LLVector4(0.f, 1.f, 0.f)) * LLQuaternion(((constant_twist + ((i%2==0)?twist:-twist))*i)*DEG_TO_RAD, LLVector4(0.f, 0.f, 1.f)); - - LLMatrix4 rot_mat(rot); - rot_mat *= trans_mat; + + LLMatrix4a rot_mat = trans_mat; + rot_mat.mul(LLQuaternion2(rot)); genBranchPipeline(vertices, normals, tex_coords, indices, index_offset, rot_mat, trunk_LOD, stop_level, depth - 1, 0, scale*mScaleStep, twist, droop, branches, alpha); } // Recurse to continue trunk if (trunk_depth) { - LLMatrix4 trans_mat; - trans_mat.setTranslation(0,0,scale*length); - trans_mat *= matrix; - LLMatrix4 rot_mat(70.5f*DEG_TO_RAD, LLVector4(0,0,1)); - rot_mat *= trans_mat; // rotate a bit around Z when ascending + static const LLMatrix4a srot_mat = gGL.genRot(70.5f,0.f,0.f,1.f); + LLMatrix4a rot_mat; + rot_mat.setMul(trans_mat, srot_mat); // rotate a bit around Z when ascending + genBranchPipeline(vertices, normals, tex_coords, indices, index_offset, rot_mat, trunk_LOD, stop_level, depth, trunk_depth-1, scale*mScaleStep, twist, droop, branches, alpha); } } @@ -1098,15 +1124,12 @@ void LLVOTree::genBranchPipeline(LLStrider& vertices, // Append leaves as two 90 deg crossed quads with leaf textures // { - LLMatrix4 scale_mat; - scale_mat.mMatrix[0][0] = - scale_mat.mMatrix[1][1] = - scale_mat.mMatrix[2][2] = scale*mLeafScale; + LLMatrix4a scale_mat = matrix; + scale_mat.applyScale_affine(scale*mLeafScale); - scale_mat *= matrix; - - glh::matrix4f norm((F32*) scale_mat.mMatrix); - LLMatrix4 norm_mat = LLMatrix4(norm.inverse().transpose().m); + LLMatrix4a norm_mat = scale_mat; + norm_mat.invert(); + norm_mat.transpose(); appendMesh(vertices, normals, tex_coords, indices, index_offset, scale_mat, norm_mat, 0, LEAF_VERTICES, LEAF_INDICES, 0); } @@ -1150,132 +1173,6 @@ void LLVOTree::calcNumVerts(U32& vert_count, U32& index_count, S32 trunk_LOD, S3 } } -U32 LLVOTree::drawBranchPipeline(LLMatrix4& matrix, U16* indicesp, S32 trunk_LOD, S32 stop_level, U16 depth, U16 trunk_depth, F32 scale, F32 twist, F32 droop, F32 branches, F32 alpha) -{ - U32 ret = 0; - // - // Draws a tree by recursing, drawing branches and then a 'leaf' texture. - // If stop_level = -1, simply draws the whole tree as a billboarded texture - // - - static F32 constant_twist; - static F32 width = 0; - - //F32 length = ((scale == 1.f)? mTrunkLength:mBranchLength); - //F32 aspect = ((scale == 1.f)? mTrunkAspect:mBranchAspect); - F32 length = ((trunk_depth || (scale == 1.f))? mTrunkLength:mBranchLength); - F32 aspect = ((trunk_depth || (scale == 1.f))? mTrunkAspect:mBranchAspect); - - constant_twist = 360.f/branches; - - if (!LLPipeline::sReflectionRender && stop_level >= 0) - { - // - // Draw the tree using recursion - // - if (depth > stop_level) - { - { - llassert(sLODIndexCount[trunk_LOD] > 0); - width = scale * length * aspect; - LLMatrix4 scale_mat; - scale_mat.mMatrix[0][0] = width; - scale_mat.mMatrix[1][1] = width; - scale_mat.mMatrix[2][2] = scale*length; - scale_mat *= matrix; - - gGL.loadMatrix((F32*) scale_mat.mMatrix); - gGL.syncMatrices(); - glDrawElements(GL_TRIANGLES, sLODIndexCount[trunk_LOD], GL_UNSIGNED_SHORT, indicesp + sLODIndexOffset[trunk_LOD]); - gPipeline.addTrianglesDrawn(LEAF_INDICES); - stop_glerror(); - ret += sLODIndexCount[trunk_LOD]; - } - - // Recurse to create more branches - for (S32 i=0; i < (S32)branches; i++) - { - LLMatrix4 trans_mat; - trans_mat.setTranslation(0,0,scale*length); - trans_mat *= matrix; - - LLQuaternion rot = - LLQuaternion(20.f*DEG_TO_RAD, LLVector4(0.f, 0.f, 1.f)) * - LLQuaternion(droop*DEG_TO_RAD, LLVector4(0.f, 1.f, 0.f)) * - LLQuaternion(((constant_twist + ((i%2==0)?twist:-twist))*i)*DEG_TO_RAD, LLVector4(0.f, 0.f, 1.f)); - - LLMatrix4 rot_mat(rot); - rot_mat *= trans_mat; - - ret += drawBranchPipeline(rot_mat, indicesp, trunk_LOD, stop_level, depth - 1, 0, scale*mScaleStep, twist, droop, branches, alpha); - } - // Recurse to continue trunk - if (trunk_depth) - { - LLMatrix4 trans_mat; - trans_mat.setTranslation(0,0,scale*length); - trans_mat *= matrix; - - LLMatrix4 rot_mat(70.5f*DEG_TO_RAD, LLVector4(0,0,1)); - rot_mat *= trans_mat; // rotate a bit around Z when ascending - ret += drawBranchPipeline(rot_mat, indicesp, trunk_LOD, stop_level, depth, trunk_depth-1, scale*mScaleStep, twist, droop, branches, alpha); - } - } - else - { - // - // Draw leaves as two 90 deg crossed quads with leaf textures - // - { - LLMatrix4 scale_mat; - scale_mat.mMatrix[0][0] = - scale_mat.mMatrix[1][1] = - scale_mat.mMatrix[2][2] = scale*mLeafScale; - - scale_mat *= matrix; - - - gGL.loadMatrix((F32*) scale_mat.mMatrix); - gGL.syncMatrices(); - glDrawElements(GL_TRIANGLES, LEAF_INDICES, GL_UNSIGNED_SHORT, indicesp); - gPipeline.addTrianglesDrawn(LEAF_INDICES); - stop_glerror(); - ret += LEAF_INDICES; - } - } - } - else - { - // - // Draw the tree as a single billboard texture - // - - LLMatrix4 scale_mat; - scale_mat.mMatrix[0][0] = - scale_mat.mMatrix[1][1] = - scale_mat.mMatrix[2][2] = mBillboardScale*mBillboardRatio; - - scale_mat *= matrix; - - gGL.matrixMode(LLRender::MM_TEXTURE); - gGL.translatef(0.0, -0.5, 0.0); - gGL.matrixMode(LLRender::MM_MODELVIEW); - - gGL.loadMatrix((F32*) scale_mat.mMatrix); - gGL.syncMatrices(); - glDrawElements(GL_TRIANGLES, LEAF_INDICES, GL_UNSIGNED_SHORT, indicesp); - gPipeline.addTrianglesDrawn(LEAF_INDICES); - stop_glerror(); - ret += LEAF_INDICES; - - gGL.matrixMode(LLRender::MM_TEXTURE); - gGL.loadIdentity(); - gGL.matrixMode(LLRender::MM_MODELVIEW); - } - - return ret; -} - void LLVOTree::updateRadius() { if (mDrawable.isNull()) @@ -1370,8 +1267,8 @@ LLTreePartition::LLTreePartition() void LLVOTree::generateSilhouetteVertices(std::vector &vertices, std::vector &normals, const LLVector3& obj_cam_vec, - const LLMatrix4& local_matrix, - const LLMatrix3& normal_matrix) + const LLMatrix4a& local_matrix_, + const LLMatrix4a& normal_matrix) { vertices.clear(); normals.clear(); @@ -1379,6 +1276,8 @@ void LLVOTree::generateSilhouetteVertices(std::vector &vertices, F32 height = mBillboardScale; // *mBillboardRatio * 0.5; F32 width = height * mTrunkAspect; + LLMatrix4 local_matrix(local_matrix_.getF32ptr()); + LLVector3 position1 = LLVector3(-width * 0.5, 0, 0) * local_matrix; LLVector3 position2 = LLVector3(-width * 0.5, 0, height) * local_matrix; LLVector3 position3 = LLVector3(width * 0.5, 0, height) * local_matrix; @@ -1468,9 +1367,13 @@ void LLVOTree::generateSilhouette(LLSelectNode* nodep, const LLVector3& view_poi // compose final matrix LLMatrix4 local_matrix; local_matrix.initAll(scale, rotation, position); + LLMatrix4a lmat; + lmat.loadu(local_matrix); + LLMatrix4a nmat; + nmat.setIdentity(); generateSilhouetteVertices(nodep->mSilhouetteVertices, nodep->mSilhouetteNormals, - LLVector3(0, 0, 0), local_matrix, LLMatrix3()); + LLVector3(0, 0, 0), lmat, nmat); nodep->mSilhouetteExists = TRUE; } diff --git a/indra/newview/llvotree.h b/indra/newview/llvotree.h index 4932c25d1..7fc46e0b9 100644 --- a/indra/newview/llvotree.h +++ b/indra/newview/llvotree.h @@ -85,24 +85,24 @@ public: void updateMesh(); - void appendMesh(LLStrider& vertices, - LLStrider& normals, + void appendMesh(LLStrider& vertices, + LLStrider& normals, LLStrider& tex_coords, LLStrider& indices, U16& idx_offset, - LLMatrix4& matrix, - LLMatrix4& norm_mat, + LLMatrix4a& matrix, + LLMatrix4a& norm_mat, S32 vertex_offset, S32 vertex_count, S32 index_count, S32 index_offset); - void genBranchPipeline(LLStrider& vertices, - LLStrider& normals, + void genBranchPipeline(LLStrider& vertices, + LLStrider& normals, LLStrider& tex_coords, LLStrider& indices, U16& index_offset, - LLMatrix4& matrix, + LLMatrix4a& matrix, S32 trunk_LOD, S32 stop_level, U16 depth, @@ -113,9 +113,6 @@ public: F32 branches, F32 alpha); - U32 drawBranchPipeline(LLMatrix4& matrix, U16* indicesp, S32 trunk_LOD, S32 stop_level, U16 depth, U16 trunk_depth, F32 scale, F32 twist, F32 droop, F32 branches, F32 alpha); - - /*virtual*/ BOOL lineSegmentIntersect(const LLVector4a& start, const LLVector4a& end, S32 face = -1, // which face to check, -1 = ALL_SIDES BOOL pick_transparent = FALSE, @@ -196,6 +193,8 @@ protected: U32 mFrameCount; + std::vector > mDrawList; + typedef std::map SpeciesMap; static SpeciesMap sSpeciesTable; @@ -210,8 +209,8 @@ private: void generateSilhouetteVertices(std::vector &vertices, std::vector &normals, const LLVector3& view_vec, - const LLMatrix4& mat, - const LLMatrix3& norm_mat); + const LLMatrix4a& mat, + const LLMatrix4a& norm_mat); }; #endif diff --git a/indra/newview/llvovolume.cpp b/indra/newview/llvovolume.cpp index 8277cb08a..7010d8c7f 100644 --- a/indra/newview/llvovolume.cpp +++ b/indra/newview/llvovolume.cpp @@ -564,29 +564,28 @@ void LLVOVolume::animateTextures() if (!facep->mTextureMatrix) { - facep->mTextureMatrix = new LLMatrix4(); + facep->mTextureMatrix = new LLMatrix4a(); } - LLMatrix4& tex_mat = *facep->mTextureMatrix; + LLMatrix4a& tex_mat = *facep->mTextureMatrix; tex_mat.setIdentity(); LLVector3 trans ; { - trans.set(LLVector3(off_s+0.5f, off_t+0.5f, 0.f)); - tex_mat.translate(LLVector3(-0.5f, -0.5f, 0.f)); + trans.set(LLVector3(off_s+0.5f, off_t+0.5f, 0.f)); + tex_mat.setTranslate_affine(LLVector3(-0.5f, -0.5f, 0.f)); } - LLVector3 scale(scale_s, scale_t, 1.f); - LLQuaternion quat; - quat.setQuat(rot, 0, 0, -1.f); + LLVector3 scale(scale_s, scale_t, 1.f); + + tex_mat.setMul(gGL.genRot(rot*RAD_TO_DEG,0.f,0.f,-1.f),tex_mat); //left mul - tex_mat.rotate(quat); + LLMatrix4a scale_mat; + scale_mat.setIdentity(); + scale_mat.applyScale_affine(scale); + tex_mat.setMul(scale_mat, tex_mat); //left mul - LLMatrix4 mat; - mat.initAll(scale, LLQuaternion(), LLVector3()); - tex_mat *= mat; - - tex_mat.translate(trans); - } + tex_mat.translate_affine(trans); + } } else { @@ -1510,93 +1509,53 @@ void LLVOVolume::updateRelativeXform(bool force_identity) { //rigged volume (which is in agent space) is used for generating bounding boxes etc //inverse of render matrix should go to partition space mRelativeXform = getRenderMatrix(); - - F32* dst = (F32*) mRelativeXformInvTrans.mMatrix; - F32* src = (F32*) mRelativeXform.mMatrix; - dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; - dst[3] = src[4]; dst[4] = src[5]; dst[5] = src[6]; - dst[6] = src[8]; dst[7] = src[9]; dst[8] = src[10]; - + mRelativeXformInvTrans = mRelativeXform; mRelativeXform.invert(); mRelativeXformInvTrans.transpose(); } else if (drawable->isActive() || force_identity) { // setup relative transforms - LLQuaternion delta_rot; - LLVector3 delta_pos, delta_scale; - - //matrix from local space to parent relative/global space + bool use_identity = force_identity || drawable->isSpatialRoot(); - delta_rot = use_identity ? LLQuaternion() : mDrawable->getRotation(); - delta_pos = use_identity ? LLVector3(0,0,0) : mDrawable->getPosition(); - delta_scale = mDrawable->getScale(); - // Vertex transform (4x4) - LLVector3 x_axis = LLVector3(delta_scale.mV[VX], 0.f, 0.f) * delta_rot; - LLVector3 y_axis = LLVector3(0.f, delta_scale.mV[VY], 0.f) * delta_rot; - LLVector3 z_axis = LLVector3(0.f, 0.f, delta_scale.mV[VZ]) * delta_rot; - - mRelativeXform.initRows(LLVector4(x_axis, 0.f), - LLVector4(y_axis, 0.f), - LLVector4(z_axis, 0.f), - LLVector4(delta_pos, 1.f)); - - - // compute inverse transpose for normals - // mRelativeXformInvTrans.setRows(x_axis, y_axis, z_axis); - // mRelativeXformInvTrans.invert(); - // mRelativeXformInvTrans.setRows(x_axis, y_axis, z_axis); - // grumble - invert is NOT a matrix invert, so we do it by hand: - - LLMatrix3 rot_inverse = LLMatrix3(~delta_rot); - - LLMatrix3 scale_inverse; - scale_inverse.setRows(LLVector3(1.0, 0.0, 0.0) / delta_scale.mV[VX], - LLVector3(0.0, 1.0, 0.0) / delta_scale.mV[VY], - LLVector3(0.0, 0.0, 1.0) / delta_scale.mV[VZ]); - - - mRelativeXformInvTrans = rot_inverse * scale_inverse; + if(use_identity) + { + mRelativeXform.setIdentity(); + mRelativeXform.applyScale_affine(mDrawable->getScale()); + } + else + { + mRelativeXform = LLQuaternion2(mDrawable->getRotation()); + mRelativeXform.applyScale_affine(mDrawable->getScale()); + mRelativeXform.setTranslate_affine(mDrawable->getPosition()); + } + mRelativeXformInvTrans = mRelativeXform; + mRelativeXformInvTrans.invert(); mRelativeXformInvTrans.transpose(); } else { - LLVector3 pos = getPosition(); - LLVector3 scale = getScale(); - LLQuaternion rot = getRotation(); - + LLVector4a pos; + pos.load3(getPosition().mV); + LLQuaternion2 rot(getRotation()); if (mParent) { - pos *= mParent->getRotation(); - pos += mParent->getPosition(); - rot *= mParent->getRotation(); + LLMatrix4a lrot = LLQuaternion2(mParent->getRotation()); + lrot.rotate(pos,pos); + LLVector4a lpos; + lpos.load3(mParent->getPosition().mV); + pos.add(lpos); + rot.mul(LLQuaternion2(mParent->getRotation())); } - - //LLViewerRegion* region = getRegion(); - //pos += region->getOriginAgent(); - - LLVector3 x_axis = LLVector3(scale.mV[VX], 0.f, 0.f) * rot; - LLVector3 y_axis = LLVector3(0.f, scale.mV[VY], 0.f) * rot; - LLVector3 z_axis = LLVector3(0.f, 0.f, scale.mV[VZ]) * rot; - mRelativeXform.initRows(LLVector4(x_axis, 0.f), - LLVector4(y_axis, 0.f), - LLVector4(z_axis, 0.f), - LLVector4(pos, 1.f)); - - // compute inverse transpose for normals - LLMatrix3 rot_inverse = LLMatrix3(~rot); - - LLMatrix3 scale_inverse; - scale_inverse.setRows(LLVector3(1.0, 0.0, 0.0) / scale.mV[VX], - LLVector3(0.0, 1.0, 0.0) / scale.mV[VY], - LLVector3(0.0, 0.0, 1.0) / scale.mV[VZ]); - - - mRelativeXformInvTrans = rot_inverse * scale_inverse; + mRelativeXform = rot; + mRelativeXform.applyScale_affine(getScale()); + mRelativeXform.setTranslate_affine(LLVector3(pos.getF32ptr())); + mRelativeXformInvTrans = mRelativeXform; + mRelativeXformInvTrans.invert(); mRelativeXformInvTrans.transpose(); } } @@ -1734,11 +1693,6 @@ BOOL LLVOVolume::updateGeometry(LLDrawable *drawable) // Update face flags updateFaceFlags(); - if(compiled) - { - LLPipeline::sCompiles++; - } - mVolumeChanged = FALSE; mLODChanged = FALSE; mSculptChanged = FALSE; @@ -3032,10 +2986,10 @@ void LLVOVolume::generateSilhouette(LLSelectNode* nodep, const LLVector3& view_p } updateRelativeXform(); - LLMatrix4 trans_mat = mRelativeXform; + LLMatrix4a trans_mat = mRelativeXform; if (mDrawable->isStatic()) { - trans_mat.translate(getRegion()->getOriginAgent()); + trans_mat.translate_affine(getRegion()->getOriginAgent()); } volume->generateSilhouetteVertices(nodep->mSilhouetteVertices, nodep->mSilhouetteNormals, view_vector, trans_mat, mRelativeXformInvTrans, nodep->getTESelectMask()); @@ -3082,7 +3036,7 @@ BOOL LLVOVolume::isHUDAttachment() const } -const LLMatrix4 LLVOVolume::getRenderMatrix() const +const LLMatrix4a& LLVOVolume::getRenderMatrix() const { if (mDrawable->isActive() && !mDrawable->isRoot()) { @@ -3564,7 +3518,7 @@ void LLVOVolume::onShift(const LLVector4a &shift_vector) updateRelativeXform(); } -const LLMatrix4& LLVOVolume::getWorldMatrix(LLXformMatrix* xform) const +const LLMatrix4a& LLVOVolume::getWorldMatrix(LLXformMatrix* xform) const { if (mVolumeImpl) { @@ -3924,7 +3878,6 @@ void LLRiggedVolume::update(const LLMeshSkinInfo* skin, LLVOAvatar* avatar, cons //build matrix palette LLMatrix4a mp[JOINT_COUNT]; - LLMatrix4* mat = (LLMatrix4*) mp; U32 count = llmin((U32) skin->mJointNames.size(), (U32) JOINT_COUNT); @@ -3939,8 +3892,9 @@ void LLRiggedVolume::update(const LLMeshSkinInfo* skin, LLVOAvatar* avatar, cons } if (joint) { - mat[j] = skin->mInvBindMatrix[j]; - mat[j] *= joint->getWorldMatrix(); + LLMatrix4a mat; + mat.loadu((F32*)skin->mInvBindMatrix[j].mMatrix); + mp[j].setMul(joint->getWorldMatrix(), mat); } } @@ -4183,13 +4137,13 @@ void LLVolumeGeometryManager::registerFace(LLSpatialGroup* group, LLFace* facep, return; } - const LLMatrix4* tex_mat = NULL; + const LLMatrix4a* tex_mat = NULL; if (facep->isState(LLFace::TEXTURE_ANIM) && facep->getVirtualSize() > MIN_TEX_ANIM_SIZE) { tex_mat = facep->mTextureMatrix; } - const LLMatrix4* model_mat = NULL; + const LLMatrix4a* model_mat = NULL; LLDrawable* drawable = facep->getDrawable(); @@ -4206,6 +4160,11 @@ void LLVolumeGeometryManager::registerFace(LLSpatialGroup* group, LLFace* facep, model_mat = &(drawable->getRegion()->mRenderMatrix); } + if(model_mat && model_mat->isIdentity()) + { + model_mat = NULL; + } + //drawable->getVObj()->setDebugText(llformat("%d", drawable->isState(LLDrawable::ANIMATED_CHILD))); LLMaterial* mat = facep->getTextureEntry()->getMaterialParams().get(); @@ -4558,8 +4517,6 @@ void LLVolumeGeometryManager::rebuildGeom(LLSpatialGroup* group) vobj->isMesh() && gMeshRepo.getSkinInfo(vobj->getVolume()->getParams().getSculptID(), vobj); - //bool bake_sunlight = LLPipeline::sBakeSunlight && drawablep->isStatic(); - bool is_rigged = false; static const LLCachedControl alt_batching("SHAltBatching",true); @@ -5577,8 +5534,6 @@ void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, LLFac LLViewerTexture* tex = facep->getTexture(); LLMaterialPtr mat = facep->getTextureEntry()->getMaterialParams(); - //bool bake_sunlight = LLPipeline::sBakeSunlight && facep->getDrawable()->isStatic(); - static const LLCachedControl alt_batching("SHAltBatching",true); if (!alt_batching && distance_sort) { diff --git a/indra/newview/llvovolume.h b/indra/newview/llvovolume.h index 3b6955951..e70a6a64d 100644 --- a/indra/newview/llvovolume.h +++ b/indra/newview/llvovolume.h @@ -86,7 +86,7 @@ public: virtual bool isVolumeUnique() const = 0; // Do we need a unique LLVolume instance? virtual bool isVolumeGlobal() const = 0; // Are we in global space? virtual bool isActive() const = 0; // Is this object currently active? - virtual const LLMatrix4& getWorldMatrix(LLXformMatrix* xform) const = 0; + virtual const LLMatrix4a& getWorldMatrix(LLXformMatrix* xform) const = 0; virtual void updateRelativeXform(bool force_identity = false) = 0; virtual U32 getID() const = 0; virtual void preRebuild() = 0; @@ -113,6 +113,16 @@ public: (1 << LLVertexBuffer::TYPE_COLOR) }; + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } + public: LLVOVolume(const LLUUID &id, const LLPCode pcode, LLViewerRegion *regionp); /*virtual*/ void markDead(); // Override (and call through to parent) to clean up media references @@ -133,9 +143,9 @@ public: /*virtual*/ BOOL setParent(LLViewerObject* parent); S32 getLOD() const { return mLOD; } const LLVector3 getPivotPositionAgent() const; - const LLMatrix4& getRelativeXform() const { return mRelativeXform; } - const LLMatrix3& getRelativeXformInvTrans() const { return mRelativeXformInvTrans; } - /*virtual*/ const LLMatrix4 getRenderMatrix() const; + const LLMatrix4a& getRelativeXform() const { return mRelativeXform; } + const LLMatrix4a& getRelativeXformInvTrans() const { return mRelativeXformInvTrans; } + /*virtual*/ const LLMatrix4a& getRenderMatrix() const; typedef std::map texture_cost_t; U32 getRenderCost(texture_cost_t &textures) const; /*virtual*/ F32 getStreamingCost(S32* bytes = NULL, S32* visible_bytes = NULL, F32* unscaled_value = NULL) const; @@ -161,7 +171,7 @@ public: BOOL getVolumeChanged() const { return mVolumeChanged; } /*virtual*/ F32 getRadius() const { return mVObjRadius; }; - const LLMatrix4& getWorldMatrix(LLXformMatrix* xform) const; + const LLMatrix4a& getWorldMatrix(LLXformMatrix* xform) const; void markForUpdate(BOOL priority) { LLViewerObject::markForUpdate(priority); mVolumeChanged = TRUE; } void faceMappingChanged() { mFaceMappingChanged=TRUE; }; @@ -365,8 +375,8 @@ private: BOOL mLODChanged; BOOL mSculptChanged; F32 mSpotLightPriority; - LLMatrix4 mRelativeXform; - LLMatrix3 mRelativeXformInvTrans; + LL_ALIGN_16(LLMatrix4a mRelativeXform); + LL_ALIGN_16(LLMatrix4a mRelativeXformInvTrans); BOOL mVolumeChanged; F32 mVObjRadius; LLVolumeInterface *mVolumeImpl; diff --git a/indra/newview/llvowater.cpp b/indra/newview/llvowater.cpp index b4ab34d36..516429f22 100644 --- a/indra/newview/llvowater.cpp +++ b/indra/newview/llvowater.cpp @@ -247,7 +247,6 @@ BOOL LLVOWater::updateGeometry(LLDrawable *drawable) buff->flush(); mDrawable->movePartition(); - LLPipeline::sCompiles++; return TRUE; } diff --git a/indra/newview/llvowlsky.cpp b/indra/newview/llvowlsky.cpp index a0c303bf9..96239a22b 100644 --- a/indra/newview/llvowlsky.cpp +++ b/indra/newview/llvowlsky.cpp @@ -484,8 +484,6 @@ BOOL LLVOWLSky::updateGeometry(LLDrawable * drawable) updateStarColors(); updateStarGeometry(drawable); - LLPipeline::sCompiles++; - return TRUE; } diff --git a/indra/newview/llwaterparammanager.cpp b/indra/newview/llwaterparammanager.cpp index 26303187d..6ba6a5ef9 100644 --- a/indra/newview/llwaterparammanager.cpp +++ b/indra/newview/llwaterparammanager.cpp @@ -390,24 +390,23 @@ void LLWaterParamManager::update(LLViewerCamera * cam) if(gPipeline.canUseVertexShaders()) { //transform water plane to eye space - glh::vec3f norm(0.f, 0.f, 1.f); - glh::vec3f p(0.f, 0.f, gAgent.getRegion()->getWaterHeight()+0.1f); + LLVector4a enorm(0.f, 0.f, 1.f); + LLVector4a ep(0.f, 0.f, gAgent.getRegion()->getWaterHeight()+0.1f); - F32 modelView[16]; - for (U32 i = 0; i < 16; i++) - { - modelView[i] = (F32) gGLModelView[i]; - } + const LLMatrix4a& mat = gGLModelView; + LLMatrix4a invtrans = mat; + invtrans.invert(); + invtrans.transpose(); - glh::matrix4f mat(modelView); - glh::matrix4f invtrans = mat.inverse().transpose(); - glh::vec3f enorm; - glh::vec3f ep; - invtrans.mult_matrix_vec(norm, enorm); - enorm.normalize(); - mat.mult_matrix_vec(p, ep); + invtrans.perspectiveTransform(enorm,enorm); + enorm.normalize3fast(); + mat.affineTransform(ep,ep); - mWaterPlane = LLVector4(enorm.v[0], enorm.v[1], enorm.v[2], -ep.dot(enorm)); + ep.setAllDot3(ep,enorm); + ep.negate(); + enorm.copyComponent<3>(ep); + + mWaterPlane.set(enorm.getF32ptr()); LLVector3 sunMoonDir; if (gSky.getSunDirection().mV[2] > LLSky::NIGHTTIME_ELEVATION_COS) diff --git a/indra/newview/pipeline.cpp b/indra/newview/pipeline.cpp index 4e40e2a89..28950e8f3 100644 --- a/indra/newview/pipeline.cpp +++ b/indra/newview/pipeline.cpp @@ -153,7 +153,7 @@ const F32 BACKLIGHT_NIGHT_MAGNITUDE_OBJECT = 0.08f; const S32 MAX_ACTIVE_OBJECT_QUIET_FRAMES = 40; const S32 MAX_OFFSCREEN_GEOMETRY_CHANGES_PER_FRAME = 10; const U32 REFLECTION_MAP_RES = 128; -const U32 DEFERRED_VB_MASK = LLVertexBuffer::MAP_VERTEX | LLVertexBuffer::MAP_TEXCOORD0 | LLVertexBuffer::MAP_TEXCOORD1; +const U32 AUX_VB_MASK = LLVertexBuffer::MAP_VERTEX | LLVertexBuffer::MAP_TEXCOORD0 | LLVertexBuffer::MAP_TEXCOORD1; // Max number of occluders to search for. JC const S32 MAX_OCCLUDER_COUNT = 2; @@ -170,7 +170,7 @@ BOOL gAvatarBacklight = FALSE; BOOL gDebugPipeline = FALSE; LLPipeline gPipeline; -const LLMatrix4* gGLLastMatrix = NULL; +const LLMatrix4a* gGLLastMatrix = NULL; LLFastTimer::DeclareTimer FTM_RENDER_GEOMETRY("Geometry"); LLFastTimer::DeclareTimer FTM_RENDER_GRASS("Grass"); @@ -249,67 +249,49 @@ void drawBoxOutline(const LLVector3& pos, const LLVector3& size); U32 nhpo2(U32 v); LLVertexBuffer* ll_create_cube_vb(U32 type_mask, U32 usage); -glh::matrix4f glh_copy_matrix(F32* src) +inline const LLMatrix4a& glh_get_current_modelview() { - glh::matrix4f ret; - ret.set_value(src); - return ret; + return gGLModelView; } -glh::matrix4f glh_get_current_modelview() +inline const LLMatrix4a& glh_get_current_projection() { - return glh_copy_matrix(gGLModelView); + return gGLProjection; } -glh::matrix4f glh_get_current_projection() +inline const LLMatrix4a& glh_get_last_modelview() { - return glh_copy_matrix(gGLProjection); + return gGLLastModelView; } -glh::matrix4f glh_get_last_modelview() +inline const LLMatrix4a& glh_get_last_projection() { - return glh_copy_matrix(gGLLastModelView); + return gGLLastProjection; } -glh::matrix4f glh_get_last_projection() +inline void glh_set_current_modelview(const LLMatrix4a& mat) { - return glh_copy_matrix(gGLLastProjection); + gGLModelView = mat; } -void glh_copy_matrix(const glh::matrix4f& src, F32* dst) +inline void glh_set_current_projection(const LLMatrix4a& mat) { - for (U32 i = 0; i < 16; i++) - { - dst[i] = src.m[i]; - } + gGLProjection = mat; } -void glh_set_current_modelview(const glh::matrix4f& mat) +inline void glh_set_last_modelview(const LLMatrix4a& mat) { - glh_copy_matrix(mat, gGLModelView); + gGLLastModelView = mat; } -void glh_set_current_projection(glh::matrix4f& mat) +void glh_set_last_projection(const LLMatrix4a& mat) { - glh_copy_matrix(mat, gGLProjection); -} - -glh::matrix4f gl_ortho(GLfloat left, GLfloat right, GLfloat bottom, GLfloat top, GLfloat znear, GLfloat zfar) -{ - glh::matrix4f ret( - 2.f/(right-left), 0.f, 0.f, -(right+left)/(right-left), - 0.f, 2.f/(top-bottom), 0.f, -(top+bottom)/(top-bottom), - 0.f, 0.f, -2.f/(zfar-znear), -(zfar+znear)/(zfar-znear), - 0.f, 0.f, 0.f, 1.f); - - return ret; + gGLLastProjection = mat; } void display_update_camera(bool tiling=false); //---------------------------------------- -S32 LLPipeline::sCompiles = 0; - BOOL LLPipeline::sPickAvatar = TRUE; BOOL LLPipeline::sDynamicLOD = TRUE; BOOL LLPipeline::sShowHUDAttachments = TRUE; @@ -329,7 +311,6 @@ BOOL LLPipeline::sAutoMaskAlphaDeferred = TRUE; BOOL LLPipeline::sAutoMaskAlphaNonDeferred = FALSE; BOOL LLPipeline::sDisableShaders = FALSE; BOOL LLPipeline::sRenderBump = TRUE; -BOOL LLPipeline::sBakeSunlight = FALSE; BOOL LLPipeline::sNoAlpha = FALSE; BOOL LLPipeline::sUseFarClip = TRUE; BOOL LLPipeline::sShadowRender = FALSE; @@ -348,7 +329,7 @@ BOOL LLPipeline::sRenderDeferred = FALSE; BOOL LLPipeline::sMemAllocationThrottled = FALSE; S32 LLPipeline::sVisibleLightCount = 0; F32 LLPipeline::sMinRenderSize = 0.f; -BOOL LLPipeline::sRenderingHUDs; +BOOL LLPipeline::sRenderingHUDs = FALSE; static LLCullResult* sCull = NULL; @@ -383,10 +364,6 @@ LLPipeline::LLPipeline() : mMeanBatchSize(0), mTrianglesDrawn(0), mNumVisibleNodes(0), - mVerticesRelit(0), - mLightingChanges(0), - mGeometryChanges(0), - mNumVisibleFaces(0), mInitialized(FALSE), mVertexShadersEnabled(FALSE), @@ -600,7 +577,7 @@ void LLPipeline::cleanup() mInitialized = FALSE; - mDeferredVB = NULL; + mAuxScreenRectVB = NULL; mCubeVB = NULL; } @@ -798,6 +775,8 @@ LLPipeline::eFBOStatus LLPipeline::doAllocateScreenBuffer(U32 resX, U32 resY) bool LLPipeline::allocateScreenBuffer(U32 resX, U32 resY, U32 samples) { + mAuxScreenRectVB = NULL; + refreshCachedSettings(); U32 res_mod = gSavedSettings.getU32("RenderResolutionDivisor"); if (res_mod > 1 && res_mod < resX && res_mod < resY) @@ -1837,11 +1816,6 @@ void LLPipeline::resetFrameStats() mMeanBatchSize = gPipeline.mTrianglesDrawn/gPipeline.mBatchCount; } mTrianglesDrawn = 0; - sCompiles = 0; - mVerticesRelit = 0; - mLightingChanges = 0; - mGeometryChanges = 0; - mNumVisibleFaces = 0; if (mOldRenderDebugMask != mRenderDebugMask) { @@ -2339,11 +2313,11 @@ void LLPipeline::updateCull(LLCamera& camera, LLCullResult& result, S32 water_cl gGL.matrixMode(LLRender::MM_PROJECTION); gGL.pushMatrix(); - gGL.loadMatrix(gGLLastProjection); + gGL.loadMatrix(glh_get_last_projection()); gGL.matrixMode(LLRender::MM_MODELVIEW); gGL.pushMatrix(); gGLLastMatrix = NULL; - gGL.loadMatrix(gGLLastModelView); + gGL.loadMatrix(glh_get_last_modelview()); LLGLDisable blend(GL_BLEND); LLGLDisable test(GL_ALPHA_TEST); @@ -2378,8 +2352,8 @@ void LLPipeline::updateCull(LLCamera& camera, LLCullResult& result, S32 water_cl } } - glh::matrix4f modelview = glh_get_last_modelview(); - glh::matrix4f proj = glh_get_last_projection(); + const LLMatrix4a& modelview = glh_get_last_modelview(); + const LLMatrix4a& proj = glh_get_last_projection(); LLGLUserClipPlane clip(plane, modelview, proj, water_clip != 0 && LLPipeline::sReflectionRender); LLGLDepthTest depth(GL_TRUE, GL_FALSE); @@ -2558,18 +2532,6 @@ void LLPipeline::downsampleDepthBuffer(LLRenderTarget& source, LLRenderTarget& d dest.bindTarget(); dest.clear(GL_DEPTH_BUFFER_BIT); - - if(mDeferredVB.isNull()) - { - mDeferredVB = new LLVertexBuffer(DEFERRED_VB_MASK, 0); - mDeferredVB->allocateBuffer(8, 0, true); - LLStrider vert; - mDeferredVB->getVertexStrider(vert); - - vert[0].set(-1,1,0); - vert[1].set(-1,-3,0); - vert[2].set(3,1,0); - } if (source.getUsage() == LLTexUnit::TT_RECT_TEXTURE) { @@ -2590,8 +2552,7 @@ void LLPipeline::downsampleDepthBuffer(LLRenderTarget& source, LLRenderTarget& d { LLGLDepthTest depth(GL_TRUE, GL_TRUE, GL_ALWAYS); - mDeferredVB->setBuffer(LLVertexBuffer::MAP_VERTEX); - mDeferredVB->drawArrays(LLRender::TRIANGLES, 0, 3); + drawFullScreenRect(LLVertexBuffer::MAP_VERTEX); } dest.flush(); @@ -2684,7 +2645,6 @@ BOOL LLPipeline::updateDrawableGeom(LLDrawable* drawablep, BOOL priority) if (update_complete && assertInitialized()) { drawablep->setState(LLDrawable::BUILT); - mGeometryChanges++; } return update_complete; } @@ -3474,9 +3434,6 @@ void LLPipeline::stateSort(LLDrawable* drawablep, LLCamera& camera) } } } - - - mNumVisibleFaces += drawablep->getNumFaces(); } @@ -4105,17 +4062,14 @@ void LLPipeline::renderGeom(LLCamera& camera, BOOL forceVBOUpdate) assertInitialized(); - F32 saved_modelview[16]; - F32 saved_projection[16]; + LLMatrix4a saved_modelview; + LLMatrix4a saved_projection; //HACK: preserve/restore matrices around HUD render if (gPipeline.hasRenderType(LLPipeline::RENDER_TYPE_HUD)) { - for (U32 i = 0; i < 16; i++) - { - saved_modelview[i] = gGLModelView[i]; - saved_projection[i] = gGLProjection[i]; - } + saved_modelview = glh_get_current_modelview(); + saved_projection = glh_get_current_projection(); } /////////////////////////////////////////// @@ -4219,7 +4173,7 @@ void LLPipeline::renderGeom(LLCamera& camera, BOOL forceVBOUpdate) { occlude = FALSE; gGLLastMatrix = NULL; - gGL.loadMatrix(gGLModelView); + gGL.loadMatrix(glh_get_current_modelview()); LLGLSLShader::bindNoShader(); doOcclusion(camera); } @@ -4230,7 +4184,7 @@ void LLPipeline::renderGeom(LLCamera& camera, BOOL forceVBOUpdate) LLFastTimer t(FTM_POOLRENDER); gGLLastMatrix = NULL; - gGL.loadMatrix(gGLModelView); + gGL.loadMatrix(glh_get_current_modelview()); for( S32 i = 0; i < poolp->getNumPasses(); i++ ) { @@ -4279,13 +4233,13 @@ void LLPipeline::renderGeom(LLCamera& camera, BOOL forceVBOUpdate) LLVertexBuffer::unbind(); gGLLastMatrix = NULL; - gGL.loadMatrix(gGLModelView); + gGL.loadMatrix(glh_get_current_modelview()); if (occlude) { occlude = FALSE; gGLLastMatrix = NULL; - gGL.loadMatrix(gGLModelView); + gGL.loadMatrix(glh_get_current_modelview()); LLGLSLShader::bindNoShader(); doOcclusion(camera); } @@ -4348,11 +4302,8 @@ void LLPipeline::renderGeom(LLCamera& camera, BOOL forceVBOUpdate) //HACK: preserve/restore matrices around HUD render if (gPipeline.hasRenderType(LLPipeline::RENDER_TYPE_HUD)) { - for (U32 i = 0; i < 16; i++) - { - gGLModelView[i] = saved_modelview[i]; - gGLProjection[i] = saved_projection[i]; - } + glh_set_current_modelview(saved_modelview); + glh_set_current_projection(saved_projection); } } @@ -4414,7 +4365,7 @@ void LLPipeline::renderGeomDeferred(LLCamera& camera) LLFastTimer t(FTM_DEFERRED_POOLRENDER); gGLLastMatrix = NULL; - gGL.loadMatrix(gGLModelView); + gGL.loadMatrix(glh_get_current_modelview()); for( S32 i = 0; i < poolp->getNumDeferredPasses(); i++ ) { @@ -4458,7 +4409,7 @@ void LLPipeline::renderGeomDeferred(LLCamera& camera) } gGLLastMatrix = NULL; - gGL.loadMatrix(gGLModelView); + gGL.loadMatrix(glh_get_current_modelview()); gGL.setColorMask(true, false); } @@ -4491,7 +4442,7 @@ void LLPipeline::renderGeomPostDeferred(LLCamera& camera, bool do_occlusion) { occlude = FALSE; gGLLastMatrix = NULL; - gGL.loadMatrix(gGLModelView); + gGL.loadMatrix(glh_get_current_modelview()); LLGLSLShader::bindNoShader(); doOcclusion(camera/*, mScreen, mOcclusionDepth, &mDeferredDepth*/); gGL.setColorMask(true, false); @@ -4503,7 +4454,7 @@ void LLPipeline::renderGeomPostDeferred(LLCamera& camera, bool do_occlusion) LLFastTimer t(FTM_POST_DEFERRED_POOLRENDER); gGLLastMatrix = NULL; - gGL.loadMatrix(gGLModelView); + gGL.loadMatrix(glh_get_current_modelview()); for( S32 i = 0; i < poolp->getNumPostDeferredPasses(); i++ ) { @@ -4545,17 +4496,17 @@ void LLPipeline::renderGeomPostDeferred(LLCamera& camera, bool do_occlusion) } gGLLastMatrix = NULL; - gGL.loadMatrix(gGLModelView); + gGL.loadMatrix(glh_get_current_modelview()); if (occlude) { occlude = FALSE; gGLLastMatrix = NULL; - gGL.loadMatrix(gGLModelView); + gGL.loadMatrix(glh_get_current_modelview()); LLGLSLShader::bindNoShader(); doOcclusion(camera); gGLLastMatrix = NULL; - gGL.loadMatrix(gGLModelView); + gGL.loadMatrix(glh_get_current_modelview()); } } @@ -4581,7 +4532,7 @@ void LLPipeline::renderGeomShadow(LLCamera& camera) poolp->prerender() ; gGLLastMatrix = NULL; - gGL.loadMatrix(gGLModelView); + gGL.loadMatrix(glh_get_current_modelview()); for( S32 i = 0; i < poolp->getNumShadowPasses(); i++ ) { @@ -4620,7 +4571,7 @@ void LLPipeline::renderGeomShadow(LLCamera& camera) } gGLLastMatrix = NULL; - gGL.loadMatrix(gGLModelView); + gGL.loadMatrix(glh_get_current_modelview()); } @@ -4695,7 +4646,7 @@ void LLPipeline::renderPhysicsDisplay() if (!bridge->isDead() && hasRenderType(bridge->mDrawableType)) { gGL.pushMatrix(); - gGL.multMatrix((F32*)bridge->mDrawable->getRenderMatrix().mMatrix); + gGL.multMatrix(bridge->mDrawable->getRenderMatrix()); bridge->renderPhysicsShapes(); gGL.popMatrix(); } @@ -4720,7 +4671,7 @@ void LLPipeline::renderDebug() gGL.color4f(1,1,1,1); gGLLastMatrix = NULL; - gGL.loadMatrix(gGLModelView); + gGL.loadMatrix(glh_get_current_modelview()); gGL.setColorMask(true, false); bool hud_only = hasRenderType(LLPipeline::RENDER_TYPE_HUD); @@ -4797,7 +4748,7 @@ void LLPipeline::renderDebug() if (!bridge->isDead() && hasRenderType(bridge->mDrawableType)) { gGL.pushMatrix(); - gGL.multMatrix((F32*)bridge->mDrawable->getRenderMatrix().mMatrix); + gGL.multMatrix(bridge->mDrawable->getRenderMatrix()); bridge->renderDebug(); gGL.popMatrix(); } @@ -4992,7 +4943,7 @@ void LLPipeline::renderDebug() gGL.getTexUnit(0)->bind(LLViewerFetchedTexture::sWhiteImagep); gGL.pushMatrix(); - gGL.loadMatrix(gGLModelView); + gGL.loadMatrix(glh_get_current_modelview()); gGLLastMatrix = NULL; for (LLSpatialGroup::sg_vector_t::iterator iter = mGroupQ2.begin(); iter != mGroupQ2.end(); ++iter) @@ -5013,7 +4964,7 @@ void LLPipeline::renderDebug() if (bridge) { gGL.pushMatrix(); - gGL.multMatrix((F32*)bridge->mDrawable->getRenderMatrix().mMatrix); + gGL.multMatrix(bridge->mDrawable->getRenderMatrix()); } F32 alpha = llclamp((F32) (size-count)/size, 0.f, 1.f); @@ -5409,13 +5360,15 @@ void LLPipeline::setupAvatarLights(BOOL for_edit) if (for_edit) { LLColor4 diffuse(1.f, 1.f, 1.f, 0.f); - LLVector4 light_pos_cam(-8.f, 0.25f, 10.f, 0.f); // w==0 => directional light - LLMatrix4 camera_mat = LLViewerCamera::getInstance()->getModelview(); - LLMatrix4 camera_rot(camera_mat.getMat3()); + LLVector4a light_pos_cam(-8.f, 0.25f, 10.f, 0.f); // w==0 => directional light + LLMatrix4a camera_rot = LLViewerCamera::getInstance()->getModelview(); + camera_rot.extractRotation_affine(); camera_rot.invert(); - LLVector4 light_pos = light_pos_cam * camera_rot; + LLVector4a light_pos; - light_pos.normalize(); + camera_rot.rotate(light_pos_cam,light_pos); + + light_pos.normalize3fast(); LLLightState* light = gGL.getLight(1); @@ -5424,7 +5377,7 @@ void LLPipeline::setupAvatarLights(BOOL for_edit) light->setDiffuse(diffuse); light->setAmbient(LLColor4::black); light->setSpecular(LLColor4::black); - light->setPosition(light_pos); + light->setPosition(LLVector4(light_pos.getF32ptr())); light->setConstantAttenuation(1.f); light->setLinearAttenuation(0.f); light->setQuadraticAttenuation(0.f); @@ -5528,11 +5481,11 @@ void LLPipeline::resetLocalLights() pLight->setConstantAttenuation(0.f); pLight->setDiffuse(LLColor4::black); pLight->setLinearAttenuation(0.f); - pLight->setPosition(LLVector4(0.f,0.f,0.f,0.f)); + pLight->setPosition(LLVector4(0.f,0.f,1.f,0.f)); pLight->setQuadraticAttenuation(0.f); pLight->setSpecular(LLColor4::black); pLight->setSpotCutoff(0.f); - pLight->setSpotDirection(LLVector3(0.f,0.f,0.f)); + pLight->setSpotDirection(LLVector3(0.f,0.f,-1.f)); pLight->setSpotExponent(0.f); pLight->disable(); } @@ -6801,7 +6754,7 @@ void LLPipeline::doResetVertexBuffers() mResetVertexBuffers = false; mCubeVB = NULL; - mDeferredVB = NULL; + mAuxScreenRectVB = NULL; for (LLWorld::region_list_t::const_iterator iter = LLWorld::getInstance()->getRegionList().begin(); iter != LLWorld::getInstance()->getRegionList().end(); ++iter) @@ -6846,7 +6799,6 @@ void LLPipeline::doResetVertexBuffers() LLVertexBuffer::sPreferStreamDraw = gSavedSettings.getBOOL("RenderPreferStreamDraw"); LLVertexBuffer::sEnableVBOs = gSavedSettings.getBOOL("RenderVBOEnable"); LLVertexBuffer::sDisableVBOMapping = LLVertexBuffer::sEnableVBOs;// && gSavedSettings.getBOOL("RenderVBOMappingDisable") ; //Temporary workaround for vbo mapping being straight up broken - sBakeSunlight = gSavedSettings.getBOOL("RenderBakeSunlight"); sNoAlpha = gSavedSettings.getBOOL("RenderNoAlpha"); LLPipeline::sTextureBindTest = gSavedSettings.getBOOL("RenderDebugTextureBind"); @@ -6858,48 +6810,58 @@ void LLPipeline::doResetVertexBuffers() void LLPipeline::renderObjects(U32 type, U32 mask, BOOL texture, BOOL batch_texture) { assertInitialized(); - gGL.loadMatrix(gGLModelView); + gGL.loadMatrix(glh_get_current_modelview()); gGLLastMatrix = NULL; mSimplePool->pushBatches(type, mask, texture, batch_texture); - gGL.loadMatrix(gGLModelView); + gGL.loadMatrix(glh_get_current_modelview()); gGLLastMatrix = NULL; } void LLPipeline::renderMaskedObjects(U32 type, U32 mask, BOOL texture, BOOL batch_texture) { assertInitialized(); - gGL.loadMatrix(gGLModelView); + gGL.loadMatrix(glh_get_current_modelview()); gGLLastMatrix = NULL; mAlphaMaskPool->pushMaskBatches(type, mask, texture, batch_texture); - gGL.loadMatrix(gGLModelView); + gGL.loadMatrix(glh_get_current_modelview()); gGLLastMatrix = NULL; } void apply_cube_face_rotation(U32 face) { + static const LLMatrix4a x_90 = gGL.genRot( 90.f, 1.f, 0.f, 0.f ); + static const LLMatrix4a y_90 = gGL.genRot( 90.f, 0.f, 1.f, 0.f ); + static const LLMatrix4a x_90_neg = gGL.genRot( -90.f, 1.f, 0.f, 0.f ); + static const LLMatrix4a y_90_neg = gGL.genRot( -90.f, 0.f, 1.f, 0.f ); + + static const LLMatrix4a x_180 = gGL.genRot( 180.f, 1.f, 0.f, 0.f ); + static const LLMatrix4a y_180 = gGL.genRot( 180.f, 0.f, 1.f, 0.f ); + static const LLMatrix4a z_180 = gGL.genRot( 180.f, 0.f, 0.f, 1.f ); + switch (face) { case 0: - gGL.rotatef(90.f, 0, 1, 0); - gGL.rotatef(180.f, 1, 0, 0); + + gGL.rotatef(y_90); + gGL.rotatef(x_180); break; case 2: - gGL.rotatef(-90.f, 1, 0, 0); + gGL.rotatef(x_90_neg); break; case 4: - gGL.rotatef(180.f, 0, 1, 0); - gGL.rotatef(180.f, 0, 0, 1); + gGL.rotatef(y_180); + gGL.rotatef(z_180); break; case 1: - gGL.rotatef(-90.f, 0, 1, 0); - gGL.rotatef(180.f, 1, 0, 0); + gGL.rotatef(y_90_neg); + gGL.rotatef(x_180); break; case 3: - gGL.rotatef(90, 1, 0, 0); + gGL.rotatef(x_90); break; case 5: - gGL.rotatef(180, 0, 0, 1); + gGL.rotatef(z_180); break; } } @@ -6981,10 +6943,6 @@ void LLPipeline::renderBloom(BOOL for_snapshot, F32 zoom_factor, int subfield, b //U32 res_mod = RenderResolutionDivisor;//.get(); - LLVector2 tc1(0,0); - LLVector2 tc2((F32) mScreen.getWidth()*2, - (F32) mScreen.getHeight()*2); - /*if (res_mod > 1) { tc2 /= (F32) res_mod; @@ -7029,29 +6987,36 @@ void LLPipeline::renderBloom(BOOL for_snapshot, F32 zoom_factor, int subfield, b llassert(zoom_factor > 0.0); // Non-zero, non-negative. const F32 tile_size = 1.0/zoom_factor; - tc1 = tile*tile_size; // Top left texture coordinates - tc2 = (tile+LLVector2(1,1))*tile_size; // Bottom right texture coordinates + LLVector2 tc1 = tile*tile_size; // Top left texture coordinates + LLVector2 tc2 = (tile+LLVector2(1,1))*tile_size; // Bottom right texture coordinates LLGLEnable blend(GL_BLEND); gGL.setSceneBlendType(LLRender::BT_ADD); - - gGL.begin(LLRender::TRIANGLE_STRIP); - gGL.color4f(1,1,1,1); - gGL.texCoord2f(tc1.mV[0], tc1.mV[1]); - gGL.vertex2f(-1,-1); - - gGL.texCoord2f(tc1.mV[0], tc2.mV[1]); - gGL.vertex2f(-1,1); - - gGL.texCoord2f(tc2.mV[0], tc1.mV[1]); - gGL.vertex2f(1,-1); - - gGL.texCoord2f(tc2.mV[0], tc2.mV[1]); - gGL.vertex2f(1,1); + + LLPointer buff = new LLVertexBuffer(AUX_VB_MASK, 0); + buff->allocateBuffer(4, 0, true); + LLStrider vert; + LLStrider texcoord0, texcoord1; + buff->getVertexStrider(vert); + buff->getTexCoord0Strider(texcoord0); + buff->getTexCoord1Strider(texcoord1); - gGL.end(); + vert[0].set(-1.f,-1.f,0.f); + vert[1].set(-1.f,1.f,0.f); + vert[2].set(1.f,-1.f,0.f); + vert[3].set(1.f,1.f,0.f); + + //Texcoord 0 is actually for texture 1, which is unbound and thus all components = 0,0,0,0. Just zero out the texcoords. + texcoord0[0] = texcoord0[1] = texcoord0[2] = texcoord0[3] = LLVector2::zero; + + texcoord1[0].set(tc1.mV[0], tc1.mV[1]); + texcoord1[1].set(tc1.mV[0], tc2.mV[1]); + texcoord1[2].set(tc2.mV[0], tc1.mV[1]); + texcoord1[3].set(tc2.mV[0], tc2.mV[1]); + + buff->setBuffer(AUX_VB_MASK); + buff->drawArrays(LLRender::TRIANGLE_STRIP, 0, 4); - gGL.flush(); gGL.setSceneBlendType(LLRender::BT_ALPHA); } @@ -7095,26 +7060,14 @@ void LLPipeline::renderBloom(BOOL for_snapshot, F32 zoom_factor, int subfield, b gGL.color4f(1,1,1,1); gPipeline.enableLightsFullbright(LLColor4(1,1,1,1)); - gGL.begin(LLRender::TRIANGLE_STRIP); - gGL.texCoord2f(tc1.mV[0], tc1.mV[1]); - gGL.vertex2f(-1,-1); - - gGL.texCoord2f(tc1.mV[0], tc2.mV[1]); - gGL.vertex2f(-1,3); - - gGL.texCoord2f(tc2.mV[0], tc1.mV[1]); - gGL.vertex2f(3,-1); - - gGL.end(); + + drawFullScreenRect(LLVertexBuffer::MAP_VERTEX | LLVertexBuffer::MAP_TEXCOORD0); gGL.getTexUnit(0)->unbind(mScreen.getUsage()); mGlow[1].flush(); } - tc1.setVec(0,0); - tc2.setVec(2,2); - // power of two between 1 and 1024 U32 glowResPow = RenderGlowResolutionPow; const U32 glow_res = llmax(1, @@ -7152,17 +7105,7 @@ void LLPipeline::renderBloom(BOOL for_snapshot, F32 zoom_factor, int subfield, b gGlowProgram.uniform2f(LLShaderMgr::GLOW_DELTA, 0, delta); } - gGL.begin(LLRender::TRIANGLE_STRIP); - gGL.texCoord2f(tc1.mV[0], tc1.mV[1]); - gGL.vertex2f(-1,-1); - - gGL.texCoord2f(tc1.mV[0], tc2.mV[1]); - gGL.vertex2f(-1,3); - - gGL.texCoord2f(tc2.mV[0], tc1.mV[1]); - gGL.vertex2f(3,-1); - - gGL.end(); + drawFullScreenRect(LLVertexBuffer::MAP_VERTEX | LLVertexBuffer::MAP_TEXCOORD1); mGlow[i%2].flush(); } @@ -7181,9 +7124,6 @@ void LLPipeline::renderBloom(BOOL for_snapshot, F32 zoom_factor, int subfield, b gGLViewport[3] = gViewerWindow->getWorldViewRectRaw().getHeight(); glViewport(gGLViewport[0], gGLViewport[1], gGLViewport[2], gGLViewport[3]); - tc2.setVec((F32) mScreen.getWidth(), - (F32) mScreen.getHeight()); - gGL.flush(); LLVertexBuffer::unbind(); @@ -7325,17 +7265,7 @@ void LLPipeline::renderBloom(BOOL for_snapshot, F32 zoom_factor, int subfield, b shader->uniform1f(LLShaderMgr::DOF_MAX_COF, CameraMaxCoF); shader->uniform1f(LLShaderMgr::DOF_RES_SCALE, CameraDoFResScale); - gGL.begin(LLRender::TRIANGLE_STRIP); - gGL.texCoord2f(tc1.mV[0], tc1.mV[1]); - gGL.vertex2f(-1,-1); - - gGL.texCoord2f(tc1.mV[0], tc2.mV[1]); - gGL.vertex2f(-1,3); - - gGL.texCoord2f(tc2.mV[0], tc1.mV[1]); - gGL.vertex2f(3,-1); - - gGL.end(); + drawFullScreenRect(LLVertexBuffer::MAP_VERTEX | LLVertexBuffer::MAP_TEXCOORD0); unbindDeferredShader(*shader); mDeferredLight.flush(); @@ -7360,17 +7290,7 @@ void LLPipeline::renderBloom(BOOL for_snapshot, F32 zoom_factor, int subfield, b shader->uniform1f(LLShaderMgr::DOF_MAX_COF, CameraMaxCoF); shader->uniform1f(LLShaderMgr::DOF_RES_SCALE, CameraDoFResScale); - gGL.begin(LLRender::TRIANGLE_STRIP); - gGL.texCoord2f(tc1.mV[0], tc1.mV[1]); - gGL.vertex2f(-1,-1); - - gGL.texCoord2f(tc1.mV[0], tc2.mV[1]); - gGL.vertex2f(-1,3); - - gGL.texCoord2f(tc2.mV[0], tc1.mV[1]); - gGL.vertex2f(3,-1); - - gGL.end(); + drawFullScreenRect(LLVertexBuffer::MAP_VERTEX | LLVertexBuffer::MAP_TEXCOORD0); unbindDeferredShader(*shader); mScreen.flush(); @@ -7414,17 +7334,7 @@ void LLPipeline::renderBloom(BOOL for_snapshot, F32 zoom_factor, int subfield, b shader->uniform1f(LLShaderMgr::DOF_WIDTH, dof_width-1); shader->uniform1f(LLShaderMgr::DOF_HEIGHT, dof_height-1); - gGL.begin(LLRender::TRIANGLE_STRIP); - gGL.texCoord2f(tc1.mV[0], tc1.mV[1]); - gGL.vertex2f(-1,-1); - - gGL.texCoord2f(tc1.mV[0], tc2.mV[1]); - gGL.vertex2f(-1,3); - - gGL.texCoord2f(tc2.mV[0], tc1.mV[1]); - gGL.vertex2f(3,-1); - - gGL.end(); + drawFullScreenRect(LLVertexBuffer::MAP_VERTEX | LLVertexBuffer::MAP_TEXCOORD0); unbindDeferredShader(*shader); @@ -7457,17 +7367,7 @@ void LLPipeline::renderBloom(BOOL for_snapshot, F32 zoom_factor, int subfield, b shader->uniform1f(LLShaderMgr::GLOBAL_GAMMA, 1.0); } - gGL.begin(LLRender::TRIANGLE_STRIP); - gGL.texCoord2f(tc1.mV[0], tc1.mV[1]); - gGL.vertex2f(-1,-1); - - gGL.texCoord2f(tc1.mV[0], tc2.mV[1]); - gGL.vertex2f(-1,3); - - gGL.texCoord2f(tc2.mV[0], tc1.mV[1]); - gGL.vertex2f(3,-1); - - gGL.end(); + drawFullScreenRect(LLVertexBuffer::MAP_VERTEX | LLVertexBuffer::MAP_TEXCOORD0); unbindDeferredShader(*shader); @@ -7497,13 +7397,7 @@ void LLPipeline::renderBloom(BOOL for_snapshot, F32 zoom_factor, int subfield, b mDeferredLight.bindTexture(0, channel); } - gGL.begin(LLRender::TRIANGLE_STRIP); - gGL.vertex2f(-1,-1); - gGL.vertex2f(-1,3); - gGL.vertex2f(3,-1); - gGL.end(); - - gGL.flush(); + drawFullScreenRect(LLVertexBuffer::MAP_VERTEX); shader->disableTexture(LLShaderMgr::DEFERRED_DIFFUSE, mDeferredLight.getUsage()); shader->unbind(); @@ -7533,13 +7427,8 @@ void LLPipeline::renderBloom(BOOL for_snapshot, F32 zoom_factor, int subfield, b shader->uniform4f(LLShaderMgr::FXAA_RCP_FRAME_OPT, -0.5f/width*scale_x, -0.5f/height*scale_y, 0.5f/width*scale_x, 0.5f/height*scale_y); shader->uniform4f(LLShaderMgr::FXAA_RCP_FRAME_OPT2, -2.f/width*scale_x, -2.f/height*scale_y, 2.f/width*scale_x, 2.f/height*scale_y); - gGL.begin(LLRender::TRIANGLE_STRIP); - gGL.vertex2f(-1,-1); - gGL.vertex2f(-1,3); - gGL.vertex2f(3,-1); - gGL.end(); + drawFullScreenRect(LLVertexBuffer::MAP_VERTEX); - gGL.flush(); shader->unbind(); } } @@ -7550,32 +7439,6 @@ void LLPipeline::renderBloom(BOOL for_snapshot, F32 zoom_factor, int subfield, b tc2 /= (F32) res_mod; }*/ - U32 mask = LLVertexBuffer::MAP_VERTEX | LLVertexBuffer::MAP_TEXCOORD0 | LLVertexBuffer::MAP_TEXCOORD1; - LLPointer buff = new LLVertexBuffer(mask, 0); - buff->allocateBuffer(3,0,TRUE); - - LLStrider v; - LLStrider uv1; - LLStrider uv2; - - buff->getVertexStrider(v); - buff->getTexCoord0Strider(uv1); - buff->getTexCoord1Strider(uv2); - - uv1[0] = LLVector2(0, 0); - uv1[1] = LLVector2(0, 2); - uv1[2] = LLVector2(2, 0); - - uv2[0] = LLVector2(0, 0); - uv2[1] = LLVector2(0, tc2.mV[1]*2.f); - uv2[2] = LLVector2(tc2.mV[0]*2.f, 0); - - v[0] = LLVector3(-1,-1,0); - v[1] = LLVector3(-1,3,0); - v[2] = LLVector3(3,-1,0); - - buff->flush(); - LLGLDisable blend(GL_BLEND); if (LLGLSLShader::sNoFixedFunction) @@ -7595,8 +7458,7 @@ void LLPipeline::renderBloom(BOOL for_snapshot, F32 zoom_factor, int subfield, b LLGLEnable multisample(RenderFSAASamples > 0 ? GL_MULTISAMPLE_ARB : 0); - buff->setBuffer(mask); - buff->drawArrays(LLRender::TRIANGLE_STRIP, 0, 3); + drawFullScreenRect(LLVertexBuffer::MAP_VERTEX | LLVertexBuffer::MAP_TEXCOORD0 | LLVertexBuffer::MAP_TEXCOORD1); if (LLGLSLShader::sNoFixedFunction) { @@ -7624,27 +7486,12 @@ void LLPipeline::renderBloom(BOOL for_snapshot, F32 zoom_factor, int subfield, b gGL.setColorMask(true, false); - LLVector2 tc1(0,0); - LLVector2 tc2((F32) gViewerWindow->getWorldViewWidthRaw()*2, - (F32) gViewerWindow->getWorldViewHeightRaw()*2); - LLGLEnable blend(GL_BLEND); gGL.color4f(1,1,1,0.75f); gGL.getTexUnit(0)->bind(&mPhysicsDisplay); - gGL.begin(LLRender::TRIANGLES); - gGL.texCoord2f(tc1.mV[0], tc1.mV[1]); - gGL.vertex2f(-1,-1); - - gGL.texCoord2f(tc1.mV[0], tc2.mV[1]); - gGL.vertex2f(-1,3); - - gGL.texCoord2f(tc2.mV[0], tc1.mV[1]); - gGL.vertex2f(3,-1); - - gGL.end(); - gGL.flush(); + drawFullScreenRect(LLVertexBuffer::MAP_VERTEX | LLVertexBuffer::MAP_TEXCOORD0); if (LLGLSLShader::sNoFixedFunction) { @@ -7747,10 +7594,10 @@ void LLPipeline::bindDeferredShader(LLGLSLShader& shader, U32 light_index, U32 n stop_glerror(); - glh::matrix4f projection = glh_get_current_projection(); - glh::matrix4f inv_proj = projection.inverse(); + LLMatrix4a inv_proj = glh_get_current_projection(); + inv_proj.invert(); - shader.uniformMatrix4fv(LLShaderMgr::INVERSE_PROJECTION_MATRIX, 1, FALSE, inv_proj.m); + shader.uniformMatrix4fv(LLShaderMgr::INVERSE_PROJECTION_MATRIX, 1, FALSE, inv_proj.getF32ptr()); shader.uniform4f(LLShaderMgr::VIEWPORT, (F32) gGLViewport[0], (F32) gGLViewport[1], (F32) gGLViewport[2], @@ -7834,18 +7681,7 @@ void LLPipeline::bindDeferredShader(LLGLSLShader& shader, U32 light_index, U32 n if(shader.getUniformLocation(LLShaderMgr::DEFERRED_SHADOW_MATRIX) >= 0) { - F32 mat[16*6]; - for (U32 i = 0; i < 16; i++) - { - mat[i] = mSunShadowMatrix[0].m[i]; - mat[i+16] = mSunShadowMatrix[1].m[i]; - mat[i+32] = mSunShadowMatrix[2].m[i]; - mat[i+48] = mSunShadowMatrix[3].m[i]; - mat[i+64] = mSunShadowMatrix[4].m[i]; - mat[i+80] = mSunShadowMatrix[5].m[i]; - } - - shader.uniformMatrix4fv(LLShaderMgr::DEFERRED_SHADOW_MATRIX, 6, FALSE, mat); + shader.uniformMatrix4fv(LLShaderMgr::DEFERRED_SHADOW_MATRIX, 6, FALSE, mSunShadowMatrix[0].getF32ptr()); stop_glerror(); } @@ -7858,7 +7694,7 @@ void LLPipeline::bindDeferredShader(LLGLSLShader& shader, U32 light_index, U32 n { cube_map->enable(channel); cube_map->bind(); - F32* m = gGLModelView; + const F32* m = glh_get_current_modelview().getF32ptr(); F32 mat[] = { m[0], m[1], m[2], m[4], m[5], m[6], @@ -7893,7 +7729,7 @@ void LLPipeline::bindDeferredShader(LLGLSLShader& shader, U32 light_index, U32 n shader.uniform1f(LLShaderMgr::DEFERRED_SPOT_SHADOW_OFFSET, RenderSpotShadowOffset); shader.uniform1f(LLShaderMgr::DEFERRED_SPOT_SHADOW_BIAS, RenderSpotShadowBias); - shader.uniform3fv(LLShaderMgr::DEFERRED_SUN_DIR, 1, mTransformedSunDir.mV); + shader.uniform3fv(LLShaderMgr::DEFERRED_SUN_DIR, 1, mTransformedSunDir.getF32ptr()); shader.uniform2f(LLShaderMgr::DEFERRED_SHADOW_RES, mShadow[0].getWidth(), mShadow[0].getHeight()); shader.uniform2f(LLShaderMgr::DEFERRED_PROJ_SHADOW_RES, mShadow[4].getWidth(), mShadow[4].getHeight()); shader.uniform1f(LLShaderMgr::DEFERRED_DEPTH_CUTOFF, RenderEdgeDepthCutoff); @@ -7902,8 +7738,10 @@ void LLPipeline::bindDeferredShader(LLGLSLShader& shader, U32 light_index, U32 n if (shader.getUniformLocation(LLShaderMgr::DEFERRED_NORM_MATRIX) >= 0) { - glh::matrix4f norm_mat = glh_get_current_modelview().inverse().transpose(); - shader.uniformMatrix4fv(LLShaderMgr::DEFERRED_NORM_MATRIX, 1, FALSE, norm_mat.m); + LLMatrix4a norm_mat = glh_get_current_modelview(); + norm_mat.invert(); + norm_mat.transpose(); + shader.uniformMatrix4fv(LLShaderMgr::DEFERRED_NORM_MATRIX, 1, FALSE, norm_mat.getF32ptr()); } shader.uniform1f(LLShaderMgr::DEFERRED_DOWNSAMPLED_DEPTH_SCALE, llclamp(RenderSSAOResolutionScale.get(),.01f,1.f)); @@ -7968,26 +7806,10 @@ void LLPipeline::renderDeferredLighting() LLGLEnable cull(GL_CULL_FACE); LLGLEnable blend(GL_BLEND); - glh::matrix4f mat = glh_copy_matrix(gGLModelView); - - if(mDeferredVB.isNull()) - { - mDeferredVB = new LLVertexBuffer(DEFERRED_VB_MASK, 0); - mDeferredVB->allocateBuffer(8, 0, true); - LLStrider vert; - mDeferredVB->getVertexStrider(vert); - - vert[0].set(-1,1,0); - vert[1].set(-1,-3,0); - vert[2].set(3,1,0); - } - { setupHWLights(NULL); //to set mSunDir; - LLVector4 dir(mSunDir, 0.f); - glh::vec4f tc(dir.mV); - mat.mult_matrix_vec(tc); - mTransformedSunDir.set(tc.v); + mTransformedSunDir.load3(mSunDir.mV); + glh_get_current_modelview().rotate(mTransformedSunDir,mTransformedSunDir); } gGL.pushMatrix(); @@ -8009,12 +7831,9 @@ void LLPipeline::renderDeferredLighting() mDeferredDownsampledDepth.clear(GL_DEPTH_BUFFER_BIT); bindDeferredShader(gDeferredDownsampleDepthNearestProgram, 0); gDeferredDownsampleDepthNearestProgram.uniform2f(LLShaderMgr::DEFERRED_SCREEN_RES, mDeferredDownsampledDepth.getWidth()/ssao_scale, mDeferredDownsampledDepth.getHeight()/ssao_scale); - mDeferredVB->setBuffer(LLVertexBuffer::MAP_VERTEX); { LLGLDepthTest depth(GL_TRUE, GL_TRUE, GL_ALWAYS); - stop_glerror(); - mDeferredVB->drawArrays(LLRender::TRIANGLES, 0, 3); - stop_glerror(); + drawFullScreenRect(LLVertexBuffer::MAP_VERTEX); } mDeferredDownsampledDepth.flush(); unbindDeferredShader(gDeferredDownsampleDepthNearestProgram); @@ -8032,12 +7851,9 @@ void LLPipeline::renderDeferredLighting() glViewport(0,0,mDeferredDownsampledDepth.getWidth(),mDeferredDownsampledDepth.getHeight()); gDeferredSSAOProgram.uniform2f(LLShaderMgr::DEFERRED_SCREEN_RES, mDeferredDownsampledDepth.getWidth()/ssao_scale, mDeferredDownsampledDepth.getHeight()/ssao_scale); } - mDeferredVB->setBuffer(LLVertexBuffer::MAP_VERTEX); { LLGLDepthTest depth(GL_FALSE); - stop_glerror(); - mDeferredVB->drawArrays(LLRender::TRIANGLES, 0, 3); - stop_glerror(); + drawFullScreenRect(LLVertexBuffer::MAP_VERTEX); } mScreen.flush(); unbindDeferredShader(gDeferredSSAOProgram); @@ -8050,47 +7866,10 @@ void LLPipeline::renderDeferredLighting() { //paint shadow/SSAO light map (direct lighting lightmap) LLFastTimer ftm(FTM_SUN_SHADOW); bindDeferredShader(gDeferredSunProgram, 0); - mDeferredVB->setBuffer(LLVertexBuffer::MAP_VERTEX); glClearColor(1,1,1,1); mDeferredLight.clear(GL_COLOR_BUFFER_BIT); glClearColor(0,0,0,0); - /*glh::matrix4f inv_trans = glh_get_current_modelview().inverse().transpose(); - - const U32 slice = 32; - F32 offset[slice*3]; - for (U32 i = 0; i < 4; i++) - { - for (U32 j = 0; j < 8; j++) - { - glh::vec3f v; - v.set_value(sinf(6.284f/8*j), cosf(6.284f/8*j), -(F32) i); -#if 0 - // Singu note: the call to mult_matrix_vec can crash, because it attempts to divide by zero. - v.normalize(); - inv_trans.mult_matrix_vec(v); -#else - // However, because afterwards we normalize the vector anyway, there is an alternative - // way to calculate the same thing without the division (which happens to be faster, too). - glh::vec4f src(v, v.length()); // Make a copy of the source and extent it with its length. - glh::vec4f dst; - inv_trans.mult_matrix_vec(src, dst); // Do a normal 4D multiplication. - dst.get_value(v[0], v[1], v[2], dst[3]); // Copy the first 3 coordinates to v. - // At this point v is equal to what it used to be, except for a constant factor (v.length() * dst[3]), - // but that doesn't matter because the next step is normalizaton. The old computation would crash - // if v.length() is zero in the commented out v.normalize(), and in inv_trans.mult_matrix_vec(v) - // if dst[3] is zero (which some times happens). Now we will only crash if v.length() is zero - // and well in the next line (but this never happens). --Aleric -#endif - v.normalize(); - offset[(i*8+j)*3+0] = v.v[0]; - offset[(i*8+j)*3+1] = v.v[2]; - offset[(i*8+j)*3+2] = v.v[1]; - } - } - - gDeferredSunProgram.uniform3fv(sOffset, slice, offset);*/ - gDeferredSunProgram.uniform2f(LLShaderMgr::DEFERRED_SCREEN_RES, mDeferredLight.getWidth(), mDeferredLight.getHeight()); //Enable bilinear filtering, as the screen tex resolution may not match current framebuffer resolution. Eg, half-res SSAO @@ -8105,9 +7884,7 @@ void LLPipeline::renderDeferredLighting() { LLGLDisable blend(GL_BLEND); LLGLDepthTest depth(GL_TRUE, GL_FALSE, GL_ALWAYS); - stop_glerror(); - mDeferredVB->drawArrays(LLRender::TRIANGLES, 0, 3); - stop_glerror(); + drawFullScreenRect(LLVertexBuffer::MAP_VERTEX); } if (channel > -1) @@ -8131,7 +7908,6 @@ void LLPipeline::renderDeferredLighting() glClearColor(0,0,0,0); bindDeferredShader(gDeferredBlurLightProgram); - mDeferredVB->setBuffer(LLVertexBuffer::MAP_VERTEX); LLVector3 go = RenderShadowGaussian; const U32 kern_length = 4; F32 blur_size = RenderShadowBlurSize; @@ -8158,16 +7934,13 @@ void LLPipeline::renderDeferredLighting() { LLGLDisable blend(GL_BLEND); LLGLDepthTest depth(GL_TRUE, GL_FALSE, GL_ALWAYS); - stop_glerror(); - mDeferredVB->drawArrays(LLRender::TRIANGLES, 0, 3); - stop_glerror(); + drawFullScreenRect(LLVertexBuffer::MAP_VERTEX); } mScreen.flush(); unbindDeferredShader(gDeferredBlurLightProgram); bindDeferredShader(gDeferredBlurLightProgram, 1); - mDeferredVB->setBuffer(LLVertexBuffer::MAP_VERTEX); mDeferredLight.bindTarget(); gDeferredBlurLightProgram.uniform2f(sDelta, 0.f, 1.f); @@ -8175,9 +7948,7 @@ void LLPipeline::renderDeferredLighting() { LLGLDisable blend(GL_BLEND); LLGLDepthTest depth(GL_TRUE, GL_FALSE, GL_ALWAYS); - stop_glerror(); - mDeferredVB->drawArrays(LLRender::TRIANGLES, 0, 3); - stop_glerror(); + drawFullScreenRect(LLVertexBuffer::MAP_VERTEX); } mDeferredLight.flush(); unbindDeferredShader(gDeferredBlurLightProgram); @@ -8212,9 +7983,7 @@ void LLPipeline::renderDeferredLighting() gGL.pushMatrix(); gGL.loadIdentity(); - mDeferredVB->setBuffer(LLVertexBuffer::MAP_VERTEX); - - mDeferredVB->drawArrays(LLRender::TRIANGLES, 0, 3); + drawFullScreenRect(LLVertexBuffer::MAP_VERTEX); gGL.popMatrix(); gGL.matrixMode(LLRender::MM_MODELVIEW); @@ -8353,11 +8122,12 @@ void LLPipeline::renderDeferredLighting() fullscreen_spot_lights.push_back(drawablep); continue; } - - glh::vec3f tc(c); - mat.mult_matrix_vec(tc); - - fullscreen_lights.push_back(LLVector4(tc.v[0], tc.v[1], tc.v[2], s)); + + glh_get_current_modelview().affineTransform(center,center); + + LLVector4 tc(center.getF32ptr()); + tc.mV[VW] = s; + fullscreen_lights.push_back(tc); light_colors.push_back(LLVector4(col.mV[0], col.mV[1], col.mV[2], volume->getLightFalloff()*0.5f)); } } @@ -8446,8 +8216,7 @@ void LLPipeline::renderDeferredLighting() gDeferredMultiLightProgram[idx].uniform1f(LLShaderMgr::MULTI_LIGHT_FAR_Z, far_z); far_z = 0.f; count = 0; - mDeferredVB->setBuffer(LLVertexBuffer::MAP_VERTEX); - mDeferredVB->drawArrays(LLRender::TRIANGLES, 0, 3); + drawFullScreenRect(LLVertexBuffer::MAP_VERTEX); unbindDeferredShader(gDeferredMultiLightProgram[idx]); } } @@ -8456,8 +8225,6 @@ void LLPipeline::renderDeferredLighting() gDeferredMultiSpotLightProgram.enableTexture(LLShaderMgr::DEFERRED_PROJECTION); - mDeferredVB->setBuffer(LLVertexBuffer::MAP_VERTEX); - for (LLDrawable::drawable_list_t::iterator iter = fullscreen_spot_lights.begin(); iter != fullscreen_spot_lights.end(); ++iter) { LLFastTimer ftm(FTM_PROJECTORS); @@ -8465,14 +8232,13 @@ void LLPipeline::renderDeferredLighting() LLVOVolume* volume = drawablep->getVOVolume(); - LLVector3 center = drawablep->getPositionAgent(); - F32* c = center.mV; + LLVector4a center; + center.load3(drawablep->getPositionAgent().mV); F32 s = volume->getLightRadius()*1.5f; sVisibleLightCount++; - glh::vec3f tc(c); - mat.mult_matrix_vec(tc); + glh_get_current_modelview().affineTransform(center,center); setupSpotLight(gDeferredMultiSpotLightProgram, drawablep); @@ -8482,11 +8248,11 @@ void LLPipeline::renderDeferredLighting() col.mV[1] = powf(col.mV[1], 2.2f); col.mV[2] = powf(col.mV[2], 2.2f);*/ - gDeferredMultiSpotLightProgram.uniform3fv(LLShaderMgr::LIGHT_CENTER, 1, tc.v); + gDeferredMultiSpotLightProgram.uniform3fv(LLShaderMgr::LIGHT_CENTER, 1, center.getF32ptr()); gDeferredMultiSpotLightProgram.uniform1f(LLShaderMgr::LIGHT_SIZE, s); gDeferredMultiSpotLightProgram.uniform3fv(LLShaderMgr::DIFFUSE_COLOR, 1, col.mV); gDeferredMultiSpotLightProgram.uniform1f(LLShaderMgr::LIGHT_FALLOFF, volume->getLightFalloff()*0.5f); - mDeferredVB->drawArrays(LLRender::TRIANGLES, 0, 3); + drawFullScreenRect(LLVertexBuffer::MAP_VERTEX); } gDeferredMultiSpotLightProgram.disableTexture(LLShaderMgr::DEFERRED_PROJECTION); @@ -8518,7 +8284,6 @@ void LLPipeline::renderDeferredLighting() mScreen.bindTarget(); // Apply gamma correction to the frame here. gDeferredPostGammaCorrectProgram.bind(); - mDeferredVB->setBuffer(LLVertexBuffer::MAP_VERTEX); S32 channel = 0; channel = gDeferredPostGammaCorrectProgram.enableTexture(LLShaderMgr::DEFERRED_DIFFUSE, mScreen.getUsage()); if (channel > -1) @@ -8529,7 +8294,7 @@ void LLPipeline::renderDeferredLighting() gDeferredPostGammaCorrectProgram.uniform2f(LLShaderMgr::DEFERRED_SCREEN_RES, mScreen.getWidth(), mScreen.getHeight()); - mDeferredVB->drawArrays(LLRender::TRIANGLES, 0, 3); + drawFullScreenRect(LLVertexBuffer::MAP_VERTEX); gGL.getTexUnit(channel)->unbind(mScreen.getUsage()); gDeferredPostGammaCorrectProgram.unbind(); @@ -8643,21 +8408,10 @@ void LLPipeline::renderDeferredLightingToRT(LLRenderTarget* target) LLGLEnable cull(GL_CULL_FACE); LLGLEnable blend(GL_BLEND); - glh::matrix4f mat = glh_copy_matrix(gGLModelView); - - LLStrider vert; - mDeferredVB->getVertexStrider(vert); - - vert[0].set(-1,1,0); - vert[1].set(-1,-3,0); - vert[2].set(3,1,0); - { setupHWLights(NULL); //to set mSunDir; - LLVector4 dir(mSunDir, 0.f); - glh::vec4f tc(dir.mV); - mat.mult_matrix_vec(tc); - mTransformedSunDir.set(tc.v); + mTransformedSunDir.load3(mSunDir.mV); + glh_get_current_modelview().rotate(mTransformedSunDir,mTransformedSunDir); } gGL.pushMatrix(); @@ -8679,12 +8433,9 @@ void LLPipeline::renderDeferredLightingToRT(LLRenderTarget* target) mDeferredDownsampledDepth.clear(GL_DEPTH_BUFFER_BIT); bindDeferredShader(gDeferredDownsampleDepthNearestProgram, 0); gDeferredDownsampleDepthNearestProgram.uniform2f(LLShaderMgr::DEFERRED_SCREEN_RES, mDeferredDownsampledDepth.getWidth()/ssao_scale, mDeferredDownsampledDepth.getHeight()/ssao_scale); - mDeferredVB->setBuffer(LLVertexBuffer::MAP_VERTEX); { LLGLDepthTest depth(GL_TRUE, GL_TRUE, GL_ALWAYS); - stop_glerror(); - mDeferredVB->drawArrays(LLRender::TRIANGLES, 0, 3); - stop_glerror(); + drawFullScreenRect(LLVertexBuffer::MAP_VERTEX); } mDeferredDownsampledDepth.flush(); unbindDeferredShader(gDeferredDownsampleDepthNearestProgram); @@ -8702,12 +8453,9 @@ void LLPipeline::renderDeferredLightingToRT(LLRenderTarget* target) glViewport(0,0,mDeferredDownsampledDepth.getWidth(),mDeferredDownsampledDepth.getHeight()); gDeferredSSAOProgram.uniform2f(LLShaderMgr::DEFERRED_SCREEN_RES, mDeferredDownsampledDepth.getWidth()/ssao_scale, mDeferredDownsampledDepth.getHeight()/ssao_scale); } - mDeferredVB->setBuffer(LLVertexBuffer::MAP_VERTEX); { LLGLDepthTest depth(GL_FALSE); - stop_glerror(); - mDeferredVB->drawArrays(LLRender::TRIANGLES, 0, 3); - stop_glerror(); + drawFullScreenRect(LLVertexBuffer::MAP_VERTEX); } mScreen.flush(); unbindDeferredShader(gDeferredSSAOProgram); @@ -8720,31 +8468,10 @@ void LLPipeline::renderDeferredLightingToRT(LLRenderTarget* target) { //paint shadow/SSAO light map (direct lighting lightmap) LLFastTimer ftm(FTM_SUN_SHADOW); bindDeferredShader(gDeferredSunProgram); - mDeferredVB->setBuffer(LLVertexBuffer::MAP_VERTEX); glClearColor(1,1,1,1); mDeferredLight.clear(GL_COLOR_BUFFER_BIT); glClearColor(0,0,0,0); - /*glh::matrix4f inv_trans = glh_get_current_modelview().inverse().transpose(); - - const U32 slice = 32; - F32 offset[slice*3]; - for (U32 i = 0; i < 4; i++) - { - for (U32 j = 0; j < 8; j++) - { - glh::vec3f v; - v.set_value(sinf(6.284f/8*j), cosf(6.284f/8*j), -(F32) i); - v.normalize(); - inv_trans.mult_matrix_vec(v); - v.normalize(); - offset[(i*8+j)*3+0] = v.v[0]; - offset[(i*8+j)*3+1] = v.v[2]; - offset[(i*8+j)*3+2] = v.v[1]; - } - } - - gDeferredSunProgram.uniform3fv(LLShaderMgr::DEFERRED_SHADOW_OFFSET, slice, offset);*/ gDeferredSunProgram.uniform2f(LLShaderMgr::DEFERRED_SCREEN_RES, mDeferredLight.getWidth(), mDeferredLight.getHeight()); //Enable bilinear filtering, as the screen tex resolution may not match current framebuffer resolution. Eg, half-res SSAO @@ -8759,9 +8486,7 @@ void LLPipeline::renderDeferredLightingToRT(LLRenderTarget* target) { LLGLDisable blend(GL_BLEND); LLGLDepthTest depth(GL_TRUE, GL_FALSE, GL_ALWAYS); - stop_glerror(); - mDeferredVB->drawArrays(LLRender::TRIANGLES, 0, 3); - stop_glerror(); + drawFullScreenRect(LLVertexBuffer::MAP_VERTEX); } if (channel > -1) @@ -8804,9 +8529,7 @@ void LLPipeline::renderDeferredLightingToRT(LLRenderTarget* target) gGL.pushMatrix(); gGL.loadIdentity(); - mDeferredVB->setBuffer(LLVertexBuffer::MAP_VERTEX); - - mDeferredVB->drawArrays(LLRender::TRIANGLES, 0, 3); + drawFullScreenRect(LLVertexBuffer::MAP_VERTEX); gGL.popMatrix(); gGL.matrixMode(LLRender::MM_MODELVIEW); @@ -8950,10 +8673,11 @@ void LLPipeline::renderDeferredLightingToRT(LLRenderTarget* target) continue; } - glh::vec3f tc(c); - mat.mult_matrix_vec(tc); - - fullscreen_lights.push_back(LLVector4(tc.v[0], tc.v[1], tc.v[2], s)); + glh_get_current_modelview().affineTransform(center,center); + + LLVector4 tc(center.getF32ptr()); + tc.mV[VW] = s; + fullscreen_lights.push_back(tc); light_colors.push_back(LLVector4(col.mV[0], col.mV[1], col.mV[2], volume->getLightFalloff()*0.5f)); } } @@ -9002,12 +8726,6 @@ void LLPipeline::renderDeferredLightingToRT(LLRenderTarget* target) unbindDeferredShader(gDeferredSpotLightProgram); } - //reset mDeferredVB to fullscreen triangle - mDeferredVB->getVertexStrider(vert); - vert[0].set(-1,1,0); - vert[1].set(-1,-3,0); - vert[2].set(3,1,0); - { LLGLDepthTest depth(GL_FALSE); @@ -9051,8 +8769,7 @@ void LLPipeline::renderDeferredLightingToRT(LLRenderTarget* target) gDeferredMultiLightProgram[idx].uniform1f(LLShaderMgr::MULTI_LIGHT_FAR_Z, far_z); far_z = 0.f; count = 0; - mDeferredVB->setBuffer(LLVertexBuffer::MAP_VERTEX); - mDeferredVB->drawArrays(LLRender::TRIANGLES, 0, 3); + drawFullScreenRect(LLVertexBuffer::MAP_VERTEX); } } @@ -9062,8 +8779,6 @@ void LLPipeline::renderDeferredLightingToRT(LLRenderTarget* target) gDeferredMultiSpotLightProgram.enableTexture(LLShaderMgr::DEFERRED_PROJECTION); - mDeferredVB->setBuffer(LLVertexBuffer::MAP_VERTEX); - for (LLDrawable::drawable_list_t::iterator iter = fullscreen_spot_lights.begin(); iter != fullscreen_spot_lights.end(); ++iter) { LLFastTimer ftm(FTM_PROJECTORS); @@ -9071,14 +8786,13 @@ void LLPipeline::renderDeferredLightingToRT(LLRenderTarget* target) LLVOVolume* volume = drawablep->getVOVolume(); - LLVector3 center = drawablep->getPositionAgent(); - F32* c = center.mV; + LLVector4a center; + center.load3(drawablep->getPositionAgent().mV); F32 s = volume->getLightRadius()*1.5f; sVisibleLightCount++; - glh::vec3f tc(c); - mat.mult_matrix_vec(tc); + glh_get_current_modelview().affineTransform(center,center); setupSpotLight(gDeferredMultiSpotLightProgram, drawablep); @@ -9088,11 +8802,11 @@ void LLPipeline::renderDeferredLightingToRT(LLRenderTarget* target) col.mV[1] = powf(col.mV[1], 2.2f); col.mV[2] = powf(col.mV[2], 2.2f);*/ - gDeferredMultiSpotLightProgram.uniform3fv(LLShaderMgr::LIGHT_CENTER, 1, tc.v); + gDeferredMultiSpotLightProgram.uniform3fv(LLShaderMgr::LIGHT_CENTER, 1, center.getF32ptr()); gDeferredMultiSpotLightProgram.uniform1f(LLShaderMgr::LIGHT_SIZE, s); gDeferredMultiSpotLightProgram.uniform3fv(LLShaderMgr::DIFFUSE_COLOR, 1, col.mV); gDeferredMultiSpotLightProgram.uniform1f(LLShaderMgr::LIGHT_FALLOFF, volume->getLightFalloff()*0.5f); - mDeferredVB->drawArrays(LLRender::TRIANGLES, 0, 3); + drawFullScreenRect(LLVertexBuffer::MAP_VERTEX); } gDeferredMultiSpotLightProgram.disableTexture(LLShaderMgr::DEFERRED_PROJECTION); @@ -9227,12 +8941,14 @@ void LLPipeline::setupSpotLight(LLGLSLShader& shader, LLDrawable* drawablep) LLVector3 origin = np - at_axis*dist; //matrix from volume space to agent space - LLMatrix4 light_mat(quat, LLVector4(origin,1.f)); + LLMatrix4 light_mat_(quat, LLVector4(origin,1.f)); - glh::matrix4f light_to_agent((F32*) light_mat.mMatrix); - glh::matrix4f light_to_screen = glh_get_current_modelview() * light_to_agent; - - glh::matrix4f screen_to_light = light_to_screen.inverse(); + LLMatrix4a light_mat; + light_mat.loadu(light_mat_.mMatrix[0]); + LLMatrix4a light_to_screen; + light_to_screen.setMul(glh_get_current_modelview(),light_mat); + LLMatrix4a screen_to_light = light_to_screen; + screen_to_light.invert(); F32 s = volume->getLightRadius()*1.5f; F32 near_clip = dist; @@ -9243,31 +8959,29 @@ void LLPipeline::setupSpotLight(LLGLSLShader& shader, LLDrawable* drawablep) F32 fovy = fov * RAD_TO_DEG; F32 aspect = width/height; - glh::matrix4f trans(0.5f, 0.f, 0.f, 0.5f, - 0.f, 0.5f, 0.f, 0.5f, - 0.f, 0.f, 0.5f, 0.5f, - 0.f, 0.f, 0.f, 1.f); + LLVector4a p1(0, 0, -(near_clip+0.01f)); + LLVector4a p2(0, 0, -(near_clip+1.f)); - glh::vec3f p1(0, 0, -(near_clip+0.01f)); - glh::vec3f p2(0, 0, -(near_clip+1.f)); + LLVector4a screen_origin(LLVector4a::getZero()); - glh::vec3f screen_origin(0, 0, 0); + light_to_screen.affineTransform(p1,p1); + light_to_screen.affineTransform(p2,p2); + light_to_screen.affineTransform(screen_origin,screen_origin); - light_to_screen.mult_matrix_vec(p1); - light_to_screen.mult_matrix_vec(p2); - light_to_screen.mult_matrix_vec(screen_origin); + LLVector4a n; + n.setSub(p2,p1); + n.normalize3fast(); - glh::vec3f n = p2-p1; - n.normalize(); - F32 proj_range = far_clip - near_clip; - glh::matrix4f light_proj = gl_perspective(fovy, aspect, near_clip, far_clip); - screen_to_light = trans * light_proj * screen_to_light; - shader.uniformMatrix4fv(LLShaderMgr::PROJECTOR_MATRIX, 1, FALSE, screen_to_light.m); + LLMatrix4a light_proj = gGL.genPersp(fovy, aspect, near_clip, far_clip); + light_proj.setMul(gGL.genNDCtoWC(),light_proj); + screen_to_light.setMul(light_proj,screen_to_light); + + shader.uniformMatrix4fv(LLShaderMgr::PROJECTOR_MATRIX, 1, FALSE, screen_to_light.getF32ptr()); shader.uniform1f(LLShaderMgr::PROJECTOR_NEAR, near_clip); - shader.uniform3fv(LLShaderMgr::PROJECTOR_P, 1, p1.v); - shader.uniform3fv(LLShaderMgr::PROJECTOR_N, 1, n.v); - shader.uniform3fv(LLShaderMgr::PROJECTOR_ORIGIN, 1, screen_origin.v); + shader.uniform3fv(LLShaderMgr::PROJECTOR_P, 1, p1.getF32ptr()); + shader.uniform3fv(LLShaderMgr::PROJECTOR_N, 1, n.getF32ptr()); + shader.uniform3fv(LLShaderMgr::PROJECTOR_ORIGIN, 1, screen_origin.getF32ptr()); shader.uniform1f(LLShaderMgr::PROJECTOR_RANGE, proj_range); shader.uniform1f(LLShaderMgr::PROJECTOR_AMBIANCE, params.mV[2]); S32 s_idx = -1; @@ -9418,8 +9132,7 @@ void LLPipeline::generateWaterReflection(LLCamera& camera_in) gPipeline.pushRenderTypeMask(); - glh::matrix4f projection = glh_get_current_projection(); - glh::matrix4f mat; + const LLMatrix4a projection = glh_get_current_projection(); stop_glerror(); LLPlane plane; @@ -9474,24 +9187,27 @@ void LLPipeline::generateWaterReflection(LLCamera& camera_in) gGL.pushMatrix(); - mat.set_scale(glh::vec3f(1,1,-1)); - mat.set_translate(glh::vec3f(0,0,height*2.f)); + const LLMatrix4a saved_modelview = glh_get_current_modelview(); - glh::matrix4f current = glh_get_current_modelview(); - - mat = current * mat; + LLMatrix4a mat; + mat.setIdentity(); + mat.getRow<2>().negate(); + mat.setTranslate_affine(LLVector3(0.f,0.f,height*2.f)); + mat.setMul(saved_modelview,mat); glh_set_current_modelview(mat); - gGL.loadMatrix(mat.m); + gGL.loadMatrix(mat); LLViewerCamera::updateFrustumPlanes(camera, FALSE, TRUE); - glh::matrix4f inv_mat = mat.inverse(); + LLMatrix4a inv_mat = mat; + inv_mat.invert(); - glh::vec3f origin(0,0,0); - inv_mat.mult_matrix_vec(origin); + LLVector4a origin; + origin.clear(); + inv_mat.affineTransform(origin,origin); - camera.setOrigin(origin.v); + camera.setOrigin(origin.getF32ptr()); glCullFace(GL_FRONT); @@ -9598,7 +9314,7 @@ void LLPipeline::generateWaterReflection(LLCamera& camera_in) glCullFace(GL_BACK); gGL.popMatrix(); mWaterRef.flush(); - glh_set_current_modelview(current); + glh_set_current_modelview(saved_modelview); LLPipeline::sUseOcclusion = occlusion; } @@ -9639,10 +9355,9 @@ void LLPipeline::generateWaterReflection(LLCamera& camera_in) if (!LLPipeline::sUnderWaterRender || LLDrawPoolWater::sNeedsReflectionUpdate) { //clip out geometry on the same side of water as the camera - mat = glh_get_current_modelview(); LLPlane plane(-pnorm, -(pd+pad)); - LLGLUserClipPlane clip_plane(plane, mat, projection); + LLGLUserClipPlane clip_plane(plane, glh_get_current_modelview(), projection); static LLCullResult result; updateCull(camera, result, water_clip, &plane); stateSort(camera, result); @@ -9707,77 +9422,11 @@ void LLPipeline::generateWaterReflection(LLCamera& camera_in) } } -glh::matrix4f look(const LLVector3 pos, const LLVector3 dir, const LLVector3 up) -{ - glh::matrix4f ret; - - LLVector3 dirN; - LLVector3 upN; - LLVector3 lftN; - - lftN = dir % up; - lftN.normVec(); - - upN = lftN % dir; - upN.normVec(); - - dirN = dir; - dirN.normVec(); - - ret.m[ 0] = lftN[0]; - ret.m[ 1] = upN[0]; - ret.m[ 2] = -dirN[0]; - ret.m[ 3] = 0.f; - - ret.m[ 4] = lftN[1]; - ret.m[ 5] = upN[1]; - ret.m[ 6] = -dirN[1]; - ret.m[ 7] = 0.f; - - ret.m[ 8] = lftN[2]; - ret.m[ 9] = upN[2]; - ret.m[10] = -dirN[2]; - ret.m[11] = 0.f; - - ret.m[12] = -(lftN*pos); - ret.m[13] = -(upN*pos); - ret.m[14] = dirN*pos; - ret.m[15] = 1.f; - - return ret; -} - -glh::matrix4f scale_translate_to_fit(const LLVector3 min, const LLVector3 max) -{ - glh::matrix4f ret; - ret.m[ 0] = 2/(max[0]-min[0]); - ret.m[ 4] = 0; - ret.m[ 8] = 0; - ret.m[12] = -(max[0]+min[0])/(max[0]-min[0]); - - ret.m[ 1] = 0; - ret.m[ 5] = 2/(max[1]-min[1]); - ret.m[ 9] = 0; - ret.m[13] = -(max[1]+min[1])/(max[1]-min[1]); - - ret.m[ 2] = 0; - ret.m[ 6] = 0; - ret.m[10] = 2/(max[2]-min[2]); - ret.m[14] = -(max[2]+min[2])/(max[2]-min[2]); - - ret.m[ 3] = 0; - ret.m[ 7] = 0; - ret.m[11] = 0; - ret.m[15] = 1; - - return ret; -} - static LLFastTimer::DeclareTimer FTM_SHADOW_RENDER("Render Shadows"); static LLFastTimer::DeclareTimer FTM_SHADOW_ALPHA("Alpha Shadow"); static LLFastTimer::DeclareTimer FTM_SHADOW_SIMPLE("Simple Shadow"); -void LLPipeline::renderShadow(glh::matrix4f& view, glh::matrix4f& proj, LLCamera& shadow_cam, LLCullResult &result, BOOL use_shader, BOOL use_occlusion, U32 target_width) +void LLPipeline::renderShadow(const LLMatrix4a& view, const LLMatrix4a& proj, LLCamera& shadow_cam, LLCullResult &result, BOOL use_shader, BOOL use_occlusion, U32 target_width) { LLFastTimer t(FTM_SHADOW_RENDER); @@ -9824,10 +9473,10 @@ void LLPipeline::renderShadow(glh::matrix4f& view, glh::matrix4f& proj, LLCamera //generate shadow map gGL.matrixMode(LLRender::MM_PROJECTION); gGL.pushMatrix(); - gGL.loadMatrix(proj.m); + gGL.loadMatrix(proj); gGL.matrixMode(LLRender::MM_MODELVIEW); gGL.pushMatrix(); - gGL.loadMatrix(gGLModelView); + gGL.loadMatrix(view); //Why was glh_get_current_modelview() used instead of view? stop_glerror(); gGLLastMatrix = NULL; @@ -9906,7 +9555,7 @@ void LLPipeline::renderShadow(glh::matrix4f& view, glh::matrix4f& proj, LLCamera gDeferredShadowCubeProgram.bind(); gGLLastMatrix = NULL; - gGL.loadMatrix(gGLModelView); + gGL.loadMatrix(glh_get_current_modelview()); //LLRenderTarget& occlusion_source = mShadow[LLViewerCamera::sCurCameraID-1]; @@ -10135,13 +9784,8 @@ void LLPipeline::generateSunShadow(LLCamera& camera) gAgentAvatarp->updateAttachmentVisibility(CAMERA_MODE_THIRD_PERSON); } - F64 last_modelview[16]; - F64 last_projection[16]; - for (U32 i = 0; i < 16; i++) - { //store last_modelview of world camera - last_modelview[i] = gGLLastModelView[i]; - last_projection[i] = gGLLastProjection[i]; - } + LLMatrix4a last_modelview = glh_get_last_modelview(); + LLMatrix4a last_projection = glh_get_last_projection(); pushRenderTypeMask(); andRenderTypeMask(LLPipeline::RENDER_TYPE_SIMPLE, @@ -10187,13 +9831,14 @@ void LLPipeline::generateSunShadow(LLCamera& camera) //get sun view matrix //store current projection/modelview matrix - glh::matrix4f saved_proj = glh_get_current_projection(); - glh::matrix4f saved_view = glh_get_current_modelview(); - glh::matrix4f inv_view = saved_view.inverse(); + const LLMatrix4a saved_proj = glh_get_current_projection(); + const LLMatrix4a saved_view = glh_get_current_modelview(); + LLMatrix4a inv_view(saved_view); + inv_view.invert(); + + LLMatrix4a view[6]; + LLMatrix4a proj[6]; - glh::matrix4f view[6]; - glh::matrix4f proj[6]; - //clip contains parallel split distances for 3 splits LLVector3 clip = RenderShadowClipPlanes; @@ -10202,9 +9847,6 @@ void LLPipeline::generateSunShadow(LLCamera& camera) //far clip on last split is minimum of camera view distance and 128 mSunClipPlanes = LLVector4(clip, clip.mV[2] * clip.mV[2]/clip.mV[1]); - clip = RenderShadowOrthoClipPlanes; - mSunOrthoClipPlanes = LLVector4(clip, clip.mV[2]*clip.mV[2]/clip.mV[1]); - //currently used for amount to extrude frusta corners for constructing shadow frusta //LLVector3 n = RenderShadowNearDist; //F32 nearDist[] = { n.mV[0], n.mV[1], n.mV[2], n.mV[2] }; @@ -10220,8 +9862,6 @@ void LLPipeline::generateSunShadow(LLCamera& camera) LLVector3 lightDir = -mSunDir; lightDir.normVec(); - glh::vec3f light_dir(lightDir.mV); - //create light space camera matrix LLVector3 at = lightDir; @@ -10278,9 +9918,10 @@ void LLPipeline::generateSunShadow(LLCamera& camera) //get good split distances for frustum for (U32 i = 0; i < fp.size(); ++i) { - glh::vec3f v(fp[i].mV); - saved_view.mult_matrix_vec(v); - fp[i].setVec(v.v); + LLVector4a v; + v.load3(fp[i].mV); + saved_view.affineTransform(v,v); + fp[i].setVec(v.getF32ptr()); } min = fp[0]; @@ -10402,9 +10043,6 @@ void LLPipeline::generateSunShadow(LLCamera& camera) } mShadow[j].flush(); - mShadowError.mV[j] = 0.f; - mShadowFOV.mV[j] = 0.f; - continue; } @@ -10420,15 +10058,16 @@ void LLPipeline::generateSunShadow(LLCamera& camera) LLVector3 origin; //get a temporary view projection - view[j] = look(camera.getOrigin(), lightDir, -up); + view[j] = gGL.genLook(camera.getOrigin(), lightDir, -up); std::vector wpf; for (U32 i = 0; i < fp.size(); i++) { - glh::vec3f p = glh::vec3f(fp[i].mV); - view[j].mult_matrix_vec(p); - wpf.push_back(LLVector3(p.v)); + LLVector4a p; + p.load3(fp[i].mV); + view[j].affineTransform(p,p); + wpf.push_back(LLVector3(p.getF32ptr())); } min = wpf[0]; @@ -10513,24 +10152,21 @@ void LLPipeline::generateSunShadow(LLCamera& camera) bfb = lp.mV[1]-bfm*lp.mV[0]; //calculate error - mShadowError.mV[j] = 0.f; + F32 shadow_error = 0.f; for (U32 i = 0; i < wpf.size(); ++i) { F32 lx = (wpf[i].mV[1]-bfb)/bfm; - mShadowError.mV[j] += fabsf(wpf[i].mV[0]-lx); + shadow_error += fabsf(wpf[i].mV[0]-lx); } - mShadowError.mV[j] /= wpf.size(); - mShadowError.mV[j] /= size.mV[0]; + shadow_error /= wpf.size(); + shadow_error /= size.mV[0]; - if (mShadowError.mV[j] > RenderShadowErrorCutoff) + if (shadow_error > RenderShadowErrorCutoff) { //just use ortho projection - mShadowFOV.mV[j] = -1.f; origin.clearVec(); - proj[j] = gl_ortho(min.mV[0], max.mV[0], - min.mV[1], max.mV[1], - -max.mV[2], -min.mV[2]); + proj[j] = gGL.genOrtho(min.mV[0], max.mV[0], min.mV[1], max.mV[1], -max.mV[2], -min.mV[2]); } else { @@ -10569,8 +10205,6 @@ void LLPipeline::generateSunShadow(LLCamera& camera) F32 cutoff = llmin((F32) RenderShadowFOVCutoff, 1.4f); - mShadowFOV.mV[j] = fovx; - if (fovx < cutoff && fovz > cutoff) { //x is a good fit, but z is too big, move away from zp enough so that fovz matches cutoff @@ -10598,7 +10232,6 @@ void LLPipeline::generateSunShadow(LLCamera& camera) fovx = acos(fovx); fovz = acos(fovz); - mShadowFOV.mV[j] = cutoff; } @@ -10618,37 +10251,29 @@ void LLPipeline::generateSunShadow(LLCamera& camera) if (fovx > cutoff) { //just use ortho projection origin.clearVec(); - mShadowError.mV[j] = -1.f; - proj[j] = gl_ortho(min.mV[0], max.mV[0], - min.mV[1], max.mV[1], - -max.mV[2], -min.mV[2]); + proj[j] = gGL.genOrtho(min.mV[0], max.mV[0], min.mV[1], max.mV[1], -max.mV[2], -min.mV[2]); } else { //get perspective projection - view[j] = view[j].inverse(); + view[j].invert(); + LLVector4a origin_agent; + origin_agent.load3(origin.mV); - glh::vec3f origin_agent(origin.mV); - //translate view to origin - view[j].mult_matrix_vec(origin_agent); + view[j].affineTransform(origin_agent,origin_agent); - eye = LLVector3(origin_agent.v); + eye = LLVector3(origin_agent.getF32ptr()); - if (!hasRenderDebugMask(LLPipeline::RENDER_DEBUG_SHADOW_FRUSTA)) - { - mShadowFrustOrigin[j] = eye; - } - - view[j] = look(LLVector3(origin_agent.v), lightDir, -up); + view[j] = gGL.genLook(LLVector3(origin_agent.getF32ptr()), lightDir, -up); F32 fx = 1.f/tanf(fovx); F32 fz = 1.f/tanf(fovz); - proj[j] = glh::matrix4f(-fx, 0, 0, 0, - 0, (yfar+ynear)/(ynear-yfar), 0, (2.f*yfar*ynear)/(ynear-yfar), - 0, 0, -fz, 0, - 0, -1.f, 0, 0); + proj[j].setRow<0>(LLVector4a( -fx, 0.f, 0.f)); + proj[j].setRow<1>(LLVector4a( 0.f, (yfar+ynear)/(ynear-yfar), 0.f, -1.f)); + proj[j].setRow<2>(LLVector4a( 0.f, 0.f, -fz)); + proj[j].setRow<3>(LLVector4a( 0.f, (2.f*yfar*ynear)/(ynear-yfar), 0.f)); } } } @@ -10666,26 +10291,19 @@ void LLPipeline::generateSunShadow(LLCamera& camera) //shadow_cam.ignoreAgentFrustumPlane(LLCamera::AGENT_PLANE_NEAR); shadow_cam.getAgentPlane(LLCamera::AGENT_PLANE_NEAR).set(shadow_near_clip); - //translate and scale to from [-1, 1] to [0, 1] - glh::matrix4f trans(0.5f, 0.f, 0.f, 0.5f, - 0.f, 0.5f, 0.f, 0.5f, - 0.f, 0.f, 0.5f, 0.5f, - 0.f, 0.f, 0.f, 1.f); - glh_set_current_modelview(view[j]); glh_set_current_projection(proj[j]); - for (U32 i = 0; i < 16; i++) - { - gGLLastModelView[i] = mShadowModelview[j].m[i]; - gGLLastProjection[i] = mShadowProjection[j].m[i]; - } + glh_set_last_modelview(mShadowModelview[j]); + glh_set_last_projection(mShadowProjection[j]); mShadowModelview[j] = view[j]; mShadowProjection[j] = proj[j]; - - mSunShadowMatrix[j] = trans*proj[j]*view[j]*inv_view; + + mSunShadowMatrix[j].setMul(gGL.genNDCtoWC(),proj[j]); + mSunShadowMatrix[j].mul_affine(view[j]); + mSunShadowMatrix[j].mul_affine(inv_view); stop_glerror(); @@ -10791,9 +10409,9 @@ void LLPipeline::generateSunShadow(LLCamera& camera) LLMatrix4 mat(quat, LLVector4(origin, 1.f)); - view[i+4] = glh::matrix4f((F32*) mat.mMatrix); + view[i+4].loadu(mat.mMatrix[0]); - view[i+4] = view[i+4].inverse(); + view[i+4].invert(); //get perspective matrix F32 near_clip = dist+0.01f; @@ -10803,29 +10421,22 @@ void LLPipeline::generateSunShadow(LLCamera& camera) F32 fovy = fov * RAD_TO_DEG; F32 aspect = width/height; - - proj[i+4] = gl_perspective(fovy, aspect, near_clip, far_clip); - //translate and scale to from [-1, 1] to [0, 1] - glh::matrix4f trans(0.5f, 0.f, 0.f, 0.5f, - 0.f, 0.5f, 0.f, 0.5f, - 0.f, 0.f, 0.5f, 0.5f, - 0.f, 0.f, 0.f, 1.f); + proj[i+4] = gGL.genPersp(fovy, aspect, near_clip, far_clip); glh_set_current_modelview(view[i+4]); glh_set_current_projection(proj[i+4]); - mSunShadowMatrix[i+4] = trans*proj[i+4]*view[i+4]*inv_view; - - for (U32 j = 0; j < 16; j++) - { - gGLLastModelView[j] = mShadowModelview[i+4].m[j]; - gGLLastProjection[j] = mShadowProjection[i+4].m[j]; - } + glh_set_last_modelview(mShadowModelview[i+4]); + glh_set_last_projection(mShadowProjection[i+4]); mShadowModelview[i+4] = view[i+4]; mShadowProjection[i+4] = proj[i+4]; + mSunShadowMatrix[i+4].setMul(gGL.genNDCtoWC(),proj[i+4]); + mSunShadowMatrix[i+4].mul_affine(view[i+4]); + mSunShadowMatrix[i+4].mul_affine(inv_view); + LLCamera shadow_cam = camera; shadow_cam.setFar(far_clip); shadow_cam.setOrigin(origin); @@ -10864,18 +10475,15 @@ void LLPipeline::generateSunShadow(LLCamera& camera) { glh_set_current_modelview(view[1]); glh_set_current_projection(proj[1]); - gGL.loadMatrix(view[1].m); + gGL.loadMatrix(view[1]); gGL.matrixMode(LLRender::MM_PROJECTION); - gGL.loadMatrix(proj[1].m); + gGL.loadMatrix(proj[1]); gGL.matrixMode(LLRender::MM_MODELVIEW); } gGL.setColorMask(true, false); - for (U32 i = 0; i < 16; i++) - { - gGLLastModelView[i] = last_modelview[i]; - gGLLastProjection[i] = last_projection[i]; - } + glh_set_last_modelview(last_modelview); + glh_set_last_projection(last_projection); popRenderTypeMask(); @@ -11034,18 +10642,19 @@ void LLPipeline::generateImpostor(LLVOAvatar* avatar) F32 distance = (pos-camera.getOrigin()).length(); F32 fov = atanf(tdim.mV[1]/distance)*2.f*RAD_TO_DEG; F32 aspect = tdim.mV[0]/tdim.mV[1]; - glh::matrix4f persp = gl_perspective(fov, aspect, 1.f, 256.f); + LLMatrix4a persp = gGL.genPersp(fov, aspect, 1.f, 256.f); glh_set_current_projection(persp); - gGL.loadMatrix(persp.m); + gGL.loadMatrix(persp); gGL.matrixMode(LLRender::MM_MODELVIEW); gGL.pushMatrix(); - glh::matrix4f mat; - camera.getOpenGLTransform(mat.m); + LLMatrix4a mat; + camera.getOpenGLTransform(mat.getF32ptr()); - mat = glh::matrix4f((GLfloat*) OGL_TO_CFR_ROTATION) * mat; + mat.setMul(OGL_TO_CFR_ROTATION, mat); + + gGL.loadMatrix(mat); - gGL.loadMatrix(mat.m); glh_set_current_modelview(mat); glClearColor(0.0f,0.0f,0.0f,0.0f); @@ -11458,3 +11067,31 @@ void LLPipeline::restoreHiddenObject( const LLUUID& id ) } */ +void LLPipeline::drawFullScreenRect(U32 data_mask) +{ + if(mAuxScreenRectVB.isNull()) + { + mAuxScreenRectVB = new LLVertexBuffer(AUX_VB_MASK, 0); + mAuxScreenRectVB->allocateBuffer(3, 0, true); + LLStrider vert; + LLStrider tc0, tc1; + mAuxScreenRectVB->getVertexStrider(vert); + mAuxScreenRectVB->getTexCoord0Strider(tc0); + mAuxScreenRectVB->getTexCoord1Strider(tc1); + + vert[0].set(-1.f,-1.f,0.f); + vert[1].set(3.f,-1.f,0.f); + vert[2].set(-1.f,3.f,0.f); + + tc0[0].set(0.f, 0.f); + tc0[1].set(mScreen.getWidth()*2.f, 0.f); + tc0[2].set(0.f, mScreen.getHeight()*2.f); + + tc1[0].set(0.f, 0.f); + tc1[1].set(2.f, 0.f); + tc1[2].set(0.f, 2.f); + } + mAuxScreenRectVB->setBuffer(data_mask); + mAuxScreenRectVB->drawArrays(LLRender::TRIANGLES, 0, 3); +} + diff --git a/indra/newview/pipeline.h b/indra/newview/pipeline.h index da5e6a6b3..589c2a3e4 100644 --- a/indra/newview/pipeline.h +++ b/indra/newview/pipeline.h @@ -81,14 +81,10 @@ BOOL compute_min_max(LLMatrix4& box, LLVector2& min, LLVector2& max); // Shouldn bool LLRayAABB(const LLVector3 ¢er, const LLVector3 &size, const LLVector3& origin, const LLVector3& dir, LLVector3 &coord, F32 epsilon = 0); BOOL setup_hud_matrices(); // use whole screen to render hud BOOL setup_hud_matrices(const LLRect& screen_region); // specify portion of screen (in pixels) to render hud attachments from (for picking) -glh::matrix4f glh_copy_matrix(F32* src); -glh::matrix4f glh_get_current_modelview(); -void glh_set_current_modelview(const glh::matrix4f& mat); -glh::matrix4f glh_get_current_projection(); -void glh_set_current_projection(glh::matrix4f& mat); -glh::matrix4f gl_ortho(GLfloat left, GLfloat right, GLfloat bottom, GLfloat top, GLfloat znear, GLfloat zfar); -glh::matrix4f gl_perspective(GLfloat fovy, GLfloat aspect, GLfloat zNear, GLfloat zFar); -glh::matrix4f gl_lookat(LLVector3 eye, LLVector3 center, LLVector3 up); +const LLMatrix4a& glh_get_current_modelview(); +void glh_set_current_modelview(const LLMatrix4a& mat); +const LLMatrix4a& glh_get_current_projection(); +void glh_set_current_projection(const LLMatrix4a& mat); extern LLFastTimer::DeclareTimer FTM_RENDER_GEOMETRY; extern LLFastTimer::DeclareTimer FTM_RENDER_GRASS; @@ -112,6 +108,7 @@ extern LLFastTimer::DeclareTimer FTM_PIPELINE; extern LLFastTimer::DeclareTimer FTM_CLIENT_COPY; +LL_ALIGN_PREFIX(16) class LLPipeline { public: @@ -306,7 +303,7 @@ public: void generateSunShadow(LLCamera& camera); - void renderShadow(glh::matrix4f& view, glh::matrix4f& proj, LLCamera& camera, LLCullResult& result, BOOL use_shader, BOOL use_occlusion, U32 target_width); + void renderShadow(const LLMatrix4a& view, const LLMatrix4a& proj, LLCamera& camera, LLCullResult& result, BOOL use_shader, BOOL use_occlusion, U32 target_width); void renderHighlights(); void renderDebug(); void renderPhysicsDisplay(); @@ -434,6 +431,8 @@ private: bool assertInitialized() { const bool is_init = isInit(); if (!is_init) assertInitializedDoError(); return is_init; }; void hideDrawable( LLDrawable *pDrawable ); void unhideDrawable( LLDrawable *pDrawable ); + + void drawFullScreenRect( U32 data_mask ); public: enum {GPU_CLASS_MAX = 3 }; @@ -552,11 +551,6 @@ public: LLSpatialPartition* getSpatialPartition(LLViewerObject* vobj); - void updateCamera(BOOL reset = FALSE); - - LLVector3 mFlyCamPosition; - LLQuaternion mFlyCamRotation; - BOOL mBackfaceCull; S32 mBatchCount; S32 mMatrixOpCount; @@ -566,18 +560,6 @@ public: S32 mMeanBatchSize; S32 mTrianglesDrawn; S32 mNumVisibleNodes; - S32 mVerticesRelit; - - S32 mDebugTextureUploadCost; - S32 mDebugSculptUploadCost; - S32 mDebugMeshUploadCost; - - S32 mLightingChanges; - S32 mGeometryChanges; - - S32 mNumVisibleFaces; - - static S32 sCompiles; static BOOL sShowHUDAttachments; static BOOL sForceOldBakedUpload; // If true will not use capabilities to upload baked textures. @@ -587,7 +569,6 @@ public: static BOOL sAutoMaskAlphaNonDeferred; static BOOL sDisableShaders; // if TRUE, rendering will be done without shaders static BOOL sRenderBump; - static BOOL sBakeSunlight; static BOOL sNoAlpha; static BOOL sUseFarClip; static BOOL sShadowRender; @@ -610,61 +591,57 @@ public: static F32 sMinRenderSize; static BOOL sRenderingHUDs; - //screen texture +public: + //screen texture LLRenderTarget mScreen; LLRenderTarget mDeferredScreen; +private: LLRenderTarget mFXAABuffer; - LLRenderTarget mEdgeMap; +public: LLRenderTarget mDeferredDepth; +private: LLRenderTarget mDeferredDownsampledDepth; LLRenderTarget mOcclusionDepth; LLRenderTarget mDeferredLight; +public: LLMultisampleBuffer mSampleBuffer; +private: LLRenderTarget mPhysicsDisplay; //utility buffer for rendering post effects, gets abused by renderDeferredLighting - LLPointer mDeferredVB; + LLPointer mAuxScreenRectVB; +public: //utility buffer for rendering cubes, 8 vertices are corners of a cube [-1, 1] LLPointer mCubeVB; +private: //sun shadow map LLRenderTarget mShadow[6]; LLRenderTarget mShadowOcclusion[6]; std::vector mShadowFrustPoints[4]; - LLVector4 mShadowError; - LLVector4 mShadowFOV; - LLVector3 mShadowFrustOrigin[4]; +public: LLCamera mShadowCamera[8]; +private: LLVector3 mShadowExtents[4][2]; - glh::matrix4f mSunShadowMatrix[6]; - glh::matrix4f mShadowModelview[6]; - glh::matrix4f mShadowProjection[6]; - glh::matrix4f mGIMatrix; - glh::matrix4f mGIMatrixProj; - glh::matrix4f mGIModelview; - glh::matrix4f mGIProjection; - glh::matrix4f mGINormalMatrix; - glh::matrix4f mGIInvProj; - LLVector2 mGIRange; - F32 mGILightRadius; + LLMatrix4a mSunShadowMatrix[6]; + LLMatrix4a mShadowModelview[6]; + LLMatrix4a mShadowProjection[6]; LLPointer mShadowSpotLight[2]; F32 mSpotLightFade[2]; LLPointer mTargetShadowSpotLight[2]; LLVector4 mSunClipPlanes; - LLVector4 mSunOrthoClipPlanes; - - LLVector2 mScreenScale; - +public: //water reflection texture LLRenderTarget mWaterRef; //water distortion texture (refraction) LLRenderTarget mWaterDis; +private: //texture for making the glow LLRenderTarget mGlow[2]; @@ -675,14 +652,15 @@ public: LLColor4 mSunDiffuse; LLVector3 mSunDir; - LLVector3 mTransformedSunDir; + LL_ALIGN_16(LLVector4a mTransformedSunDir); +public: BOOL mInitialized; BOOL mVertexShadersEnabled; S32 mVertexShadersLoaded; // 0 = no, 1 = yes, -1 = failed U32 mTransformFeedbackPrimitives; //number of primitives expected to be generated by transform feedback -protected: +private: BOOL mRenderTypeEnabled[NUM_RENDER_TYPES]; std::stack mRenderTypeEnableStack; @@ -847,13 +825,13 @@ public: //debug use static U32 sCurRenderPoolType ; -}; +} LL_ALIGN_POSTFIX(16); void render_bbox(const LLVector3 &min, const LLVector3 &max); void render_hud_elements(); extern LLPipeline gPipeline; extern BOOL gDebugPipeline; -extern const LLMatrix4* gGLLastMatrix; +extern const LLMatrix4a* gGLLastMatrix; #endif diff --git a/indra/newview/qtoolalign.cpp b/indra/newview/qtoolalign.cpp index 2b09c7cf2..240bbffcb 100644 --- a/indra/newview/qtoolalign.cpp +++ b/indra/newview/qtoolalign.cpp @@ -123,7 +123,7 @@ BOOL QToolAlign::findSelectedManipulator(S32 x, S32 y) { LLVector4 translation(mBBox.getCenterAgent()); transform.initRotTrans(mBBox.getRotation(), translation); - LLMatrix4 cfr(OGL_TO_CFR_ROTATION); + LLMatrix4 cfr(OGL_TO_CFR_ROTATION.getF32ptr()); transform *= cfr; LLMatrix4 window_scale; F32 zoom_level = 2.f * gAgentCamera.mHUDCurZoom; @@ -136,8 +136,8 @@ BOOL QToolAlign::findSelectedManipulator(S32 x, S32 y) { transform.initAll(LLVector3(1.f, 1.f, 1.f), mBBox.getRotation(), mBBox.getCenterAgent()); - LLMatrix4 projection_matrix = LLViewerCamera::getInstance()->getProjection(); - LLMatrix4 model_matrix = LLViewerCamera::getInstance()->getModelview(); + LLMatrix4 projection_matrix( LLViewerCamera::getInstance()->getProjection().getF32ptr() ); + LLMatrix4 model_matrix( LLViewerCamera::getInstance()->getModelview().getF32ptr() ); transform *= model_matrix; transform *= projection_matrix; diff --git a/install.xml b/install.xml index 3a8e5ee06..a703de3fc 100644 --- a/install.xml +++ b/install.xml @@ -473,32 +473,6 @@ - freeglut - - copyright - Copyright (c) 1999-2000 Pawel W. Olszta. - description - OpenSource alternative to OpenGL Utility Toolkit - license - freeglut - packages - - windows - - md5sum - 74758efd7fc6246f704ea702c4b3e310 - url - https://bitbucket.org/SingularityViewer/libraries/downloads/freeglut-2.6.0-windows-20110214.tar.bz2 - - windows64 - - md5sum - 74758efd7fc6246f704ea702c4b3e310 - url - https://bitbucket.org/SingularityViewer/libraries/downloads/freeglut-2.6.0-windows-20110214.tar.bz2 - - - freetype copyright @@ -1735,37 +1709,6 @@ your work. url http://www.fmod.org/ifmodlicense.html - freeglut - - text - Freeglut Copyright ------------------- - -Freeglut code without an explicit copyright is covered by the following -copyright: - -Copyright (c) 1999-2000 Pawel W. Olszta. All Rights Reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies or substantial portions of the Software. - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -PAWEL W. OLSZTA BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -Except as contained in this notice, the name of Pawel W. Olszta shall not be -used in advertising or otherwise to promote the sale, use or other dealings -in this Software without prior written authorization from Pawel W. Olszta. - - freetype url @@ -1796,51 +1739,6 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. - glh_linear - - text - glh - is a platform-indepenedent C++ OpenGL helper library - -Copyright (c) 2000 Cass Everitt -Copyright (c) 2000 NVIDIA Corporation -All rights reserved. - -Redistribution and use in source and binary forms, with or -without modification, are permitted provided that the following -conditions are met: - -Redistributions of source code must retain the above -copyright notice, this list of conditions and the following -disclaimer. -Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following -disclaimer in the documentation and/or other materials -provided with the distribution. -The names of contributors to this software may not be used -to endorse or promote products derived from this software -without specific prior written permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. - -Cass Everitt - cass@r3.nu - - - - glut - - url - http://www.xmission.com/~nate/glut/README-win32.txt - gpl url