A little vectorization. gluProject replaced with vectorized glProjectf. Added LLMatrix4a::rotate4. Tweaked LLMatrix4a::rotate. Removed extra _mm_movehl_ps call in LLMatrix3a::setTranspose
This commit is contained in:
@@ -60,7 +60,7 @@ inline void LLMatrix3a::setTranspose(const LLMatrix3a& src)
|
||||
const LLQuad srcCol1 = src.mColumns[1];
|
||||
const LLQuad unpacklo = _mm_unpacklo_ps( srcCol0, srcCol1 );
|
||||
mColumns[0] = _mm_movelh_ps( unpacklo, src.mColumns[2] );
|
||||
mColumns[1] = _mm_shuffle_ps( _mm_movehl_ps( srcCol0, unpacklo ), src.mColumns[2], _MM_SHUFFLE(0, 1, 1, 0) );
|
||||
mColumns[1] = _mm_shuffle_ps( unpacklo, src.mColumns[2], _MM_SHUFFLE(0, 1, 3, 2) );
|
||||
mColumns[2] = _mm_shuffle_ps( _mm_unpackhi_ps( srcCol0, srcCol1 ), src.mColumns[2], _MM_SHUFFLE(0, 2, 1, 0) );
|
||||
}
|
||||
|
||||
|
||||
@@ -105,24 +105,42 @@ public:
|
||||
mMatrix[3].setAdd(a.mMatrix[3],d3);
|
||||
}
|
||||
|
||||
inline void rotate(const LLVector4a& v, LLVector4a& res)
|
||||
inline void rotate(const LLVector4a& v, LLVector4a& res) const
|
||||
{
|
||||
res = _mm_shuffle_ps(v, v, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
res.mul(mMatrix[0]);
|
||||
|
||||
LLVector4a y;
|
||||
y = _mm_shuffle_ps(v, v, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
y.mul(mMatrix[1]);
|
||||
LLVector4a x,y,z;
|
||||
|
||||
LLVector4a z;
|
||||
x = _mm_shuffle_ps(v, v, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
y = _mm_shuffle_ps(v, v, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
z = _mm_shuffle_ps(v, v, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
|
||||
x.mul(mMatrix[0]);
|
||||
y.mul(mMatrix[1]);
|
||||
z.mul(mMatrix[2]);
|
||||
|
||||
res.add(y);
|
||||
res.add(z);
|
||||
x.add(y);
|
||||
res.setAdd(x,z);
|
||||
}
|
||||
|
||||
inline void rotate4(const LLVector4a& v, LLVector4a& res) const
|
||||
{
|
||||
LLVector4a x,y,z,w;
|
||||
|
||||
x = _mm_shuffle_ps(v, v, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
y = _mm_shuffle_ps(v, v, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
z = _mm_shuffle_ps(v, v, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
w = _mm_shuffle_ps(v, v, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
|
||||
x.mul(mMatrix[0]);
|
||||
y.mul(mMatrix[1]);
|
||||
z.mul(mMatrix[2]);
|
||||
w.mul(mMatrix[3]);
|
||||
|
||||
x.add(y);
|
||||
z.add(w);
|
||||
res.setAdd(x,z);
|
||||
}
|
||||
|
||||
inline void affineTransform(const LLVector4a& v, LLVector4a& res)
|
||||
inline void affineTransform(const LLVector4a& v, LLVector4a& res) const
|
||||
{
|
||||
LLVector4a x,y,z;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user