A little vectorization. gluProject replaced with vectorized glProjectf. Added LLMatrix4a::rotate4. Tweaked LLMatrix4a::rotate. Removed extra _mm_movehl_ps call in LLMatrix3a::setTranspose

This commit is contained in:
Shyotl
2011-11-19 20:02:31 -06:00
parent 70909f86c8
commit 1cf367aae5
4 changed files with 108 additions and 66 deletions

View File

@@ -60,7 +60,7 @@ inline void LLMatrix3a::setTranspose(const LLMatrix3a& src)
const LLQuad srcCol1 = src.mColumns[1];
const LLQuad unpacklo = _mm_unpacklo_ps( srcCol0, srcCol1 );
mColumns[0] = _mm_movelh_ps( unpacklo, src.mColumns[2] );
mColumns[1] = _mm_shuffle_ps( _mm_movehl_ps( srcCol0, unpacklo ), src.mColumns[2], _MM_SHUFFLE(0, 1, 1, 0) );
mColumns[1] = _mm_shuffle_ps( unpacklo, src.mColumns[2], _MM_SHUFFLE(0, 1, 3, 2) );
mColumns[2] = _mm_shuffle_ps( _mm_unpackhi_ps( srcCol0, srcCol1 ), src.mColumns[2], _MM_SHUFFLE(0, 2, 1, 0) );
}

View File

@@ -105,24 +105,42 @@ public:
mMatrix[3].setAdd(a.mMatrix[3],d3);
}
inline void rotate(const LLVector4a& v, LLVector4a& res)
inline void rotate(const LLVector4a& v, LLVector4a& res) const
{
res = _mm_shuffle_ps(v, v, _MM_SHUFFLE(0, 0, 0, 0));
res.mul(mMatrix[0]);
LLVector4a y;
y = _mm_shuffle_ps(v, v, _MM_SHUFFLE(1, 1, 1, 1));
y.mul(mMatrix[1]);
LLVector4a x,y,z;
LLVector4a z;
x = _mm_shuffle_ps(v, v, _MM_SHUFFLE(0, 0, 0, 0));
y = _mm_shuffle_ps(v, v, _MM_SHUFFLE(1, 1, 1, 1));
z = _mm_shuffle_ps(v, v, _MM_SHUFFLE(2, 2, 2, 2));
x.mul(mMatrix[0]);
y.mul(mMatrix[1]);
z.mul(mMatrix[2]);
res.add(y);
res.add(z);
x.add(y);
res.setAdd(x,z);
}
inline void rotate4(const LLVector4a& v, LLVector4a& res) const
{
LLVector4a x,y,z,w;
x = _mm_shuffle_ps(v, v, _MM_SHUFFLE(0, 0, 0, 0));
y = _mm_shuffle_ps(v, v, _MM_SHUFFLE(1, 1, 1, 1));
z = _mm_shuffle_ps(v, v, _MM_SHUFFLE(2, 2, 2, 2));
w = _mm_shuffle_ps(v, v, _MM_SHUFFLE(3, 3, 3, 3));
x.mul(mMatrix[0]);
y.mul(mMatrix[1]);
z.mul(mMatrix[2]);
w.mul(mMatrix[3]);
x.add(y);
z.add(w);
res.setAdd(x,z);
}
inline void affineTransform(const LLVector4a& v, LLVector4a& res)
inline void affineTransform(const LLVector4a& v, LLVector4a& res) const
{
LLVector4a x,y,z;