/** * @file llmatrix4a.h * @brief LLMatrix4a class header file - memory aligned and vectorized 4x4 matrix * * $LicenseInfo:firstyear=2007&license=viewerlgpl$ * Second Life Viewer Source Code * Copyright (C) 2010, Linden Research, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; * version 2.1 of the License only. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA * $/LicenseInfo$ */ #ifndef LL_LLMATRIX4A_H #define LL_LLMATRIX4A_H #include "llvector4a.h" #include "m4math.h" #include "m3math.h" LL_ALIGN_PREFIX(16) class LLMatrix4a { private: LL_ALIGN_16(LLVector4a mMatrix[4]); public: enum { ROW_FWD = 0, ROW_LEFT, ROW_UP, ROW_TRANS }; void* operator new(size_t size) { return ll_aligned_malloc_16(size); } void* operator new[](size_t size) { return ll_aligned_malloc_16(size); } void operator delete(void* ptr) { ll_aligned_free_16(ptr); } void operator delete[](void* ptr) { ll_aligned_free_16(ptr); } LLMatrix4a() = default; LLMatrix4a(const LLQuad& q1,const LLQuad& q2,const LLQuad& q3,const LLQuad& q4) { mMatrix[0] = q1; mMatrix[1] = q2; mMatrix[2] = q3; mMatrix[3] = q4; } LLMatrix4a(const LLQuaternion2& quat) { const LLVector4a& xyzw = quat.getVector4a(); LLVector4a nyxwz = _mm_shuffle_ps(xyzw, xyzw, _MM_SHUFFLE(2,3,0,1)); nyxwz.negate(); const LLVector4a xnyynx = _mm_unpacklo_ps(xyzw,nyxwz); const LLVector4a znwwnz = _mm_unpackhi_ps(xyzw,nyxwz); LLMatrix4a mata; mata.setRow<0>(_mm_shuffle_ps(xyzw, xnyynx, _MM_SHUFFLE(0,1,2,3))); mata.setRow<1>(_mm_shuffle_ps(znwwnz, xyzw, _MM_SHUFFLE(1,0,2,3))); mata.setRow<2>(_mm_shuffle_ps(xnyynx, xyzw, _MM_SHUFFLE(2,3,3,2))); mata.setRow<3>(_mm_shuffle_ps(xnyynx, znwwnz, _MM_SHUFFLE(2,3,1,3))); LLMatrix4a matb; matb.setRow<0>(_mm_shuffle_ps(xyzw, xnyynx, _MM_SHUFFLE(3,1,2,3))); matb.setRow<1>(_mm_shuffle_ps(znwwnz, xnyynx, _MM_SHUFFLE(1,0,2,3))); matb.setRow<2>(_mm_shuffle_ps(xnyynx, znwwnz, _MM_SHUFFLE(3,2,3,2))); matb.setRow<3>(xyzw); setMul(matb,mata); } inline F32* getF32ptr() { return mMatrix[0].getF32ptr(); } inline const F32* getF32ptr() const { return mMatrix[0].getF32ptr(); } inline void clear() { mMatrix[0].clear(); mMatrix[1].clear(); mMatrix[2].clear(); mMatrix[3].clear(); } inline void setIdentity() { static __m128 ones = _mm_set_ps(1.f,0.f,0.f,1.f); mMatrix[0] = _mm_movelh_ps(ones,_mm_setzero_ps()); mMatrix[1] = _mm_movehl_ps(_mm_setzero_ps(),ones); mMatrix[2] = _mm_movelh_ps(_mm_setzero_ps(),ones); mMatrix[3] = _mm_movehl_ps(ones,_mm_setzero_ps()); } inline void loadu(const LLMatrix4& src) { mMatrix[0].loadua(src.mMatrix[0]); mMatrix[1].loadua(src.mMatrix[1]); mMatrix[2].loadua(src.mMatrix[2]); mMatrix[3].loadua(src.mMatrix[3]); } inline void loadu(const LLMatrix3& src) { mMatrix[0].load3(src.mMatrix[0]); mMatrix[1].load3(src.mMatrix[1]); mMatrix[2].load3(src.mMatrix[2]); mMatrix[3].set(0,0,0,1.f); } inline void loadu(const F32* src) { mMatrix[0].loadua(src+0); mMatrix[1].loadua(src+4); mMatrix[2].loadua(src+8); mMatrix[3].loadua(src+12); } inline void add(const LLMatrix4a& rhs) { mMatrix[0].add(rhs.mMatrix[0]); mMatrix[1].add(rhs.mMatrix[1]); mMatrix[2].add(rhs.mMatrix[2]); mMatrix[3].add(rhs.mMatrix[3]); } inline void mul(const LLMatrix4a& rhs) { //Not using rotate4 to avoid extra copy of *this. LLVector4a x0,y0,z0,w0; LLVector4a x1,y1,z1,w1; LLVector4a x2,y2,z2,w2; LLVector4a x3,y3,z3,w3; //16 shuffles x0.splat<0>(rhs.mMatrix[0]); x1.splat<0>(rhs.mMatrix[1]); x2.splat<0>(rhs.mMatrix[2]); x3.splat<0>(rhs.mMatrix[3]); y0.splat<1>(rhs.mMatrix[0]); y1.splat<1>(rhs.mMatrix[1]); y2.splat<1>(rhs.mMatrix[2]); y3.splat<1>(rhs.mMatrix[3]); z0.splat<2>(rhs.mMatrix[0]); z1.splat<2>(rhs.mMatrix[1]); z2.splat<2>(rhs.mMatrix[2]); z3.splat<2>(rhs.mMatrix[3]); w0.splat<3>(rhs.mMatrix[0]); w1.splat<3>(rhs.mMatrix[1]); w2.splat<3>(rhs.mMatrix[2]); w3.splat<3>(rhs.mMatrix[3]); //16 muls x0.mul(mMatrix[0]); x1.mul(mMatrix[0]); x2.mul(mMatrix[0]); x3.mul(mMatrix[0]); y0.mul(mMatrix[1]); y1.mul(mMatrix[1]); y2.mul(mMatrix[1]); y3.mul(mMatrix[1]); z0.mul(mMatrix[2]); z1.mul(mMatrix[2]); z2.mul(mMatrix[2]); z3.mul(mMatrix[2]); w0.mul(mMatrix[3]); w1.mul(mMatrix[3]); w2.mul(mMatrix[3]); w3.mul(mMatrix[3]); //12 adds x0.add(y0); z0.add(w0); x1.add(y1); z1.add(w1); x2.add(y2); z2.add(w2); x3.add(y3); z3.add(w3); mMatrix[0].setAdd(x0,z0); mMatrix[1].setAdd(x1,z1); mMatrix[2].setAdd(x2,z2); mMatrix[3].setAdd(x3,z3); } inline void setRows(const LLVector4a& r0, const LLVector4a& r1, const LLVector4a& r2) { mMatrix[0] = r0; mMatrix[1] = r1; mMatrix[2] = r2; } template inline void setRow(const LLVector4a& row) { mMatrix[N] = row; } template inline const LLVector4a& getRow() const { return mMatrix[N]; } template inline LLVector4a& getRow() { return mMatrix[N]; } template inline void setColumn(const LLVector4a& col) { mMatrix[0].copyComponent(col.getScalarAt<0>()); mMatrix[1].copyComponent(col.getScalarAt<1>()); mMatrix[2].copyComponent(col.getScalarAt<2>()); mMatrix[3].copyComponent(col.getScalarAt<3>()); } template inline LLVector4a getColumn() { LLVector4a v; v.copyComponent<0>(mMatrix[0].getScalarAt()); v.copyComponent<1>(mMatrix[1].getScalarAt()); v.copyComponent<2>(mMatrix[2].getScalarAt()); v.copyComponent<3>(mMatrix[3].getScalarAt()); return v; } inline void setMul(const LLMatrix4a& m, const F32 s) { const LLVector4a ssss(s); mMatrix[0].setMul(m.mMatrix[0], ssss); mMatrix[1].setMul(m.mMatrix[1], ssss); mMatrix[2].setMul(m.mMatrix[2], ssss); mMatrix[3].setMul(m.mMatrix[3], ssss); } inline void setMul(const LLMatrix4a& m0, const LLMatrix4a& m1) { m0.rotate4(m1.mMatrix[0],mMatrix[0]); m0.rotate4(m1.mMatrix[1],mMatrix[1]); m0.rotate4(m1.mMatrix[2],mMatrix[2]); m0.rotate4(m1.mMatrix[3],mMatrix[3]); } inline void setLerp(const LLMatrix4a& a, const LLMatrix4a& b, F32 w) { LLVector4a d0,d1,d2,d3; d0.setSub(b.mMatrix[0], a.mMatrix[0]); d1.setSub(b.mMatrix[1], a.mMatrix[1]); d2.setSub(b.mMatrix[2], a.mMatrix[2]); d3.setSub(b.mMatrix[3], a.mMatrix[3]); // this = a + d*w const LLVector4a wwww(w); d0.mul(wwww); d1.mul(wwww); d2.mul(wwww); d3.mul(wwww); mMatrix[0].setAdd(a.mMatrix[0],d0); mMatrix[1].setAdd(a.mMatrix[1],d1); mMatrix[2].setAdd(a.mMatrix[2],d2); mMatrix[3].setAdd(a.mMatrix[3],d3); } //Singu Note: Don't mess with this. It's intentionally different from LL's. // Note how res isn't manipulated until the very end. //Fast(er). Treats v[VW] as 0.f inline void rotate(const LLVector4a& v, LLVector4a& res) const { LLVector4a x,y,z; x.splat<0>(v); y.splat<1>(v); z.splat<2>(v); x.mul(mMatrix[0]); y.mul(mMatrix[1]); z.mul(mMatrix[2]); x.add(y); res.setAdd(x,z); } //Proper. v[VW] as v[VW] inline void rotate4(const LLVector4a& v, LLVector4a& res) const { LLVector4a x,y,z,w; x.splat<0>(v); y.splat<1>(v); z.splat<2>(v); w.splat<3>(v); x.mul(mMatrix[0]); y.mul(mMatrix[1]); z.mul(mMatrix[2]); w.mul(mMatrix[3]); x.add(y); z.add(w); res.setAdd(x,z); } //Fast(er). Treats v[VW] as 1.f inline void affineTransform(const LLVector4a& v, LLVector4a& res) const { LLVector4a x,y,z; x.splat<0>(v); y.splat<1>(v); z.splat<2>(v); x.mul(mMatrix[0]); y.mul(mMatrix[1]); z.mul(mMatrix[2]); x.add(y); z.add(mMatrix[3]); res.setAdd(x,z); } inline void perspectiveTransform(const LLVector4a& v, LLVector4a& res) const { LLVector4a x,y,z,s,t,p,q; x.splat<0>(v); y.splat<1>(v); z.splat<2>(v); s.splat<3>(mMatrix[0]); t.splat<3>(mMatrix[1]); p.splat<3>(mMatrix[2]); q.splat<3>(mMatrix[3]); s.mul(x); t.mul(y); p.mul(z); q.add(s); t.add(p); q.add(t); x.mul(mMatrix[0]); y.mul(mMatrix[1]); z.mul(mMatrix[2]); x.add(y); z.add(mMatrix[3]); res.setAdd(x,z); res.div(q); } inline void transpose() { __m128 q1 = _mm_unpackhi_ps(mMatrix[0],mMatrix[1]); __m128 q2 = _mm_unpacklo_ps(mMatrix[0],mMatrix[1]); __m128 q3 = _mm_unpacklo_ps(mMatrix[2],mMatrix[3]); __m128 q4 = _mm_unpackhi_ps(mMatrix[2],mMatrix[3]); mMatrix[0] = _mm_movelh_ps(q2,q3); mMatrix[1] = _mm_movehl_ps(q3,q2); mMatrix[2] = _mm_movelh_ps(q1,q4); mMatrix[3] = _mm_movehl_ps(q4,q1); } // Following procedure adapted from: // http://software.intel.com/en-us/articles/optimized-matrix-library-for-use-with-the-intel-pentiumr-4-processors-sse2-instructions/ // // License/Copyright Statement: // // Copyright (c) 2001 Intel Corporation. // // Permition is granted to use, copy, distribute and prepare derivative works // of this library for any purpose and without fee, provided, that the above // copyright notice and this statement appear in all copies. // Intel makes no representations about the suitability of this library for // any purpose, and specifically disclaims all warranties. // See LEGAL-intel_matrixlib.TXT for all the legal information. inline float invert() { LL_ALIGN_16(const unsigned int Sign_PNNP[4]) = { 0x00000000, 0x80000000, 0x80000000, 0x00000000 }; // The inverse is calculated using "Divide and Conquer" technique. The // original matrix is divide into four 2x2 sub-matrices. Since each // register holds four matrix element, the smaller matrices are // represented as a registers. Hence we get a better locality of the // calculations. LLVector4a A = _mm_movelh_ps(mMatrix[0], mMatrix[1]), // the four sub-matrices B = _mm_movehl_ps(mMatrix[1], mMatrix[0]), C = _mm_movelh_ps(mMatrix[2], mMatrix[3]), D = _mm_movehl_ps(mMatrix[3], mMatrix[2]); LLVector4a iA, iB, iC, iD, // partial inverse of the sub-matrices DC, AB; LLSimdScalar dA, dB, dC, dD; // determinant of the sub-matrices LLSimdScalar det, d, d1, d2; LLVector4a rd; // AB = A# * B AB.setMul(_mm_shuffle_ps(A,A,0x0F), B); AB.sub(_mm_mul_ps(_mm_shuffle_ps(A,A,0xA5), _mm_shuffle_ps(B,B,0x4E))); // DC = D# * C DC.setMul(_mm_shuffle_ps(D,D,0x0F), C); DC.sub(_mm_mul_ps(_mm_shuffle_ps(D,D,0xA5), _mm_shuffle_ps(C,C,0x4E))); // dA = |A| dA = _mm_mul_ps(_mm_shuffle_ps(A, A, 0x5F),A); dA -= _mm_movehl_ps(dA,dA); // dB = |B| dB = _mm_mul_ps(_mm_shuffle_ps(B, B, 0x5F),B); dB -= _mm_movehl_ps(dB,dB); // dC = |C| dC = _mm_mul_ps(_mm_shuffle_ps(C, C, 0x5F),C); dC -= _mm_movehl_ps(dC,dC); // dD = |D| dD = _mm_mul_ps(_mm_shuffle_ps(D, D, 0x5F),D); dD -= _mm_movehl_ps(dD,dD); // d = trace(AB*DC) = trace(A#*B*D#*C) d = _mm_mul_ps(_mm_shuffle_ps(DC,DC,0xD8),AB); // iD = C*A#*B iD.setMul(_mm_shuffle_ps(C,C,0xA0), _mm_movelh_ps(AB,AB)); iD.add(_mm_mul_ps(_mm_shuffle_ps(C,C,0xF5), _mm_movehl_ps(AB,AB))); // iA = B*D#*C iA.setMul(_mm_shuffle_ps(B,B,0xA0), _mm_movelh_ps(DC,DC)); iA.add(_mm_mul_ps(_mm_shuffle_ps(B,B,0xF5), _mm_movehl_ps(DC,DC))); // d = trace(AB*DC) = trace(A#*B*D#*C) [continue] d = _mm_add_ps(d, _mm_movehl_ps(d, d)); d += _mm_shuffle_ps(d, d, 1); d1 = dA*dD; d2 = dB*dC; // iD = D*|A| - C*A#*B iD.setSub(_mm_mul_ps(D,_mm_shuffle_ps(dA,dA,0)), iD); // iA = A*|D| - B*D#*C; iA.setSub(_mm_mul_ps(A,_mm_shuffle_ps(dD,dD,0)), iA); // det = |A|*|D| + |B|*|C| - trace(A#*B*D#*C) det = d1+d2-d; __m128 is_zero_mask = _mm_cmpeq_ps(det,_mm_setzero_ps()); rd = _mm_div_ss(_mm_set_ss(1.f),_mm_or_ps(_mm_andnot_ps(is_zero_mask, det), _mm_and_ps(is_zero_mask, _mm_set_ss(1.f)))); #ifdef ZERO_SINGULAR rd = _mm_and_ps(_mm_cmpneq_ss(det,_mm_setzero_ps()), rd); #endif // iB = D * (A#B)# = D*B#*A iB.setMul(D, _mm_shuffle_ps(AB,AB,0x33)); iB.sub(_mm_mul_ps(_mm_shuffle_ps(D,D,0xB1), _mm_shuffle_ps(AB,AB,0x66))); // iC = A * (D#C)# = A*C#*D iC.setMul(A, _mm_shuffle_ps(DC,DC,0x33)); iC.sub(_mm_mul_ps(_mm_shuffle_ps(A,A,0xB1), _mm_shuffle_ps(DC,DC,0x66))); rd = _mm_shuffle_ps(rd,rd,0); rd = _mm_xor_ps(rd, _mm_load_ps((const float*)Sign_PNNP)); // iB = C*|B| - D*B#*A iB.setSub(_mm_mul_ps(C,_mm_shuffle_ps(dB,dB,0)), iB); // iC = B*|C| - A*C#*D; iC.setSub(_mm_mul_ps(B,_mm_shuffle_ps(dC,dC,0)), iC); // iX = iX / det iA.mul(rd); iB.mul(rd); iC.mul(rd); iD.mul(rd); mMatrix[0] = _mm_shuffle_ps(iA,iB,0x77); mMatrix[1] = _mm_shuffle_ps(iA,iB,0x22); mMatrix[2] = _mm_shuffle_ps(iC,iD,0x77); mMatrix[3] = _mm_shuffle_ps(iC,iD,0x22); F32 ret; _mm_store_ss(&ret,det); return ret; } //=============Affine transformation matrix only========================= //Multiply matrix with a pure translation matrix. inline void applyTranslation_affine(const F32& x, const F32& y, const F32& z) { const LLVector4a xyz0(x,y,z,0); //load LLVector4a xxxx; xxxx.splat<0>(xyz0); LLVector4a yyyy; yyyy.splat<1>(xyz0); LLVector4a zzzz; zzzz.splat<2>(xyz0); LLVector4a sum1; LLVector4a sum2; LLVector4a sum3; sum1.setMul(xxxx,mMatrix[0]); sum2.setMul(yyyy,mMatrix[1]); sum3.setMul(zzzz,mMatrix[2]); mMatrix[3].add(sum1); mMatrix[3].add(sum2); mMatrix[3].add(sum3); } //Multiply matrix with a pure translation matrix. inline void applyTranslation_affine(const LLVector3& trans) { applyTranslation_affine(trans.mV[VX],trans.mV[VY],trans.mV[VZ]); } //Multiply matrix with a pure scale matrix. inline void applyScale_affine(const F32& x, const F32& y, const F32& z) { const LLVector4a xyz0(x,y,z,0); //load LLVector4a xxxx; xxxx.splat<0>(xyz0); LLVector4a yyyy; yyyy.splat<1>(xyz0); LLVector4a zzzz; zzzz.splat<2>(xyz0); mMatrix[0].mul(xxxx); mMatrix[1].mul(yyyy); mMatrix[2].mul(zzzz); } //Multiply matrix with a pure scale matrix. inline void applyScale_affine(const LLVector3& scale) { applyScale_affine(scale.mV[VX],scale.mV[VY],scale.mV[VZ]); } //Multiply matrix with a pure scale matrix. inline void applyScale_affine(const F32& s) { const LLVector4a scale(s); //load mMatrix[0].mul(scale); mMatrix[1].mul(scale); mMatrix[2].mul(scale); } //Direct addition to row3. inline void translate_affine(const LLVector3& trans) { LLVector4a translation; translation.load3(trans.mV); mMatrix[3].add(translation); } //Direct assignment of row3. inline void setTranslate_affine(const LLVector3& trans) { static const LLVector4Logical mask = _mm_load_ps((F32*)&S_V4LOGICAL_MASK_TABLE[3*4]); LLVector4a translation; translation.load3(trans.mV); mMatrix[3].setSelectWithMask(mask,mMatrix[3],translation); } inline void mul_affine(const LLMatrix4a& rhs) { LLVector4a x0,y0,z0; LLVector4a x1,y1,z1; LLVector4a x2,y2,z2; LLVector4a x3,y3,z3; //12 shuffles x0.splat<0>(rhs.mMatrix[0]); x1.splat<0>(rhs.mMatrix[1]); x2.splat<0>(rhs.mMatrix[2]); x3.splat<0>(rhs.mMatrix[3]); y0.splat<1>(rhs.mMatrix[0]); y1.splat<1>(rhs.mMatrix[1]); y2.splat<1>(rhs.mMatrix[2]); y3.splat<1>(rhs.mMatrix[3]); z0.splat<2>(rhs.mMatrix[0]); z1.splat<2>(rhs.mMatrix[1]); z2.splat<2>(rhs.mMatrix[2]); z3.splat<2>(rhs.mMatrix[3]); //12 muls x0.mul(mMatrix[0]); x1.mul(mMatrix[0]); x2.mul(mMatrix[0]); x3.mul(mMatrix[0]); y0.mul(mMatrix[1]); y1.mul(mMatrix[1]); y2.mul(mMatrix[1]); y3.mul(mMatrix[1]); z0.mul(mMatrix[2]); z1.mul(mMatrix[2]); z2.mul(mMatrix[2]); z3.mul(mMatrix[2]); //9 adds x0.add(y0); x1.add(y1); x2.add(y2); x3.add(y3); z3.add(mMatrix[3]); mMatrix[0].setAdd(x0,z0); mMatrix[1].setAdd(x1,z1); mMatrix[2].setAdd(x2,z2); mMatrix[3].setAdd(x3,z3); } inline void extractRotation_affine() { static const LLVector4Logical mask = _mm_load_ps((F32*)&S_V4LOGICAL_MASK_TABLE[3*4]); mMatrix[0].setSelectWithMask(mask,_mm_setzero_ps(),mMatrix[0]); mMatrix[1].setSelectWithMask(mask,_mm_setzero_ps(),mMatrix[1]); mMatrix[2].setSelectWithMask(mask,_mm_setzero_ps(),mMatrix[2]); mMatrix[3].setSelectWithMask(mask,LLVector4a(1.f),_mm_setzero_ps()); } //======================Logic==================== private: template inline void init_foos(LLMatrix4a& foos) const { static bool done(false); if (done) return; const LLVector4a delta(0.0001f); foos.setIdentity(); foos.getRow<0>().sub(delta); foos.getRow<1>().sub(delta); foos.getRow<2>().sub(delta); foos.getRow<3>().sub(delta); done = true; } public: inline bool isIdentity() const { static LLMatrix4a mins; static LLMatrix4a maxs; init_foos(mins); init_foos(maxs); LLVector4a mask1 = _mm_and_ps(_mm_cmpgt_ps(mMatrix[0],mins.getRow<0>()), _mm_cmplt_ps(mMatrix[0],maxs.getRow<0>())); LLVector4a mask2 = _mm_and_ps(_mm_cmpgt_ps(mMatrix[1],mins.getRow<1>()), _mm_cmplt_ps(mMatrix[1],maxs.getRow<1>())); LLVector4a mask3 = _mm_and_ps(_mm_cmpgt_ps(mMatrix[2],mins.getRow<2>()), _mm_cmplt_ps(mMatrix[2],maxs.getRow<2>())); LLVector4a mask4 = _mm_and_ps(_mm_cmpgt_ps(mMatrix[3],mins.getRow<3>()), _mm_cmplt_ps(mMatrix[3],maxs.getRow<3>())); mask1 = _mm_and_ps(mask1,mask2); mask2 = _mm_and_ps(mask3,mask4); return _mm_movemask_epi8(_mm_castps_si128(_mm_and_ps(mask1, mask2))) == 0xFFFF; } } LL_ALIGN_POSTFIX(16); inline std::ostream& operator<<(std::ostream& s, const LLMatrix4a& m) { s << "[" << m.getF32ptr()[0] << ", " << m.getF32ptr()[1] << ", " << m.getF32ptr()[2] << ", " << m.getF32ptr()[3] << "]"; return s; } void matMulBoundBox(const LLMatrix4a &a, const LLVector4a *in_extents, LLVector4a *out_extents); #if !defined(LL_DEBUG) static_assert(std::is_trivial::value, "LLMatrix4a must be a trivial type"); static_assert(std::is_standard_layout::value, "LLMatrix4a must be a standard layout type"); #endif #endif