A few more alignment bits and bobs.
This commit is contained in:
@@ -1689,6 +1689,12 @@ static void avg4_colors2(const U8* a, const U8* b, const U8* c, const U8* d, U8*
|
||||
dst[1] = (U8)(((U32)(a[1]) + b[1] + c[1] + d[1])>>2);
|
||||
}
|
||||
|
||||
void LLImageBase::setDataAndSize(U8 *data, S32 size)
|
||||
{
|
||||
ll_assert_aligned(data, 16);
|
||||
mData = data; mDataSize = size;
|
||||
}
|
||||
|
||||
//static
|
||||
void LLImageBase::generateMip(const U8* indata, U8* mipdata, S32 width, S32 height, S32 nchannels)
|
||||
{
|
||||
|
||||
@@ -134,7 +134,7 @@ public:
|
||||
|
||||
protected:
|
||||
// special accessor to allow direct setting of mData and mDataSize by LLImageFormatted
|
||||
void setDataAndSize(U8 *data, S32 size) { mData = data; mDataSize = size; }
|
||||
void setDataAndSize(U8 *data, S32 size);
|
||||
|
||||
public:
|
||||
static void generateMip(const U8 *indata, U8* mipdata, int width, int height, S32 nchannels);
|
||||
|
||||
@@ -62,6 +62,7 @@ inline LLSimdScalar operator/(const LLSimdScalar& a, const LLSimdScalar& b)
|
||||
inline LLSimdScalar operator-(const LLSimdScalar& a)
|
||||
{
|
||||
static LL_ALIGN_16(const U32 signMask[4]) = {0x80000000, 0x80000000, 0x80000000, 0x80000000 };
|
||||
ll_assert_aligned(signMask,16);
|
||||
return _mm_xor_ps(*reinterpret_cast<const LLQuad*>(signMask), a);
|
||||
}
|
||||
|
||||
@@ -146,6 +147,7 @@ inline LLSimdScalar& LLSimdScalar::operator/=(const LLSimdScalar& rhs)
|
||||
inline LLSimdScalar LLSimdScalar::getAbs() const
|
||||
{
|
||||
static const LL_ALIGN_16(U32 F_ABS_MASK_4A[4]) = { 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF };
|
||||
ll_assert_aligned(F_ABS_MASK_4A,16);
|
||||
return _mm_and_ps( mQ, *reinterpret_cast<const LLQuad*>(F_ABS_MASK_4A));
|
||||
}
|
||||
|
||||
|
||||
@@ -95,17 +95,6 @@ const S32 SCULPT_MIN_AREA_DETAIL = 1;
|
||||
|
||||
extern BOOL gDebugGL;
|
||||
|
||||
void assert_aligned(void* ptr, uintptr_t alignment)
|
||||
{
|
||||
#if 0
|
||||
uintptr_t t = (uintptr_t) ptr;
|
||||
if (t%alignment != 0)
|
||||
{
|
||||
llerrs << "Alignment check failed." << llendl;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
BOOL check_same_clock_dir( const LLVector3& pt1, const LLVector3& pt2, const LLVector3& pt3, const LLVector3& norm)
|
||||
{
|
||||
LLVector3 test = (pt2-pt1)%(pt3-pt2);
|
||||
@@ -6967,14 +6956,14 @@ void LLVolumeFace::resizeVertices(S32 num_verts)
|
||||
if (num_verts)
|
||||
{
|
||||
mPositions = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts);
|
||||
assert_aligned(mPositions, 16);
|
||||
ll_assert_aligned(mPositions, 16);
|
||||
mNormals = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts);
|
||||
assert_aligned(mNormals, 16);
|
||||
ll_assert_aligned(mNormals, 16);
|
||||
|
||||
//pad texture coordinate block end to allow for QWORD reads
|
||||
S32 size = ((num_verts*sizeof(LLVector2)) + 0xF) & ~0xF;
|
||||
mTexCoords = (LLVector2*) ll_aligned_malloc_16(size);
|
||||
assert_aligned(mTexCoords, 16);
|
||||
ll_assert_aligned(mTexCoords, 16);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -7062,7 +7051,8 @@ void LLVolumeFace::pushIndex(const U16& idx)
|
||||
S32 old_size = ((mNumIndices*2)+0xF) & ~0xF;
|
||||
if (new_size != old_size)
|
||||
{
|
||||
mIndices = (U16*) realloc(mIndices, new_size);
|
||||
mIndices = (U16*) ll_aligned_realloc_16(mIndices, new_size, old_size);
|
||||
ll_assert_aligned(mIndices,16);
|
||||
}
|
||||
|
||||
mIndices[mNumIndices++] = idx;
|
||||
|
||||
@@ -1639,7 +1639,8 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume,
|
||||
if (!do_xform)
|
||||
{
|
||||
LLFastTimer t(FTM_FACE_TEX_QUICK_NO_XFORM);
|
||||
LLVector4a::memcpyNonAliased16((F32*) tex_coords.get(), (F32*) vf.mTexCoords, num_vertices*2*sizeof(F32));
|
||||
S32 tc_size = (num_vertices*2*sizeof(F32)+0xF) & ~0xF;
|
||||
LLVector4a::memcpyNonAliased16((F32*) tex_coords.get(), (F32*) vf.mTexCoords, tc_size);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
@@ -736,8 +736,10 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w
|
||||
F32* vw = (F32*) vertex_weightsp.get();
|
||||
F32* cw = (F32*) clothing_weightsp.get();
|
||||
|
||||
LLVector4a::memcpyNonAliased16(tc, (F32*) mMesh->getTexCoords(), num_verts*2*sizeof(F32));
|
||||
LLVector4a::memcpyNonAliased16(vw, (F32*) mMesh->getWeights(), num_verts*sizeof(F32));
|
||||
S32 tc_size = (num_verts*2*sizeof(F32)+0xF) & ~0xF;
|
||||
LLVector4a::memcpyNonAliased16(tc, (F32*) mMesh->getTexCoords(), tc_size);
|
||||
S32 vw_size = (num_verts*sizeof(F32)+0xF) & ~0xF;
|
||||
LLVector4a::memcpyNonAliased16(vw, (F32*) mMesh->getWeights(), vw_size);
|
||||
LLVector4a::memcpyNonAliased16(cw, (F32*) mMesh->getClothingWeights(), num_verts*4*sizeof(F32));
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user