A few more alignment bits and bobs.

This commit is contained in:
Shyotl
2012-10-16 20:07:04 -05:00
parent 7533b475be
commit 43271f290e
6 changed files with 20 additions and 19 deletions

View File

@@ -1689,6 +1689,12 @@ static void avg4_colors2(const U8* a, const U8* b, const U8* c, const U8* d, U8*
dst[1] = (U8)(((U32)(a[1]) + b[1] + c[1] + d[1])>>2);
}
void LLImageBase::setDataAndSize(U8 *data, S32 size)
{
ll_assert_aligned(data, 16);
mData = data; mDataSize = size;
}
//static
void LLImageBase::generateMip(const U8* indata, U8* mipdata, S32 width, S32 height, S32 nchannels)
{

View File

@@ -134,7 +134,7 @@ public:
protected:
// special accessor to allow direct setting of mData and mDataSize by LLImageFormatted
void setDataAndSize(U8 *data, S32 size) { mData = data; mDataSize = size; }
void setDataAndSize(U8 *data, S32 size);
public:
static void generateMip(const U8 *indata, U8* mipdata, int width, int height, S32 nchannels);

View File

@@ -62,6 +62,7 @@ inline LLSimdScalar operator/(const LLSimdScalar& a, const LLSimdScalar& b)
inline LLSimdScalar operator-(const LLSimdScalar& a)
{
static LL_ALIGN_16(const U32 signMask[4]) = {0x80000000, 0x80000000, 0x80000000, 0x80000000 };
ll_assert_aligned(signMask,16);
return _mm_xor_ps(*reinterpret_cast<const LLQuad*>(signMask), a);
}
@@ -146,6 +147,7 @@ inline LLSimdScalar& LLSimdScalar::operator/=(const LLSimdScalar& rhs)
inline LLSimdScalar LLSimdScalar::getAbs() const
{
static const LL_ALIGN_16(U32 F_ABS_MASK_4A[4]) = { 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF };
ll_assert_aligned(F_ABS_MASK_4A,16);
return _mm_and_ps( mQ, *reinterpret_cast<const LLQuad*>(F_ABS_MASK_4A));
}

View File

@@ -95,17 +95,6 @@ const S32 SCULPT_MIN_AREA_DETAIL = 1;
extern BOOL gDebugGL;
void assert_aligned(void* ptr, uintptr_t alignment)
{
#if 0
uintptr_t t = (uintptr_t) ptr;
if (t%alignment != 0)
{
llerrs << "Alignment check failed." << llendl;
}
#endif
}
BOOL check_same_clock_dir( const LLVector3& pt1, const LLVector3& pt2, const LLVector3& pt3, const LLVector3& norm)
{
LLVector3 test = (pt2-pt1)%(pt3-pt2);
@@ -6967,14 +6956,14 @@ void LLVolumeFace::resizeVertices(S32 num_verts)
if (num_verts)
{
mPositions = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts);
assert_aligned(mPositions, 16);
ll_assert_aligned(mPositions, 16);
mNormals = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts);
assert_aligned(mNormals, 16);
ll_assert_aligned(mNormals, 16);
//pad texture coordinate block end to allow for QWORD reads
S32 size = ((num_verts*sizeof(LLVector2)) + 0xF) & ~0xF;
mTexCoords = (LLVector2*) ll_aligned_malloc_16(size);
assert_aligned(mTexCoords, 16);
ll_assert_aligned(mTexCoords, 16);
}
else
{
@@ -7062,7 +7051,8 @@ void LLVolumeFace::pushIndex(const U16& idx)
S32 old_size = ((mNumIndices*2)+0xF) & ~0xF;
if (new_size != old_size)
{
mIndices = (U16*) realloc(mIndices, new_size);
mIndices = (U16*) ll_aligned_realloc_16(mIndices, new_size, old_size);
ll_assert_aligned(mIndices,16);
}
mIndices[mNumIndices++] = idx;

View File

@@ -1639,7 +1639,8 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume,
if (!do_xform)
{
LLFastTimer t(FTM_FACE_TEX_QUICK_NO_XFORM);
LLVector4a::memcpyNonAliased16((F32*) tex_coords.get(), (F32*) vf.mTexCoords, num_vertices*2*sizeof(F32));
S32 tc_size = (num_vertices*2*sizeof(F32)+0xF) & ~0xF;
LLVector4a::memcpyNonAliased16((F32*) tex_coords.get(), (F32*) vf.mTexCoords, tc_size);
}
else
{

View File

@@ -736,8 +736,10 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w
F32* vw = (F32*) vertex_weightsp.get();
F32* cw = (F32*) clothing_weightsp.get();
LLVector4a::memcpyNonAliased16(tc, (F32*) mMesh->getTexCoords(), num_verts*2*sizeof(F32));
LLVector4a::memcpyNonAliased16(vw, (F32*) mMesh->getWeights(), num_verts*sizeof(F32));
S32 tc_size = (num_verts*2*sizeof(F32)+0xF) & ~0xF;
LLVector4a::memcpyNonAliased16(tc, (F32*) mMesh->getTexCoords(), tc_size);
S32 vw_size = (num_verts*sizeof(F32)+0xF) & ~0xF;
LLVector4a::memcpyNonAliased16(vw, (F32*) mMesh->getWeights(), vw_size);
LLVector4a::memcpyNonAliased16(cw, (F32*) mMesh->getClothingWeights(), num_verts*4*sizeof(F32));
}