Shyotl
2011-08-05 01:18:27 -05:00
parent 2c489d7741
commit b75a28ec15
7 changed files with 92 additions and 40 deletions

View File

@@ -42,13 +42,15 @@ template <class Object> class LLStrider
U8* mBytep;
};
U32 mSkip;
U32 mTypeSize;
public:
LLStrider() { mObjectp = NULL; mSkip = sizeof(Object); }
LLStrider() { mObjectp = NULL; mTypeSize = mSkip = sizeof(Object); }
~LLStrider() { }
const LLStrider<Object>& operator = (Object *first) { mObjectp = first; return *this;}
void setStride (S32 skipBytes) { mSkip = (skipBytes ? skipBytes : sizeof(Object));}
void setTypeSize (S32 typeBytes){ mTypeSize = (typeBytes ? typeBytes : sizeof(Object)); }
void skip(const U32 index) { mBytep += mSkip*index;}
U32 getSkip() const { return mSkip; }
@@ -58,18 +60,68 @@ public:
Object* operator ++(int) { Object* old = mObjectp; mBytep += mSkip; return old; }
Object* operator +=(int i) { mBytep += mSkip*i; return mObjectp; }
Object& operator[](U32 index) { return *(Object*)(mBytep + (mSkip * index)); }
void assignArray(U8* buff, size_t elem_size, size_t elem_count)
void assignArray(U8* __restrict source, const size_t elem_size, const size_t elem_count)
{
llassert_always(sizeof(Object) <= elem_size);
if(sizeof(Object) == mSkip && sizeof(Object) == elem_size) //No stride. No difference in element size.
LLVector4a::memcpyNonAliased16((F32*) mBytep, (F32*) buff, elem_size * elem_count);
U8* __restrict dest = mBytep; //refer to dest instead of mBytep to benefit from __restrict hint
const U32 bytes = elem_size * elem_count; //total bytes to copy from source to dest
//stride == sizeof(element) implies entire buffer is unstrided and thus memcpy-able, provided source buffer elements match in size.
//Because LLStrider is often passed an LLVector3 even if the reprensentation is LLVector4 in the vertex buffer, mTypeSize is set to
//the TRUE vbo datatype size via VertexBufferStrider::get
if(mTypeSize == mSkip && mTypeSize == elem_size)
{
if(bytes >= sizeof(LLVector4) * 4) //Should be able to pull at least 3 16byte blocks from this. Smaller isn't really beneficial.
{
U8* __restrict aligned_source = LL_NEXT_ALIGNED_ADDRESS(source);
U8* __restrict aligned_dest = LL_NEXT_ALIGNED_ADDRESS(dest);
const U32 source_offset = aligned_source - source; //Offset to first aligned location in source buffer.
const U32 dest_offset = aligned_dest - dest; //Offset to first aligned location in dest buffer.
llassert_always(source_offset < 16);
llassert_always(dest_offset < 16);
if(source_offset == dest_offset) //delta to aligned location matches between source and destination! _mm_*_ps should be viable.
{
const U32 end_offset = (bytes - source_offset) % sizeof(LLVector4); //buffers may not neatly end on a 16byte alignment boundary.
const U32 aligned_bytes = bytes - source_offset - end_offset; //how many bytes to copy from aligned start to aligned end.
llassert_always(aligned_bytes > 0);
if(source_offset) //memcpy up to the aligned location if needed
memcpy(dest,source,source_offset);
LLVector4a::memcpyNonAliased16((F32*) aligned_dest, (F32*) aligned_source, aligned_bytes);
if(end_offset) //memcpy to the very end if needed.
memcpy(aligned_dest+aligned_bytes,aligned_source+aligned_bytes,end_offset);
}
else //buffers non-uniformly offset from aligned location. Using _mm_*u_ps.
{
U32 end = bytes/sizeof(LLVector4); //sizeof(LLVector4) = 16 bytes = 128 bits
llassert_always(end > 0);
__m128* dst = (__m128*) dest;
__m128* src = (__m128*) source;
for (U32 i = 0; i < end; i++) //copy 128bit chunks
{
__m128 res = _mm_loadu_ps((F32*)&src[i]);
_mm_storeu_ps((F32*)&dst[i], res);
}
end*=16;//Convert to real byte offset
if(end < bytes) //just memcopy the rest
memcpy(dest+end,source+end,bytes-end);
}
}
else //Too small. just do a simple memcpy.
memcpy(dest,source,bytes);
}
else
{
for(U32 i=0;i<elem_count;i++)
{
memcpy(mBytep,buff,sizeof(Object));
mBytep+=mSkip;
buff+=elem_size;
memcpy(dest,source,sizeof(Object));
dest+=mSkip;
source+=elem_size;
}
}
}

View File

@@ -1695,6 +1695,19 @@ LLSD LLModel::writeModelToStream(std::ostream& ostr, LLSD& mdl, BOOL nowrite, BO
LLModel::weight_list& LLModel::getJointInfluences(const LLVector3& pos)
{
//1. If a vertex has been weighted then we'll find it via pos and return it's weight list
weight_map::iterator iterPos = mSkinWeights.begin();
weight_map::iterator iterEnd = mSkinWeights.end();
for ( ; iterPos!=iterEnd; ++iterPos )
{
if ( jointPositionalLookup( iterPos->first, pos ) )
{
return iterPos->second;
}
}
//2. Otherwise we'll use the older implementation
weight_map::iterator iter = mSkinWeights.find(pos);
if (iter != mSkinWeights.end())

View File

@@ -223,20 +223,17 @@ public:
};
struct JointPositionalCompare
//Are the doubles the same w/in epsilon specified tolerance
bool areEqual( double a, double b )
{
//Are the doubles the same w/in epsilon specified tolerance
bool areEqual( double a, double b )
{
const float epsilon = 1e-5f;
return (abs((int)(a - b)) < epsilon) && (a < b);
}
//Make sure that we return false for any values that are within the tolerance for equivalence
bool operator() ( const LLVector3& a, const LLVector3& b )
{
return ( areEqual( a[0],b[0]) && areEqual( a[1],b[1] ) && areEqual( a[2],b[2]) ) ? false : true;
}
};
const float epsilon = 1e-5f;
return (fabs((a - b)) < epsilon) ? true : false ;
}
//Make sure that we return false for any values that are within the tolerance for equivalence
bool jointPositionalLookup( const LLVector3& a, const LLVector3& b )
{
return ( areEqual( a[0],b[0]) && areEqual( a[1],b[1] ) && areEqual( a[2],b[2]) ) ? true : false;
}
//copy of position array for this model -- mPosition[idx].mV[X,Y,Z]
std::vector<LLVector3> mPosition;

View File

@@ -1192,12 +1192,14 @@ template <class T,S32 type> struct VertexBufferStrider
strider = (T*)(ptr + index*sizeof(T));
strider.setStride(0);
strider.setTypeSize(0);
return TRUE;
}
else if (vbo.hasDataType(type))
{
S32 stride = vbo.getStride();
volatile U8* ptr = vbo.mapVertexBuffer(type);
S32 size = LLVertexBuffer::sTypeSize[type];
if (ptr == NULL)
{
@@ -1207,6 +1209,7 @@ template <class T,S32 type> struct VertexBufferStrider
strider = (T*)(ptr + vbo.getOffset(type) + index*stride);
strider.setStride(stride);
strider.setTypeSize(size);
return TRUE;
}
else
@@ -1575,11 +1578,3 @@ void LLVertexBuffer::setupVertexBuffer(U32 data_mask) const
llglassertok();
}
void LLVertexBuffer::markDirty(U32 vert_index, U32 vert_count, U32 indices_index, U32 indices_count)
{
// TODO: use GL_APPLE_flush_buffer_range here
/*if (useVBOs() && !mFilthy)
{
}*/
}

View File

@@ -213,8 +213,6 @@ public:
void setStride(S32 type, S32 new_stride);
void markDirty(U32 vert_index, U32 vert_count, U32 indices_index, U32 indices_count);
void draw(U32 mode, U32 count, U32 indices_offset) const;
void drawArrays(U32 mode, U32 offset, U32 count) const;
void drawRange(U32 mode, U32 start, U32 end, U32 count, U32 indices_offset) const;

View File

@@ -3180,7 +3180,8 @@ void LLPhysicsDecomp::doDecompositionSingleHull()
return;
#endif //!MESH_IMPORT
#if MESH_IMPORT
LLConvexDecomposition* decomp = LLConvexDecomposition::getInstance();
if (decomp == NULL)
{
//stub. do nothing.

View File

@@ -3678,12 +3678,8 @@ void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, std::
U32 te_idx = facep->getTEOffset();
if (facep->getGeometryVolume(*volume, te_idx,
vobj->getRelativeXform(), vobj->getRelativeXformInvTrans(), index_offset))
{
buffer->markDirty(facep->getGeomIndex(), facep->getGeomCount(),
facep->getIndicesStart(), facep->getIndicesCount());
}
facep->getGeometryVolume(*volume, te_idx,
vobj->getRelativeXform(), vobj->getRelativeXformInvTrans(), index_offset);
}
}
@@ -3709,9 +3705,9 @@ void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, std::
// can we safely treat this as an alpha mask?
if (facep->canRenderAsMask())
{
const LLDrawable* drawablep = facep->getDrawable();
const LLVOVolume* vobj = drawablep ? drawablep->getVOVolume() : NULL;
if (te->getFullbright() || (vobj && vobj->isHUDAttachment()))
//const LLDrawable* drawablep = facep->getDrawable();
//const LLVOVolume* vobj = drawablep ? drawablep->getVOVolume() : NULL;
if (te->getFullbright() /*|| (vobj && vobj->isHUDAttachment())*/)
{
registerFace(group, facep, LLRenderPass::PASS_FULLBRIGHT_ALPHA_MASK);
}