Skinned shaders were exceeding maximum amount of vertex uniforms on amd hardware.
-Changed the transform matrix uniform to a 3x4 matrix and packed translation into it to free up uniforms. (3x3 is converted to 3x4 internally, so we were needlessly eating 3*52 extra uniform slots. translationPalette might also have been treated as a vec4 internally too, wasting 52 more slots.) -matrix3x4 requires opengl2.1 and newer, so added a new featuretable mask. -Also added a featuretable mask to disable hardware skinning and deferred shading on hardware with less than 1024 vertex uniforms. NOTE: On old old old amd hardware, evidently a 3x4 matrix might be upgraded to 4x4. I'm unsure, but I doubt such hardware has 1024+ uniform components available to begin with. 4x3 supposedly doesn't do this, but opengl is column-major, so this makes little sense.
This commit is contained in:
@@ -304,6 +304,7 @@ PFNGLUNIFORM3IVARBPROC glUniform3ivARB = NULL;
|
||||
PFNGLUNIFORM4IVARBPROC glUniform4ivARB = NULL;
|
||||
PFNGLUNIFORMMATRIX2FVARBPROC glUniformMatrix2fvARB = NULL;
|
||||
PFNGLUNIFORMMATRIX3FVARBPROC glUniformMatrix3fvARB = NULL;
|
||||
PFNGLUNIFORMMATRIX3FVARBPROC glUniformMatrix3x4fvARB = NULL;
|
||||
PFNGLUNIFORMMATRIX4FVARBPROC glUniformMatrix4fvARB = NULL;
|
||||
PFNGLGETOBJECTPARAMETERFVARBPROC glGetObjectParameterfvARB = NULL;
|
||||
PFNGLGETOBJECTPARAMETERIVARBPROC glGetObjectParameterivARB = NULL;
|
||||
@@ -469,7 +470,8 @@ LLGLManager::LLGLManager() :
|
||||
mGLSLVersionMinor(0),
|
||||
mVRAM(0),
|
||||
mGLMaxVertexRange(0),
|
||||
mGLMaxIndexRange(0)
|
||||
mGLMaxIndexRange(0),
|
||||
mGLMaxVertexUniformComponents(0)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -715,6 +717,12 @@ bool LLGLManager::initGL()
|
||||
mNumTextureImageUnits = llmin(num_tex_image_units, 32);
|
||||
}
|
||||
|
||||
if (mHasVertexShader)
|
||||
{
|
||||
//According to the spec, the resulting value should never be less than 512. We need at least 1024 to use skinned shaders.
|
||||
glGetIntegerv(GL_MAX_VERTEX_UNIFORM_COMPONENTS_ARB, &mGLMaxVertexUniformComponents);
|
||||
}
|
||||
|
||||
if (LLRender::sGLCoreProfile)
|
||||
{
|
||||
mNumTextureUnits = llmin(mNumTextureImageUnits, MAX_GL_TEXTURE_UNITS);
|
||||
@@ -1269,6 +1277,7 @@ void LLGLManager::initExtensions()
|
||||
glUniform4ivARB = (PFNGLUNIFORM4IVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniform4ivARB");
|
||||
glUniformMatrix2fvARB = (PFNGLUNIFORMMATRIX2FVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniformMatrix2fvARB");
|
||||
glUniformMatrix3fvARB = (PFNGLUNIFORMMATRIX3FVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniformMatrix3fvARB");
|
||||
glUniformMatrix3x4fvARB = (PFNGLUNIFORMMATRIX3FVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniformMatrix3x4fv");
|
||||
glUniformMatrix4fvARB = (PFNGLUNIFORMMATRIX4FVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniformMatrix4fvARB");
|
||||
glGetObjectParameterfvARB = (PFNGLGETOBJECTPARAMETERFVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glGetObjectParameterfvARB");
|
||||
glGetObjectParameterivARB = (PFNGLGETOBJECTPARAMETERIVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glGetObjectParameterivARB");
|
||||
|
||||
@@ -151,6 +151,8 @@ public:
|
||||
S32 mVRAM; // VRAM in MB
|
||||
S32 mGLMaxVertexRange;
|
||||
S32 mGLMaxIndexRange;
|
||||
|
||||
S32 mGLMaxVertexUniformComponents;
|
||||
|
||||
void getPixelFormat(); // Get the best pixel format
|
||||
|
||||
|
||||
@@ -152,6 +152,7 @@ extern PFNGLUNIFORM3IVARBPROC glUniform3ivARB;
|
||||
extern PFNGLUNIFORM4IVARBPROC glUniform4ivARB;
|
||||
extern PFNGLUNIFORMMATRIX2FVARBPROC glUniformMatrix2fvARB;
|
||||
extern PFNGLUNIFORMMATRIX3FVARBPROC glUniformMatrix3fvARB;
|
||||
extern PFNGLUNIFORMMATRIX3FVARBPROC glUniformMatrix3x4fvARB;
|
||||
extern PFNGLUNIFORMMATRIX4FVARBPROC glUniformMatrix4fvARB;
|
||||
extern PFNGLGETOBJECTPARAMETERFVARBPROC glGetObjectParameterfvARB;
|
||||
extern PFNGLGETOBJECTPARAMETERIVARBPROC glGetObjectParameterivARB;
|
||||
@@ -415,6 +416,7 @@ extern PFNGLUNIFORM3IVARBPROC glUniform3ivARB;
|
||||
extern PFNGLUNIFORM4IVARBPROC glUniform4ivARB;
|
||||
extern PFNGLUNIFORMMATRIX2FVARBPROC glUniformMatrix2fvARB;
|
||||
extern PFNGLUNIFORMMATRIX3FVARBPROC glUniformMatrix3fvARB;
|
||||
extern PFNGLUNIFORMMATRIX3FVARBPROC glUniformMatrix3x4fvARB;
|
||||
extern PFNGLUNIFORMMATRIX4FVARBPROC glUniformMatrix4fvARB;
|
||||
extern PFNGLGETOBJECTPARAMETERFVARBPROC glGetObjectParameterfvARB;
|
||||
extern PFNGLGETOBJECTPARAMETERIVARBPROC glGetObjectParameterivARB;
|
||||
@@ -656,6 +658,7 @@ extern PFNGLUNIFORM3IVARBPROC glUniform3ivARB;
|
||||
extern PFNGLUNIFORM4IVARBPROC glUniform4ivARB;
|
||||
extern PFNGLUNIFORMMATRIX2FVARBPROC glUniformMatrix2fvARB;
|
||||
extern PFNGLUNIFORMMATRIX3FVARBPROC glUniformMatrix3fvARB;
|
||||
extern PFNGLUNIFORMMATRIX3FVARBPROC glUniformMatrix3x4fvARB;
|
||||
extern PFNGLUNIFORMMATRIX4FVARBPROC glUniformMatrix4fvARB;
|
||||
extern PFNGLGETOBJECTPARAMETERFVARBPROC glGetObjectParameterfvARB;
|
||||
extern PFNGLGETOBJECTPARAMETERIVARBPROC glGetObjectParameterivARB;
|
||||
|
||||
@@ -933,6 +933,23 @@ void LLGLSLShader::uniformMatrix3fv(U32 index, U32 count, GLboolean transpose, c
|
||||
}
|
||||
}
|
||||
|
||||
void LLGLSLShader::uniformMatrix3x4fv(U32 index, U32 count, GLboolean transpose, const GLfloat *v)
|
||||
{
|
||||
if (mProgramObject > 0)
|
||||
{
|
||||
if (mUniform.size() <= index)
|
||||
{
|
||||
UNIFORM_ERRS << "Uniform index out of bounds." << LL_ENDL;
|
||||
return;
|
||||
}
|
||||
|
||||
if (mUniform[index] >= 0)
|
||||
{
|
||||
glUniformMatrix3x4fvARB(mUniform[index], count, transpose, v);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void LLGLSLShader::uniformMatrix4fv(U32 index, U32 count, GLboolean transpose, const GLfloat *v)
|
||||
{
|
||||
if (mProgramObject > 0)
|
||||
|
||||
@@ -100,6 +100,7 @@ public:
|
||||
void uniform2i(const LLStaticHashedString& uniform, GLint i, GLint j);
|
||||
void uniformMatrix2fv(U32 index, U32 count, GLboolean transpose, const GLfloat *v);
|
||||
void uniformMatrix3fv(U32 index, U32 count, GLboolean transpose, const GLfloat *v);
|
||||
void uniformMatrix3x4fv(U32 index, U32 count, GLboolean transpose, const GLfloat *v);
|
||||
void uniformMatrix4fv(U32 index, U32 count, GLboolean transpose, const GLfloat *v);
|
||||
void uniform1i(const LLStaticHashedString& uniform, GLint i);
|
||||
void uniform1f(const LLStaticHashedString& uniform, GLfloat v);
|
||||
|
||||
@@ -24,12 +24,9 @@
|
||||
|
||||
ATTRIBUTE vec4 weight4;
|
||||
|
||||
uniform mat3 matrixPalette[52];
|
||||
uniform vec3 translationPalette[52];
|
||||
uniform mat3x4 matrixPalette[52];
|
||||
uniform float maxWeight;
|
||||
|
||||
|
||||
|
||||
mat4 getObjectSkinnedTransform()
|
||||
{
|
||||
int i;
|
||||
@@ -47,22 +44,21 @@ mat4 getObjectSkinnedTransform()
|
||||
int i3 = int(index.z);
|
||||
int i4 = int(index.w);
|
||||
|
||||
mat3 mat = mat3(matrixPalette[i1])*w.x;
|
||||
mat += mat3(matrixPalette[i2])*w.y;
|
||||
mat += mat3(matrixPalette[i3])*w.z;
|
||||
mat += mat3(matrixPalette[i4])*w.w;
|
||||
|
||||
mat3 mat = matrixPalette[i1]*w.x;
|
||||
mat += matrixPalette[i2]*w.y;
|
||||
mat += matrixPalette[i3]*w.z;
|
||||
mat += matrixPalette[i4]*w.w;
|
||||
|
||||
vec3 trans = translationPalette[i1]*w.x;
|
||||
trans += translationPalette[i2]*w.y;
|
||||
trans += translationPalette[i3]*w.z;
|
||||
trans += translationPalette[i4]*w.w;
|
||||
vec3 trans = vec3(matrixPalette[i1][0].w,matrixPalette[i1][1].w,matrixPalette[i1][2].w)*w.x;
|
||||
trans += vec3(matrixPalette[i2][0].w,matrixPalette[i2][1].w,matrixPalette[i2][2].w)*w.y;
|
||||
trans += vec3(matrixPalette[i3][0].w,matrixPalette[i3][1].w,matrixPalette[i3][2].w)*w.z;
|
||||
trans += vec3(matrixPalette[i4][0].w,matrixPalette[i4][1].w,matrixPalette[i4][2].w)*w.w;
|
||||
|
||||
mat4 ret;
|
||||
|
||||
ret[0] = vec4(mat[0].xyz, 0);
|
||||
ret[1] = vec4(mat[1].xyz, 0);
|
||||
ret[2] = vec4(mat[2].xyz, 0);
|
||||
ret[0] = vec4(mat[0], 0);
|
||||
ret[1] = vec4(mat[1], 0);
|
||||
ret[2] = vec4(mat[2], 0);
|
||||
ret[3] = vec4(trans, sum);
|
||||
|
||||
return ret;
|
||||
|
||||
@@ -301,6 +301,10 @@ RenderObjectBump 0 0
|
||||
list OpenGLPre15
|
||||
RenderVBOEnable 1 0
|
||||
|
||||
list OpenGLPre21
|
||||
RenderAvatarVP 0 0
|
||||
RenderAvatarCloth 0 0
|
||||
|
||||
list OpenGLPre30
|
||||
RenderDeferred 0 0
|
||||
RenderMaxTextureIndex 1 1
|
||||
@@ -486,6 +490,7 @@ Disregard128DefaultDrawDistance 1 0
|
||||
list ATIOldDriver
|
||||
RenderAvatarVP 0 0
|
||||
RenderAvatarCloth 0 0
|
||||
RenderDeferred 0 0
|
||||
|
||||
// ATI cards generally perform better when not using VBOs for streaming data
|
||||
|
||||
@@ -596,3 +601,7 @@ RenderShaderLightingMaxLevel 1 2
|
||||
list NVIDIA_GeForce_Go_7900
|
||||
RenderShaderLightingMaxLevel 1 2
|
||||
|
||||
list VertexUniformsLT1024
|
||||
RenderAvatarVP 0 0
|
||||
RenderAvatarCloth 0 0
|
||||
RenderDeferred 0 0
|
||||
|
||||
@@ -301,6 +301,11 @@ RenderObjectBump 0 0
|
||||
list OpenGLPre15
|
||||
RenderVBOEnable 1 0
|
||||
|
||||
list OpenGLPre21
|
||||
RenderAvatarVP 0 0
|
||||
RenderAvatarCloth 0 0
|
||||
RenderDeferred 0 0
|
||||
|
||||
list IntelPre30
|
||||
RenderAnisotropic 1 0
|
||||
// Avoid some Intel crashes on Linux
|
||||
@@ -458,6 +463,7 @@ Disregard128DefaultDrawDistance 1 0
|
||||
list ATIOldDriver
|
||||
RenderAvatarVP 0 0
|
||||
RenderAvatarCloth 0 0
|
||||
RenderDeferred 0 0
|
||||
// Avoid driver crashes with some features on Linux with old ATI drivers
|
||||
UseOcclusion 0 0
|
||||
WindLightUseAtmosShaders 0 0
|
||||
@@ -563,3 +569,7 @@ RenderShaderLightingMaxLevel 1 2
|
||||
list NVIDIA_GeForce_Go_7900
|
||||
RenderShaderLightingMaxLevel 1 2
|
||||
|
||||
list VertexUniformsLT1024
|
||||
RenderAvatarVP 0 0
|
||||
RenderAvatarCloth 0 0
|
||||
RenderDeferred 0 0
|
||||
@@ -296,6 +296,11 @@ RenderObjectBump 0 0
|
||||
list OpenGLPre15
|
||||
RenderVBOEnable 1 0
|
||||
|
||||
list OpenGLPre21
|
||||
RenderAvatarVP 0 0
|
||||
RenderAvatarCloth 0 0
|
||||
RenderDeferred 0 0
|
||||
|
||||
list TexUnit8orLess
|
||||
RenderDeferredSSAO 0 0
|
||||
|
||||
@@ -521,3 +526,7 @@ Disregard128DefaultDrawDistance 1 0
|
||||
list NVIDIA_GeForce_Go_7400
|
||||
Disregard128DefaultDrawDistance 1 0
|
||||
|
||||
list VertexUniformsLT1024
|
||||
RenderAvatarVP 0 0
|
||||
RenderAvatarCloth 0 0
|
||||
RenderDeferred 0 0
|
||||
@@ -1547,41 +1547,35 @@ void LLDrawPoolAvatar::renderRigged(LLVOAvatar* avatar, U32 type, bool glow)
|
||||
|
||||
stop_glerror();
|
||||
|
||||
F32 mp[JOINT_COUNT*9];
|
||||
|
||||
F32 transp[JOINT_COUNT*3];
|
||||
F32 mp[JOINT_COUNT*12];
|
||||
|
||||
for (U32 i = 0; i < count; ++i)
|
||||
{
|
||||
F32* m = (F32*) mat[i].mMatrix;
|
||||
|
||||
U32 idx = i*9;
|
||||
U32 idx = i*12;
|
||||
|
||||
mp[idx+0] = m[0];
|
||||
mp[idx+1] = m[1];
|
||||
mp[idx+2] = m[2];
|
||||
mp[idx+3] = m[12];
|
||||
|
||||
mp[idx+3] = m[4];
|
||||
mp[idx+4] = m[5];
|
||||
mp[idx+5] = m[6];
|
||||
mp[idx+4] = m[4];
|
||||
mp[idx+5] = m[5];
|
||||
mp[idx+6] = m[6];
|
||||
mp[idx+7] = m[13];
|
||||
|
||||
mp[idx+6] = m[8];
|
||||
mp[idx+7] = m[9];
|
||||
mp[idx+8] = m[10];
|
||||
|
||||
idx = i*3;
|
||||
|
||||
transp[idx+0] = m[12];
|
||||
transp[idx+1] = m[13];
|
||||
transp[idx+2] = m[14];
|
||||
mp[idx+8] = m[8];
|
||||
mp[idx+9] = m[9];
|
||||
mp[idx+10] = m[10];
|
||||
mp[idx+11] = m[14];
|
||||
}
|
||||
|
||||
LLDrawPoolAvatar::sVertexProgram->uniformMatrix3fv(LLViewerShaderMgr::AVATAR_MATRIX,
|
||||
LLDrawPoolAvatar::sVertexProgram->uniformMatrix3x4fv(LLViewerShaderMgr::AVATAR_MATRIX,
|
||||
count,
|
||||
FALSE,
|
||||
(GLfloat*) mp);
|
||||
|
||||
LLDrawPoolAvatar::sVertexProgram->uniform3fv(LLShaderMgr::AVATAR_TRANSLATION, count, transp);
|
||||
LLDrawPoolAvatar::sVertexProgram->uniform1f(LLShaderMgr::AVATAR_MAX_WEIGHT, F32(count-1));
|
||||
|
||||
stop_glerror();
|
||||
|
||||
@@ -634,13 +634,17 @@ void LLFeatureManager::applyBaseMasks()
|
||||
{
|
||||
maskFeatures("Intel");
|
||||
}
|
||||
if (gGLManager.mGLVersion < 1.5f)
|
||||
{
|
||||
maskFeatures("OpenGLPre15");
|
||||
}
|
||||
if (gGLManager.mGLVersion < 3.f)
|
||||
{
|
||||
maskFeatures("OpenGLPre30");
|
||||
if(gGLManager.mGLVersion < 2.1f || glUniformMatrix3x4fvARB == NULL) //glUniformMatrix3x4fv is part of glsl 1.20 spec.
|
||||
{
|
||||
maskFeatures("OpenGLPre21");
|
||||
if (gGLManager.mGLVersion < 1.5f)
|
||||
{
|
||||
maskFeatures("OpenGLPre15");
|
||||
}
|
||||
}
|
||||
}
|
||||
if (gGLManager.mNumTextureImageUnits <= 8)
|
||||
{
|
||||
@@ -650,6 +654,10 @@ void LLFeatureManager::applyBaseMasks()
|
||||
{
|
||||
maskFeatures("MapBufferRange");
|
||||
}
|
||||
if (gGLManager.mGLMaxVertexUniformComponents < 1024)
|
||||
{
|
||||
maskFeatures("VertexUniformsLT1024");
|
||||
}
|
||||
|
||||
// now mask by gpu string
|
||||
// Replaces ' ' with '_' in mGPUString to deal with inability for parser to handle spaces
|
||||
|
||||
@@ -436,7 +436,7 @@ void LLViewerShaderMgr::setShaders()
|
||||
mVertexShaderLevel[SHADER_AVATAR] = 3;
|
||||
mMaxAvatarShaderLevel = 3;
|
||||
|
||||
if (gSavedSettings.getBOOL("RenderAvatarVP") && loadShadersObject())
|
||||
if (LLFeatureManager::getInstance()->isFeatureAvailable("RenderAvatarVP") && gSavedSettings.getBOOL("RenderAvatarVP") && loadShadersObject())
|
||||
{ //hardware skinning is enabled and rigged attachment shaders loaded correctly
|
||||
BOOL avatar_cloth = gSavedSettings.getBOOL("RenderAvatarCloth");
|
||||
S32 avatar_class = 1;
|
||||
|
||||
Reference in New Issue
Block a user