Skinned shaders were exceeding maximum amount of vertex uniforms on amd hardware.

-Changed the transform matrix uniform to a 3x4 matrix and packed translation into it to free up uniforms. (3x3 is converted to 3x4 internally, so we were needlessly eating 3*52 extra uniform slots. translationPalette might also have been treated as a vec4 internally too, wasting 52 more slots.)
-matrix3x4 requires opengl2.1 and newer, so added a new featuretable mask.
-Also added a featuretable mask to disable hardware skinning and deferred shading on hardware with less than 1024 vertex uniforms.
NOTE: On old old old amd hardware, evidently a 3x4 matrix might be upgraded to 4x4. I'm unsure, but I doubt such hardware has 1024+ uniform components available to begin with. 4x3 supposedly doesn't do this, but opengl is column-major, so this makes little sense.
This commit is contained in:
Shyotl
2014-05-17 03:31:45 -05:00
parent fdcf2eda5a
commit 67c8ac2b04
12 changed files with 98 additions and 40 deletions

View File

@@ -304,6 +304,7 @@ PFNGLUNIFORM3IVARBPROC glUniform3ivARB = NULL;
PFNGLUNIFORM4IVARBPROC glUniform4ivARB = NULL;
PFNGLUNIFORMMATRIX2FVARBPROC glUniformMatrix2fvARB = NULL;
PFNGLUNIFORMMATRIX3FVARBPROC glUniformMatrix3fvARB = NULL;
PFNGLUNIFORMMATRIX3FVARBPROC glUniformMatrix3x4fvARB = NULL;
PFNGLUNIFORMMATRIX4FVARBPROC glUniformMatrix4fvARB = NULL;
PFNGLGETOBJECTPARAMETERFVARBPROC glGetObjectParameterfvARB = NULL;
PFNGLGETOBJECTPARAMETERIVARBPROC glGetObjectParameterivARB = NULL;
@@ -469,7 +470,8 @@ LLGLManager::LLGLManager() :
mGLSLVersionMinor(0),
mVRAM(0),
mGLMaxVertexRange(0),
mGLMaxIndexRange(0)
mGLMaxIndexRange(0),
mGLMaxVertexUniformComponents(0)
{
}
@@ -715,6 +717,12 @@ bool LLGLManager::initGL()
mNumTextureImageUnits = llmin(num_tex_image_units, 32);
}
if (mHasVertexShader)
{
//According to the spec, the resulting value should never be less than 512. We need at least 1024 to use skinned shaders.
glGetIntegerv(GL_MAX_VERTEX_UNIFORM_COMPONENTS_ARB, &mGLMaxVertexUniformComponents);
}
if (LLRender::sGLCoreProfile)
{
mNumTextureUnits = llmin(mNumTextureImageUnits, MAX_GL_TEXTURE_UNITS);
@@ -1269,6 +1277,7 @@ void LLGLManager::initExtensions()
glUniform4ivARB = (PFNGLUNIFORM4IVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniform4ivARB");
glUniformMatrix2fvARB = (PFNGLUNIFORMMATRIX2FVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniformMatrix2fvARB");
glUniformMatrix3fvARB = (PFNGLUNIFORMMATRIX3FVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniformMatrix3fvARB");
glUniformMatrix3x4fvARB = (PFNGLUNIFORMMATRIX3FVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniformMatrix3x4fv");
glUniformMatrix4fvARB = (PFNGLUNIFORMMATRIX4FVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniformMatrix4fvARB");
glGetObjectParameterfvARB = (PFNGLGETOBJECTPARAMETERFVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glGetObjectParameterfvARB");
glGetObjectParameterivARB = (PFNGLGETOBJECTPARAMETERIVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glGetObjectParameterivARB");

View File

@@ -151,6 +151,8 @@ public:
S32 mVRAM; // VRAM in MB
S32 mGLMaxVertexRange;
S32 mGLMaxIndexRange;
S32 mGLMaxVertexUniformComponents;
void getPixelFormat(); // Get the best pixel format

View File

@@ -152,6 +152,7 @@ extern PFNGLUNIFORM3IVARBPROC glUniform3ivARB;
extern PFNGLUNIFORM4IVARBPROC glUniform4ivARB;
extern PFNGLUNIFORMMATRIX2FVARBPROC glUniformMatrix2fvARB;
extern PFNGLUNIFORMMATRIX3FVARBPROC glUniformMatrix3fvARB;
extern PFNGLUNIFORMMATRIX3FVARBPROC glUniformMatrix3x4fvARB;
extern PFNGLUNIFORMMATRIX4FVARBPROC glUniformMatrix4fvARB;
extern PFNGLGETOBJECTPARAMETERFVARBPROC glGetObjectParameterfvARB;
extern PFNGLGETOBJECTPARAMETERIVARBPROC glGetObjectParameterivARB;
@@ -415,6 +416,7 @@ extern PFNGLUNIFORM3IVARBPROC glUniform3ivARB;
extern PFNGLUNIFORM4IVARBPROC glUniform4ivARB;
extern PFNGLUNIFORMMATRIX2FVARBPROC glUniformMatrix2fvARB;
extern PFNGLUNIFORMMATRIX3FVARBPROC glUniformMatrix3fvARB;
extern PFNGLUNIFORMMATRIX3FVARBPROC glUniformMatrix3x4fvARB;
extern PFNGLUNIFORMMATRIX4FVARBPROC glUniformMatrix4fvARB;
extern PFNGLGETOBJECTPARAMETERFVARBPROC glGetObjectParameterfvARB;
extern PFNGLGETOBJECTPARAMETERIVARBPROC glGetObjectParameterivARB;
@@ -656,6 +658,7 @@ extern PFNGLUNIFORM3IVARBPROC glUniform3ivARB;
extern PFNGLUNIFORM4IVARBPROC glUniform4ivARB;
extern PFNGLUNIFORMMATRIX2FVARBPROC glUniformMatrix2fvARB;
extern PFNGLUNIFORMMATRIX3FVARBPROC glUniformMatrix3fvARB;
extern PFNGLUNIFORMMATRIX3FVARBPROC glUniformMatrix3x4fvARB;
extern PFNGLUNIFORMMATRIX4FVARBPROC glUniformMatrix4fvARB;
extern PFNGLGETOBJECTPARAMETERFVARBPROC glGetObjectParameterfvARB;
extern PFNGLGETOBJECTPARAMETERIVARBPROC glGetObjectParameterivARB;

View File

@@ -933,6 +933,23 @@ void LLGLSLShader::uniformMatrix3fv(U32 index, U32 count, GLboolean transpose, c
}
}
void LLGLSLShader::uniformMatrix3x4fv(U32 index, U32 count, GLboolean transpose, const GLfloat *v)
{
if (mProgramObject > 0)
{
if (mUniform.size() <= index)
{
UNIFORM_ERRS << "Uniform index out of bounds." << LL_ENDL;
return;
}
if (mUniform[index] >= 0)
{
glUniformMatrix3x4fvARB(mUniform[index], count, transpose, v);
}
}
}
void LLGLSLShader::uniformMatrix4fv(U32 index, U32 count, GLboolean transpose, const GLfloat *v)
{
if (mProgramObject > 0)

View File

@@ -100,6 +100,7 @@ public:
void uniform2i(const LLStaticHashedString& uniform, GLint i, GLint j);
void uniformMatrix2fv(U32 index, U32 count, GLboolean transpose, const GLfloat *v);
void uniformMatrix3fv(U32 index, U32 count, GLboolean transpose, const GLfloat *v);
void uniformMatrix3x4fv(U32 index, U32 count, GLboolean transpose, const GLfloat *v);
void uniformMatrix4fv(U32 index, U32 count, GLboolean transpose, const GLfloat *v);
void uniform1i(const LLStaticHashedString& uniform, GLint i);
void uniform1f(const LLStaticHashedString& uniform, GLfloat v);

View File

@@ -24,12 +24,9 @@
ATTRIBUTE vec4 weight4;
uniform mat3 matrixPalette[52];
uniform vec3 translationPalette[52];
uniform mat3x4 matrixPalette[52];
uniform float maxWeight;
mat4 getObjectSkinnedTransform()
{
int i;
@@ -47,22 +44,21 @@ mat4 getObjectSkinnedTransform()
int i3 = int(index.z);
int i4 = int(index.w);
mat3 mat = mat3(matrixPalette[i1])*w.x;
mat += mat3(matrixPalette[i2])*w.y;
mat += mat3(matrixPalette[i3])*w.z;
mat += mat3(matrixPalette[i4])*w.w;
mat3 mat = matrixPalette[i1]*w.x;
mat += matrixPalette[i2]*w.y;
mat += matrixPalette[i3]*w.z;
mat += matrixPalette[i4]*w.w;
vec3 trans = translationPalette[i1]*w.x;
trans += translationPalette[i2]*w.y;
trans += translationPalette[i3]*w.z;
trans += translationPalette[i4]*w.w;
vec3 trans = vec3(matrixPalette[i1][0].w,matrixPalette[i1][1].w,matrixPalette[i1][2].w)*w.x;
trans += vec3(matrixPalette[i2][0].w,matrixPalette[i2][1].w,matrixPalette[i2][2].w)*w.y;
trans += vec3(matrixPalette[i3][0].w,matrixPalette[i3][1].w,matrixPalette[i3][2].w)*w.z;
trans += vec3(matrixPalette[i4][0].w,matrixPalette[i4][1].w,matrixPalette[i4][2].w)*w.w;
mat4 ret;
ret[0] = vec4(mat[0].xyz, 0);
ret[1] = vec4(mat[1].xyz, 0);
ret[2] = vec4(mat[2].xyz, 0);
ret[0] = vec4(mat[0], 0);
ret[1] = vec4(mat[1], 0);
ret[2] = vec4(mat[2], 0);
ret[3] = vec4(trans, sum);
return ret;

View File

@@ -301,6 +301,10 @@ RenderObjectBump 0 0
list OpenGLPre15
RenderVBOEnable 1 0
list OpenGLPre21
RenderAvatarVP 0 0
RenderAvatarCloth 0 0
list OpenGLPre30
RenderDeferred 0 0
RenderMaxTextureIndex 1 1
@@ -486,6 +490,7 @@ Disregard128DefaultDrawDistance 1 0
list ATIOldDriver
RenderAvatarVP 0 0
RenderAvatarCloth 0 0
RenderDeferred 0 0
// ATI cards generally perform better when not using VBOs for streaming data
@@ -596,3 +601,7 @@ RenderShaderLightingMaxLevel 1 2
list NVIDIA_GeForce_Go_7900
RenderShaderLightingMaxLevel 1 2
list VertexUniformsLT1024
RenderAvatarVP 0 0
RenderAvatarCloth 0 0
RenderDeferred 0 0

View File

@@ -301,6 +301,11 @@ RenderObjectBump 0 0
list OpenGLPre15
RenderVBOEnable 1 0
list OpenGLPre21
RenderAvatarVP 0 0
RenderAvatarCloth 0 0
RenderDeferred 0 0
list IntelPre30
RenderAnisotropic 1 0
// Avoid some Intel crashes on Linux
@@ -458,6 +463,7 @@ Disregard128DefaultDrawDistance 1 0
list ATIOldDriver
RenderAvatarVP 0 0
RenderAvatarCloth 0 0
RenderDeferred 0 0
// Avoid driver crashes with some features on Linux with old ATI drivers
UseOcclusion 0 0
WindLightUseAtmosShaders 0 0
@@ -563,3 +569,7 @@ RenderShaderLightingMaxLevel 1 2
list NVIDIA_GeForce_Go_7900
RenderShaderLightingMaxLevel 1 2
list VertexUniformsLT1024
RenderAvatarVP 0 0
RenderAvatarCloth 0 0
RenderDeferred 0 0

View File

@@ -296,6 +296,11 @@ RenderObjectBump 0 0
list OpenGLPre15
RenderVBOEnable 1 0
list OpenGLPre21
RenderAvatarVP 0 0
RenderAvatarCloth 0 0
RenderDeferred 0 0
list TexUnit8orLess
RenderDeferredSSAO 0 0
@@ -521,3 +526,7 @@ Disregard128DefaultDrawDistance 1 0
list NVIDIA_GeForce_Go_7400
Disregard128DefaultDrawDistance 1 0
list VertexUniformsLT1024
RenderAvatarVP 0 0
RenderAvatarCloth 0 0
RenderDeferred 0 0

View File

@@ -1547,41 +1547,35 @@ void LLDrawPoolAvatar::renderRigged(LLVOAvatar* avatar, U32 type, bool glow)
stop_glerror();
F32 mp[JOINT_COUNT*9];
F32 transp[JOINT_COUNT*3];
F32 mp[JOINT_COUNT*12];
for (U32 i = 0; i < count; ++i)
{
F32* m = (F32*) mat[i].mMatrix;
U32 idx = i*9;
U32 idx = i*12;
mp[idx+0] = m[0];
mp[idx+1] = m[1];
mp[idx+2] = m[2];
mp[idx+3] = m[12];
mp[idx+3] = m[4];
mp[idx+4] = m[5];
mp[idx+5] = m[6];
mp[idx+4] = m[4];
mp[idx+5] = m[5];
mp[idx+6] = m[6];
mp[idx+7] = m[13];
mp[idx+6] = m[8];
mp[idx+7] = m[9];
mp[idx+8] = m[10];
idx = i*3;
transp[idx+0] = m[12];
transp[idx+1] = m[13];
transp[idx+2] = m[14];
mp[idx+8] = m[8];
mp[idx+9] = m[9];
mp[idx+10] = m[10];
mp[idx+11] = m[14];
}
LLDrawPoolAvatar::sVertexProgram->uniformMatrix3fv(LLViewerShaderMgr::AVATAR_MATRIX,
LLDrawPoolAvatar::sVertexProgram->uniformMatrix3x4fv(LLViewerShaderMgr::AVATAR_MATRIX,
count,
FALSE,
(GLfloat*) mp);
LLDrawPoolAvatar::sVertexProgram->uniform3fv(LLShaderMgr::AVATAR_TRANSLATION, count, transp);
LLDrawPoolAvatar::sVertexProgram->uniform1f(LLShaderMgr::AVATAR_MAX_WEIGHT, F32(count-1));
stop_glerror();

View File

@@ -634,13 +634,17 @@ void LLFeatureManager::applyBaseMasks()
{
maskFeatures("Intel");
}
if (gGLManager.mGLVersion < 1.5f)
{
maskFeatures("OpenGLPre15");
}
if (gGLManager.mGLVersion < 3.f)
{
maskFeatures("OpenGLPre30");
if(gGLManager.mGLVersion < 2.1f || glUniformMatrix3x4fvARB == NULL) //glUniformMatrix3x4fv is part of glsl 1.20 spec.
{
maskFeatures("OpenGLPre21");
if (gGLManager.mGLVersion < 1.5f)
{
maskFeatures("OpenGLPre15");
}
}
}
if (gGLManager.mNumTextureImageUnits <= 8)
{
@@ -650,6 +654,10 @@ void LLFeatureManager::applyBaseMasks()
{
maskFeatures("MapBufferRange");
}
if (gGLManager.mGLMaxVertexUniformComponents < 1024)
{
maskFeatures("VertexUniformsLT1024");
}
// now mask by gpu string
// Replaces ' ' with '_' in mGPUString to deal with inability for parser to handle spaces

View File

@@ -436,7 +436,7 @@ void LLViewerShaderMgr::setShaders()
mVertexShaderLevel[SHADER_AVATAR] = 3;
mMaxAvatarShaderLevel = 3;
if (gSavedSettings.getBOOL("RenderAvatarVP") && loadShadersObject())
if (LLFeatureManager::getInstance()->isFeatureAvailable("RenderAvatarVP") && gSavedSettings.getBOOL("RenderAvatarVP") && loadShadersObject())
{ //hardware skinning is enabled and rigged attachment shaders loaded correctly
BOOL avatar_cloth = gSavedSettings.getBOOL("RenderAvatarCloth");
S32 avatar_class = 1;