From 67c8ac2b048c0c49a16b1dadcdc9e486143381d0 Mon Sep 17 00:00:00 2001 From: Shyotl Date: Sat, 17 May 2014 03:31:45 -0500 Subject: [PATCH] Skinned shaders were exceeding maximum amount of vertex uniforms on amd hardware. -Changed the transform matrix uniform to a 3x4 matrix and packed translation into it to free up uniforms. (3x3 is converted to 3x4 internally, so we were needlessly eating 3*52 extra uniform slots. translationPalette might also have been treated as a vec4 internally too, wasting 52 more slots.) -matrix3x4 requires opengl2.1 and newer, so added a new featuretable mask. -Also added a featuretable mask to disable hardware skinning and deferred shading on hardware with less than 1024 vertex uniforms. NOTE: On old old old amd hardware, evidently a 3x4 matrix might be upgraded to 4x4. I'm unsure, but I doubt such hardware has 1024+ uniform components available to begin with. 4x3 supposedly doesn't do this, but opengl is column-major, so this makes little sense. --- indra/llrender/llgl.cpp | 11 ++++++- indra/llrender/llgl.h | 2 ++ indra/llrender/llglheaders.h | 3 ++ indra/llrender/llglslshader.cpp | 17 +++++++++++ indra/llrender/llglslshader.h | 1 + .../shaders/class1/avatar/objectSkinV.glsl | 28 ++++++++--------- indra/newview/featuretable.txt | 9 ++++++ indra/newview/featuretable_linux.txt | 10 +++++++ indra/newview/featuretable_mac.txt | 9 ++++++ indra/newview/lldrawpoolavatar.cpp | 30 ++++++++----------- indra/newview/llfeaturemanager.cpp | 16 +++++++--- indra/newview/llviewershadermgr.cpp | 2 +- 12 files changed, 98 insertions(+), 40 deletions(-) diff --git a/indra/llrender/llgl.cpp b/indra/llrender/llgl.cpp index 064183d49..1886a4d48 100644 --- a/indra/llrender/llgl.cpp +++ b/indra/llrender/llgl.cpp @@ -304,6 +304,7 @@ PFNGLUNIFORM3IVARBPROC glUniform3ivARB = NULL; PFNGLUNIFORM4IVARBPROC glUniform4ivARB = NULL; PFNGLUNIFORMMATRIX2FVARBPROC glUniformMatrix2fvARB = NULL; PFNGLUNIFORMMATRIX3FVARBPROC glUniformMatrix3fvARB = NULL; +PFNGLUNIFORMMATRIX3FVARBPROC glUniformMatrix3x4fvARB = NULL; PFNGLUNIFORMMATRIX4FVARBPROC glUniformMatrix4fvARB = NULL; PFNGLGETOBJECTPARAMETERFVARBPROC glGetObjectParameterfvARB = NULL; PFNGLGETOBJECTPARAMETERIVARBPROC glGetObjectParameterivARB = NULL; @@ -469,7 +470,8 @@ LLGLManager::LLGLManager() : mGLSLVersionMinor(0), mVRAM(0), mGLMaxVertexRange(0), - mGLMaxIndexRange(0) + mGLMaxIndexRange(0), + mGLMaxVertexUniformComponents(0) { } @@ -715,6 +717,12 @@ bool LLGLManager::initGL() mNumTextureImageUnits = llmin(num_tex_image_units, 32); } + if (mHasVertexShader) + { + //According to the spec, the resulting value should never be less than 512. We need at least 1024 to use skinned shaders. + glGetIntegerv(GL_MAX_VERTEX_UNIFORM_COMPONENTS_ARB, &mGLMaxVertexUniformComponents); + } + if (LLRender::sGLCoreProfile) { mNumTextureUnits = llmin(mNumTextureImageUnits, MAX_GL_TEXTURE_UNITS); @@ -1269,6 +1277,7 @@ void LLGLManager::initExtensions() glUniform4ivARB = (PFNGLUNIFORM4IVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniform4ivARB"); glUniformMatrix2fvARB = (PFNGLUNIFORMMATRIX2FVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniformMatrix2fvARB"); glUniformMatrix3fvARB = (PFNGLUNIFORMMATRIX3FVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniformMatrix3fvARB"); + glUniformMatrix3x4fvARB = (PFNGLUNIFORMMATRIX3FVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniformMatrix3x4fv"); glUniformMatrix4fvARB = (PFNGLUNIFORMMATRIX4FVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniformMatrix4fvARB"); glGetObjectParameterfvARB = (PFNGLGETOBJECTPARAMETERFVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glGetObjectParameterfvARB"); glGetObjectParameterivARB = (PFNGLGETOBJECTPARAMETERIVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glGetObjectParameterivARB"); diff --git a/indra/llrender/llgl.h b/indra/llrender/llgl.h index 9b277c70a..7a11d1b2e 100644 --- a/indra/llrender/llgl.h +++ b/indra/llrender/llgl.h @@ -151,6 +151,8 @@ public: S32 mVRAM; // VRAM in MB S32 mGLMaxVertexRange; S32 mGLMaxIndexRange; + + S32 mGLMaxVertexUniformComponents; void getPixelFormat(); // Get the best pixel format diff --git a/indra/llrender/llglheaders.h b/indra/llrender/llglheaders.h index 11074a7fd..341c3adc9 100644 --- a/indra/llrender/llglheaders.h +++ b/indra/llrender/llglheaders.h @@ -152,6 +152,7 @@ extern PFNGLUNIFORM3IVARBPROC glUniform3ivARB; extern PFNGLUNIFORM4IVARBPROC glUniform4ivARB; extern PFNGLUNIFORMMATRIX2FVARBPROC glUniformMatrix2fvARB; extern PFNGLUNIFORMMATRIX3FVARBPROC glUniformMatrix3fvARB; +extern PFNGLUNIFORMMATRIX3FVARBPROC glUniformMatrix3x4fvARB; extern PFNGLUNIFORMMATRIX4FVARBPROC glUniformMatrix4fvARB; extern PFNGLGETOBJECTPARAMETERFVARBPROC glGetObjectParameterfvARB; extern PFNGLGETOBJECTPARAMETERIVARBPROC glGetObjectParameterivARB; @@ -415,6 +416,7 @@ extern PFNGLUNIFORM3IVARBPROC glUniform3ivARB; extern PFNGLUNIFORM4IVARBPROC glUniform4ivARB; extern PFNGLUNIFORMMATRIX2FVARBPROC glUniformMatrix2fvARB; extern PFNGLUNIFORMMATRIX3FVARBPROC glUniformMatrix3fvARB; +extern PFNGLUNIFORMMATRIX3FVARBPROC glUniformMatrix3x4fvARB; extern PFNGLUNIFORMMATRIX4FVARBPROC glUniformMatrix4fvARB; extern PFNGLGETOBJECTPARAMETERFVARBPROC glGetObjectParameterfvARB; extern PFNGLGETOBJECTPARAMETERIVARBPROC glGetObjectParameterivARB; @@ -656,6 +658,7 @@ extern PFNGLUNIFORM3IVARBPROC glUniform3ivARB; extern PFNGLUNIFORM4IVARBPROC glUniform4ivARB; extern PFNGLUNIFORMMATRIX2FVARBPROC glUniformMatrix2fvARB; extern PFNGLUNIFORMMATRIX3FVARBPROC glUniformMatrix3fvARB; +extern PFNGLUNIFORMMATRIX3FVARBPROC glUniformMatrix3x4fvARB; extern PFNGLUNIFORMMATRIX4FVARBPROC glUniformMatrix4fvARB; extern PFNGLGETOBJECTPARAMETERFVARBPROC glGetObjectParameterfvARB; extern PFNGLGETOBJECTPARAMETERIVARBPROC glGetObjectParameterivARB; diff --git a/indra/llrender/llglslshader.cpp b/indra/llrender/llglslshader.cpp index 974bbb3b5..7f09a3be8 100644 --- a/indra/llrender/llglslshader.cpp +++ b/indra/llrender/llglslshader.cpp @@ -933,6 +933,23 @@ void LLGLSLShader::uniformMatrix3fv(U32 index, U32 count, GLboolean transpose, c } } +void LLGLSLShader::uniformMatrix3x4fv(U32 index, U32 count, GLboolean transpose, const GLfloat *v) +{ + if (mProgramObject > 0) + { + if (mUniform.size() <= index) + { + UNIFORM_ERRS << "Uniform index out of bounds." << LL_ENDL; + return; + } + + if (mUniform[index] >= 0) + { + glUniformMatrix3x4fvARB(mUniform[index], count, transpose, v); + } + } +} + void LLGLSLShader::uniformMatrix4fv(U32 index, U32 count, GLboolean transpose, const GLfloat *v) { if (mProgramObject > 0) diff --git a/indra/llrender/llglslshader.h b/indra/llrender/llglslshader.h index 1f134a881..a530d2658 100644 --- a/indra/llrender/llglslshader.h +++ b/indra/llrender/llglslshader.h @@ -100,6 +100,7 @@ public: void uniform2i(const LLStaticHashedString& uniform, GLint i, GLint j); void uniformMatrix2fv(U32 index, U32 count, GLboolean transpose, const GLfloat *v); void uniformMatrix3fv(U32 index, U32 count, GLboolean transpose, const GLfloat *v); + void uniformMatrix3x4fv(U32 index, U32 count, GLboolean transpose, const GLfloat *v); void uniformMatrix4fv(U32 index, U32 count, GLboolean transpose, const GLfloat *v); void uniform1i(const LLStaticHashedString& uniform, GLint i); void uniform1f(const LLStaticHashedString& uniform, GLfloat v); diff --git a/indra/newview/app_settings/shaders/class1/avatar/objectSkinV.glsl b/indra/newview/app_settings/shaders/class1/avatar/objectSkinV.glsl index 7df4f96ce..6acbf0aaf 100644 --- a/indra/newview/app_settings/shaders/class1/avatar/objectSkinV.glsl +++ b/indra/newview/app_settings/shaders/class1/avatar/objectSkinV.glsl @@ -24,12 +24,9 @@ ATTRIBUTE vec4 weight4; -uniform mat3 matrixPalette[52]; -uniform vec3 translationPalette[52]; +uniform mat3x4 matrixPalette[52]; uniform float maxWeight; - - mat4 getObjectSkinnedTransform() { int i; @@ -47,22 +44,21 @@ mat4 getObjectSkinnedTransform() int i3 = int(index.z); int i4 = int(index.w); + mat3 mat = mat3(matrixPalette[i1])*w.x; + mat += mat3(matrixPalette[i2])*w.y; + mat += mat3(matrixPalette[i3])*w.z; + mat += mat3(matrixPalette[i4])*w.w; - mat3 mat = matrixPalette[i1]*w.x; - mat += matrixPalette[i2]*w.y; - mat += matrixPalette[i3]*w.z; - mat += matrixPalette[i4]*w.w; - - vec3 trans = translationPalette[i1]*w.x; - trans += translationPalette[i2]*w.y; - trans += translationPalette[i3]*w.z; - trans += translationPalette[i4]*w.w; + vec3 trans = vec3(matrixPalette[i1][0].w,matrixPalette[i1][1].w,matrixPalette[i1][2].w)*w.x; + trans += vec3(matrixPalette[i2][0].w,matrixPalette[i2][1].w,matrixPalette[i2][2].w)*w.y; + trans += vec3(matrixPalette[i3][0].w,matrixPalette[i3][1].w,matrixPalette[i3][2].w)*w.z; + trans += vec3(matrixPalette[i4][0].w,matrixPalette[i4][1].w,matrixPalette[i4][2].w)*w.w; mat4 ret; - ret[0] = vec4(mat[0].xyz, 0); - ret[1] = vec4(mat[1].xyz, 0); - ret[2] = vec4(mat[2].xyz, 0); + ret[0] = vec4(mat[0], 0); + ret[1] = vec4(mat[1], 0); + ret[2] = vec4(mat[2], 0); ret[3] = vec4(trans, sum); return ret; diff --git a/indra/newview/featuretable.txt b/indra/newview/featuretable.txt index 111c7a431..90dd09387 100644 --- a/indra/newview/featuretable.txt +++ b/indra/newview/featuretable.txt @@ -301,6 +301,10 @@ RenderObjectBump 0 0 list OpenGLPre15 RenderVBOEnable 1 0 +list OpenGLPre21 +RenderAvatarVP 0 0 +RenderAvatarCloth 0 0 + list OpenGLPre30 RenderDeferred 0 0 RenderMaxTextureIndex 1 1 @@ -486,6 +490,7 @@ Disregard128DefaultDrawDistance 1 0 list ATIOldDriver RenderAvatarVP 0 0 RenderAvatarCloth 0 0 +RenderDeferred 0 0 // ATI cards generally perform better when not using VBOs for streaming data @@ -596,3 +601,7 @@ RenderShaderLightingMaxLevel 1 2 list NVIDIA_GeForce_Go_7900 RenderShaderLightingMaxLevel 1 2 +list VertexUniformsLT1024 +RenderAvatarVP 0 0 +RenderAvatarCloth 0 0 +RenderDeferred 0 0 diff --git a/indra/newview/featuretable_linux.txt b/indra/newview/featuretable_linux.txt index 90b788f4f..043d44866 100644 --- a/indra/newview/featuretable_linux.txt +++ b/indra/newview/featuretable_linux.txt @@ -301,6 +301,11 @@ RenderObjectBump 0 0 list OpenGLPre15 RenderVBOEnable 1 0 +list OpenGLPre21 +RenderAvatarVP 0 0 +RenderAvatarCloth 0 0 +RenderDeferred 0 0 + list IntelPre30 RenderAnisotropic 1 0 // Avoid some Intel crashes on Linux @@ -458,6 +463,7 @@ Disregard128DefaultDrawDistance 1 0 list ATIOldDriver RenderAvatarVP 0 0 RenderAvatarCloth 0 0 +RenderDeferred 0 0 // Avoid driver crashes with some features on Linux with old ATI drivers UseOcclusion 0 0 WindLightUseAtmosShaders 0 0 @@ -563,3 +569,7 @@ RenderShaderLightingMaxLevel 1 2 list NVIDIA_GeForce_Go_7900 RenderShaderLightingMaxLevel 1 2 +list VertexUniformsLT1024 +RenderAvatarVP 0 0 +RenderAvatarCloth 0 0 +RenderDeferred 0 0 \ No newline at end of file diff --git a/indra/newview/featuretable_mac.txt b/indra/newview/featuretable_mac.txt index 874254439..96384d700 100644 --- a/indra/newview/featuretable_mac.txt +++ b/indra/newview/featuretable_mac.txt @@ -296,6 +296,11 @@ RenderObjectBump 0 0 list OpenGLPre15 RenderVBOEnable 1 0 +list OpenGLPre21 +RenderAvatarVP 0 0 +RenderAvatarCloth 0 0 +RenderDeferred 0 0 + list TexUnit8orLess RenderDeferredSSAO 0 0 @@ -521,3 +526,7 @@ Disregard128DefaultDrawDistance 1 0 list NVIDIA_GeForce_Go_7400 Disregard128DefaultDrawDistance 1 0 +list VertexUniformsLT1024 +RenderAvatarVP 0 0 +RenderAvatarCloth 0 0 +RenderDeferred 0 0 \ No newline at end of file diff --git a/indra/newview/lldrawpoolavatar.cpp b/indra/newview/lldrawpoolavatar.cpp index ae09282b4..dacc26422 100644 --- a/indra/newview/lldrawpoolavatar.cpp +++ b/indra/newview/lldrawpoolavatar.cpp @@ -1547,41 +1547,35 @@ void LLDrawPoolAvatar::renderRigged(LLVOAvatar* avatar, U32 type, bool glow) stop_glerror(); - F32 mp[JOINT_COUNT*9]; - - F32 transp[JOINT_COUNT*3]; + F32 mp[JOINT_COUNT*12]; for (U32 i = 0; i < count; ++i) { F32* m = (F32*) mat[i].mMatrix; - U32 idx = i*9; + U32 idx = i*12; mp[idx+0] = m[0]; mp[idx+1] = m[1]; mp[idx+2] = m[2]; + mp[idx+3] = m[12]; - mp[idx+3] = m[4]; - mp[idx+4] = m[5]; - mp[idx+5] = m[6]; + mp[idx+4] = m[4]; + mp[idx+5] = m[5]; + mp[idx+6] = m[6]; + mp[idx+7] = m[13]; - mp[idx+6] = m[8]; - mp[idx+7] = m[9]; - mp[idx+8] = m[10]; - - idx = i*3; - - transp[idx+0] = m[12]; - transp[idx+1] = m[13]; - transp[idx+2] = m[14]; + mp[idx+8] = m[8]; + mp[idx+9] = m[9]; + mp[idx+10] = m[10]; + mp[idx+11] = m[14]; } - LLDrawPoolAvatar::sVertexProgram->uniformMatrix3fv(LLViewerShaderMgr::AVATAR_MATRIX, + LLDrawPoolAvatar::sVertexProgram->uniformMatrix3x4fv(LLViewerShaderMgr::AVATAR_MATRIX, count, FALSE, (GLfloat*) mp); - LLDrawPoolAvatar::sVertexProgram->uniform3fv(LLShaderMgr::AVATAR_TRANSLATION, count, transp); LLDrawPoolAvatar::sVertexProgram->uniform1f(LLShaderMgr::AVATAR_MAX_WEIGHT, F32(count-1)); stop_glerror(); diff --git a/indra/newview/llfeaturemanager.cpp b/indra/newview/llfeaturemanager.cpp index c3d0bc8a6..f58f1f577 100644 --- a/indra/newview/llfeaturemanager.cpp +++ b/indra/newview/llfeaturemanager.cpp @@ -634,13 +634,17 @@ void LLFeatureManager::applyBaseMasks() { maskFeatures("Intel"); } - if (gGLManager.mGLVersion < 1.5f) - { - maskFeatures("OpenGLPre15"); - } if (gGLManager.mGLVersion < 3.f) { maskFeatures("OpenGLPre30"); + if(gGLManager.mGLVersion < 2.1f || glUniformMatrix3x4fvARB == NULL) //glUniformMatrix3x4fv is part of glsl 1.20 spec. + { + maskFeatures("OpenGLPre21"); + if (gGLManager.mGLVersion < 1.5f) + { + maskFeatures("OpenGLPre15"); + } + } } if (gGLManager.mNumTextureImageUnits <= 8) { @@ -650,6 +654,10 @@ void LLFeatureManager::applyBaseMasks() { maskFeatures("MapBufferRange"); } + if (gGLManager.mGLMaxVertexUniformComponents < 1024) + { + maskFeatures("VertexUniformsLT1024"); + } // now mask by gpu string // Replaces ' ' with '_' in mGPUString to deal with inability for parser to handle spaces diff --git a/indra/newview/llviewershadermgr.cpp b/indra/newview/llviewershadermgr.cpp index 67ec117a7..4c1a50296 100644 --- a/indra/newview/llviewershadermgr.cpp +++ b/indra/newview/llviewershadermgr.cpp @@ -436,7 +436,7 @@ void LLViewerShaderMgr::setShaders() mVertexShaderLevel[SHADER_AVATAR] = 3; mMaxAvatarShaderLevel = 3; - if (gSavedSettings.getBOOL("RenderAvatarVP") && loadShadersObject()) + if (LLFeatureManager::getInstance()->isFeatureAvailable("RenderAvatarVP") && gSavedSettings.getBOOL("RenderAvatarVP") && loadShadersObject()) { //hardware skinning is enabled and rigged attachment shaders loaded correctly BOOL avatar_cloth = gSavedSettings.getBOOL("RenderAvatarCloth"); S32 avatar_class = 1;