Applied Tofu Linden's SSAO improvements. e189d55a7b

2012-12-08 16:51:38 -06:00
parent 08a3828cc9
commit 6b05022530
9 changed files with 75 additions and 102 deletions
--- a/indra/llrender/llshadermgr.cpp
+++ b/indra/llrender/llshadermgr.cpp
@@ -1120,7 +1120,7 @@ void LLShaderMgr::initAttribsAndUniforms()
 	mReservedUniforms.push_back("ssao_max_radius");
 	mReservedUniforms.push_back("ssao_factor");
 	mReservedUniforms.push_back("ssao_factor_inv");
-	mReservedUniforms.push_back("ssao_effect_mat");
+	mReservedUniforms.push_back("ssao_effect");
 	mReservedUniforms.push_back("screen_res");
 	mReservedUniforms.push_back("near_clip");
 	mReservedUniforms.push_back("shadow_offset");
--- a/indra/llrender/llshadermgr.h
+++ b/indra/llrender/llshadermgr.h
@@ -120,7 +120,7 @@ public:
 		DEFERRED_SSAO_MAX_RADIUS,
 		DEFERRED_SSAO_FACTOR,
 		DEFERRED_SSAO_FACTOR_INV,
-		DEFERRED_SSAO_EFFECT_MAT,
+		DEFERRED_SSAO_EFFECT,
 		DEFERRED_SCREEN_RES,
 		DEFERRED_NEAR_CLIP,
 		DEFERRED_SHADOW_OFFSET,
--- a/indra/newview/app_settings/settings.xml
+++ b/indra/newview/app_settings/settings.xml
@@ -10536,7 +10536,7 @@ Found in Advanced->Rendering->Info Displays</string>
    <key>Type</key>
    <string>F32</string>
    <key>Value</key>
-    <real>500.0</real>
+    <real>1500.0</real>
  </map>
  <key>RenderSSAOMaxScale</key>
  <map>
@@ -10547,7 +10547,7 @@ Found in Advanced->Rendering->Info Displays</string>
    <key>Type</key>
    <string>U32</string>
    <key>Value</key>
-    <integer>200</integer>
+    <integer>250</integer>
  </map>
  <key>RenderSSAOFactor</key>
  <map>
@@ -10558,19 +10558,19 @@ Found in Advanced->Rendering->Info Displays</string>
    <key>Type</key>
    <string>F32</string>
    <key>Value</key>
-    <real>0.30</real>
+    <real>5.0</real>
  </map>
  <key>RenderSSAOEffect</key>
  <map>
    <key>Comment</key>
-    <string>Multiplier for (1) value and (2) saturation (HSV definition), for areas which are totally occluded.  Blends with original color for partly-occluded areas.  (Third component is unused.)</string>
+    <string>Multiplier for (1) value for areas which are totally occluded.  Blends with original color for partly-occluded areas.  (Third component is unused.)</string>
    <key>Persist</key>
    <integer>1</integer>
    <key>Type</key>
    <string>Vector3</string>
    <key>Value</key>
    <array>
-      <real>0.80</real>
+      <real>0.50</real>
      <real>1.00</real>
      <real>0.00</real>
    </array>
@@ -11157,7 +11157,7 @@ Found in Advanced->Rendering->Info Displays</string>
    <string>Vector3</string>
    <key>Value</key>
    <array>
-      <real>3.0</real>
+      <real>1.0</real>
      <real>2.0</real>
      <real>0.0</real>
    </array>
@@ -11172,7 +11172,7 @@ Found in Advanced->Rendering->Info Displays</string>
    <key>Type</key>
    <string>F32</string>
    <key>Value</key>
-    <real>1.4</real>
+    <real>3.0</real>
  </map>
  <key>RenderShadowBlurSamples</key>
  <map>
--- a/indra/newview/app_settings/shaders/class1/deferred/blurLightF.glsl
+++ b/indra/newview/app_settings/shaders/class1/deferred/blurLightF.glsl
@@ -86,24 +86,23 @@ void main()
 	vec3 pos = getPosition(tc).xyz;
 	vec4 ccol = texture2DRect(lightMap, tc).rgba;
 	
-	vec2 dlt = kern_scale * delta / (1.0+norm.xy*norm.xy);
+	vec2 dlt = kern_scale * delta / (vec2(1.0)+norm.xy*norm.xy);
 	dlt /= max(-pos.z*dist_factor, 1.0);
 	
 	vec2 defined_weight = getKern(0).xy; // special case the first (centre) sample's weight in the blur; we have to sample it anyway so we get it for 'free'
 	vec4 col = defined_weight.xyxx * ccol;

 	// relax tolerance according to distance to avoid speckling artifacts, as angles and distances are a lot more abrupt within a small screen area at larger distances
-	float pointplanedist_tolerance_pow2 = pos.z*pos.z*0.00005;
+	float pointplanedist_tolerance_pow2 = pos.z*-0.001;

 	// perturb sampling origin slightly in screen-space to hide edge-ghosting artifacts where smoothing radius is quite large
-	float tc_mod = 0.5*(tc.x + tc.y); // mod(tc.x+tc.y,2)
-	tc_mod -= floor(tc_mod);
-	tc_mod *= 2.0;
+	vec2 tc_v = fract(0.5 * tc.xy); // we now have floor(mod(tc,2.0))*0.5
+	float tc_mod = 2.0 * abs(tc_v.x - tc_v.y); // diff of x,y makes checkerboard
 	tc += ( (tc_mod - 0.5) * getKern(1).z * dlt * 0.5 );

 	for (int i = 1; i < 4; i++)
 	{
-		vec2 samptc = tc + getKern(i).z*dlt;
+		vec2 samptc = (tc + getKern(i).z * dlt);
 	        vec3 samppos = getPosition(samptc).xyz; 
 		float d = dot(norm.xyz, samppos.xyz-pos.xyz);// dist from plane
 		if (d*d <= pointplanedist_tolerance_pow2)
@@ -114,7 +113,7 @@ void main()
 	}
 	for (int i = 1; i < 4; i++)
 	{
-		vec2 samptc = tc - getKern(i).z*dlt;
+		vec2 samptc = (tc - getKern(i).z * dlt);
 	        vec3 samppos = getPosition(samptc).xyz; 
 		float d = dot(norm.xyz, samppos.xyz-pos.xyz);// dist from plane
 		if (d*d <= pointplanedist_tolerance_pow2)
@@ -125,7 +124,7 @@ void main()
 	}

 	col /= defined_weight.xyxx;
-	col.y *= col.y;
+	col.y *= col.y; // delinearize SSAO effect post-blur
 	
 	frag_color = col;
 }
--- a/indra/newview/app_settings/shaders/class1/deferred/softenLightF.glsl
+++ b/indra/newview/app_settings/shaders/class1/deferred/softenLightF.glsl
@@ -62,7 +62,7 @@ uniform float max_y;
 uniform vec4 glow;
 uniform float scene_light_strength;
 uniform mat3 env_mat;
-uniform mat3 ssao_effect_mat;
+uniform float ssao_effect;

 uniform vec3 sun_dir;
 VARYING vec2 vary_fragcoord;
@@ -203,22 +203,15 @@ void calcAtmospherics(vec3 inPositionEye, float ambFactor) {
 	//increase ambient when there are more clouds
 	vec4 tmpAmbient = ambient + (vec4(1.) - ambient) * cloud_shadow * 0.5;
 	
-	/*  decrease value and saturation (that in HSV, not HSL) for occluded areas
-	 * // for HSV color/geometry used here, see http://gimp-savvy.com/BOOK/index.html?node52.html
-	 * // The following line of code performs the equivalent of:
-	 * float ambAlpha = tmpAmbient.a;
-	 * float ambValue = dot(vec3(tmpAmbient), vec3(0.577)); // projection onto <1/rt(3), 1/rt(3), 1/rt(3)>, the neutral white-black axis
-	 * vec3 ambHueSat = vec3(tmpAmbient) - vec3(ambValue);
-	 * tmpAmbient = vec4(RenderSSAOEffect.valueFactor * vec3(ambValue) + RenderSSAOEffect.saturationFactor *(1.0 - ambFactor) * ambHueSat, ambAlpha);
-	 */
-	tmpAmbient = vec4(mix(ssao_effect_mat * tmpAmbient.rgb, tmpAmbient.rgb, ambFactor), tmpAmbient.a);
-
 	//haze color
 	setAdditiveColor(
 		vec3(blue_horizon * blue_weight * (sunlight*(1.-cloud_shadow) + tmpAmbient)
 	  + (haze_horizon * haze_weight) * (sunlight*(1.-cloud_shadow) * temp2.x
 		  + tmpAmbient)));

+	// decrease value for occluded areas
+	tmpAmbient = vec4(mix(ssao_effect * tmpAmbient.rgb, tmpAmbient.rgb, ambFactor), tmpAmbient.a);
+
 	//brightness of surface both sunlight and ambient
 	setSunlitColor(vec3(sunlight * .5));
 	setAmblitColor(vec3(tmpAmbient * .25));
--- a/indra/newview/app_settings/shaders/class1/deferred/sunLightSSAOF.glsl
+++ b/indra/newview/app_settings/shaders/class1/deferred/sunLightSSAOF.glsl
@@ -65,8 +65,6 @@ vec4 getPosition(vec2 pos_screen)
 //calculate decreases in ambient lighting when crowded out (SSAO)
 float calcAmbientOcclusion(vec4 pos, vec3 norm)
 {
-	float ret = 1.0;
-	
 	vec2 kern[8];
 	// exponentially (^2) distant occlusion samples spread around origin
 	kern[0] = vec2(-1.0, 0.0) * 0.125*0.125;
@@ -82,36 +80,39 @@ float calcAmbientOcclusion(vec4 pos, vec3 norm)
 	vec3 pos_world = pos.xyz;
 	vec2 noise_reflect = texture2D(noiseMap, vary_fragcoord.xy/128.0).xy;
 		
+	// We treat the first sample as the origin, which definitely doesn't obscure itself thanks to being visible for sampling in the first place.
+	float points = 1.0;
 	float angle_hidden = 0.0;
-	int points = 0;
-		
-	float scale = min(ssao_radius / -pos_world.z, ssao_max_radius);
-		
-	// it was found that keeping # of samples a constant was the fastest, probably due to compiler optimizations unrolling?)
+			
+	// use a kernel scale that diminishes with distance.
+	// a scale of less than 32 is just wasting good samples, though.
+	float scale = max(32.0, min(ssao_radius / -pos.z, ssao_max_radius));
+	
+	// it was found that keeping # of samples a constant was the fastest, probably due to compiler optimizations (unrolling?)
 	for (int i = 0; i < 8; i++)
 	{
 		vec2 samppos_screen = pos_screen + scale * reflect(kern[i], noise_reflect);
-		vec3 samppos_world = getPosition(samppos_screen).xyz; 
-			
-		vec3 diff = pos_world - samppos_world;
-		float dist2 = dot(diff, diff);
-			
-		// assume each sample corresponds to an occluding sphere with constant radius, constant x-sectional area
-		// --> solid angle shrinking by the square of distance
-		//radius is somewhat arbitrary, can approx with just some constant k * 1 / dist^2
-		//(k should vary inversely with # of samples, but this is taken care of later)
-			
-		angle_hidden = angle_hidden + float(dot((samppos_world - 0.05*norm - pos_world), norm) > 0.0) * min(1.0/dist2, ssao_factor_inv);
-			
-		// 'blocked' samples (significantly closer to camera relative to pos_world) are "no data", not "no occlusion" 
-		points = points + int(diff.z > -1.0);
+		vec3 samppos_world = getPosition(samppos_screen).xyz; 	
+	
+		vec3 diff = samppos_world - pos.xyz;
+	
+		if (diff.z < ssao_factor && diff.z != 0.0)
+		{
+			float dist = length(diff);
+			float angrel = max(0.0, dot(norm.xyz, diff/dist));
+			float distrel = 1.0/(1.0+dist*dist);
+			float samplehidden = min(angrel, distrel);
+	
+			angle_hidden += (samplehidden);
+			points += 1.0;
+	    }
 	}
 		
-	angle_hidden = min(ssao_factor*angle_hidden/float(points), 1.0);
-		
-	ret = (1.0 - (float(points != 0) * angle_hidden));
-	
-	return min(ret, 1.0);
+	angle_hidden /= points;
+
+	float rtn = (1.0 - angle_hidden);
+
+	return (rtn * rtn);
 }

 vec3 unpack(vec2 tc)
--- a/indra/newview/app_settings/shaders/class2/deferred/softenLightF.glsl
+++ b/indra/newview/app_settings/shaders/class2/deferred/softenLightF.glsl
@@ -63,7 +63,7 @@ uniform vec4 glow;
 uniform float scene_light_strength;
 uniform mat3 env_mat;
 uniform vec4 shadow_clip;
-uniform mat3 ssao_effect_mat;
+uniform float ssao_effect;

 uniform mat4 inv_proj;
 uniform vec2 screen_res;
@@ -205,22 +205,15 @@ void calcAtmospherics(vec3 inPositionEye, float ambFactor) {
 	//increase ambient when there are more clouds
 	vec4 tmpAmbient = ambient + (vec4(1.) - ambient) * cloud_shadow * 0.5;
 	
-	/*  decrease value and saturation (that in HSV, not HSL) for occluded areas
-	 * // for HSV color/geometry used here, see http://gimp-savvy.com/BOOK/index.html?node52.html
-	 * // The following line of code performs the equivalent of:
-	 * float ambAlpha = tmpAmbient.a;
-	 * float ambValue = dot(vec3(tmpAmbient), vec3(0.577)); // projection onto <1/rt(3), 1/rt(3), 1/rt(3)>, the neutral white-black axis
-	 * vec3 ambHueSat = vec3(tmpAmbient) - vec3(ambValue);
-	 * tmpAmbient = vec4(RenderSSAOEffect.valueFactor * vec3(ambValue) + RenderSSAOEffect.saturationFactor *(1.0 - ambFactor) * ambHueSat, ambAlpha);
-	 */
-	tmpAmbient = vec4(mix(ssao_effect_mat * tmpAmbient.rgb, tmpAmbient.rgb, ambFactor), tmpAmbient.a);
-
 	//haze color
 	setAdditiveColor(
 		vec3(blue_horizon * blue_weight * (sunlight*(1.-cloud_shadow) + tmpAmbient)
 	  + (haze_horizon * haze_weight) * (sunlight*(1.-cloud_shadow) * temp2.x
 		  + tmpAmbient)));

+	// decrease ambient value for occluded areas
+	tmpAmbient *= mix(ssao_effect, 1.0, ambFactor);
+
 	//brightness of surface both sunlight and ambient
 	setSunlitColor(vec3(sunlight * .5));
 	setAmblitColor(vec3(tmpAmbient * .25));
--- a/indra/newview/app_settings/shaders/class2/deferred/sunLightSSAOF.glsl
+++ b/indra/newview/app_settings/shaders/class2/deferred/sunLightSSAOF.glsl
@@ -97,46 +97,40 @@ vec2 getKern(int i)
 //calculate decreases in ambient lighting when crowded out (SSAO)
 float calcAmbientOcclusion(vec4 pos, vec3 norm)
 {
-	float ret = 1.0;
-
 	vec2 pos_screen = vary_fragcoord.xy;
-	vec3 pos_world = pos.xyz;
 	vec2 noise_reflect = texture2D(noiseMap, vary_fragcoord.xy/128.0).xy;
-		
+	
+	 // We treat the first sample as the origin, which definitely doesn't obscure itself thanks to being visible for sampling in the first place.
+	float points = 1.0;
 	float angle_hidden = 0.0;
-	float points = 0;
 		
-	float scale = min(ssao_radius / -pos_world.z, ssao_max_radius);
+	// use a kernel scale that diminishes with distance.
+	// a scale of less than 32 is just wasting good samples, though.
+	float scale = max(32.0, min(ssao_radius / -pos.z, ssao_max_radius));
 	
 	// it was found that keeping # of samples a constant was the fastest, probably due to compiler optimizations (unrolling?)
 	for (int i = 0; i < 8; i++)
 	{
 		vec2 samppos_screen = pos_screen + scale * reflect(getKern(i), noise_reflect);
 		vec3 samppos_world = getPosition(samppos_screen).xyz; 
-		
-		vec3 diff = pos_world - samppos_world;
-		float dist2 = dot(diff, diff);
-			
-		// assume each sample corresponds to an occluding sphere with constant radius, constant x-sectional area
-		// --> solid angle shrinking by the square of distance
-		//radius is somewhat arbitrary, can approx with just some constant k * 1 / dist^2
-		//(k should vary inversely with # of samples, but this is taken care of later)
-		
-		float funky_val = (dot((samppos_world - 0.05*norm - pos_world), norm) > 0.0) ? 1.0 : 0.0;
-		angle_hidden = angle_hidden + funky_val * min(1.0/dist2, ssao_factor_inv);
-			
-		// 'blocked' samples (significantly closer to camera relative to pos_world) are "no data", not "no occlusion" 
-		float diffz_val = (diff.z > -1.0) ? 1.0 : 0.0;
-		points = points + diffz_val;
-	}
-		
-	angle_hidden = min(ssao_factor*angle_hidden/points, 1.0);
-	
-	float points_val = (points > 0.0) ? 1.0 : 0.0;
-	ret = (1.0 - (points_val * angle_hidden));

-	ret = max(ret, 0.0);
-	return min(ret, 1.0);
+		vec3 diff = samppos_world - pos.xyz;
+
+		if (diff.z < ssao_factor && diff.z != 0.0)
+		{
+			float dist = length(diff);
+			float angrel = max(0.0, dot(norm.xyz, diff/dist));
+			float distrel = 1.0/(1.0+dist*dist);
+			float samplehidden = min(angrel, distrel);
+	
+			angle_hidden += (samplehidden);
+			points += 1.0;
+		}
+	}
+	
+	angle_hidden /= points;
+	float rtn = (1.0 - angle_hidden);
+	return (rtn * rtn);
 }

 float pcfShadow(sampler2DRectShadow shadowMap, vec4 stc, float scl, vec2 pos_screen)
--- a/indra/newview/pipeline.cpp
+++ b/indra/newview/pipeline.cpp
@@ -7302,14 +7302,7 @@ void LLPipeline::bindDeferredShader(LLGLSLShader& shader, U32 light_index, U32 n
 	shader.uniform1f(LLShaderMgr::DEFERRED_SSAO_FACTOR_INV, 1.0/ssao_factor);

 	LLVector3 ssao_effect = RenderSSAOEffect;
-	F32 matrix_diag = (ssao_effect[0] + 2.0*ssao_effect[1])/3.0;
-	F32 matrix_nondiag = (ssao_effect[0] - ssao_effect[1])/3.0;
-	// This matrix scales (proj of color onto <1/rt(3),1/rt(3),1/rt(3)>) by
-	// value factor, and scales remainder by saturation factor
-	F32 ssao_effect_mat[] = {	matrix_diag, matrix_nondiag, matrix_nondiag,
-								matrix_nondiag, matrix_diag, matrix_nondiag,
-								matrix_nondiag, matrix_nondiag, matrix_diag};
-	shader.uniformMatrix3fv(LLShaderMgr::DEFERRED_SSAO_EFFECT_MAT, 1, GL_FALSE, ssao_effect_mat);
+	shader.uniform1f(LLShaderMgr::DEFERRED_SSAO_EFFECT, ssao_effect[0]);

 	F32 shadow_offset_error = 1.f + RenderShadowOffsetError * fabsf(LLViewerCamera::getInstance()->getOrigin().mV[2]);
 	F32 shadow_bias_error = 1.f + RenderShadowBiasError * fabsf(LLViewerCamera::getInstance()->getOrigin().mV[2]);