I replaced Sobol with simple spiral sampling. With per pixel rotation matrix premultiplied to PCFUVMATRIX. It's not as good looking yet but it's saves 300 assembly lines. This makes me wonder is sobol random worth it. With unrolled loop it's easy to precalculate sampling points. Same optimization can be done to blocker search too. Need more testing.
Code:
PCFUVMatrix = mul(float2x2(FilterRadius, 0, 0, FilterRadius), PCFUVMatrix); float RandAngle = 2.0f * PI * frac(7.1721f * Settings.SvPosition.x + 11.131f * Settings.SvPosition.y + View.StateFrameIndexMod8 * 0.125f); PCFUVMatrix = mul(float2x2(cos(RandAngle), -sin(RandAngle), sin(RandAngle), cos(RandAngle)), PCFUVMatrix); UNROLL for (int j = 0; j < PCSS_SAMPLES; j++) { float angle = j * PI * 4.71f; float2 PCFSample = float2(sin(angle), cos(angle)) * sqrt(float(j+1) * (1.0f / float(PCSS_SAMPLES))); //float2 PCFSample = RandToCircle(SobolIndex(SobolRandom, j << 3, PCSS_SAMPLE_BITS + 3)); float2 SampleUVOffset = mul(PCFUVMatrix, PCFSample); float2 SampleUV = ShadowPosition + SampleUVOffset * Settings.ShadowTileOffsetAndSize.zw; float SampleDepthBias = max(dot(DepthBiasDotFactors, SampleUVOffset), 0); #if FEATURE_GATHER4 float4 SampleDepth = Settings.ShadowDepthTexture.Gather(Settings.ShadowDepthTextureSampler, SampleUV); VisibleLightAccumulation += dot(0.25, saturate(SampleDepth * Settings.TransitionScale + (Settings.TransitionScale * SampleDepthBias + ScaledAndBiasedDepth))); #else float SampleDepth = Texture2DSampleLevel(Settings.ShadowDepthTexture, Settings.ShadowDepthTextureSampler, SampleUV, 0).r; VisibleLightAccumulation += saturate(SampleDepth * Settings.TransitionScale + (Settings.TransitionScale * SampleDepthBias + ScaledAndBiasedDepth)); #endif }
Comment