From b0a4acdfdb0cd99d2c326b9e1c0aa149babc5b8a Mon Sep 17 00:00:00 2001 From: Rezmason Date: Sun, 14 Nov 2021 22:01:56 -0800 Subject: [PATCH] Rewrote the WebGPU bloom pass based on the classic Unreal solution of blurring and combining the levels of an image pyramid. Fixed the regl bloom pass to use the downscaled blurred mipmap levels to build the first pyramid. --- TODO.txt | 15 ++---- js/regl/bloomPass.js | 2 +- js/webgpu/bloomPass.js | 89 ++++++++++++++++++++++++---------- js/webgpu/imagePass.js | 2 +- js/webgpu/utils.js | 27 +++++++++-- shaders/wgsl/bloomBlur.wgsl | 45 +++++++++++++++++ shaders/wgsl/bloomCombine.wgsl | 31 ++++++++++++ shaders/wgsl/blur1D.wgsl | 37 -------------- 8 files changed, 170 insertions(+), 78 deletions(-) create mode 100644 shaders/wgsl/bloomBlur.wgsl create mode 100644 shaders/wgsl/bloomCombine.wgsl delete mode 100644 shaders/wgsl/blur1D.wgsl diff --git a/TODO.txt b/TODO.txt index cceab9c..59b4aa4 100644 --- a/TODO.txt +++ b/TODO.txt @@ -1,18 +1,13 @@ TODO: WebGPU - - Figure out texture pyramid stuff - Idea: use mip levels to store stuff - Multiple draw calls, if level N must downsample level N - 1 - Does every level HAVE to be downsampled from the previous level? Or can they all be downsampled from a single source? - What about in my case in particular? I'm blurring everything anyway. - - blur pass - Switch to rgba32float somehow? - Why isn't this straightforward? + Rename setSize to rebuild — it is the function that receives inputs as well as screen size + Create and store the bloom bind groups on resize + Make sure everything is properly commented Update links in issues Get rid of end pass once it's possible to copy a bgra8unorm to a canvas texture + Switch to rgba32float somehow? + Why isn't this straightforward? Support looping diff --git a/js/regl/bloomPass.js b/js/regl/bloomPass.js index d8f1c00..c796039 100644 --- a/js/regl/bloomPass.js +++ b/js/regl/bloomPass.js @@ -103,7 +103,7 @@ export default ({ regl, config }, inputs) => { const highPassFBO = highPassPyramid[i]; const hBlurFBO = hBlurPyramid[i]; const vBlurFBO = vBlurPyramid[i]; - highPass({ fbo: highPassFBO, frag: highPassFrag.text(), tex: inputs.primary }); + highPass({ fbo: highPassFBO, frag: highPassFrag.text(), tex: i === 0 ? inputs.primary : highPassPyramid[i - 1] }); blur({ fbo: hBlurFBO, frag: blurFrag.text(), tex: highPassFBO, direction: [1, 0] }); blur({ fbo: vBlurFBO, frag: blurFrag.text(), tex: hBlurFBO, direction: [0, 1] }); } diff --git a/js/webgpu/bloomPass.js b/js/webgpu/bloomPass.js index cc33148..683959e 100644 --- a/js/webgpu/bloomPass.js +++ b/js/webgpu/bloomPass.js @@ -1,13 +1,15 @@ import { structs } from "/lib/gpu-buffer.js"; -import { makeComputeTarget, loadShader, makeUniformBuffer, makeBindGroup, makePass } from "./utils.js"; +import { makeComputeTarget, makePyramidView, loadShader, makeUniformBuffer, makeBindGroup, makePass } from "./utils.js"; export default (context, getInputs) => { const { config, device } = context; + const pyramidHeight = 4; const bloomSize = config.bloomSize; - const bloomStrength = config.newBloomStrength; + const bloomStrength = config.bloomStrength; + const bloomRadius = 2; // Looks better with more, but is more costly - const enabled = bloomSize > 0 && bloomStrength > 0; + const enabled = true; // If there's no bloom to apply, return a no-op pass with an empty bloom texture if (!enabled) { @@ -16,17 +18,22 @@ export default (context, getInputs) => { return makePass(getOutputs); } - const assets = [loadShader(device, "shaders/wgsl/blur1D.wgsl")]; + const assets = [loadShader(device, "shaders/wgsl/bloomBlur.wgsl"), loadShader(device, "shaders/wgsl/bloomCombine.wgsl")]; - const nearestSampler = device.createSampler({}); + const linearSampler = device.createSampler({ + magFilter: "linear", + minFilter: "linear", + }); - let computePipeline; - let configUniforms; - let horizontalConfigBuffer; - let verticalConfigBuffer; - let intermediate; + let blurPipeline; + let combinePipeline; + let hBlurPyramid; + let vBlurPyramid; + let hBlurBuffer; + let vBlurBuffer; + let combineBuffer; let output; - let screenSize; + let scaledScreenSize; const getOutputs = () => ({ primary: getInputs().primary, @@ -34,39 +41,69 @@ export default (context, getInputs) => { }); const ready = (async () => { - const [blurShader] = await Promise.all(assets); + const [blurShader, combineShader] = await Promise.all(assets); - computePipeline = device.createComputePipeline({ + blurPipeline = device.createComputePipeline({ compute: { module: blurShader.module, entryPoint: "computeMain", }, }); - configUniforms = structs.from(blurShader.code).Config; + combinePipeline = device.createComputePipeline({ + compute: { + module: combineShader.module, + entryPoint: "computeMain", + }, + }); + + const blurUniforms = structs.from(blurShader.code).Config; + hBlurBuffer = makeUniformBuffer(device, blurUniforms, { bloomRadius, direction: [1, 0] }); + vBlurBuffer = makeUniformBuffer(device, blurUniforms, { bloomRadius, direction: [0, 1] }); + + const combineUniforms = structs.from(combineShader.code).Config; + combineBuffer = makeUniformBuffer(device, combineUniforms, { bloomStrength, pyramidHeight }); })(); const setSize = (width, height) => { - intermediate?.destroy(); - intermediate = makeComputeTarget(device, Math.floor(width * bloomSize), height); + hBlurPyramid?.destroy(); + hBlurPyramid = makeComputeTarget(device, Math.floor(width * bloomSize), Math.floor(height * bloomSize), pyramidHeight); + + vBlurPyramid?.destroy(); + vBlurPyramid = makeComputeTarget(device, Math.floor(width * bloomSize), Math.floor(height * bloomSize), pyramidHeight); + output?.destroy(); output = makeComputeTarget(device, Math.floor(width * bloomSize), Math.floor(height * bloomSize)); - screenSize = [width, height]; - - horizontalConfigBuffer = makeUniformBuffer(device, configUniforms, { bloomStrength, direction: [0, bloomSize] }); - verticalConfigBuffer = makeUniformBuffer(device, configUniforms, { bloomStrength, direction: [1, 0] }); + scaledScreenSize = [Math.floor(width * bloomSize), Math.floor(height * bloomSize)]; }; const execute = (encoder) => { const inputs = getInputs(); const tex = inputs.primary; - const intermediateView = intermediate.createView(); + const computePass = encoder.beginComputePass(); - computePass.setPipeline(computePipeline); - computePass.setBindGroup(0, makeBindGroup(device, computePipeline, 0, [horizontalConfigBuffer, nearestSampler, tex.createView(), intermediateView])); - computePass.dispatch(Math.ceil(Math.floor(screenSize[0] * bloomSize) / 32), screenSize[1], 1); - computePass.setBindGroup(0, makeBindGroup(device, computePipeline, 0, [verticalConfigBuffer, nearestSampler, intermediateView, output.createView()])); - computePass.dispatch(Math.ceil(Math.floor(screenSize[0] * bloomSize) / 32), Math.floor(screenSize[1] * bloomSize), 1); + + computePass.setPipeline(blurPipeline); + const hBlurPyramidViews = Array(pyramidHeight) + .fill() + .map((_, level) => makePyramidView(hBlurPyramid, level)); + const vBlurPyramidViews = Array(pyramidHeight) + .fill() + .map((_, level) => makePyramidView(vBlurPyramid, level)); + for (let i = 0; i < pyramidHeight; i++) { + const downsample = 2 ** -i; + const size = [Math.ceil(Math.floor(scaledScreenSize[0] * downsample) / 32), Math.floor(Math.floor(scaledScreenSize[1] * downsample)), 1]; + const srcView = i === 0 ? tex.createView() : hBlurPyramidViews[i - 1]; + computePass.setBindGroup(0, makeBindGroup(device, blurPipeline, 0, [hBlurBuffer, linearSampler, srcView, hBlurPyramidViews[i]])); + computePass.dispatch(...size); + computePass.setBindGroup(0, makeBindGroup(device, blurPipeline, 0, [vBlurBuffer, linearSampler, hBlurPyramidViews[i], vBlurPyramidViews[i]])); + computePass.dispatch(...size); + } + + computePass.setPipeline(combinePipeline); + computePass.setBindGroup(0, makeBindGroup(device, combinePipeline, 0, [combineBuffer, linearSampler, vBlurPyramid.createView(), output.createView()])); + computePass.dispatch(Math.ceil(scaledScreenSize[0] / 32), scaledScreenSize[1], 1); + computePass.endPass(); }; diff --git a/js/webgpu/imagePass.js b/js/webgpu/imagePass.js index 4a599d5..7ea908d 100644 --- a/js/webgpu/imagePass.js +++ b/js/webgpu/imagePass.js @@ -1,4 +1,4 @@ -import { makeComputeTarget, loadTexture, loadShader, makeUniformBuffer, makeBindGroup, makePass } from "./utils.js"; +import { makeComputeTarget, loadTexture, loadShader, makeBindGroup, makePass } from "./utils.js"; // Multiplies the rendered rain and bloom by a loaded in image diff --git a/js/webgpu/utils.js b/js/webgpu/utils.js index cb2dfff..d72e263 100644 --- a/js/webgpu/utils.js +++ b/js/webgpu/utils.js @@ -27,16 +27,18 @@ const loadTexture = async (device, url) => { return texture; }; -const makeRenderTarget = (device, width, height, format) => +const makeRenderTarget = (device, width, height, format, mipLevelCount = 1) => device.createTexture({ size: [width, height, 1], + mipLevelCount, format, usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.COPY_SRC | GPUTextureUsage.COPY_DST | GPUTextureUsage.RENDER_ATTACHMENT, }); -const makeComputeTarget = (device, width, height) => +const makeComputeTarget = (device, width, height, mipLevelCount = 1) => device.createTexture({ size: [width, height, 1], + mipLevelCount, format: "rgba8unorm", usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.COPY_SRC | GPUTextureUsage.COPY_DST | GPUTextureUsage.STORAGE_BINDING, }); @@ -76,6 +78,13 @@ const make1DTexture = (device, rgbas) => { return texture; }; +const makePyramidView = (texture, level) => + texture.createView({ + baseMipLevel: level, + mipLevelCount: 1, + dimension: "2d", + }); + const makeBindGroup = (device, pipeline, index, entries) => device.createBindGroup({ layout: pipeline.getBindGroupLayout(index), @@ -97,4 +106,16 @@ const makePass = (getOutputs, ready, setSize, execute) => ({ const makePipeline = (context, steps) => steps.filter((f) => f != null).reduce((pipeline, f, i) => [...pipeline, f(context, i == 0 ? null : pipeline[i - 1].getOutputs)], []); -export { getCanvasSize, makeRenderTarget, makeComputeTarget, make1DTexture, loadTexture, loadShader, makeUniformBuffer, makePass, makePipeline, makeBindGroup }; +export { + getCanvasSize, + makeRenderTarget, + makeComputeTarget, + make1DTexture, + makePyramidView, + loadTexture, + loadShader, + makeUniformBuffer, + makePass, + makePipeline, + makeBindGroup, +}; diff --git a/shaders/wgsl/bloomBlur.wgsl b/shaders/wgsl/bloomBlur.wgsl new file mode 100644 index 0000000..fb778c2 --- /dev/null +++ b/shaders/wgsl/bloomBlur.wgsl @@ -0,0 +1,45 @@ +let ONE_OVER_SQRT_2PI = 0.39894; + +[[block]] struct Config { + bloomRadius : f32; + direction : vec2; +}; + +[[group(0), binding(0)]] var config : Config; +[[group(0), binding(1)]] var linearSampler : sampler; +[[group(0), binding(2)]] var tex : texture_2d; +[[group(0), binding(3)]] var outputTex : texture_storage_2d; + +struct ComputeInput { + [[builtin(global_invocation_id)]] id : vec3; +}; + +fn gaussianPDF(x : f32) -> f32 { + return ONE_OVER_SQRT_2PI * exp( -0.5 * + ( x * x ) / ( config.bloomRadius * config.bloomRadius ) + ) / config.bloomRadius; +} + +[[stage(compute), workgroup_size(32, 1, 1)]] fn computeMain(input : ComputeInput) { + + var coord = vec2(input.id.xy); + var outputSize = textureDimensions(outputTex); + + if (coord.x >= outputSize.x) { + return; + } + + var uv = (vec2(coord) + 0.5) / vec2(outputSize); + var uvOffset = config.direction / vec2(outputSize); + + var weightSum = gaussianPDF(0.0); + var sum = textureSampleLevel( tex, linearSampler, uv, 0.0) * weightSum; + for (var x : f32 = 1.0; x < config.bloomRadius; x = x + 1.0) { + var weight = gaussianPDF(x); + sum = sum + textureSampleLevel( tex, linearSampler, uv + uvOffset * x, 0.0) * weight; + sum = sum + textureSampleLevel( tex, linearSampler, uv - uvOffset * x, 0.0) * weight; + weightSum = weightSum + weight * 2.0; + } + + textureStore(outputTex, coord, sum / weightSum); +} diff --git a/shaders/wgsl/bloomCombine.wgsl b/shaders/wgsl/bloomCombine.wgsl new file mode 100644 index 0000000..8c553d4 --- /dev/null +++ b/shaders/wgsl/bloomCombine.wgsl @@ -0,0 +1,31 @@ +[[block]] struct Config { + bloomStrength : f32; + pyramidHeight : f32; +}; + +[[group(0), binding(0)]] var config : Config; +[[group(0), binding(1)]] var linearSampler : sampler; +[[group(0), binding(2)]] var tex : texture_2d; +[[group(0), binding(3)]] var outputTex : texture_storage_2d; + +struct ComputeInput { + [[builtin(global_invocation_id)]] id : vec3; +}; + +[[stage(compute), workgroup_size(32, 1, 1)]] fn computeMain(input : ComputeInput) { + + var coord = vec2(input.id.xy); + var outputSize = textureDimensions(outputTex); + + if (coord.x >= outputSize.x) { + return; + } + + var uv = (vec2(coord) + 0.5) / vec2(outputSize); + var sum = vec4(0.0); + for (var i = 0.0; i < config.pyramidHeight; i = i + 1.0) { + sum = sum + (1.0 - i / config.pyramidHeight) * textureSampleLevel( tex, linearSampler, uv, i + 1.0 ); + } + + textureStore(outputTex, coord, sum * config.bloomStrength); +} diff --git a/shaders/wgsl/blur1D.wgsl b/shaders/wgsl/blur1D.wgsl deleted file mode 100644 index bc189ec..0000000 --- a/shaders/wgsl/blur1D.wgsl +++ /dev/null @@ -1,37 +0,0 @@ -[[block]] struct Config { - bloomStrength : f32; - direction : vec2; -}; - -[[group(0), binding(0)]] var config : Config; -[[group(0), binding(1)]] var nearestSampler : sampler; -[[group(0), binding(2)]] var tex : texture_2d; -[[group(0), binding(3)]] var outputTex : texture_storage_2d; - -struct ComputeInput { - [[builtin(global_invocation_id)]] id : vec3; -}; - -[[stage(compute), workgroup_size(32, 1, 1)]] fn computeMain(input : ComputeInput) { - - var coord = vec2(input.id.xy); - var outputSize = textureDimensions(outputTex); - - if (coord.x >= outputSize.x) { - return; - } - - var uv = (vec2(coord) + 0.5) / vec2(outputSize); - var offset = config.direction / vec2(outputSize); - var sum = vec4(0.0); - - sum = sum + textureSampleLevel( tex, nearestSampler, uv + offset * 3.0, 0.0 ) * 0.006; - sum = sum + textureSampleLevel( tex, nearestSampler, uv + offset * 2.0, 0.0 ) * 0.061; - sum = sum + textureSampleLevel( tex, nearestSampler, uv + offset * 1.0, 0.0 ) * 0.242; - sum = sum + textureSampleLevel( tex, nearestSampler, uv + offset * 0.0, 0.0 ) * 0.383; - sum = sum + textureSampleLevel( tex, nearestSampler, uv + offset * -1.0, 0.0 ) * 0.242; - sum = sum + textureSampleLevel( tex, nearestSampler, uv + offset * -2.0, 0.0 ) * 0.061; - sum = sum + textureSampleLevel( tex, nearestSampler, uv + offset * -3.0, 0.0 ) * 0.006; - - textureStore(outputTex, coord, sum * config.bloomStrength); -}