From 58832ee9360bd13a10b5175d4af0637f16215400 Mon Sep 17 00:00:00 2001 From: Mikulas Florek Date: Mon, 16 Sep 2024 23:26:50 +0200 Subject: [PATCH] hlsl shaders --- data/models/demolevel/demolevel.mat | 2 +- data/models/pistol/darkermetal.mat | 2 +- data/models/pistol/magazine.mat | 2 +- data/models/pistol/material_003.mat | 2 +- data/models/pistol/metal.mat | 2 +- data/models/pistol/muzzle.mat | 2 +- data/models/pistol/wood.mat | 2 +- data/models/shapes/cone.mat | 2 +- data/models/shapes/cube.mat | 2 +- data/models/shapes/cylinder.mat | 2 +- data/models/shapes/material.mat | 2 +- data/models/shapes/monkey.mat | 2 +- data/models/shapes/plane.mat | 2 +- data/models/shapes/sphere.mat | 2 +- data/models/shapes/sphere_metal.mat | 2 +- data/models/shapes/torus.mat | 2 +- data/models/shapes/x.mat | 2 +- data/models/shapes/y.mat | 2 +- data/models/shapes/z.mat | 2 +- data/models/snake/darkgreen.mat | 2 +- data/models/snake/darkred.mat | 2 +- data/models/snake/lightgreen.mat | 2 +- data/models/snake/purple.mat | 2 +- data/models/snake/red.mat | 2 +- data/models/snake/teeth.mat | 2 +- data/models/snake/yellow.mat | 2 +- data/models/ybot/alpha_body_mat.mat | 2 +- data/models/ybot/alpha_joints_mat.mat | 2 +- data/models/ybot/material.mat | 2 +- data/models/ybot/material_001.mat | 2 +- data/particles/demo/demo.mat | 2 +- data/pipelines/atmo.hlsl | 28 +- data/pipelines/blit.hlsl | 25 + data/pipelines/bloom_blur.hlsl | 14 +- data/pipelines/blur.hlsl | 14 +- data/pipelines/common.hlsli | 243 +++------ data/pipelines/cubemap_sky.hlsl | 2 +- data/pipelines/curve_decal.hlsl | 123 +++++ data/pipelines/curve_decal.shd | 134 ----- data/pipelines/debug_shape.hlsl | 12 +- data/pipelines/decal.hlsl | 37 +- data/pipelines/draw2d.hlsl | 13 +- data/pipelines/film_grain.hlsl | 20 +- data/pipelines/ibl_filter.hlsl | 121 +++++ data/pipelines/ibl_filter.shd | 127 ----- data/pipelines/impostor.hlsl | 253 +++++++++ data/pipelines/impostor.shd | 279 ---------- data/pipelines/lighting.hlsl | 46 ++ data/pipelines/lighting.shd | 58 -- data/pipelines/particles.hlsl | 71 +++ data/pipelines/particles.shd | 88 --- data/pipelines/ssao.hlsl | 15 +- .../pipelines/{standard.shd => standard.hlsl} | 39 +- data/pipelines/surface_base.hlsli | 244 +++++++++ data/pipelines/surface_base.inc | 286 ---------- data/pipelines/tdao.hlsl | 41 +- data/pipelines/textured_quad.hlsl | 41 -- data/pipelines/water.hlsl | 30 +- .../{world_grid.shd => world_grid.hlsl} | 21 +- src/renderer/draw_stream.cpp | 87 ++- src/renderer/draw_stream.h | 2 +- src/renderer/editor/fbx_importer.cpp | 4 +- src/renderer/editor/render_plugins.cpp | 131 ++--- src/renderer/gpu/gpu.h | 3 +- src/renderer/gpu/gpu_dx12.cpp | 305 +++-------- src/renderer/pipeline.cpp | 85 +-- src/renderer/pipeline.h | 5 +- src/renderer/postprocess.h | 61 +-- src/renderer/shader.cpp | 514 ++++-------------- src/renderer/shader.h | 34 +- 70 files changed, 1502 insertions(+), 2216 deletions(-) create mode 100644 data/pipelines/blit.hlsl create mode 100644 data/pipelines/curve_decal.hlsl delete mode 100644 data/pipelines/curve_decal.shd create mode 100644 data/pipelines/ibl_filter.hlsl delete mode 100644 data/pipelines/ibl_filter.shd create mode 100644 data/pipelines/impostor.hlsl delete mode 100644 data/pipelines/impostor.shd create mode 100644 data/pipelines/lighting.hlsl delete mode 100644 data/pipelines/lighting.shd create mode 100644 data/pipelines/particles.hlsl delete mode 100644 data/pipelines/particles.shd rename data/pipelines/{standard.shd => standard.hlsl} (67%) create mode 100644 data/pipelines/surface_base.hlsli delete mode 100644 data/pipelines/surface_base.inc delete mode 100644 data/pipelines/textured_quad.hlsl rename data/pipelines/{world_grid.shd => world_grid.hlsl} (54%) diff --git a/data/models/demolevel/demolevel.mat b/data/models/demolevel/demolevel.mat index 108ab26b5e..c4721c40b0 100644 --- a/data/models/demolevel/demolevel.mat +++ b/data/models/demolevel/demolevel.mat @@ -1,4 +1,4 @@ -shader "/pipelines/world_grid.shd" +shader "/pipelines/world_grid.hlsl" backface_culling true layer "default" diff --git a/data/models/pistol/darkermetal.mat b/data/models/pistol/darkermetal.mat index 955c8aafe6..d1b62f21e3 100644 --- a/data/models/pistol/darkermetal.mat +++ b/data/models/pistol/darkermetal.mat @@ -1,4 +1,4 @@ -shader "/pipelines/standard.shd" +shader "/pipelines/standard.hlsl" backface_culling true layer "default" diff --git a/data/models/pistol/magazine.mat b/data/models/pistol/magazine.mat index 955c8aafe6..d1b62f21e3 100644 --- a/data/models/pistol/magazine.mat +++ b/data/models/pistol/magazine.mat @@ -1,4 +1,4 @@ -shader "/pipelines/standard.shd" +shader "/pipelines/standard.hlsl" backface_culling true layer "default" diff --git a/data/models/pistol/material_003.mat b/data/models/pistol/material_003.mat index 955c8aafe6..d1b62f21e3 100644 --- a/data/models/pistol/material_003.mat +++ b/data/models/pistol/material_003.mat @@ -1,4 +1,4 @@ -shader "/pipelines/standard.shd" +shader "/pipelines/standard.hlsl" backface_culling true layer "default" diff --git a/data/models/pistol/metal.mat b/data/models/pistol/metal.mat index 17acf27210..a519daacd0 100644 --- a/data/models/pistol/metal.mat +++ b/data/models/pistol/metal.mat @@ -1,4 +1,4 @@ -shader "/pipelines/standard.shd" +shader "/pipelines/standard.hlsl" backface_culling true layer "default" diff --git a/data/models/pistol/muzzle.mat b/data/models/pistol/muzzle.mat index 955c8aafe6..d1b62f21e3 100644 --- a/data/models/pistol/muzzle.mat +++ b/data/models/pistol/muzzle.mat @@ -1,4 +1,4 @@ -shader "/pipelines/standard.shd" +shader "/pipelines/standard.hlsl" backface_culling true layer "default" diff --git a/data/models/pistol/wood.mat b/data/models/pistol/wood.mat index 17acf27210..a519daacd0 100644 --- a/data/models/pistol/wood.mat +++ b/data/models/pistol/wood.mat @@ -1,4 +1,4 @@ -shader "/pipelines/standard.shd" +shader "/pipelines/standard.hlsl" backface_culling true layer "default" diff --git a/data/models/shapes/cone.mat b/data/models/shapes/cone.mat index 13ad0855b3..5a63436610 100644 --- a/data/models/shapes/cone.mat +++ b/data/models/shapes/cone.mat @@ -1,4 +1,4 @@ -shader "/pipelines/standard.shd" +shader "/pipelines/standard.hlsl" backface_culling true layer "default" diff --git a/data/models/shapes/cube.mat b/data/models/shapes/cube.mat index 17acf27210..a519daacd0 100644 --- a/data/models/shapes/cube.mat +++ b/data/models/shapes/cube.mat @@ -1,4 +1,4 @@ -shader "/pipelines/standard.shd" +shader "/pipelines/standard.hlsl" backface_culling true layer "default" diff --git a/data/models/shapes/cylinder.mat b/data/models/shapes/cylinder.mat index 17acf27210..a519daacd0 100644 --- a/data/models/shapes/cylinder.mat +++ b/data/models/shapes/cylinder.mat @@ -1,4 +1,4 @@ -shader "/pipelines/standard.shd" +shader "/pipelines/standard.hlsl" backface_culling true layer "default" diff --git a/data/models/shapes/material.mat b/data/models/shapes/material.mat index 0a08f835a4..91cceefc71 100644 --- a/data/models/shapes/material.mat +++ b/data/models/shapes/material.mat @@ -1,4 +1,4 @@ -shader "/pipelines/standard.shd" +shader "/pipelines/standard.hlsl" texture "" texture "" texture "" diff --git a/data/models/shapes/monkey.mat b/data/models/shapes/monkey.mat index 0a08f835a4..91cceefc71 100644 --- a/data/models/shapes/monkey.mat +++ b/data/models/shapes/monkey.mat @@ -1,4 +1,4 @@ -shader "/pipelines/standard.shd" +shader "/pipelines/standard.hlsl" texture "" texture "" texture "" diff --git a/data/models/shapes/plane.mat b/data/models/shapes/plane.mat index 7c9f4cd099..eb3c4b6e15 100644 --- a/data/models/shapes/plane.mat +++ b/data/models/shapes/plane.mat @@ -1,4 +1,4 @@ -shader "/pipelines/world_grid.shd" +shader "/pipelines/world_grid.hlsl" backface_culling false layer "default" diff --git a/data/models/shapes/sphere.mat b/data/models/shapes/sphere.mat index 17acf27210..a519daacd0 100644 --- a/data/models/shapes/sphere.mat +++ b/data/models/shapes/sphere.mat @@ -1,4 +1,4 @@ -shader "/pipelines/standard.shd" +shader "/pipelines/standard.hlsl" backface_culling true layer "default" diff --git a/data/models/shapes/sphere_metal.mat b/data/models/shapes/sphere_metal.mat index c7147ab7f3..52f8010af4 100644 --- a/data/models/shapes/sphere_metal.mat +++ b/data/models/shapes/sphere_metal.mat @@ -1,4 +1,4 @@ -shader "/pipelines/standard.shd" +shader "/pipelines/standard.hlsl" backface_culling true layer "default" diff --git a/data/models/shapes/torus.mat b/data/models/shapes/torus.mat index d5497b9871..8e08cab959 100644 --- a/data/models/shapes/torus.mat +++ b/data/models/shapes/torus.mat @@ -1,4 +1,4 @@ -shader "/pipelines/standard.shd" +shader "/pipelines/standard.hlsl" backface_culling true layer "default" texture "" diff --git a/data/models/shapes/x.mat b/data/models/shapes/x.mat index 87b36686d9..469b66515f 100644 --- a/data/models/shapes/x.mat +++ b/data/models/shapes/x.mat @@ -1,4 +1,4 @@ -shader "/pipelines/standard.shd" +shader "/pipelines/standard.hlsl" texture "" texture "" texture "" diff --git a/data/models/shapes/y.mat b/data/models/shapes/y.mat index e6e8ecfcdf..f4e9e4b7c5 100644 --- a/data/models/shapes/y.mat +++ b/data/models/shapes/y.mat @@ -1,4 +1,4 @@ -shader "/pipelines/standard.shd" +shader "/pipelines/standard.hlsl" texture "" texture "" texture "" diff --git a/data/models/shapes/z.mat b/data/models/shapes/z.mat index fe14fefbab..a2d0ac2fbb 100644 --- a/data/models/shapes/z.mat +++ b/data/models/shapes/z.mat @@ -1,4 +1,4 @@ -shader "/pipelines/standard.shd" +shader "/pipelines/standard.hlsl" texture "" texture "" texture "" diff --git a/data/models/snake/darkgreen.mat b/data/models/snake/darkgreen.mat index 1ffe22d07d..668b088327 100644 --- a/data/models/snake/darkgreen.mat +++ b/data/models/snake/darkgreen.mat @@ -1,4 +1,4 @@ -shader "/pipelines/standard.shd" +shader "/pipelines/standard.hlsl" texture "" texture "" texture "" diff --git a/data/models/snake/darkred.mat b/data/models/snake/darkred.mat index 903709f446..de43f1816f 100644 --- a/data/models/snake/darkred.mat +++ b/data/models/snake/darkred.mat @@ -1,4 +1,4 @@ -shader "/pipelines/standard.shd" +shader "/pipelines/standard.hlsl" texture "" texture "" texture "" diff --git a/data/models/snake/lightgreen.mat b/data/models/snake/lightgreen.mat index 0b7d1170a1..b28f356506 100644 --- a/data/models/snake/lightgreen.mat +++ b/data/models/snake/lightgreen.mat @@ -1,4 +1,4 @@ -shader "/pipelines/standard.shd" +shader "/pipelines/standard.hlsl" texture "" texture "" texture "" diff --git a/data/models/snake/purple.mat b/data/models/snake/purple.mat index d44ad240a4..84a97a3c51 100644 --- a/data/models/snake/purple.mat +++ b/data/models/snake/purple.mat @@ -1,4 +1,4 @@ -shader "/pipelines/standard.shd" +shader "/pipelines/standard.hlsl" texture "" texture "" texture "" diff --git a/data/models/snake/red.mat b/data/models/snake/red.mat index 1b7dadc3ef..91a57342d2 100644 --- a/data/models/snake/red.mat +++ b/data/models/snake/red.mat @@ -1,4 +1,4 @@ -shader "/pipelines/standard.shd" +shader "/pipelines/standard.hlsl" texture "" texture "" texture "" diff --git a/data/models/snake/teeth.mat b/data/models/snake/teeth.mat index 8e1320a2f8..1d4d84dedc 100644 --- a/data/models/snake/teeth.mat +++ b/data/models/snake/teeth.mat @@ -1,4 +1,4 @@ -shader "/pipelines/standard.shd" +shader "/pipelines/standard.hlsl" texture "" texture "" texture "" diff --git a/data/models/snake/yellow.mat b/data/models/snake/yellow.mat index 73ce2fc995..85e8d91730 100644 --- a/data/models/snake/yellow.mat +++ b/data/models/snake/yellow.mat @@ -1,4 +1,4 @@ -shader "/pipelines/standard.shd" +shader "/pipelines/standard.hlsl" texture "" texture "" texture "" diff --git a/data/models/ybot/alpha_body_mat.mat b/data/models/ybot/alpha_body_mat.mat index dd24533f44..025c2259d9 100644 --- a/data/models/ybot/alpha_body_mat.mat +++ b/data/models/ybot/alpha_body_mat.mat @@ -1,4 +1,4 @@ -shader "/pipelines/standard.shd" +shader "/pipelines/standard.hlsl" backface_culling true layer "default" diff --git a/data/models/ybot/alpha_joints_mat.mat b/data/models/ybot/alpha_joints_mat.mat index 39018fdc97..8b8d7ea36b 100644 --- a/data/models/ybot/alpha_joints_mat.mat +++ b/data/models/ybot/alpha_joints_mat.mat @@ -1,4 +1,4 @@ -shader "/pipelines/standard.shd" +shader "/pipelines/standard.hlsl" backface_culling true layer "default" diff --git a/data/models/ybot/material.mat b/data/models/ybot/material.mat index 3760f8ced0..29cc4be4f1 100644 --- a/data/models/ybot/material.mat +++ b/data/models/ybot/material.mat @@ -1,4 +1,4 @@ -shader "/pipelines/standard.shd" +shader "/pipelines/standard.hlsl" texture "" texture "" texture "" diff --git a/data/models/ybot/material_001.mat b/data/models/ybot/material_001.mat index d6185f2de7..7d0dfce371 100644 --- a/data/models/ybot/material_001.mat +++ b/data/models/ybot/material_001.mat @@ -1,4 +1,4 @@ -shader "/pipelines/standard.shd" +shader "/pipelines/standard.hlsl" texture "" texture "" texture "" diff --git a/data/particles/demo/demo.mat b/data/particles/demo/demo.mat index 52a81b0222..20ddc34038 100644 --- a/data/particles/demo/demo.mat +++ b/data/particles/demo/demo.mat @@ -1,4 +1,4 @@ -shader "/pipelines/particles.shd" +shader "/pipelines/particles.hlsl" backface_culling true layer "transparent" diff --git a/data/pipelines/atmo.hlsl b/data/pipelines/atmo.hlsl index 42d2847e40..ed359e2cff 100644 --- a/data/pipelines/atmo.hlsl +++ b/data/pipelines/atmo.hlsl @@ -13,7 +13,7 @@ cbuffer Data : register (b4) { float4 u_fog_scattering; float u_fog_top; float u_fog_enabled; - float u_godarys_enabled; + float u_godrays_enabled; uint u_output; uint u_optical_depth; uint u_depth_buffer; @@ -52,10 +52,11 @@ void main(uint3 thread_id : SV_DispatchThreadID) { float3 sunlight = u_sunlight.rgb * u_sunlight.a; float ndc_depth = bindless_textures[u_depth_buffer][thread_id.xy].r; float2 uv = thread_id.xy / (float2)Global_framebuffer_size.xy; - float3 eyedir = getWorldNormal(uv); + float3 eyedir = getViewDirection(uv); const float cos_theta = dot(eyedir, Global_light_dir.xyz); if (ndc_depth > 0) { + // sky is hidden some object float linear_depth = toLinearDepth(Global_inv_projection, ndc_depth); float2 v = float2( saturate(linear_depth / 50e3), @@ -71,7 +72,8 @@ void main(uint3 thread_id : SV_DispatchThreadID) { ; } else { - float spot = smoothstep(0.0, 1000.0, phase(cos_theta, 0.9995)) * 200; + // sky is visible + float sun_spot = smoothstep(0.0, 1000.0, phase(cos_theta, 0.9995)) * 200; float2 v = 1; v.y = max(0, eyedir.y); @@ -87,43 +89,43 @@ void main(uint3 thread_id : SV_DispatchThreadID) { atmo.rgb = insc.aaa * miePhase(0.75, -cos_theta) * sunlight * u_scatter_mie.rgb + insc.rgb * rayleighPhase(-cos_theta) * sunlight * u_scatter_rayleigh.rgb - + spot * exp(-opt_depth.x * extinction_rayleigh - opt_depth.y * extinction_mie) + + sun_spot * exp(-opt_depth.x * extinction_rayleigh - opt_depth.y * extinction_mie) ; } if (u_fog_enabled > 0) { - const float3 fog_extinction = u_fog_scattering.rgb; float linear_depth = ndc_depth > 0 ? toLinearDepth(Global_inv_projection, ndc_depth) : 1e5; float dist = (linear_depth / dot(eyedir, Global_view_dir.xyz)); float3 p0 = Global_camera_world_pos.xyz; float3 p1 = Global_camera_world_pos.xyz + eyedir * dist; makeAscending(p0, p1); - if (p0.y < u_fog_top) { - if (p1.y > u_fog_top) { + const bool is_in_fog = p0.y < u_fog_top; + if (is_in_fog) { + const bool is_partially_in_fog = p1.y > u_fog_top; + if (is_partially_in_fog) { + // clip to top of fog float3 dir = p1 - p0; float3 diry1 = dir / (abs(dir.y) < 1e-5 ? 1e-5 : dir.y); p1 -= diry1 * (p1.y - u_fog_top); } - float3 fog_transmittance = exp(-distanceInFog(p0, p1) * fog_extinction); + float3 fog_transmittance = exp(-distanceInFog(p0, p1) * u_fog_scattering.rgb); float3 inscatter = 0; { - const int STEP_COUNT = u_godarys_enabled > 0 ? 32 : 8; + const int STEP_COUNT = u_godrays_enabled > 0 ? 32 : 8; float step_len = length(p1 - p0) / (STEP_COUNT + 1); float offset = hash((float2)thread_id.xy * 0.05); for (float f = (offset ) / STEP_COUNT; f < 1; f += 1.0 / (STEP_COUNT + 1)) { float3 p = lerp(p0, p1, f); float od = distanceInFog(p, p + Global_light_dir.xyz * 1e5); // TODO 1e5 od += distanceInFog(p, Global_camera_world_pos.xyz); - float shadow = u_godarys_enabled > 0 ? getShadowSimple(Global_shadowmap, p - Global_camera_world_pos.xyz) : 1; - inscatter += getFogDensity(p) * step_len * exp(-od * fog_extinction) * shadow; + float shadow = u_godrays_enabled > 0 ? getShadowSimple(Global_shadowmap, p - Global_camera_world_pos.xyz) : 1; + inscatter += getFogDensity(p) * step_len * exp(-od * u_fog_scattering.rgb) * shadow; } } - const float cos_theta = dot(eyedir, Global_light_dir.xyz); - scene.rgb = fog_transmittance; atmo.rgb += inscatter * u_fog_scattering.rgb * sunlight * miePhase(0.25, -cos_theta); } diff --git a/data/pipelines/blit.hlsl b/data/pipelines/blit.hlsl new file mode 100644 index 0000000000..55548d7e4b --- /dev/null +++ b/data/pipelines/blit.hlsl @@ -0,0 +1,25 @@ +//@include "pipelines/common.hlsli" + +cbuffer Drawcall : register(b4) { + float4 u_r_mask; + float4 u_g_mask; + float4 u_b_mask; + float4 u_a_mask; + float4 u_offsets; + uint2 u_position; + int2 u_scale; + uint u_input; + uint u_output; +}; + +[numthreads(16, 16, 1)] +void main(uint3 thread_id : SV_DispatchThreadID) { + float4 value = bindless_textures[u_input][thread_id.xy]; + + bindless_rw_textures[u_output][thread_id.xy * u_scale + u_position] = float4( + dot(value, u_r_mask) + u_offsets.r, + dot(value, u_g_mask) + u_offsets.g, + dot(value, u_b_mask) + u_offsets.b, + dot(value, u_a_mask) + u_offsets.a + ); +} diff --git a/data/pipelines/bloom_blur.hlsl b/data/pipelines/bloom_blur.hlsl index 59c865ac06..3811d198f9 100644 --- a/data/pipelines/bloom_blur.hlsl +++ b/data/pipelines/bloom_blur.hlsl @@ -7,15 +7,15 @@ cbuffer Drawcall : register(b4) { uint u_small; }; -struct Output { +struct VSOutput { float2 tc0 : TEXCOORD0; float4 tc1 : TEXCOORD1; float4 tc2 : TEXCOORD2; float4 position : SV_POSITION; }; -Output mainVS(uint vertex_id : SV_VertexID) { - Output output; +VSOutput mainVS(uint vertex_id : SV_VertexID) { + VSOutput output; float2 uv; output.position = fullscreenQuad(vertex_id, uv); output.tc0 = uv; @@ -41,14 +41,8 @@ Output mainVS(uint vertex_id : SV_VertexID) { return output; } -struct Input { - float2 tc0 : TEXCOORD0; - float4 tc1 : TEXCOORD1; - float4 tc2 : TEXCOORD2; -}; - // blur bigger in one axis and merge with smaller (already blurred from previous step) -float4 mainPS(Input input) : SV_Target { +float4 mainPS(VSOutput input) : SV_Target { return sampleBindless(LinearSamplerClamp, u_small, input.tc0.xy) * 0.5 + ((sampleBindless(LinearSamplerClamp, u_big, input.tc0.xy)) * 0.2270270270 + (sampleBindless(LinearSamplerClamp, u_big, input.tc1.xy)) * 0.3162162162 diff --git a/data/pipelines/blur.hlsl b/data/pipelines/blur.hlsl index 7b9f354914..163b3b954c 100644 --- a/data/pipelines/blur.hlsl +++ b/data/pipelines/blur.hlsl @@ -6,15 +6,15 @@ cbuffer Drawcall : register(b4) { uint u_input; }; -struct Output { +struct VSOutput { float2 tc0 : TEXCOORD0; float4 tc1 : TEXCOORD1; float4 tc2 : TEXCOORD2; float4 position : SV_POSITION; }; -Output mainVS(uint vertex_id : SV_VertexID) { - Output output; +VSOutput mainVS(uint vertex_id : SV_VertexID) { + VSOutput output; float2 uv; output.position = fullscreenQuad(vertex_id, uv); output.tc0 = uv; @@ -40,13 +40,7 @@ Output mainVS(uint vertex_id : SV_VertexID) { return output; } -struct Input { - float2 tc0 : TEXCOORD0; - float4 tc1 : TEXCOORD1; - float4 tc2 : TEXCOORD2; -}; - -float4 mainPS(Input input) : SV_Target { +float4 mainPS(VSOutput input) : SV_Target { float2 uv0 = saturate(input.tc0.xy); float2 uv1 = saturate(input.tc1.xy); float2 uv2 = saturate(input.tc1.zw); diff --git a/data/pipelines/common.hlsli b/data/pipelines/common.hlsli index ddb8c4f9c5..a69fefc2ef 100644 --- a/data/pipelines/common.hlsli +++ b/data/pipelines/common.hlsli @@ -1,5 +1,3 @@ -#line 2 "common.hlsli" - #define M_PI 3.14159265359 #define ONE_BY_PI (1 / 3.14159265359) @@ -10,10 +8,6 @@ static const float2 POISSON_DISK_4[4] = { float2( 0.34495938, 0.29387760 ) }; -cbuffer ShadowAtlas : register(b3) { - float4x4 u_shadow_atlas_matrices[128]; -}; - static const float2 POISSON_DISK_16[16] = { float2(0.3568125,-0.5825516), float2(-0.2828444,-0.1149732), @@ -121,6 +115,7 @@ cbuffer GlobalState : register(b0) { float4 Global_to_prev_frame_camera_translation; float4 Global_light_dir; float4 Global_light_color; + uint2 Global_random_uint2; int2 Global_framebuffer_size; float2 Global_pixel_jitter; float2 Global_prev_pixel_jitter; @@ -152,6 +147,10 @@ cbuffer PassState : register(b1) { float4 Pass_shadow_to_camera : packoffset(c32); }; +cbuffer ShadowAtlas : register(b3) { + float4x4 u_shadow_atlas_matrices[128]; +}; + float3 rotateByQuat(float4 rot, float3 pos) { float3 uv = cross(rot.xyz, pos); float3 uuv = cross(rot.xyz, uv); @@ -206,16 +205,14 @@ uint2 textureSize(TextureCube Tex, uint Level) { return ret; } -float3 getWorldNormal(float2 frag_coord) { - float z = 1; - float4 posProj = float4(toScreenUV(frag_coord) * 2 - 1, z, 1.0); +// returns view vector, i.e. normalized vector in world-space pointing from camera to pixel +float3 getViewDirection(float2 screen_uv) { + float4 posProj = float4(toScreenUV(screen_uv) * 2 - 1, 1, 1.0); float4 wpos = mul(posProj, Global_inv_view_projection); - wpos /= wpos.w; - float3 view = (mul(float4(0.0, 0.0, 0.0, 1.0), Global_inv_view)).xyz - wpos.xyz; - - return -normalize(view); + return normalize(wpos.xyz); } +// get view-space position of pixel at `tex_coord` float3 getViewPosition(uint depth_buffer, float4x4 inv_view_proj, float2 tex_coord) { float z = sampleBindlessLod(LinearSamplerClamp, depth_buffer, tex_coord, 0).r; float4 pos_proj = float4(toScreenUV(tex_coord) * 2 - 1, z, 1.0); @@ -223,6 +220,7 @@ float3 getViewPosition(uint depth_buffer, float4x4 inv_view_proj, float2 tex_coo return view_pos.xyz / view_pos.w; } +// get view-space position of pixel at `tex_coord` and its NDC depth float3 getViewPosition(uint depth_buffer, float4x4 inv_view_proj, float2 tex_coord, out float ndc_depth) { float z = sampleBindlessLod(LinearSamplerClamp, depth_buffer, tex_coord, 0).r; float4 pos_proj = float4(toScreenUV(tex_coord) * 2 - 1, z, 1.0); @@ -276,9 +274,10 @@ float2 computeStaticObjectMotionVector(float3 wpos) { return pos_projected.xy / pos_projected.w - p.xy / p.w; } -float4 fullscreenQuad(int vertexID, out float2 uv) { - uv = float2((vertexID & 1) * 2, vertexID & 2); - return float4(toScreenUV(uv) * 2 - 1, 0, 1); +// can be used in VS to draw a triangle covering whole screen +float4 fullscreenQuad(int vertexID, out float2 screen_uv) { + screen_uv = float2((vertexID & 1) * 2, vertexID & 2); + return float4(toScreenUV(screen_uv) * 2 - 1, 0, 1); } // TODO optimize @@ -294,8 +293,6 @@ StructuredBuffer b_cluster_map : register(t2); StructuredBuffer b_env_probes : register(t3); StructuredBuffer b_refl_probes : register(t4); -#ifdef LUMIX_FRAGMENT_SHADER - Cluster getClusterLinearDepth(float linear_depth, float2 frag_coord) { int3 cluster; int2 fragcoord = int2(frag_coord.xy); @@ -328,8 +325,6 @@ Cluster getCluster(float ndc_depth, float2 frag_coord) { return b_clusters[idx]; } -#endif - float hash(float3 seed) { float dot_product = dot(seed, float3(12.9898,78.233,45.164)); return frac(sin(dot_product) * 43758.5453); @@ -356,33 +351,31 @@ float getShadowSimple(uint shadowmap, float3 wpos) { } float getShadow(uint shadowmap, float3 wpos, float3 N, float2 frag_coord) { - #ifdef LUMIX_FRAGMENT_SHADER - float NdL = saturate(dot(N, Global_light_dir.xyz)); - float4 pos = float4(wpos, 1); + float NdL = saturate(dot(N, Global_light_dir.xyz)); + float4 pos = float4(wpos, 1); + + for (int slice = 0; slice < 4; ++slice) { + float3 sc = mul(Global_sm_slices[slice].world_to_slice, pos); - for (int slice = 0; slice < 4; ++slice) { - float3 sc = mul(Global_sm_slices[slice].world_to_slice, pos); + if (all(sc.xyz < 0.99) && all(sc.xyz > 0.01)) { + float c = hash(frag_coord) * 2 - 1; + float s = sqrt(1 - c * c); + float2x2 rot = float2x2(c, s, -s, c); + float2 sm_uv = float2(sc.x * 0.25 + slice * 0.25, sc.y); + float shadow = 0; + float receiver = sc.z; - if (all(sc.xyz < 0.99) && all(sc.xyz > 0.01)) { - float c = hash(frag_coord) * 2 - 1; - float s = sqrt(1 - c * c); - float2x2 rot = float2x2(c, s, -s, c); - float2 sm_uv = float2(sc.x * 0.25 + slice * 0.25, sc.y); - float shadow = 0; - float receiver = sc.z; - - float bias = (0.01 + Global_sm_slices[slice].texel_world / max(NdL, 0.1)) * Global_shadow_rcp_depth_range; - for (int j = 0; j < 16; ++j) { - float2 pcf_offset = mul(rot, POISSON_DISK_16[j]); - float2 uv = sm_uv + pcf_offset * float2(0.25, 1) * Global_sm_slices[slice].rcp_size * 3; + float bias = (0.01 + Global_sm_slices[slice].texel_world / max(NdL, 0.1)) * Global_shadow_rcp_depth_range; + for (int j = 0; j < 16; ++j) { + float2 pcf_offset = mul(rot, POISSON_DISK_16[j]); + float2 uv = sm_uv + pcf_offset * float2(0.25, 1) * Global_sm_slices[slice].rcp_size * 3; - float occluder = sampleBindlessLod(LinearSamplerClamp, shadowmap, uv, 0).r; - shadow += receiver > occluder - length(pcf_offset) * bias * 3 ? 1 : 0; - } - return shadow / 16.0; + float occluder = sampleBindlessLod(LinearSamplerClamp, shadowmap, uv, 0).r; + shadow += receiver > occluder - length(pcf_offset) * bias * 3 ? 1 : 0; } + return shadow / 16.0; } - #endif + } return 1; } @@ -432,56 +425,52 @@ float getShadowAtlasResolution(int idx) { } float3 pointLightsLighting(Cluster cluster, Surface surface, uint shadow_atlas, float2 frag_coord) { - #ifdef LUMIX_FRAGMENT_SHADER - float3 res = 0; - for (int i = cluster.offset; i < cluster.offset + cluster.lights_count; ++i) { - Light light = b_lights[b_cluster_map[i]]; - float3 lpos = surface.wpos.xyz - light.pos_radius.xyz; - float dist = length(lpos); - float attn = pow(max(0, 1 - dist / light.pos_radius.w), light.color_attn.w); - float3 L = -lpos / dist; - if (attn > 1e-5) { - float3 direct_light = computeDirectLight(surface, L, light.color_attn.rgb); - int atlas_idx = light.atlas_idx; - if (atlas_idx >= 0) { - float4 proj_pos = mul(float4(lpos, 1), u_shadow_atlas_matrices[atlas_idx]); - proj_pos /= proj_pos.w; - - float2 shadow_uv = proj_pos.xy; - - float c = hash(frag_coord) * 2 - 1; - float s = sqrt(1 - c * c); - float2x2 rot = float2x2(c, s, -s, c); - float shadow = 0; - float receiver = proj_pos.z; - for (int j = 0; j < 16; ++j) { - float2 pcf_offset = mul(rot, POISSON_DISK_16[j]); - float2 uv = shadow_uv + pcf_offset * float2(0.25, 1) / getShadowAtlasResolution(atlas_idx) * 3; - - float occluder = sampleBindlessLod(LinearSamplerClamp, shadow_atlas, uv, 0).r; - shadow += receiver * 1.02 > occluder ? 1 : 0; - } - attn *= shadow / 16.0; - } + float3 res = 0; + for (int i = cluster.offset; i < cluster.offset + cluster.lights_count; ++i) { + Light light = b_lights[b_cluster_map[i]]; + float3 lpos = surface.wpos.xyz - light.pos_radius.xyz; + float dist = length(lpos); + float attn = pow(max(0, 1 - dist / light.pos_radius.w), light.color_attn.w); + float3 L = -lpos / dist; + if (attn > 1e-5) { + float3 direct_light = computeDirectLight(surface, L, light.color_attn.rgb); + int atlas_idx = light.atlas_idx; + if (atlas_idx >= 0) { + float4 proj_pos = mul(float4(lpos, 1), u_shadow_atlas_matrices[atlas_idx]); + proj_pos /= proj_pos.w; + + float2 shadow_uv = proj_pos.xy; - float fov = light.fov; - if (fov < M_PI) { - // TODO replace rot with dir - float3 dir = rotateByQuat(light.rot, float3(0, 0, -1)); - float3 L = lpos / max(dist, 1e-5); - float cosDir = dot(normalize(dir), L); - float cosCone = cos(fov * 0.5); + float c = hash(frag_coord) * 2 - 1; + float s = sqrt(1 - c * c); + float2x2 rot = float2x2(c, s, -s, c); + float shadow = 0; + float receiver = proj_pos.z; + for (int j = 0; j < 16; ++j) { + float2 pcf_offset = mul(rot, POISSON_DISK_16[j]); + float2 uv = shadow_uv + pcf_offset * float2(0.25, 1) / getShadowAtlasResolution(atlas_idx) * 3; - attn *= cosDir < cosCone ? 0 : (cosDir - cosCone) / (1 - cosCone); + float occluder = sampleBindlessLod(LinearSamplerClamp, shadow_atlas, uv, 0).r; + shadow += receiver * 1.02 > occluder ? 1 : 0; } + attn *= shadow / 16.0; + } - res += direct_light * attn; + float fov = light.fov; + if (fov < M_PI) { + // TODO replace rot with dir + float3 dir = rotateByQuat(light.rot, float3(0, 0, -1)); + float3 L = lpos / max(dist, 1e-5); + float cosDir = dot(normalize(dir), L); + float cosCone = cos(fov * 0.5); + + attn *= cosDir < cosCone ? 0 : (cosDir - cosCone) / (1 - cosCone); } + + res += direct_light * attn; } - return res; - #else - return 0; - #endif + } + return res; } float3 computeIndirectDiffuse(float3 irradiance, Surface surface) { @@ -617,8 +606,8 @@ float3 computeLighting(Cluster cluster, Surface surface, float3 light_direction, return res; } -float2 cameraReproject(float2 uv, float depth) { - float4 v = mul(float4(toScreenUV(uv) * 2 - 1, depth, 1), Global_reprojection); +float2 cameraReproject(float2 uv, float ndc_depth) { + float4 v = mul(float4(toScreenUV(uv) * 2 - 1, ndc_depth, 1), Global_reprojection); float2 res = (v.xy / v.w) * 0.5 + 0.5; return toScreenUV(res); } @@ -630,77 +619,9 @@ float D_GGX(float ndoth, float roughness) { return a2 / (f * f * M_PI); } -#ifdef LUMIX_FRAGMENT_SHADER - bool ditherLOD(float lod, float2 frag_coord){ - // interleaved gradient noise by Jorge Jimenez - float s = frac(52.9829189 * frac(0.06711056 * frag_coord.x + 0.00583715 * frag_coord.y)); - float ret = lod < 0.0 ? step(s, lod + 1.0) : step(lod, s); - return ret < 1e-3; - } -#endif - -/* - -layout (std140, binding = 3) uniform ShadowAtlas { - mat4 u_shadow_atlas_matrices[128]; -}; - -float3 getViewPosition(sampler2D depth_buffer, mat4 inv_view_proj, float2 tex_coord, out float ndc_depth) -{ - float z = texture(depth_buffer, tex_coord).r; - float4 pos_proj = float4(toScreenUV(tex_coord) * 2 - 1, z, 1.0); - float4 view_pos = inv_view_proj * pos_proj; - ndc_depth = z; - return view_pos.xyz / view_pos.w; -} - -float3 getViewPosition(sampler2D depth_buffer, mat4 inv_view_proj, float2 tex_coord) -{ - float z = texture(depth_buffer, tex_coord).r; - float4 pos_proj = float4(toScreenUV(tex_coord) * 2 - 1, z, 1.0); - float4 view_pos = inv_view_proj * pos_proj; - return view_pos.xyz / view_pos.w; -} - - - -float G_SmithSchlickGGX(float ndotl, float ndotv, float roughness) -{ - float r = roughness + 1.0; - float k = (r * r) / 8.0; - float l = ndotl / (ndotl * (1.0 - k) + k); - float v = ndotv / (ndotv * (1.0 - k) + k); - return l * v; -} - -float3 env_brdf_approx(float3 F0, float roughness, float NoV) { - float4 c0 = float4(-1, -0.0275, -0.572, 0.022); - float4 c1 = float4(1, 0.0425, 1.0, -0.04); - float4 r = roughness * c0 + c1; - float a004 = min(r.x * r.x, exp2(-9.28 * NoV)) * r.x + r.y; - float2 AB = float2(-1.04, 1.04) * a004 + r.zw; - return F0 * AB.x + AB.y; -} - -float3 computeIndirectDiffuse(float3 irradiance, Surface surface) { - float ndotv = abs(dot(surface.N , surface.V)) + 1e-5f; - float3 F0 = mix(float3(0.04), surface.albedo, surface.metallic); - float3 F = F_Schlick(ndotv, F0); - float3 kd = mix(float3(1.0) - F, float3(0.0), surface.metallic); - return surface.albedo * irradiance; +bool ditherLOD(float lod, float2 frag_coord){ + // interleaved gradient noise by Jorge Jimenez + float s = frac(52.9829189 * frac(0.06711056 * frag_coord.x + 0.00583715 * frag_coord.y)); + float ret = lod < 0.0 ? step(s, lod + 1.0) : step(lod, s); + return ret < 1e-3; } - -float3 transformByDualQuat(mat2x4 dq, float3 pos) { - return pos - + 2 * cross(dq[0].xyz, cross(dq[0].xyz, pos) + dq[0].w * pos) - + 2 * (dq[0].w * dq[1].xyz - dq[1].w * dq[0].xyz + cross(dq[0].xyz, dq[1].xyz)); -} - -float2 computeStaticObjectMotionVector(float3 wpos) { - float4 p = Global.view_projection_no_jitter * float4(wpos, 1); - float4 pos_projected = Global.prev_view_projection_no_jitter * float4(wpos + Global.to_prev_frame_camera_translation.xyz, 1); - return pos_projected.xy / pos_projected.w - p.xy / p.w; -} - - -*/ diff --git a/data/pipelines/cubemap_sky.hlsl b/data/pipelines/cubemap_sky.hlsl index d7d2c2cb10..64a3e60812 100644 --- a/data/pipelines/cubemap_sky.hlsl +++ b/data/pipelines/cubemap_sky.hlsl @@ -29,6 +29,6 @@ VSOutput mainVS(uint vertex_id : SV_VertexID) { } float4 mainPS(VSOutput input) : SV_TARGET { - float3 eye_dir = getWorldNormal(input.uv); + float3 eye_dir = getViewDirection(input.uv); return float4(sampleCubeBindless(LinearSampler, u_sky, eye_dir).rgb * u_intensity, 1); } diff --git a/data/pipelines/curve_decal.hlsl b/data/pipelines/curve_decal.hlsl new file mode 100644 index 0000000000..a7f8625c8b --- /dev/null +++ b/data/pipelines/curve_decal.hlsl @@ -0,0 +1,123 @@ +//@surface +//@include "pipelines/common.hlsli" +//@texture_slot "Texture", "textures/common/white.tga" +//@uniform "Material color", "color", {1, 1, 1, 1} + +struct VSOutput { + float3 half_extents : TEXCOORD0; + float3 pos : TEXCOORD1; + float4 rot : TEXCOORD2; + float2 uv_scale : TEXCOORD3; + float4 bezier : TEXCOORD4; + float4 position : SV_POSITION; +}; + +struct VSInput { + float3 position : TEXCOORD0; + float3 i_pos : TEXCOORD1; + float4 i_rot : TEXCOORD2; + float3 i_half_extents : TEXCOORD3; + float2 i_uv_scale : TEXCOORD4; + float4 i_bezier : TEXCOORD5; +}; + +VSOutput mainVS(VSInput input) { + VSOutput output; + output.pos = input.i_pos; + output.rot = input.i_rot; + output.half_extents = input.i_half_extents; + float3 pos = rotateByQuat(input.i_rot, input.position * input.i_half_extents); + pos += input.i_pos; + output.uv_scale = input.i_uv_scale; + output.bezier = input.i_bezier; + output.position = mul(float4(pos, 1), mul(Global_view, Global_projection)); + return output; +} + +float cross2(float2 a, float2 b) { return a.x * b.y - a.y * b.x; } + +// from shadertoy by iq +float2 sdBezier(float2 pos, float2 A, float2 B, float2 C) { + float2 a = B - A; + float2 b = A - 2.0*B + C; + float2 c = a * 2.0; + float2 d = A - pos; + + float kk = 1.0 / dot(b, b); + float kx = kk * dot(a, b); + float ky = kk * (2.0 * dot(a, a) + dot(d, b)) / 3.0; + float kz = kk * dot(d, a); + + float res = 0.0; + float sgn = 0.0; + + float p = ky - kx * kx; + float p3 = p * p * p; + float q = kx * (2.0 * kx * kx - 3.0 * ky) + kz; + float h = q * q + 4.0 * p3; + float res_t; + + if (h >= 0.0) { // 1 root + h = sqrt(h); + float2 x = (float2(h, -h) - q) / 2.0; + float2 uv = sign(x) * pow(abs(x), 1.0 / 3.0); + float t = saturate(uv.x + uv.y - kx); + float2 q = d + (c + b * t) * t; + res = dot(q, q); + sgn = cross2(c + 2.0 * b * t, q); + res_t = t; + } + else { // 3 roots + float z = sqrt(-p); + float v = acos(q / (p * z * 2.0)) / 3.0; + float m = cos(v); + float n = sin(v) * 1.732050808; + float3 t = saturate(float3(m + m, -n - m, n - m) * z - kx); + float2 qx = d + (c + b * t.x) * t.x; + float dx = dot(qx, qx), sx = cross2(c + 2.0 * b * t.x, qx); + float2 qy = d + (c + b * t.y) * t.y; + float dy = dot(qy, qy), sy = cross2(c + 2.0 * b * t.y, qy); + if (dx < dy) { + res = dx; + sgn = sx; + res_t = t.x; + } else { + res = dy; + sgn = sy; + res_t = t.y; + } + } + + return float2(sqrt(res) * sign(sgn), res_t); +} + +cbuffer Dc : register(b4) { + uint u_gbuffer_depth; +}; + +GBufferOutput mainPS(VSOutput input) { + float2 screen_uv = input.position.xy / Global_framebuffer_size; + float3 wpos = getViewPosition(u_gbuffer_depth, Global_inv_view_projection, screen_uv); + + float4 r = input.rot; + r.w = -r.w; + float3 lpos = rotateByQuat(r, wpos - input.pos); + if (any(abs(lpos) > input.half_extents)) discard; + + float2 bezier_dist = sdBezier(lpos.xz, input.bezier.xy, 0, input.bezier.zw); + if (abs(bezier_dist.x) > 0.5 * input.uv_scale.x) discard; + if (abs(bezier_dist.y - 0.5) > 0.499) discard; + bezier_dist.x += 0.5 * input.uv_scale.x; + bezier_dist.x /= input.uv_scale.x; + bezier_dist.y *= input.uv_scale.y; + float4 color = sampleBindless(LinearSampler, t_texture, bezier_dist.yx); + if (color.a < 0.5) discard; + color.rgb *= u_material_color.rgb; + + GBufferOutput o; + o.gbuffer0 = float4(color.rgb, 0.9); + o.gbuffer1 = float4(0, 0, 0, 0); + o.gbuffer2 = float4(0, 0, 0, 0); + o.gbuffer3 = float4(0, 0, 0, 0); + return o; +} \ No newline at end of file diff --git a/data/pipelines/curve_decal.shd b/data/pipelines/curve_decal.shd deleted file mode 100644 index 797557e67e..0000000000 --- a/data/pipelines/curve_decal.shd +++ /dev/null @@ -1,134 +0,0 @@ -include "pipelines/common.hlsli" - -texture_slot { - name = "Texture", - uniform = "u_texture", - default_texture = "textures/common/white.tga" -} - -uniform("Material color", "color", {1, 1, 1, 1}) - -common [[ - struct VSOutput { - float3 half_extents : TEXCOORD0; - float3 pos : TEXCOORD1; - float4 rot : TEXCOORD2; - float2 uv_scale : TEXCOORD3; - float4 bezier : TEXCOORD4; - float4 position : SV_POSITION; - }; -]] - -vertex_shader [[ - struct Input { - float3 position : TEXCOORD0; - float3 i_pos : TEXCOORD1; - float4 i_rot : TEXCOORD2; - float3 i_half_extents : TEXCOORD3; - float2 i_uv_scale : TEXCOORD4; - float4 i_bezier : TEXCOORD5; - }; - - VSOutput main(Input input) { - VSOutput output; - output.pos = input.i_pos; - output.rot = input.i_rot; - output.half_extents = input.i_half_extents; - float3 pos = rotateByQuat(input.i_rot, input.position * input.i_half_extents); - pos += input.i_pos; - output.uv_scale = input.i_uv_scale; - output.bezier = input.i_bezier; - output.position = mul(float4(pos, 1), mul(Global_view, Global_projection)); - return output; - } -]] - -fragment_shader [[ - float cross2(float2 a, float2 b) { return a.x * b.y - a.y * b.x; } - - // from shadertoy by iq - float2 sdBezier(float2 pos, float2 A, float2 B, float2 C) { - float2 a = B - A; - float2 b = A - 2.0*B + C; - float2 c = a * 2.0; - float2 d = A - pos; - - float kk = 1.0 / dot(b, b); - float kx = kk * dot(a, b); - float ky = kk * (2.0 * dot(a, a) + dot(d, b)) / 3.0; - float kz = kk * dot(d, a); - - float res = 0.0; - float sgn = 0.0; - - float p = ky - kx * kx; - float p3 = p * p * p; - float q = kx * (2.0 * kx * kx - 3.0 * ky) + kz; - float h = q * q + 4.0 * p3; - float res_t; - - if (h >= 0.0) { // 1 root - h = sqrt(h); - float2 x = (float2(h, -h) - q) / 2.0; - float2 uv = sign(x) * pow(abs(x), 1.0 / 3.0); - float t = saturate(uv.x + uv.y - kx); - float2 q = d + (c + b * t) * t; - res = dot(q, q); - sgn = cross2(c + 2.0 * b * t, q); - res_t = t; - } - else { // 3 roots - float z = sqrt(-p); - float v = acos(q / (p * z * 2.0)) / 3.0; - float m = cos(v); - float n = sin(v) * 1.732050808; - float3 t = saturate(float3(m + m, -n - m, n - m) * z - kx); - float2 qx = d + (c + b * t.x) * t.x; - float dx = dot(qx, qx), sx = cross2(c + 2.0 * b * t.x, qx); - float2 qy = d + (c + b * t.y) * t.y; - float dy = dot(qy, qy), sy = cross2(c + 2.0 * b * t.y, qy); - if (dx < dy) { - res = dx; - sgn = sx; - res_t = t.x; - } else { - res = dy; - sgn = sy; - res_t = t.y; - } - } - - return float2(sqrt(res) * sign(sgn), res_t); - } - - cbuffer Dc : register(b4) { - uint u_gbuffer_depth; - }; - - GBufferOutput main(VSOutput input) { - float2 screen_uv = input.position.xy / Global_framebuffer_size; - float3 wpos = getViewPosition(u_gbuffer_depth, Global_inv_view_projection, screen_uv); - - float4 r = input.rot; - r.w = -r.w; - float3 lpos = rotateByQuat(r, wpos - input.pos); - if (any(abs(lpos) > input.half_extents)) discard; - - float2 bezier_dist = sdBezier(lpos.xz, input.bezier.xy, 0, input.bezier.zw); - if (abs(bezier_dist.x) > 0.5 * input.uv_scale.x) discard; - if (abs(bezier_dist.y - 0.5) > 0.499) discard; - bezier_dist.x += 0.5 * input.uv_scale.x; - bezier_dist.x /= input.uv_scale.x; - bezier_dist.y *= input.uv_scale.y; - float4 color = sampleBindless(LinearSampler, t_texture, bezier_dist.yx); - if (color.a < 0.5) discard; - color.rgb *= u_material_color.rgb; - - GBufferOutput o; - o.gbuffer0 = float4(color.rgb, 0.9); - o.gbuffer1 = float4(0, 0, 0, 0); - o.gbuffer2 = float4(0, 0, 0, 0); - o.gbuffer3 = float4(0, 0, 0, 0); - return o; - } -]] \ No newline at end of file diff --git a/data/pipelines/debug_shape.hlsl b/data/pipelines/debug_shape.hlsl index 576bb8c958..9ad1c3666e 100644 --- a/data/pipelines/debug_shape.hlsl +++ b/data/pipelines/debug_shape.hlsl @@ -5,7 +5,7 @@ cbuffer Model : register(b4) { float4x4 u_model; }; -struct Output { +struct VSOutput { float4 color : TEXCOORD0; float4 position : SV_POSITION; }; @@ -15,17 +15,13 @@ struct VSInput { float4 color : TEXCOORD1; }; -Output mainVS(VSInput input) { - Output output; +VSOutput mainVS(VSInput input) { + VSOutput output; output.color = float4(pow(abs(input.color.rgb), 2.2f.xxx), input.color.a); output.position = mul(float4(input.position, 1), mul(u_model, Pass_view_projection)); return output; } -struct Input { - float4 color : TEXCOORD0; -}; - -float4 mainPS(Input input) : SV_TARGET { +float4 mainPS(VSOutput input) : SV_TARGET { return input.color; } diff --git a/data/pipelines/decal.hlsl b/data/pipelines/decal.hlsl index ee0bb4bd2b..7f293d34fa 100644 --- a/data/pipelines/decal.hlsl +++ b/data/pipelines/decal.hlsl @@ -19,20 +19,6 @@ struct VSOutput { float4 position : SV_POSITION; }; -struct Input { - float3 half_extents : TEXCOORD0; - float3 pos : TEXCOORD1; - float4 rot : TEXCOORD2; - float2 uv_scale : TEXCOORD3; - float4 frag_coord : SV_POSITION; -}; - -struct Output { - float4 o0 : SV_TARGET0; - float4 o1 : SV_TARGET1; - float4 o2 : SV_TARGET2; -}; - cbuffer DC : register(b4) { uint u_gbuffer_depth; }; @@ -41,6 +27,7 @@ VSOutput mainVS(VSInput input) { VSOutput output; output.pos = input.i_pos; output.rot = input.i_rot; + output.rot.w = -output.rot.w; output.half_extents = input.i_half_extents; float3 pos = rotateByQuat(input.i_rot, input.position * input.i_half_extents); pos += input.i_pos; @@ -49,22 +36,22 @@ VSOutput mainVS(VSInput input) { return output; } -Output mainPS(Input input) { - float2 screen_uv = input.frag_coord.xy / Global_framebuffer_size; +GBufferOutput mainPS(VSOutput input) { + float2 screen_uv = input.position.xy / Global_framebuffer_size; float3 wpos = getViewPosition(u_gbuffer_depth, Global_inv_view_projection, screen_uv); - float4 r = input.rot; - r.w = -r.w; - float3 lpos = rotateByQuat(r, wpos - input.pos); + float3 lpos = rotateByQuat(input.rot, wpos - input.pos); if (any(abs(lpos) > input.half_extents)) discard; - - float4 color = sampleBindless(LinearSampler, t_texture, (lpos.xz / input.half_extents.xz * 0.5 + 0.5) * input.uv_scale); + + float2 uv = (lpos.xz / input.half_extents.xz * 0.5 + 0.5) * input.uv_scale; + float4 color = sampleBindless(LinearSampler, t_texture, uv); //if (color.a < 0.01) discard; color.rgb *= u_material_color.rgb; - Output output; - output.o0 = float4(color.rgb, color.a); - output.o1 = float4(0, 0, 0, 0); - output.o2 = float4(0, 0, 0, 0); + GBufferOutput output; + output.gbuffer0 = float4(color.rgb, color.a); + output.gbuffer1 = float4(0, 0, 0, 0); + output.gbuffer2 = float4(0, 0, 0, 0); + output.gbuffer3 = float4(0, 0, 0, 0); return output; } diff --git a/data/pipelines/draw2d.hlsl b/data/pipelines/draw2d.hlsl index 973992f8b9..b66d8342db 100644 --- a/data/pipelines/draw2d.hlsl +++ b/data/pipelines/draw2d.hlsl @@ -12,25 +12,20 @@ struct VSInput { float4 color : TEXCOORD2; }; -struct Output { +struct VSOutput { float4 color : TEXCOORD0; float2 uv : TEXCOORD1; float4 position : SV_POSITION; }; -Output mainVS(VSInput input) { - Output output; +VSOutput mainVS(VSInput input) { + VSOutput output; output.color = input.color; output.uv = input.uv; output.position = mul(float4(input.position, 0, 1), u_matrix); return output; } -struct Input { - float4 color : TEXCOORD0; - float2 uv : TEXCOORD1; -}; - -float4 mainPS(Input input) :SV_TARGET { +float4 mainPS(VSOutput input) :SV_TARGET { return input.color * sampleBindlessLod(LinearSampler, u_texture, input.uv, 0); } \ No newline at end of file diff --git a/data/pipelines/film_grain.hlsl b/data/pipelines/film_grain.hlsl index 39f359927a..c2d323fc9c 100644 --- a/data/pipelines/film_grain.hlsl +++ b/data/pipelines/film_grain.hlsl @@ -3,25 +3,21 @@ cbuffer Drawcall : register(b4) { float u_intensity; float u_lumamount; - uint u_source; + uint u_in_out; uint u_noise; - uint u_output; }; -float3 filmGrain(float3 in_color, uint2 frag_coord) { +[numthreads(16, 16, 1)] +void main(uint3 thread_id : SV_DispatchThreadID) { + float4 in_color = bindless_rw_textures[u_in_out][thread_id.xy]; + int2 texture_size = int2(textureSize(bindless_textures[u_noise], 0)); - uint2 ij = (frag_coord + Global_time * 1e4) % texture_size; + uint2 ij = (thread_id.xy + Global_random_uint2) % texture_size; float3 noise = bindless_textures[u_noise][ij].xyz; - float _luminance = lerp(0.0, luminance(in_color), u_lumamount); + float _luminance = lerp(0.0, luminance(in_color.rgb), u_lumamount); float lum = smoothstep(0.2, 0.0, _luminance) + _luminance; lum += _luminance; - noise = lerp(0, pow(lum, 4.0), noise); - return in_color + noise * u_intensity; -} -[numthreads(16, 16, 1)] -void main(uint3 thread_id : SV_DispatchThreadID) { - float3 c = bindless_textures[u_source][thread_id.xy].rgb; - bindless_rw_textures[u_output][thread_id.xy] = float4(filmGrain(c, thread_id.xy), 1); + bindless_rw_textures[u_in_out][thread_id.xy] = float4(in_color.rgb + noise * u_intensity, in_color.a); } diff --git a/data/pipelines/ibl_filter.hlsl b/data/pipelines/ibl_filter.hlsl new file mode 100644 index 0000000000..f302ae41a1 --- /dev/null +++ b/data/pipelines/ibl_filter.hlsl @@ -0,0 +1,121 @@ +//@surface +//@include "pipelines/common.hlsli" + +cbuffer Drawcall : register(b4) { + float u_filter_roughness; + int u_face; + int u_mip; + uint u_texture; +}; + +struct Output { + float2 uv : TEXCOORD0; + float4 position : SV_POSITION; +}; + +Output mainVS(uint vertex_id : SV_VertexID) { + Output output; + float4 pos = fullscreenQuad(vertex_id, output.uv); + pos.xy = pos.xy; + pos.y = -pos.y; + output.position = pos; + return output; +} + +static const uint SAMPLE_COUNT = 128u; + +// https://github.com/google/filament/blob/master/shaders/src/light_indirect.fs +float prefilteredImportanceSampling(float ipdf) { + const float numSamples = float(SAMPLE_COUNT); + const float invNumSamples = 1.0 / float(SAMPLE_COUNT); + float dim = 128; + const float omegaP = (4.0 * M_PI) / (6.0 * dim * dim); + const float invOmegaP = 1.0 / omegaP; + const float K = 4.0; + const float iblRoughnessOneLevel = 4; + float omegaS = invNumSamples * ipdf; + float mipLevel = clamp(log2(K * omegaS * invOmegaP) * 0.5, 0.0, iblRoughnessOneLevel); + return mipLevel; +} + +// https://github.com/JoeyDeVries/LearnOpenGL/blob/master/src/6.pbr/2.2.1.ibl_specular/2.2.1.prefilter.fs +float RadicalInverse_VdC(uint bits) { + bits = (bits << 16u) | (bits >> 16u); + bits = ((bits & 0x55555555u) << 1u) | ((bits & 0xAAAAAAAAu) >> 1u); + bits = ((bits & 0x33333333u) << 2u) | ((bits & 0xCCCCCCCCu) >> 2u); + bits = ((bits & 0x0F0F0F0Fu) << 4u) | ((bits & 0xF0F0F0F0u) >> 4u); + bits = ((bits & 0x00FF00FFu) << 8u) | ((bits & 0xFF00FF00u) >> 8u); + return float(bits) * 2.3283064365386963e-10; // / 0x100000000 +} + +float2 Hammersley(uint i, uint N) { + return float2(float(i)/float(N), RadicalInverse_VdC(i)); +} + +float3 ImportanceSampleGGX(float2 Xi, float3 N, float roughness) { + float a = roughness*roughness; + + float phi = 2.0 * M_PI * Xi.x; + float cosTheta = sqrt((1.0 - Xi.y) / (1.0 + (a*a - 1.0) * Xi.y)); + float sinTheta = sqrt(1.0 - cosTheta*cosTheta); + + // from spherical coordinates to cartesian coordinates + float3 H; + H.x = cos(phi) * sinTheta; + H.y = sin(phi) * sinTheta; + H.z = cosTheta; + + // from tangent-space vector to world-space sample vector + float3 up = abs(N.z) < 0.999 ? float3(0.0, 0.0, 1.0) : float3(1.0, 0.0, 0.0); + float3 tangent = normalize(cross(up, N)); + float3 bitangent = cross(N, tangent); + + float3 sampleVec = tangent * H.x + bitangent * H.y + N * H.z; + return normalize(sampleVec); +} + +float4 mainPS(float2 in_uv : TEXCOORD0) : SV_TARGET { + float2 uv = in_uv * 2 - 1; + uv.y *= -1; + float3 N = 0; + + switch (u_face) { + case 0: N = float3(1, -uv.y, -uv.x); break; + case 1: N = float3(-1, -uv.y, uv.x); break; + case 2: N = float3(uv.x, 1, uv.y); break; + case 3: N = float3(uv.x, -1, -uv.y); break; + case 4: N = float3(uv.x, -uv.y, 1); break; + case 5: N = float3(-uv.x, -uv.y, -1); break; + } + + if (u_mip == 0) { + return float4(sampleCubeBindless(LinearSampler, u_texture, N).rgb, 1); + } + + N = normalize(N); + float3 R = N; + float3 V = R; + + float totalWeight = 0.0; + float3 prefilteredColor = 0; + for(uint i = 0u; i < SAMPLE_COUNT; ++i) { + float2 Xi = Hammersley(i, SAMPLE_COUNT); + float3 H = ImportanceSampleGGX(Xi, N, u_filter_roughness); + float3 L = normalize(2.0 * dot(V, H) * H - V); + + float NdotL = dot(N, L); + if(NdotL > 0.0) { + float LdotH = dot(L, H); + float NdotH = dot(N, H); + float ipdf = (4.0 * LdotH) / (D_GGX(NdotH, u_filter_roughness) * NdotH); + float mipLevel = prefilteredImportanceSampling(ipdf); + + float3 c = sampleCubeBindlessLod(LinearSampler, u_texture, L, mipLevel).rgb; + prefilteredColor += c * NdotL; + totalWeight += NdotL; + } + } + prefilteredColor = prefilteredColor / totalWeight; + + return float4(prefilteredColor, 1.0); +} diff --git a/data/pipelines/ibl_filter.shd b/data/pipelines/ibl_filter.shd deleted file mode 100644 index f0ab0bc647..0000000000 --- a/data/pipelines/ibl_filter.shd +++ /dev/null @@ -1,127 +0,0 @@ -include "pipelines/common.hlsli" - -common [[ - - cbuffer Drawcall : register(b4) { - float u_filter_roughness; - int u_face; - int u_mip; - uint u_texture; - }; -]] - -vertex_shader [[ - struct Output { - float2 uv : TEXCOORD0; - float4 position : SV_POSITION; - }; - - Output main(uint vertex_id : SV_VertexID) { - Output output; - float4 pos = fullscreenQuad(vertex_id, output.uv); - pos.xy = pos.xy; - pos.y = -pos.y; - output.position = pos; - return output; - } -]] - -fragment_shader [[ - static const uint SAMPLE_COUNT = 128u; - - // https://github.com/google/filament/blob/master/shaders/src/light_indirect.fs - float prefilteredImportanceSampling(float ipdf) { - const float numSamples = float(SAMPLE_COUNT); - const float invNumSamples = 1.0 / float(SAMPLE_COUNT); - float dim = 128; - const float omegaP = (4.0 * M_PI) / (6.0 * dim * dim); - const float invOmegaP = 1.0 / omegaP; - const float K = 4.0; - const float iblRoughnessOneLevel = 4; - float omegaS = invNumSamples * ipdf; - float mipLevel = clamp(log2(K * omegaS * invOmegaP) * 0.5, 0.0, iblRoughnessOneLevel); - return mipLevel; - } - - // https://github.com/JoeyDeVries/LearnOpenGL/blob/master/src/6.pbr/2.2.1.ibl_specular/2.2.1.prefilter.fs - float RadicalInverse_VdC(uint bits) { - bits = (bits << 16u) | (bits >> 16u); - bits = ((bits & 0x55555555u) << 1u) | ((bits & 0xAAAAAAAAu) >> 1u); - bits = ((bits & 0x33333333u) << 2u) | ((bits & 0xCCCCCCCCu) >> 2u); - bits = ((bits & 0x0F0F0F0Fu) << 4u) | ((bits & 0xF0F0F0F0u) >> 4u); - bits = ((bits & 0x00FF00FFu) << 8u) | ((bits & 0xFF00FF00u) >> 8u); - return float(bits) * 2.3283064365386963e-10; // / 0x100000000 - } - - float2 Hammersley(uint i, uint N) { - return float2(float(i)/float(N), RadicalInverse_VdC(i)); - } - - float3 ImportanceSampleGGX(float2 Xi, float3 N, float roughness) { - float a = roughness*roughness; - - float phi = 2.0 * M_PI * Xi.x; - float cosTheta = sqrt((1.0 - Xi.y) / (1.0 + (a*a - 1.0) * Xi.y)); - float sinTheta = sqrt(1.0 - cosTheta*cosTheta); - - // from spherical coordinates to cartesian coordinates - float3 H; - H.x = cos(phi) * sinTheta; - H.y = sin(phi) * sinTheta; - H.z = cosTheta; - - // from tangent-space vector to world-space sample vector - float3 up = abs(N.z) < 0.999 ? float3(0.0, 0.0, 1.0) : float3(1.0, 0.0, 0.0); - float3 tangent = normalize(cross(up, N)); - float3 bitangent = cross(N, tangent); - - float3 sampleVec = tangent * H.x + bitangent * H.y + N * H.z; - return normalize(sampleVec); - } - - float4 main(float2 in_uv : TEXCOORD0) : SV_TARGET { - float2 uv = in_uv * 2 - 1; - uv.y *= -1; - float3 N = 0; - - switch (u_face) { - case 0: N = float3(1, -uv.y, -uv.x); break; - case 1: N = float3(-1, -uv.y, uv.x); break; - case 2: N = float3(uv.x, 1, uv.y); break; - case 3: N = float3(uv.x, -1, -uv.y); break; - case 4: N = float3(uv.x, -uv.y, 1); break; - case 5: N = float3(-uv.x, -uv.y, -1); break; - } - - if (u_mip == 0) { - return float4(sampleCubeBindless(LinearSampler, u_texture, N).rgb, 1); - } - - N = normalize(N); - float3 R = N; - float3 V = R; - - float totalWeight = 0.0; - float3 prefilteredColor = 0; - for(uint i = 0u; i < SAMPLE_COUNT; ++i) { - float2 Xi = Hammersley(i, SAMPLE_COUNT); - float3 H = ImportanceSampleGGX(Xi, N, u_filter_roughness); - float3 L = normalize(2.0 * dot(V, H) * H - V); - - float NdotL = dot(N, L); - if(NdotL > 0.0) { - float LdotH = dot(L, H); - float NdotH = dot(N, H); - float ipdf = (4.0 * LdotH) / (D_GGX(NdotH, u_filter_roughness) * NdotH); - float mipLevel = prefilteredImportanceSampling(ipdf); - - float3 c = sampleCubeBindlessLod(LinearSampler, u_texture, L, mipLevel).rgb; - prefilteredColor += c * NdotL; - totalWeight += NdotL; - } - } - prefilteredColor = prefilteredColor / totalWeight; - - return float4(prefilteredColor, 1.0); - } -]] \ No newline at end of file diff --git a/data/pipelines/impostor.hlsl b/data/pipelines/impostor.hlsl new file mode 100644 index 0000000000..85de4b2cb8 --- /dev/null +++ b/data/pipelines/impostor.hlsl @@ -0,0 +1,253 @@ +//@surface +//@include "pipelines/common.hlsli" + +//@texture_slot "Albedo", "textures/common/white.tga" +//@texture_slot "Normal", "", "HAS_NORMAL" +//@texture_slot "Self shadow", "", "HAS_SELFSHADOW" +//@texture_slot "Depth", "", "HAS_DEPTHMAP" + +//@define "ALPHA_CUTOUT" + +//@uniform "Material color", "color", {1, 1, 1, 1} +//@uniform "Roughness", "normalized_float", 1 +//@uniform "Metallic", "normalized_float", 0 +//@uniform "Emission", "float", 0 +//@uniform "Translucency", "normalized_float", 0 +//@uniform "Center", "float3", {0, 0, 0} +//@uniform "Radius", "float", 1 + +struct VSOutput { + float2 uv : TEXCOORD0; + float3 normal : TEXCOORD1; + float3 tangent : TEXCOORD2; + float4 wpos : TEXCOORD3; + #if !defined DEPTH && defined HAS_SELFSHADOW + float4 shadow_coefs : TEXCOORD4; + #endif + float lod : TEXCOORD5; + float4 position : SV_POSITION; +}; +struct Input { + float3 position : TEXCOORD0; + float2 uv : TEXCOORD1; + #define ATTR(X) TEXCOORD##X + #ifdef INSTANCED + float4 i_rot_lod : ATTR(INSTANCE0_ATTR); + float4 i_pos_scale : ATTR(INSTANCE1_ATTR); + #elif defined AUTOINSTANCED + float4 i_rot : ATTR(INSTANCE0_ATTR); + float4 i_pos_lod : ATTR(INSTANCE1_ATTR); + float4 i_scale : ATTR(INSTANCE2_ATTR); + #endif +}; + +#ifdef INSTANCED +#elif defined AUTOINSTANCED +#else + #define USE_MATRIX + cbuffer ModelState : register(b4) { + row_major float4x4 model_mtx; + }; +#endif + +float2 dirToGrid(float3 vec) { + vec.y = min(vec.y, -0.001); + vec = normalize(vec); + vec.xz /= dot(1.0, abs(vec) ); + return float2(vec.x + vec.z, vec.x - vec.z) * 0.5 + 0.5; +} + + +#ifdef USE_MATRIX + VSOutput mainVS(Input input) { + VSOutput output; + float3x3 to_model_space = (float3x3)model_mtx; + #ifdef DEPTH + float3 N = normalize(Global_light_dir.xyz); + #else + float3 instance_pos = mul(float4(0, 0, 0, 1), model_mtx).xyz; + float3 N = normalize(instance_pos); + #endif + + float3x3 tangent_space; + tangent_space[0] = normalize(cross(N, float3(0, 1, 0))); + tangent_space[1] = normalize(cross(tangent_space[0], N)); + tangent_space[2] = cross(tangent_space[0], tangent_space[1]); + + float3 vd = float3(N.x, N.y, N.z); + vd = mul(vd, to_model_space); + #if !defined DEPTH && defined HAS_SELFSHADOW + float3 ld = mul(-Global_light_dir.xyz, to_model_space); + output.shadow_coefs = max(float4(ld.x, -ld.z, -ld.x, ld.z), 0); + output.shadow_coefs /= dot(output.shadow_coefs, 1); + #endif + float2 grid = dirToGrid(normalize(vd)); + output.uv = input.uv / 9 + int2(grid * 9) / 9.0; + + #ifdef DEPTH + // move to avoid selfshadow + float3 p = u_center.xyz + mul(input.position + float3(0, 0, u_center.y) - u_center.xyz, tangent_space); + #else + float3 p = u_center.xyz + mul(input.position - u_center.xyz, tangent_space); + #endif + p = mul(float4(p, 1), model_mtx).xyz; + + output.lod = 1; + output.tangent = tangent_space[0]; + output.normal = tangent_space[2]; + output.wpos = float4(p, 1); + + output.position = mul(output.wpos, Pass_view_projection); + return output; + } +#else + VSOutput mainVS(Input input) { + VSOutput output; + float3x3 tangent_space; + + #if defined INSTANCED + float3 instance_pos = input.i_pos_scale.xyz; + float scale = input.i_pos_scale.w; + float4 to_model_space = float4(input.i_rot_lod.xyz, sqrt(1 - dot(input.i_rot_lod.xyz, input.i_rot_lod.xyz))); + output.lod = input.i_rot_lod.w; + #else + float3 instance_pos = input.i_pos_lod.xyz; + float3 scale = input.i_scale.xyz; + float4 to_model_space = input.i_rot * float4(1, 1, 1, -1); + output.lod = input.i_pos_lod.w; + #endif + + #ifdef DEPTH + float3 N = normalize(Global_light_dir.xyz); + #else + float3 N = normalize(instance_pos); + #endif + tangent_space[0] = normalize(cross(N, float3(0, 1, 0))); + tangent_space[1] = normalize(cross(tangent_space[0], N)); + tangent_space[2] = cross(tangent_space[0], tangent_space[1]); + + float3 vd = float3(N.x, N.y, N.z); + vd = rotateByQuat(to_model_space, vd); + #if !defined DEPTH && defined HAS_SELFSHADOW + float3 ld = rotateByQuat(to_model_space, -Global_light_dir.xyz); + output.shadow_coefs = max(float4(ld.x, -ld.z, -ld.x, ld.z), 0); + output.shadow_coefs /= dot(output.shadow_coefs, 1); + #endif + float2 grid = dirToGrid(normalize(vd)); + output.uv = input.uv / 9 + int2(grid * 9) / 9.0; + + #ifdef DEPTH + // move to avoid selfshadow + float3 p = u_center.xyz + mul(input.position + float3(0, 0, u_center.y) - u_center.xyz, tangent_space); + #else + float3 p = u_center.xyz + mul(input.position - u_center.xyz, tangent_space); + #endif + p *= scale; + output.tangent = tangent_space[0]; + output.normal = tangent_space[2]; + output.wpos = float4(instance_pos + p, 1); + + output.position = mul(output.wpos, Pass_view_projection); + return output; + } +#endif + +Surface getSurface(VSOutput input) { + Surface data; + float4 c = sampleBindless(LinearSampler, t_albedo, input.uv) * u_material_color; + data.albedo = c.rgb; + data.alpha = c.a; + #ifdef ALPHA_CUTOUT + if(data.alpha < 0.5) discard; + #endif + float3x3 tbn = float3x3( + normalize(input.tangent), + normalize(input.normal), + normalize(cross(input.normal, input.tangent)) + ); + + data.wpos = input.wpos.xyz; + data.V = normalize(-data.wpos); + data.roughness = u_roughness; + data.metallic = u_metallic; + #ifdef HAS_NORMAL + data.N.xz = sampleBindless(LinearSampler, t_normal, input.uv).xy * 2 - 1; + data.N.y = sqrt(saturate(1 - dot(data.N.xz, data.N.xz))); + data.N = mul(data.N, tbn); + #else + data.N = Global_light_dir.xyz; + #endif + data.emission = u_emission; + data.translucency = u_translucency; + data.ao = 1; + data.motion = computeStaticObjectMotionVector(input.wpos.xyz); + + #if !defined DEPTH && defined HAS_SELFSHADOW + float4 self_shadow = sampleBindless(LinearSampler, t_self_shadow, input.uv); + data.shadow = saturate(dot(self_shadow, input.shadow_coefs)); + data.shadow *= data.shadow; + //data.shadow = 1; + data.ao = dot(self_shadow, 1) * 0.25; + #else + data.shadow = 1; + #endif + + return data; +} + +#ifdef DEPTH + void mainPS(VSOutput input) { + if (ditherLOD(input.lod, input.position.xy)) discard; + #ifdef ALPHA_CUTOUT + float4 c = sampleBindless(LinearSampler, t_albedo, input.uv); + if(c.a < 0.5) discard; + #endif + } +#elif defined DEFERRED + struct Output { + float4 gbuffer0 : SV_Target0; + float4 gbuffer1 : SV_Target1; + float4 gbuffer2 : SV_Target2; + float4 gbuffer3 : SV_Target3; + #ifdef HAS_DEPTHMAP + float depth : SV_Depth; + #endif + }; + + Output mainPS(VSOutput input) { + Surface data = getSurface(input); + GBufferOutput gb = packSurface(data); + Output output; + output.gbuffer0 = gb.gbuffer0; + output.gbuffer1 = gb.gbuffer1; + output.gbuffer2 = gb.gbuffer2; + output.gbuffer3 = gb.gbuffer3; + #ifdef HAS_DEPTHMAP + float depth = sampleBindless(LinearSampler, t_depth, input.uv).x; + float linear_z = toLinearDepth(Pass_inv_projection, input.position.z); + output.depth = 0.1 / (linear_z + (depth - 0.5) * u_radius); // TODO remove hardcoded near plane 0.1 + output.depth = input.position.z; + #endif + + return output; + } +#else + float mainPS(VSOutput input) : SV_TARGET + { + if (ditherLOD(v_lod)) discard; + + Surface surface = getSurface(input); + float3 res = computeDirectLight(surface + , Global_light_dir.xyz + , Global_light_color.rgb * Global_light_intensity * surface.shadow); + res += surface.emission * surface.albedo; + + float linear_depth = dot(surface.wpos.xyz, Pass_view_dir.xyz); + Cluster cluster = getClusterLinearDepth(linear_depth); + //res += pointLightsLighting(cluster, surface, shadow_atlas); + res += envProbesLighting(cluster, surface); + res += reflProbesLighting(cluster, surface, u_reflection_probes); + + return float4(res, surface.alpha); + } +#endif diff --git a/data/pipelines/impostor.shd b/data/pipelines/impostor.shd deleted file mode 100644 index 1dbe742fab..0000000000 --- a/data/pipelines/impostor.shd +++ /dev/null @@ -1,279 +0,0 @@ -texture_slot { - name = "Albedo", - default_texture = "textures/common/white.tga" -} - -texture_slot { - name = "Normal", - define = "HAS_NORMAL" -} - -texture_slot { - name = "Self shadow", - define = "HAS_SELFSHADOW" -} - -texture_slot { - name = "Depth", - define = "HAS_DEPTHMAP" -} - -include "pipelines/common.hlsli" - -define "ALPHA_CUTOUT" -uniform("Material color", "color", {1, 1, 1, 1}) -uniform("Roughness", "normalized_float", 1) -uniform("Metallic", "normalized_float", 0) -uniform("Emission", "float", 0) -uniform("Translucency", "normalized_float", 0) -uniform("Center", "vec3") -uniform("Radius", "float") - ------------------- - -common [[ - struct VSOutput { - float2 uv : TEXCOORD0; - float3 normal : TEXCOORD1; - float3 tangent : TEXCOORD2; - float4 wpos : TEXCOORD3; - #if !defined DEPTH && defined HAS_SELFSHADOW - float4 shadow_coefs : TEXCOORD4; - #endif - float lod : TEXCOORD5; - float4 position : SV_POSITION; - }; -]] - -vertex_shader [[ - struct Input { - float3 position : TEXCOORD0; - float2 uv : TEXCOORD1; - #define ATTR(X) TEXCOORD##X - #ifdef INSTANCED - float4 i_rot_lod : ATTR(INSTANCE0_ATTR); - float4 i_pos_scale : ATTR(INSTANCE1_ATTR); - #elif defined AUTOINSTANCED - float4 i_rot : ATTR(INSTANCE0_ATTR); - float4 i_pos_lod : ATTR(INSTANCE1_ATTR); - float4 i_scale : ATTR(INSTANCE2_ATTR); - #endif - }; - - #ifdef INSTANCED - #elif defined AUTOINSTANCED - #else - #define USE_MATRIX - cbuffer ModelState : register(b4) { - row_major float4x4 model_mtx; - }; - #endif - - float2 dirToGrid(float3 vec) { - vec.y = min(vec.y, -0.001); - vec = normalize(vec); - vec.xz /= dot(1.0, abs(vec) ); - return float2(vec.x + vec.z, vec.x - vec.z) * 0.5 + 0.5; - } - - - #ifdef USE_MATRIX - VSOutput main(Input input) { - VSOutput output; - float3x3 to_model_space = (float3x3)model_mtx; - #ifdef DEPTH - float3 N = normalize(Global_light_dir.xyz); - #else - float3 instance_pos = mul(float4(0, 0, 0, 1), model_mtx).xyz; - float3 N = normalize(instance_pos); - #endif - - float3x3 tangent_space; - tangent_space[0] = normalize(cross(N, float3(0, 1, 0))); - tangent_space[1] = normalize(cross(tangent_space[0], N)); - tangent_space[2] = cross(tangent_space[0], tangent_space[1]); - - float3 vd = float3(N.x, N.y, N.z); - vd = mul(vd, to_model_space); - #if !defined DEPTH && defined HAS_SELFSHADOW - float3 ld = mul(-Global_light_dir.xyz, to_model_space); - output.shadow_coefs = max(float4(ld.x, -ld.z, -ld.x, ld.z), 0); - output.shadow_coefs /= dot(output.shadow_coefs, 1); - #endif - float2 grid = dirToGrid(normalize(vd)); - output.uv = input.uv / 9 + int2(grid * 9) / 9.0; - - #ifdef DEPTH - // move to avoid selfshadow - float3 p = u_center.xyz + mul(input.position + float3(0, 0, u_center.y) - u_center.xyz, tangent_space); - #else - float3 p = u_center.xyz + mul(input.position - u_center.xyz, tangent_space); - #endif - p = mul(float4(p, 1), model_mtx).xyz; - - output.lod = 1; - output.tangent = tangent_space[0]; - output.normal = tangent_space[2]; - output.wpos = float4(p, 1); - - output.position = mul(output.wpos, Pass_view_projection); - return output; - } - #else - VSOutput main(Input input) { - VSOutput output; - float3x3 tangent_space; - - #if defined INSTANCED - float3 instance_pos = input.i_pos_scale.xyz; - float scale = input.i_pos_scale.w; - float4 to_model_space = float4(input.i_rot_lod.xyz, sqrt(1 - dot(input.i_rot_lod.xyz, input.i_rot_lod.xyz))); - output.lod = input.i_rot_lod.w; - #else - float3 instance_pos = input.i_pos_lod.xyz; - float3 scale = input.i_scale.xyz; - float4 to_model_space = input.i_rot * float4(1, 1, 1, -1); - output.lod = input.i_pos_lod.w; - #endif - - #ifdef DEPTH - float3 N = normalize(Global_light_dir.xyz); - #else - float3 N = normalize(instance_pos); - #endif - tangent_space[0] = normalize(cross(N, float3(0, 1, 0))); - tangent_space[1] = normalize(cross(tangent_space[0], N)); - tangent_space[2] = cross(tangent_space[0], tangent_space[1]); - - float3 vd = float3(N.x, N.y, N.z); - vd = rotateByQuat(to_model_space, vd); - #if !defined DEPTH && defined HAS_SELFSHADOW - float3 ld = rotateByQuat(to_model_space, -Global_light_dir.xyz); - output.shadow_coefs = max(float4(ld.x, -ld.z, -ld.x, ld.z), 0); - output.shadow_coefs /= dot(output.shadow_coefs, 1); - #endif - float2 grid = dirToGrid(normalize(vd)); - output.uv = input.uv / 9 + int2(grid * 9) / 9.0; - - #ifdef DEPTH - // move to avoid selfshadow - float3 p = u_center.xyz + mul(input.position + float3(0, 0, u_center.y) - u_center.xyz, tangent_space); - #else - float3 p = u_center.xyz + mul(input.position - u_center.xyz, tangent_space); - #endif - p *= scale; - output.tangent = tangent_space[0]; - output.normal = tangent_space[2]; - output.wpos = float4(instance_pos + p, 1); - - output.position = mul(output.wpos, Pass_view_projection); - return output; - } - #endif -]] - -fragment_shader [[ - cbuffer DC : register(b4) { - - } - - Surface getSurface(VSOutput input) { - Surface data; - float4 c = sampleBindless(LinearSampler, t_albedo, input.uv) * u_material_color; - data.albedo = c.rgb; - data.alpha = c.a; - #ifdef ALPHA_CUTOUT - if(data.alpha < 0.5) discard; - #endif - float3x3 tbn = float3x3( - normalize(input.tangent), - normalize(input.normal), - normalize(cross(input.normal, input.tangent)) - ); - - data.wpos = input.wpos.xyz; - data.V = normalize(-data.wpos); - data.roughness = u_roughness; - data.metallic = u_metallic; - #ifdef HAS_NORMAL - data.N.xz = sampleBindless(LinearSampler, t_normal, input.uv).xy * 2 - 1; - data.N.y = sqrt(saturate(1 - dot(data.N.xz, data.N.xz))); - data.N = mul(data.N, tbn); - #else - data.N = Global_light_dir.xyz; - #endif - data.emission = u_emission; - data.translucency = u_translucency; - data.ao = 1; - data.motion = computeStaticObjectMotionVector(input.wpos.xyz); - - #if !defined DEPTH && defined HAS_SELFSHADOW - float4 self_shadow = sampleBindless(LinearSampler, t_self_shadow, input.uv); - data.shadow = saturate(dot(self_shadow, input.shadow_coefs)); - data.shadow *= data.shadow; - //data.shadow = 1; - data.ao = dot(self_shadow, 1) * 0.25; - #else - data.shadow = 1; - #endif - - return data; - } - - #ifdef DEPTH - void main(VSOutput input) { - if (ditherLOD(input.lod, input.position.xy)) discard; - #ifdef ALPHA_CUTOUT - float4 c = sampleBindless(LinearSampler, t_albedo, input.uv); - if(c.a < 0.5) discard; - #endif - } - #elif defined DEFERRED - struct Output { - float4 gbuffer0 : SV_Target0; - float4 gbuffer1 : SV_Target1; - float4 gbuffer2 : SV_Target2; - float4 gbuffer3 : SV_Target3; - #ifdef HAS_DEPTHMAP - float depth : SV_Depth; - #endif - }; - - Output main(VSOutput input) { - Surface data = getSurface(input); - GBufferOutput gb = packSurface(data); - Output output; - output.gbuffer0 = gb.gbuffer0; - output.gbuffer1 = gb.gbuffer1; - output.gbuffer2 = gb.gbuffer2; - output.gbuffer3 = gb.gbuffer3; - #ifdef HAS_DEPTHMAP - float depth = sampleBindless(LinearSampler, t_depth, input.uv).x; - float linear_z = toLinearDepth(Pass_inv_projection, input.position.z); - output.depth = 0.1 / (linear_z + (depth - 0.5) * u_radius); // TODO remove hardcoded near plane 0.1 - output.depth = input.position.z; - #endif - - return output; - } - #else - float main(VSOutput input) : SV_TARGET - { - if (ditherLOD(v_lod)) discard; - - Surface surface = getSurface(input); - float3 res = computeDirectLight(surface - , Global_light_dir.xyz - , Global_light_color.rgb * Global_light_intensity * surface.shadow); - res += surface.emission * surface.albedo; - - float linear_depth = dot(surface.wpos.xyz, Pass_view_dir.xyz); - Cluster cluster = getClusterLinearDepth(linear_depth); - //res += pointLightsLighting(cluster, surface, shadow_atlas); - res += envProbesLighting(cluster, surface); - res += reflProbesLighting(cluster, surface, u_reflection_probes); - - return float4(res, surface.alpha); - } - #endif -]] diff --git a/data/pipelines/lighting.hlsl b/data/pipelines/lighting.hlsl new file mode 100644 index 0000000000..ec4ad0384e --- /dev/null +++ b/data/pipelines/lighting.hlsl @@ -0,0 +1,46 @@ +//@surface +//@include "pipelines/common.hlsli" + +struct VSOutput { + float2 uv : TEXCOORD0; + float4 position : SV_POSITION; +}; + +struct VSInput { + uint vertexID : SV_VertexID; +}; + +cbuffer Textures : register(b4) { + uint u_gbuffer0; + uint u_gbuffer1; + uint u_gbuffer2; + uint u_gbuffer3; + uint u_gbuffer_depth; + uint u_shadowmap; + uint u_shadow_atlas; + uint u_reflection_probes; +}; + +VSOutput mainVS(VSInput input) { + VSOutput output; + output.position = fullscreenQuad(input.vertexID, output.uv); + return output; +} + +float4 mainPS(VSOutput input) : SV_Target { + float ndc_depth; + Surface surface = unpackSurface(input.uv, u_gbuffer0, u_gbuffer1, u_gbuffer2, u_gbuffer3, u_gbuffer_depth, ndc_depth); + Cluster cluster = getCluster(ndc_depth, input.position.xy); + + float4 res; + res.rgb = computeLighting(cluster + , surface + , Global_light_dir.xyz + , Global_light_color.rgb * Global_light_intensity + , u_shadowmap + , u_shadow_atlas + , u_reflection_probes + , input.position.xy); + res.a = 1; + return res; +} \ No newline at end of file diff --git a/data/pipelines/lighting.shd b/data/pipelines/lighting.shd deleted file mode 100644 index e34040e3c9..0000000000 --- a/data/pipelines/lighting.shd +++ /dev/null @@ -1,58 +0,0 @@ -include "pipelines/common.hlsli" - ------------------- - -vertex_shader [[ - struct Output { - float2 uv : TEXCOORD0; - float4 position : SV_POSITION; - }; - - struct Input { - uint vertexID : SV_VertexID; - }; - - Output main(Input input) { - Output output; - output.position = fullscreenQuad(input.vertexID, output.uv); - return output; - } -]] - ---------------------- - -fragment_shader [[ - cbuffer Textures : register(b4) { - uint u_gbuffer0; - uint u_gbuffer1; - uint u_gbuffer2; - uint u_gbuffer3; - uint u_gbuffer_depth; - uint u_shadowmap; - uint u_shadow_atlas; - uint u_reflection_probes; - }; - - struct Input { - float2 uv : TEXCOORD0; - float4 position : SV_POSITION; - }; - - float4 main(Input input) : SV_Target { - float ndc_depth; - Surface surface = unpackSurface(input.uv, u_gbuffer0, u_gbuffer1, u_gbuffer2, u_gbuffer3, u_gbuffer_depth, ndc_depth); - Cluster cluster = getCluster(ndc_depth, input.position.xy); - - float4 res; - res.rgb = computeLighting(cluster - , surface - , Global_light_dir.xyz - , Global_light_color.rgb * Global_light_intensity - , u_shadowmap - , u_shadow_atlas - , u_reflection_probes - , input.position.xy); - res.a = 1; - return res; - } -]] \ No newline at end of file diff --git a/data/pipelines/particles.hlsl b/data/pipelines/particles.hlsl new file mode 100644 index 0000000000..29e7058ae8 --- /dev/null +++ b/data/pipelines/particles.hlsl @@ -0,0 +1,71 @@ +//@surface +//@include "pipelines/common.hlsli" +//@uniform "Frames cols", "int", 1 +//@uniform "Frames rows", "int", 1 +//@texture_slot "Texture", "textures/common/white.tga" + +struct VSInput { + float3 i_position : TEXCOORD0; + float i_scale : TEXCOORD1; + float4 i_color : TEXCOORD2; + float i_rot : TEXCOORD3; + float i_frame : TEXCOORD4; + float i_emission : TEXCOORD5; + uint vertex_id : SV_VertexID; +}; + +struct VSOutput { + float2 uv : TEXCOORD0; + float4 color : TEXCOORD1; + float emission : TEXCOORD2; + float4 position : SV_POSITION; +}; + +cbuffer Model : register(b4) { float4x4 u_model; } + +VSOutput mainVS(VSInput input) { + float2 pos = float2(input.vertex_id & 1, (input.vertex_id & 2) * 0.5); + uint frame = uint(input.i_frame); + VSOutput output; + output.uv = (pos + float2(frame % u_frames_cols, frame / u_frames_cols)) / float2(u_frames_cols, u_frames_rows); + + float3 dir = normalize(input.i_position); + + float c = cos(input.i_rot); + float s = sin(input.i_rot); + float2x2 rotm = float2x2(c, s, -s, c); + pos = mul(rotm, pos * 2 - 1); + pos *= input.i_scale; + + output.color = input.i_color; + output.emission = input.i_emission; + output.position = mul((mul(float4(input.i_position.xyz, 1), mul(u_model, Pass_view)) + float4(pos.xy, 0, 0)), Pass_projection); + return output; +} + +float4 mainPS(VSOutput input) : SV_TARGET { + Surface data; + float4 c = sampleBindless(LinearSampler, t_texture, input.uv) * saturate(input.color); + data.N = 0; + data.V = 0; + data.wpos = 0; + data.albedo = c.rgb; + data.alpha = c.a; + data.emission = input.emission; + data.shadow = 1; + data.ao = 1; + data.roughness = 1; + data.metallic = 0; + data.translucency = 0; + + float linear_depth = dot(data.wpos.xyz, Pass_view_dir.xyz); + Cluster cluster = getClusterLinearDepth(linear_depth, input.frag_coord.xy); + float4 o_color; + o_color.rgb = computeLighting(cluster, data, Global_light_dir.xyz, Global_light_color.rgb * Global_light_intensity, Global_shadowmap, Global_shadow_atlas, Global_reflection_probes, input.frag_coord.xy); + + #if defined ALPHA_CUTOUT + if(data.alpha < 0.5) discard; + #endif + o_color.a = data.alpha; + return o_color; +} diff --git a/data/pipelines/particles.shd b/data/pipelines/particles.shd deleted file mode 100644 index 2e47b695d1..0000000000 --- a/data/pipelines/particles.shd +++ /dev/null @@ -1,88 +0,0 @@ -include "pipelines/common.hlsli" - -uniform("Frames cols", "int") -uniform("Frames rows", "int") - -common("#define PARTICLES\n") - -texture_slot { - name = "Texture", - default_texture = "textures/common/white.tga" -} - -vertex_shader [[ - struct Input { - float3 i_position : TEXCOORD0; - float i_scale : TEXCOORD1; - float4 i_color : TEXCOORD2; - float i_rot : TEXCOORD3; - float i_frame : TEXCOORD4; - float i_emission : TEXCOORD5; - uint vertex_id : SV_VertexID; - }; - - struct Output { - float2 uv : TEXCOORD0; - float4 color : TEXCOORD1; - float emission : TEXCOORD2; - float4 position : SV_POSITION; - }; - - cbuffer Model : register(b4) { float4x4 u_model; } - - Output main(Input input) { - float2 pos = float2(input.vertex_id & 1, (input.vertex_id & 2) * 0.5); - uint frame = uint(input.i_frame); - Output output; - output.uv = (pos + float2(frame % u_frames_cols, frame / u_frames_cols)) / float2(u_frames_cols, u_frames_rows); - - float3 dir = normalize(input.i_position); - - float c = cos(input.i_rot); - float s = sin(input.i_rot); - float2x2 rotm = float2x2(c, s, -s, c); - pos = mul(rotm, pos * 2 - 1); - pos *= input.i_scale; - - output.color = input.i_color; - output.emission = input.i_emission; - output.position = mul((mul(float4(input.i_position.xyz, 1), mul(u_model, Pass_view)) + float4(pos.xy, 0, 0)), Pass_projection); - return output; - } -]] - -fragment_shader [[ - struct Input { - float2 uv : TEXCOORD0; - float4 color : TEXCOORD1; - float emission : TEXCOORD2; - float4 frag_coord : SV_POSITION; - }; - - float4 main(Input input) : SV_TARGET { - Surface data; - float4 c = sampleBindless(LinearSampler, t_texture, input.uv) * saturate(input.color); - data.N = 0; - data.V = 0; - data.wpos = 0; - data.albedo = c.rgb; - data.alpha = c.a; - data.emission = input.emission; - data.shadow = 1; - data.ao = 1; - data.roughness = 1; - data.metallic = 0; - data.translucency = 0; - - float linear_depth = dot(data.wpos.xyz, Pass_view_dir.xyz); - Cluster cluster = getClusterLinearDepth(linear_depth, input.frag_coord.xy); - float4 o_color; - o_color.rgb = computeLighting(cluster, data, Global_light_dir.xyz, Global_light_color.rgb * Global_light_intensity, Global_shadowmap, Global_shadow_atlas, Global_reflection_probes, input.frag_coord.xy); - - #if defined ALPHA_CUTOUT - if(data.alpha < 0.5) discard; - #endif - o_color.a = data.alpha; - return o_color; - } -]] diff --git a/data/pipelines/ssao.hlsl b/data/pipelines/ssao.hlsl index 62fdb02f23..90ba0fc032 100644 --- a/data/pipelines/ssao.hlsl +++ b/data/pipelines/ssao.hlsl @@ -13,6 +13,7 @@ cbuffer UB : register(b4) { uint u_output; }; +// get normal in view space float3 getViewNormal(float2 tex_coord) { float3 wnormal = sampleBindlessLod(LinearSamplerClamp, u_normal_buffer, tex_coord, 0).xyz * 2 - 1; float4 vnormal = mul(float4(wnormal, 0), Global_view); @@ -29,18 +30,16 @@ void main(uint3 thread_id : SV_DispatchThreadID) { float occlusion = 0; float occlusion_count = 0; - float rand = hash(view_pos.xyz + frac(Global_time) * 0.001); - float random_angle = rand * 6.283285; - float3 rot; - float depth_scale = u_radius / view_pos.z * (rand * 2 + 0.1); - rot.x = sin(random_angle); - rot.y = cos(random_angle); - rot.z = -rot.x; + + float c = hash(view_pos.xyz + frac(Global_time) * 0.001) * 2 - 1; + float depth_scale = u_radius / view_pos.z * (c * 2 + 0.1); + float s = sqrt(1 - c * c); + float2x2 rot = float2x2(c, s, -s, c); rot *= depth_scale; for (int i = 0; i < 4; ++i) { float2 poisson = POISSON_DISK_4[i]; - float2 s = float2(dot(poisson, rot.yx), dot(poisson, rot.zy)); + float2 s = mul(poisson, rot); float3 vpos_a = getViewPosition(u_depth_buffer, Global_inv_projection, uv + s) - view_pos; float3 vpos_b = getViewPosition(u_depth_buffer, Global_inv_projection, uv - s) - view_pos; diff --git a/data/pipelines/standard.shd b/data/pipelines/standard.hlsl similarity index 67% rename from data/pipelines/standard.shd rename to data/pipelines/standard.hlsl index fc41476a4a..d2b0003a9c 100644 --- a/data/pipelines/standard.shd +++ b/data/pipelines/standard.hlsl @@ -1,13 +1,21 @@ -import "pipelines/surface_base.inc" +//@surface +//@include "pipelines/common.hlsli" +//@include "pipelines/surface_base.hlsli" -uniform("Material color", "color", {1, 1, 1, 1}) -uniform("Roughness", "normalized_float", 1) -uniform("Metallic", "normalized_float", 0) -uniform("Emission", "float", 0) -uniform("Translucency", "normalized_float", 0) +//@uniform "Material color", "color", {1, 1, 1, 1} +//@uniform "Roughness", "normalized_float", 1 +//@uniform "Metallic", "normalized_float", 0 +//@uniform "Emission", "float", 0 +//@uniform "Translucency", "normalized_float", 0 -surface_shader [[ - #line 11 "standard.shd" +//@texture_slot "Albedo", "textures/common/white.tga" +//@texture_slot "Normal", "textures/common/default_normal.tga" +//@texture_slot "Roughness", "textures/common/white.tga" +//@texture_slot "Metallic", "", "HAS_METALLICMAP" +//@texture_slot "Ambient occlusion", "", "HAS_AMBIENT_OCCLUSION_TEX" + +Surface getSurface(VSOutput input) { + Surface data; // TODO mip offset #ifdef UV0_ATTR float2 uv = input.uv; @@ -15,6 +23,12 @@ surface_shader [[ float2 uv = 0; #endif + #ifdef AO_ATTR + data.ao = input.ao; + #else + data.ao = 1; + #endif + float4 c = sampleBindless(LinearSampler, t_albedo, uv/*, -1*/) * u_material_color; data.albedo = c.rgb; data.alpha = c.a; @@ -22,12 +36,6 @@ surface_shader [[ data.albedo.rgb *= input.color.rgb; #endif - #ifdef AO_ATTR - data.ao = input.ao; - #else - data.ao = 1; - #endif - #ifdef HAS_AMBIENT_OCCLUSION_TEX data.ao *= sampleBindless(LinearSampler, t_ambient_occlusion, uv).r; #endif @@ -67,4 +75,5 @@ surface_shader [[ float ndotv = abs(dot(data.N , data.V)) + 1e-5f; data.alpha = lerp(data.alpha, 1, pow(saturate(1 - ndotv), 5)); #endif -]] \ No newline at end of file + return data; +} \ No newline at end of file diff --git a/data/pipelines/surface_base.hlsli b/data/pipelines/surface_base.hlsli new file mode 100644 index 0000000000..5bdbb1e6a4 --- /dev/null +++ b/data/pipelines/surface_base.hlsli @@ -0,0 +1,244 @@ +//include "pipelines/common.hlsli" + +// you can include this file and implement getSurface function for custom surface shaders + +#if !defined GRASS + #define HAS_LOD +#endif + +struct VSOutput { + float4 wpos : TEXCOORD0; + float3 normal : TEXCOORD1; + #ifdef UV0_ATTR + float2 uv : TEXCOORD2; + #endif + #ifdef TANGENT_ATTR + float3 tangent : TEXCOORD3; + #endif + #if defined DYNAMIC || defined SKINNED + float4 prev_ndcpos_no_jitter : TEXCOORD4; + #endif + #ifdef HAS_LOD + float lod : TEXCOORD5; + #endif + #ifdef GRASS + #ifdef COLOR0_ATTR + //float4 color : TEXCOORD6; + #endif + float pos_y : TEXCOORD7; + #endif + float4 position : SV_POSITION; +}; + +#define ATTR(X) TEXCOORD##X +struct VSInput { + float3 position : ATTR(0); + float3 normal : ATTR(NORMAL_ATTR); + + #ifdef UV0_ATTR + float2 uv : ATTR(UV0_ATTR); + #endif + + #ifdef TANGENT_ATTR + float3 tangent : ATTR(TANGENT_ATTR); + #endif + + #ifdef INDICES_ATTR + int4 indices : ATTR(INDICES_ATTR); + float4 weights : ATTR(WEIGHTS_ATTR); + #endif + + #ifdef AUTOINSTANCED + float4 i_rot : ATTR(INSTANCE0_ATTR); + float4 i_pos_lod : ATTR(INSTANCE1_ATTR); + float4 i_scale : ATTR(INSTANCE2_ATTR); + #elif defined INSTANCED + float4 i_rot_lod : ATTR(INSTANCE0_ATTR); + float4 i_pos_scale : ATTR(INSTANCE1_ATTR); + #elif defined DYNAMIC + float4 i_rot : ATTR(INSTANCE0_ATTR); + float4 i_pos_lod : ATTR(INSTANCE1_ATTR); + float4 i_scale : ATTR(INSTANCE2_ATTR); + float4 i_prev_rot : ATTR(INSTANCE3_ATTR); + float4 i_prev_pos_lod : ATTR(INSTANCE4_ATTR); + float4 i_prev_scale : ATTR(INSTANCE5_ATTR); + #elif defined SKINNED + #elif defined GRASS + float4 i_pos_scale : ATTR(INSTANCE0_ATTR); + float4 i_rot : ATTR(INSTANCE1_ATTR); + #ifdef COLOR0_ATTR + //float4 color : ATTR(COLOR0_ATTR) + #endif + #else + #endif +}; + +#ifdef SKINNED + cbuffer ModelState : register(b4) { + float fur_scale; + float fur_gravity; + float layers; + float padding; + row_major float4x4 mtx; + row_major float4x4 prev_matrix; + row_major float2x4 bones[255]; + } +#elif defined INSTANCED +#elif defined AUTOINSTANCED +#elif defined GRASS + cbuffer ModelState : register(b4) { + float3 u_grass_origin; + }; +#elif defined DYNAMIC +#else + cbuffer ModelState : register(b4) { + row_major float4x4 model_mtx; + }; +#endif + +Surface getSurface(VSOutput input); + +VSOutput mainVS(VSInput input) { + VSOutput output; + #ifdef HAS_LOD + output.lod = 0; + #endif + #ifdef TANGENT_ATTR + output.tangent = input.tangent; + #endif + #ifdef UV0_ATTR + output.uv = input.uv; + #endif + #ifdef AUTOINSTANCED + float3 p = input.position.xyz * input.i_scale.xyz; + output.wpos = float4(input.i_pos_lod.xyz + rotateByQuat(input.i_rot, p), 1); + output.position = mul(output.wpos, Pass_view_projection); + output.normal = rotateByQuat(input.i_rot, input.normal); + #ifdef HAS_LOD + output.lod = input.i_pos_lod.w; + #endif + #elif defined INSTANCED + float4 rot_quat = float4(input.i_rot_lod.xyz, 0); + rot_quat.w = sqrt(saturate(1 - dot(rot_quat.xyz, rot_quat.xyz))); + output.normal = rotateByQuat(rot_quat, input.normal); + #ifdef TANGENT_ATTR + output.tangent = rotateByQuat(rot_quat, input.tangent); + #endif + float3 p = input.position * input.i_pos_scale.w; + output.wpos = float4(input.i_pos_scale.xyz + rotateByQuat(rot_quat, p), 1); + output.position = mul(output.wpos, Pass_view_projection); + #elif defined GRASS + output.normal = rotateByQuat(input.i_rot, input.normal); + #ifdef TANGENT_ATTR + output.tangent = rotateByQuat(input.i_rot, input.tangent); + #endif + float3 p = input.position; + output.pos_y = p.y; + output.wpos = float4(input.i_pos_scale.xyz + rotateByQuat(input.i_rot, input.position * input.i_pos_scale.w), 1); + output.wpos.xyz += u_grass_origin; + #ifdef COLOR0_ATTR + //output.color = input.color; + #endif + output.position = mul(output.wpos, Pass_view_projection); + #elif defined DYNAMIC + output.normal = rotateByQuat(input.i_rot, input.normal); + #ifdef TANGENT_ATTR + output.tangent = rotateByQuat(input.i_rot, input.tangent); + #endif + output.wpos = float4(input.i_pos_lod.xyz + rotateByQuat(input.i_rot, input.position * input.i_scale.xyz), 1); + output.position = mul(output.wpos, Pass_view_projection); + output.prev_ndcpos_no_jitter = float4(input.i_prev_pos_lod.xyz + rotateByQuat(input.i_prev_rot, input.position * input.i_prev_scale.xyz), 1); + output.prev_ndcpos_no_jitter = mul(output.prev_ndcpos_no_jitter, mul(Global_view_projection_no_jitter, Global_reprojection)); + #elif defined SKINNED + float2x4 dq = mul(bones[input.indices.x], input.weights.x); + float w = dot(bones[input.indices.y][0], bones[input.indices.x][0]) < 0 ? -input.weights.y : input.weights.y; + dq += mul(bones[input.indices.y], w); + w = dot(bones[input.indices.z][0], bones[input.indices.x][0]) < 0 ? -input.weights.z : input.weights.z; + dq += mul(bones[input.indices.z], w); + w = dot(bones[input.indices.w][0], bones[input.indices.x][0]) < 0 ? -input.weights.w : input.weights.w; + dq += mul(bones[input.indices.w], w); + + dq *= 1 / length(dq[0]); + + float3x3 m = (float3x3)mtx; + output.normal = mul(rotateByQuat(dq[0], input.normal), m); + #ifdef TANGENT_ATTR + output.tangent = mul(rotateByQuat(dq[0], input.tangent), m); + #endif + float3 mpos; + #ifdef FUR + v_fur_layer = gl_InstanceID / layers; + mpos = input.position + (input.normal + float3(0, -fur_gravity * input.fur_layer, 0)) * input.fur_layer * fur_scale; + #else + mpos = input.position; + #endif + output.wpos = mul(float4(transformByDualQuat(dq, mpos), 1), mtx); + output.position = mul(output.wpos, Pass_view_projection); + // TODO previous frame bone positions + output.prev_ndcpos_no_jitter = mul(float4(transformByDualQuat(dq, mpos), 1), prev_matrix); + output.prev_ndcpos_no_jitter = mul(output.prev_ndcpos_no_jitter, mul(Global_view_projection_no_jitter, Global_reprojection)); + #else + float3x3 rot_mtx = (float3x3)model_mtx; + output.normal = mul(input.normal, rot_mtx); + #ifdef TANGENT_ATTR + output.tangent = mul(input.tangent, rot_mtx); + #endif + output.wpos = mul(float4(input.position, 1), model_mtx); + output.position = mul(output.wpos, Pass_view_projection); + #endif + return output; +} + +Surface getSurfaceEx(VSOutput input) { + Surface data = getSurface(input); + float4 p = mul(input.wpos, Global_view_projection_no_jitter); + #if defined DYNAMIC || defined SKINNED + float2 prev_pos_projected = input.prev_ndcpos_no_jitter.xy / input.prev_ndcpos_no_jitter.w; + data.motion = prev_pos_projected.xy - p.xy / p.w; + #else + data.motion = computeStaticObjectMotionVector(input.wpos.xyz); + #endif + data.V = normalize(-data.wpos); + return data; +} + +#ifdef DEPTH + void mainPS(VSOutput input) { + #ifdef HAS_LOD + if (ditherLOD(input.lod, input.position.xy)) discard; + #endif + } +#elif defined DEFERRED || defined GRASS + GBufferOutput mainPS(VSOutput input) { + #ifdef HAS_LOD + if (ditherLOD(input.lod, input.position.xy)) discard; + #endif + + return packSurface(getSurfaceEx(input)); + } +#else + cbuffer Drawcall2 : register(b5) { + uint u_shadowmap; + uint u_shadow_atlas; + uint u_reflection_probes; + }; + + float4 mainPS(VSOutput input, float4 frag_coord : SV_POSITION) : SV_TARGET{ + #ifdef HAS_LOD + if (ditherLOD(input.lod, input.position.xy)) discard; + #endif + + Surface data = getSurfaceEx(input); + + float linear_depth = dot(data.wpos.xyz, Pass_view_dir.xyz); + Cluster cluster = getClusterLinearDepth(linear_depth, frag_coord.xy); + float4 result; + result.rgb = computeLighting(cluster, data, Global_light_dir.xyz, Global_light_color.rgb * Global_light_intensity, u_shadowmap, u_shadow_atlas, u_reflection_probes, frag_coord); + + #if defined ALPHA_CUTOUT + if(data.alpha < 0.5) discard; + #endif + result.a = data.alpha; + return result; + } +#endif diff --git a/data/pipelines/surface_base.inc b/data/pipelines/surface_base.inc deleted file mode 100644 index 1a5913e67e..0000000000 --- a/data/pipelines/surface_base.inc +++ /dev/null @@ -1,286 +0,0 @@ -include "pipelines/common.hlsli" - -function surface_shader_ex(args) - args.vertex_preface = args.vertex_preface or "" - if args.texture_slots == nil then - args.texture_slots = { - { - name = "Albedo", - default_texture = "textures/common/white.tga" - }, - { - name = "Normal", - default_texture = "textures/common/default_normal.tga" - }, - { - name = "Roughness", - default_texture = "textures/common/white.tga" - }, - { - name = "Metallic", - define = "HAS_METALLICMAP" - }, - { - name = "Ambient occlusion", - define = "HAS_AMBIENT_OCCLUSION_TEX" - } - } - end - for _, slot in ipairs(args.texture_slots) do - texture_slot(slot) - end - - common [[ - #if !defined GRASS && !defined PARTICLES - #define HAS_LOD - #endif - - struct VSOutput { - float4 wpos : TEXCOORD0; - float3 normal : TEXCOORD1; - #ifdef UV0_ATTR - float2 uv : TEXCOORD2; - #endif - #ifdef TANGENT_ATTR - float3 tangent : TEXCOORD3; - #endif - #if defined DYNAMIC || defined SKINNED - float4 prev_ndcpos_no_jitter : TEXCOORD4; - #endif - #ifdef HAS_LOD - float lod : TEXCOORD5; - #endif - #ifdef GRASS - #ifdef COLOR0_ATTR - //float4 color : TEXCOORD6; - #endif - float pos_y : TEXCOORD7; - #endif - float4 position : SV_POSITION; - }; - ]] - - vertex_shader(args.vertex_preface .. [[ - #line 65 "surface_base.inc" - #define ATTR(X) TEXCOORD##X - struct Input { - float3 position : ATTR(0); - float3 normal : ATTR(NORMAL_ATTR); - #ifdef INDICES_ATTR - int4 indices : ATTR(INDICES_ATTR); - float4 weights : ATTR(WEIGHTS_ATTR); - #endif - #ifdef AUTOINSTANCED - float4 i_rot : ATTR(INSTANCE0_ATTR); - float4 i_pos_lod : ATTR(INSTANCE1_ATTR); - float4 i_scale : ATTR(INSTANCE2_ATTR); - #elif defined INSTANCED - float4 i_rot_lod : ATTR(INSTANCE0_ATTR); - float4 i_pos_scale : ATTR(INSTANCE1_ATTR); - #elif defined DYNAMIC - float4 i_rot : ATTR(INSTANCE0_ATTR); - float4 i_pos_lod : ATTR(INSTANCE1_ATTR); - float4 i_scale : ATTR(INSTANCE2_ATTR); - float4 i_prev_rot : ATTR(INSTANCE3_ATTR); - float4 i_prev_pos_lod : ATTR(INSTANCE4_ATTR); - float4 i_prev_scale : ATTR(INSTANCE5_ATTR); - #elif defined SKINNED - #elif defined GRASS - float4 i_pos_scale : ATTR(INSTANCE0_ATTR); - float4 i_rot : ATTR(INSTANCE1_ATTR); - #ifdef COLOR0_ATTR - //float4 color : ATTR(COLOR0_ATTR) - #endif - #else - #endif - - #ifdef UV0_ATTR - float2 uv : ATTR(UV0_ATTR); - #endif - #ifdef TANGENT_ATTR - float3 tangent : ATTR(TANGENT_ATTR); - #endif - }; - - #ifdef SKINNED - cbuffer ModelState : register(b4) { - float fur_scale; - float fur_gravity; - float layers; - float padding; - row_major float4x4 mtx; - row_major float4x4 prev_matrix; - row_major float2x4 bones[255]; - } - #elif defined INSTANCED - #elif defined AUTOINSTANCED - #elif defined GRASS - cbuffer ModelState : register(b4) { - float3 u_grass_origin; - }; - #elif defined DYNAMIC - #else - cbuffer ModelState : register(b4) { - row_major float4x4 model_mtx; - }; - #endif - - VSOutput main(Input input) { - VSOutput output; - #ifdef HAS_LOD - output.lod = 0; - #endif - #ifdef TANGENT_ATTR - output.tangent = input.tangent; - #endif - #ifdef UV0_ATTR - output.uv = input.uv; - #endif - #ifdef AUTOINSTANCED - float3 p = input.position.xyz * input.i_scale.xyz; - output.wpos = float4(input.i_pos_lod.xyz + rotateByQuat(input.i_rot, p), 1); - output.position = mul(output.wpos, Pass_view_projection); - output.normal = rotateByQuat(input.i_rot, input.normal); - #ifdef HAS_LOD - output.lod = input.i_pos_lod.w; - #endif - #elif defined INSTANCED - float4 rot_quat = float4(input.i_rot_lod.xyz, 0); - rot_quat.w = sqrt(saturate(1 - dot(rot_quat.xyz, rot_quat.xyz))); - output.normal = rotateByQuat(rot_quat, input.normal); - #ifdef TANGENT_ATTR - output.tangent = rotateByQuat(rot_quat, input.tangent); - #endif - float3 p = input.position * input.i_pos_scale.w; - output.wpos = float4(input.i_pos_scale.xyz + rotateByQuat(rot_quat, p), 1); - output.position = mul(output.wpos, Pass_view_projection); - #elif defined GRASS - output.normal = rotateByQuat(input.i_rot, input.normal); - #ifdef TANGENT_ATTR - output.tangent = rotateByQuat(input.i_rot, input.tangent); - #endif - float3 p = input.position; - output.pos_y = p.y; - output.wpos = float4(input.i_pos_scale.xyz + rotateByQuat(input.i_rot, input.position * input.i_pos_scale.w), 1); - output.wpos.xyz += u_grass_origin; - #ifdef COLOR0_ATTR - //output.color = input.color; - #endif - output.position = mul(output.wpos, Pass_view_projection); - #elif defined DYNAMIC - output.normal = rotateByQuat(input.i_rot, input.normal); - #ifdef TANGENT_ATTR - output.tangent = rotateByQuat(input.i_rot, input.tangent); - #endif - output.wpos = float4(input.i_pos_lod.xyz + rotateByQuat(input.i_rot, input.position * input.i_scale.xyz), 1); - output.position = mul(output.wpos, Pass_view_projection); - output.prev_ndcpos_no_jitter = float4(input.i_prev_pos_lod.xyz + rotateByQuat(input.i_prev_rot, input.position * input.i_prev_scale.xyz), 1); - output.prev_ndcpos_no_jitter = mul(output.prev_ndcpos_no_jitter, mul(Global_view_projection_no_jitter, Global_reprojection)); - #elif defined SKINNED - float2x4 dq = mul(bones[input.indices.x], input.weights.x); - float w = dot(bones[input.indices.y][0], bones[input.indices.x][0]) < 0 ? -input.weights.y : input.weights.y; - dq += mul(bones[input.indices.y], w); - w = dot(bones[input.indices.z][0], bones[input.indices.x][0]) < 0 ? -input.weights.z : input.weights.z; - dq += mul(bones[input.indices.z], w); - w = dot(bones[input.indices.w][0], bones[input.indices.x][0]) < 0 ? -input.weights.w : input.weights.w; - dq += mul(bones[input.indices.w], w); - - dq *= 1 / length(dq[0]); - - float3x3 m = (float3x3)mtx; - output.normal = mul(rotateByQuat(dq[0], input.normal), m); - #ifdef TANGENT_ATTR - output.tangent = mul(rotateByQuat(dq[0], input.tangent), m); - #endif - float3 mpos; - #ifdef FUR - v_fur_layer = gl_InstanceID / layers; - mpos = input.position + (input.normal + float3(0, -fur_gravity * input.fur_layer, 0)) * input.fur_layer * fur_scale; - #else - mpos = input.position; - #endif - output.wpos = mul(float4(transformByDualQuat(dq, mpos), 1), mtx); - output.position = mul(output.wpos, Pass_view_projection); - // TODO previous frame bone positions - output.prev_ndcpos_no_jitter = mul(float4(transformByDualQuat(dq, mpos), 1), prev_matrix); - output.prev_ndcpos_no_jitter = mul(output.prev_ndcpos_no_jitter, mul(Global_view_projection_no_jitter, Global_reprojection)); - #else - float3x3 rot_mtx = (float3x3)model_mtx; - output.normal = mul(input.normal, rot_mtx); - #ifdef TANGENT_ATTR - output.tangent = mul(input.tangent, rot_mtx); - #endif - output.wpos = mul(float4(input.position, 1), model_mtx); - output.position = mul(output.wpos, Pass_view_projection); - #endif - return output; - } - ]]) - - fragment_shader([[ - typedef VSOutput Input; - - Surface getSurface(Input input) { - Surface data; - data.motion = 0.0f.xx; - float4 p = mul(input.wpos, Global_view_projection_no_jitter); - #if defined DYNAMIC || defined SKINNED - float2 prev_pos_projected = input.prev_ndcpos_no_jitter.xy / input.prev_ndcpos_no_jitter.w; - data.motion = prev_pos_projected.xy - p.xy / p.w; - #else - data.motion = computeStaticObjectMotionVector(input.wpos.xyz); - #endif - ]] .. args.fragment .. [[ - #line 234 "surface_base.inc" - data.V = normalize(-data.wpos); - return data; - } - - #ifdef DEPTH - void main(Input input) { - #ifdef HAS_LOD - if (ditherLOD(input.lod, input.position.xy)) discard; - #endif - } - #elif defined DEFERRED || defined GRASS - GBufferOutput main(Input input) { - #ifdef HAS_LOD - if (ditherLOD(input.lod, input.position.xy)) discard; - #endif - - return packSurface(getSurface(input)); - } - #else - cbuffer Drawcall2 : register(b5) { - uint u_shadowmap; - uint u_shadow_atlas; - uint u_reflection_probes; - }; - - float4 main(Input input, float4 frag_coord : SV_POSITION) : SV_TARGET{ - #ifdef HAS_LOD - if (ditherLOD(input.lod, input.position.xy)) discard; - #endif - - Surface data = getSurface(input); - - float linear_depth = dot(data.wpos.xyz, Pass_view_dir.xyz); - Cluster cluster = getClusterLinearDepth(linear_depth, frag_coord.xy); - float4 result; - result.rgb = computeLighting(cluster, data, Global_light_dir.xyz, Global_light_color.rgb * Global_light_intensity, u_shadowmap, u_shadow_atlas, u_reflection_probes, frag_coord); - - #if defined ALPHA_CUTOUT - if(data.alpha < 0.5) discard; - #endif - result.a = data.alpha; - return result; - } - #endif - ]]) -end - -function surface_shader(code) - local args = { fragment = code } - surface_shader_ex(args) -end - diff --git a/data/pipelines/tdao.hlsl b/data/pipelines/tdao.hlsl index 275a84eead..172360d329 100644 --- a/data/pipelines/tdao.hlsl +++ b/data/pipelines/tdao.hlsl @@ -1,12 +1,11 @@ //@include "pipelines/common.hlsli" +// top-down ambient occlusion + cbuffer Drawcall : register(b4) { + float4 u_offset; + float2 u_size; float u_intensity; - float u_width; - float u_height; - float u_offset0; - float u_offset1; - float u_offset2; float u_range; float u_half_depth_range; float u_scale; @@ -18,12 +17,14 @@ cbuffer Drawcall : register(b4) { [numthreads(16, 16, 1)] void main(uint3 thread_id : SV_DispatchThreadID) { - if (any(thread_id.xy > uint2(u_width, u_height))) return; - - float2 screen_uv = thread_id.xy / float2(u_width, u_height); - float3 wpos = getViewPosition(u_depth_buffer, Global_inv_view_projection, screen_uv); + // compute td-space position + float2 screen_uv = thread_id.xy / u_size; + float3 pos_td = getViewPosition(u_depth_buffer, Global_inv_view_projection, screen_uv); + pos_td += u_offset.xyz; + pos_td.y += u_depth_offset; - float2 uv = (wpos.xz + float2(u_offset0, u_offset2)) / u_range; + // compute uv in tdao texture space + float2 uv = pos_td.xz / u_range; #ifdef _ORIGIN_BOTTOM_LEFT uv = uv * float2(1, -1); #endif @@ -31,20 +32,24 @@ void main(uint3 thread_id : SV_DispatchThreadID) { if (any(uv < -1)) return; uv = saturate(uv * 0.5 + 0.5); - float4 v = bindless_rw_textures[u_gbufferB][thread_id.xy]; + // create random rotation matrix float c = hash(float2(thread_id.xy)) * 2 - 1; float s = sqrt(1 - c * c); - + float2x2 rot = mul(u_scale, float2x2(c, s, -s, c)); + + // compute tdao float ao = 0; - float2x2 rot = mul(u_scale, float2x2(c, s, -s, c)); for (int i = 0; i < 16; ++i) { - float td_depth = sampleBindlessLod(LinearSamplerClamp, u_topdown_depthmap, uv + mul(POISSON_DISK_16[i], rot), 0).r; - td_depth = (td_depth * 2 - 1) * u_half_depth_range; - ao += saturate((-(wpos.y + u_offset1) - u_depth_offset + td_depth)); + float2 uv_iter = uv + mul(POISSON_DISK_16[i], rot); + float td_depth_ndc = sampleBindlessLod(LinearSamplerClamp, u_topdown_depthmap, uv_iter, 0).r; + float td_depth = (td_depth_ndc * 2 - 1) * u_half_depth_range; + ao += saturate(td_depth - pos_td.y); } ao *= u_intensity / 16; - v.w = v.w * (1 - ao); - bindless_rw_textures[u_gbufferB][thread_id.xy] = v; + // add tdao to ao + float4 gbufferB_value = bindless_rw_textures[u_gbufferB][thread_id.xy]; + gbufferB_value.w = gbufferB_value.w * (1 - ao); + bindless_rw_textures[u_gbufferB][thread_id.xy] = gbufferB_value; } diff --git a/data/pipelines/textured_quad.hlsl b/data/pipelines/textured_quad.hlsl deleted file mode 100644 index e77dc319f5..0000000000 --- a/data/pipelines/textured_quad.hlsl +++ /dev/null @@ -1,41 +0,0 @@ -//@surface -//@include "pipelines/common.hlsli" - -cbuffer Drawcall : register(b4) { - float4 u_offset_scale; - float4 u_r_mask; - float4 u_g_mask; - float4 u_b_mask; - float4 u_a_mask; - float4 u_offsets; - uint u_texture; -}; - -struct Output { - float2 uv : TEXCOORD0; - float4 position : SV_POSITION; -}; - -Output mainVS(uint vertex_id : SV_VertexID) { - Output output; - float4 pos = fullscreenQuad(vertex_id, output.uv); - pos.xy = pos.xy * u_offset_scale.zw + u_offset_scale.xy; - output.position = pos; - return output; -} - -struct Input { - float2 uv : TEXCOORD0; -}; - -// TODO is this pixel perfect? -float4 mainPS(Input input) : SV_TARGET { - float2 uv = input.uv; - float4 t = sampleBindlessLod(LinearSamplerClamp, u_texture, uv, 0); - return float4( - dot(t, u_r_mask) + u_offsets.r, - dot(t, u_g_mask) + u_offsets.g, - dot(t, u_b_mask) + u_offsets.b, - dot(t, u_a_mask) + u_offsets.a - ); -} diff --git a/data/pipelines/water.hlsl b/data/pipelines/water.hlsl index de71e2a0e5..9392f94565 100644 --- a/data/pipelines/water.hlsl +++ b/data/pipelines/water.hlsl @@ -35,13 +35,7 @@ struct VSInput { #endif }; -#ifndef AUTOINSTANCED - cbuffer Model : register(b4) { - float4x4 u_model; - }; -#endif - -struct Output { +struct VSOutput { float2 uv : TEXCOORD0; float3 normal : TEXCOORD1; float3 tangent : TEXCOORD2; @@ -52,11 +46,17 @@ struct Output { float4 position : SV_POSITION; }; -Output mainVS(VSInput input) { +#ifndef AUTOINSTANCED + cbuffer Model : register(b4) { + float4x4 u_model; + }; +#endif + +VSOutput mainVS(VSInput input) { static const float3 normal = float3(0, 1, 0); static const float3 tangent = float3(1, 0, 0); - Output output; + VSOutput output; output.uv = input.uv; #if defined AUTOINSTANCED output.normal = rotateByQuat(input.i_rot, normal); @@ -86,16 +86,6 @@ cbuffer Textures : register(b5) { uint u_bg; }; -struct Input { - float2 uv : TEXCOORD0; - float3 normal : TEXCOORD1; - float3 tangent : TEXCOORD2; - float4 wpos : TEXCOORD3; - #ifdef _HAS_ATTR2 - //float2 masks : TEXCOORD4; - #endif -}; - float2 raycast(float3 csOrig, float3 csDir, float stride, float jitter) { float3 csEndPoint = csOrig + abs(csOrig.z * 0.1) * csDir; @@ -209,7 +199,7 @@ float3 getSurfaceNormal(float2 uv, float normal_strength, out float h00) return N; } -float4 mainPS(Input input) : SV_TARGET +float4 mainPS(VSOutput input) : SV_TARGET { float3 V = normalize(-input.wpos.xyz); float3 L = Global_light_dir.xyz; diff --git a/data/pipelines/world_grid.shd b/data/pipelines/world_grid.hlsl similarity index 54% rename from data/pipelines/world_grid.shd rename to data/pipelines/world_grid.hlsl index 5fa25c581d..9d2fca9e37 100644 --- a/data/pipelines/world_grid.shd +++ b/data/pipelines/world_grid.hlsl @@ -1,13 +1,15 @@ -import "pipelines/surface_base.inc" +//@surface +//@include "pipelines/common.hlsli" +//@include "pipelines/surface_base.hlsli" -uniform("Material color", "color", {1, 1, 1, 1}) -uniform("Roughness", "normalized_float", 1) -uniform("Metallic", "normalized_float", 0) -uniform("Emission", "float", 0) -uniform("Translucency", "normalized_float", 0) +//@uniform "Material color", "color", {1, 1, 1, 1} +//@uniform "Roughness", "normalized_float", 1 +//@uniform "Metallic", "normalized_float", 0 +//@uniform "Emission", "float", 0 +//@uniform "Translucency", "normalized_float", 0 -surface_shader [[ - #line 11 "world_grid.shd" +Surface getSurface(VSOutput input) { + Surface data; float3 t = fmod(abs(input.wpos.xyz + Global_camera_world_pos.xyz + 0.5), float3(2.0f.xxx)); float ff = dot(floor(t), float3(1.0f.xxx)); ff = fmod(ff, 2); @@ -21,4 +23,5 @@ surface_shader [[ data.emission = u_emission; data.translucency = u_translucency; data.shadow = 1; -]] \ No newline at end of file + return data; +} \ No newline at end of file diff --git a/src/renderer/draw_stream.cpp b/src/renderer/draw_stream.cpp index 3a51515073..fd5dc8f685 100644 --- a/src/renderer/draw_stream.cpp +++ b/src/renderer/draw_stream.cpp @@ -206,11 +206,7 @@ struct BinderShaderBufferData { struct CreateProgramData { CreateProgramData(IAllocator& allocator) - : sources(allocator) - , prefixes(allocator) - , srcs(allocator) - , prfxs(allocator) - , types(allocator) + : source(allocator) , name(allocator) , decl(gpu::PrimitiveType::NONE) {} @@ -218,11 +214,9 @@ struct CreateProgramData { gpu::ProgramHandle program; gpu::StateFlags state; gpu::VertexDecl decl; - Array sources; - Array srcs; - Array prefixes; - Array prfxs; - Array types; + String source; + const char* src; + gpu::ShaderType type; String name; }; @@ -379,12 +373,30 @@ DrawStream& DrawStream::createSubstream() { return *new (NewPlaceholder(), data) DrawStream(renderer); } +static const char* getAttrDefine(u32 idx) { + switch (idx) { + case 0 : return "#define _HAS_ATTR0\n"; + case 1 : return "#define _HAS_ATTR1\n"; + case 2 : return "#define _HAS_ATTR2\n"; + case 3 : return "#define _HAS_ATTR3\n"; + case 4 : return "#define _HAS_ATTR4\n"; + case 5 : return "#define _HAS_ATTR5\n"; + case 6 : return "#define _HAS_ATTR6\n"; + case 7 : return "#define _HAS_ATTR7\n"; + case 8 : return "#define _HAS_ATTR8\n"; + case 9 : return "#define _HAS_ATTR9\n"; + case 10 : return "#define _HAS_ATTR10\n"; + case 11 : return "#define _HAS_ATTR11\n"; + case 12 : return "#define _HAS_ATTR12\n"; + default: ASSERT(false); return ""; + } +} + void DrawStream::createProgram(gpu::ProgramHandle prog , gpu::StateFlags state , const gpu::VertexDecl& decl - , const char** srcs - , const gpu::ShaderType* types - , u32 num + , const char* src + , gpu::ShaderType type , const char** prefixes , u32 prefixes_count , const char* name @@ -393,21 +405,39 @@ void DrawStream::createProgram(gpu::ProgramHandle prog data->program = prog; data->state = state; data->decl = decl; - data->sources.reserve(num); - data->srcs.resize(num); - data->types.resize(num); - for (u32 i = 0; i < num; ++i) { - data->sources.emplace(srcs[i], gpu::getAllocator()); - data->srcs[i] = data->sources[i].c_str(); - data->types[i] = types[i]; + data->type = type; + data->source = String(gpu::getAllocator()); + for (u32 i = 0; i < decl.attributes_count; ++i) { + data->source.append(getAttrDefine(i)); } - - data->prefixes.reserve(prefixes_count); - data->prfxs.resize(prefixes_count); + data->source = R"#( + #define TextureHandle int + #define TextureCubeArrayHandle int + + Texture2D bindless_textures[] : register(t0, space1); + TextureCubeArray bindless_cube_arrays[] : register(t0, space2); + Texture2DArray bindless_2D_arrays[] : register(t0, space3); + TextureCube bindless_cubemaps[] : register(t0, space4); + ByteAddressBuffer bindless_buffers[] : register(t0, space5); + RWTexture2D bindless_rw_textures[] : register(u0, space0); + RWByteAddressBuffer bindless_rw_buffers[] : register(u0, space1); + + SamplerState LinearSamplerClamp : register(s0); + SamplerState LinearSampler : register(s1); + + #define sampleCubeBindlessLod(sampler, index, uv, lod) bindless_cubemaps[index].Sample((sampler), (uv), (lod)) + #define sampleCubeBindless(sampler, index, uv) bindless_cubemaps[index].Sample((sampler), (uv)) + #define sampleBindless(sampler, index, uv) bindless_textures[index].Sample((sampler), (uv)) + #define sampleBindlessLod(sampler, index, uv, lod) bindless_textures[index].SampleLevel((sampler), (uv), (lod)) + #define sampleBindlessOffset(sampler, index, uv, offset) bindless_textures[index].Sample((sampler), (uv), (offset)) + #define sampleBindlessLodOffset(sampler, index, uv, lod, offset) bindless_textures[index].SampleLevel((sampler), (uv), (lod), (offset)) + #define sampleCubeArrayBindlessLod(sampler, index, uv, lod) bindless_cube_arrays[index].SampleLevel((sampler), (uv), (lod)) + )#"; for (u32 i = 0; i < prefixes_count; ++i) { - data->prefixes.emplace(prefixes[i], gpu::getAllocator()); - data->prfxs[i] = data->prefixes[i].c_str(); + data->source.append(prefixes[i], "\n"); } + data->source.append(src); + data->src = data->source.c_str(); data->name = name; write(Instruction::CREATE_PROGRAM, data); } @@ -762,11 +792,8 @@ void DrawStream::run() { gpu::createProgram(data->program , data->state , data->decl - , data->srcs.begin() - , data->types.begin() - , data->sources.size() - , data->prfxs.begin() - , data->prfxs.size() + , data->src + , data->type , data->name.c_str() ); LUMIX_DELETE(gpu::getAllocator(), data); diff --git a/src/renderer/draw_stream.h b/src/renderer/draw_stream.h index 76f248c3e5..cc7f90903f 100644 --- a/src/renderer/draw_stream.h +++ b/src/renderer/draw_stream.h @@ -14,7 +14,7 @@ struct DrawStream { DrawStream(DrawStream&& rhs); ~DrawStream(); - void createProgram(gpu::ProgramHandle prog, gpu::StateFlags state, const gpu::VertexDecl& decl, const char** srcs, const gpu::ShaderType* types, u32 num, const char** prefixes, u32 prefixes_count, const char* name); + void createProgram(gpu::ProgramHandle prog, gpu::StateFlags state, const gpu::VertexDecl& decl, const char* srcs, gpu::ShaderType type, const char** prefixes, u32 prefixes_count, const char* name); void createBuffer(gpu::BufferHandle buffer, gpu::BufferFlags flags, size_t size, const void* data, const char* debug_name); void createTexture(gpu::TextureHandle handle, u32 w, u32 h, u32 depth, gpu::TextureFormat format, gpu::TextureFlags flags, const char* debug_name); void createTextureView(gpu::TextureHandle view, gpu::TextureHandle texture, u32 layer, u32 mip); diff --git a/src/renderer/editor/fbx_importer.cpp b/src/renderer/editor/fbx_importer.cpp index 421bbb1937..07f3e1b74f 100644 --- a/src/renderer/editor/fbx_importer.cpp +++ b/src/renderer/editor/fbx_importer.cpp @@ -1330,7 +1330,7 @@ void FBXImporter::createImpostorTextures(Model* model, ImpostorTexturesContext& else { const AABB& aabb = model->getAABB(); const Vec3 center = (aabb.max + aabb.min) * 0.5f; - f << "shader \"/pipelines/impostor.shd\"\n"; + f << "shader \"/pipelines/impostor.hlsl\"\n"; f << "texture \"" << src_info.basename << "_impostor0.tga\"\n"; f << "texture \"" << src_info.basename << "_impostor1.tga\"\n"; f << "texture \"" << src_info.basename << "_impostor2.tga\"\n"; @@ -1377,7 +1377,7 @@ bool FBXImporter::writeMaterials(const Path& src, const ImportConfig& cfg, bool } m_out_file.clear(); - writeString("shader \"/pipelines/standard.shd\"\n"); + writeString("shader \"/pipelines/standard.hlsl\"\n"); if (material.alpha_cutout) writeString("define \"ALPHA_CUTOUT\"\n"); if (material.textures[2].is_valid) writeString("uniform \"Metallic\", 1.000000"); diff --git a/src/renderer/editor/render_plugins.cpp b/src/renderer/editor/render_plugins.cpp index 25938ab458..c76d4cb37b 100644 --- a/src/renderer/editor/render_plugins.cpp +++ b/src/renderer/editor/render_plugins.cpp @@ -970,7 +970,7 @@ struct MaterialPlugin final : AssetBrowser::IPlugin, AssetCompiler::IPlugin { bool canCreateResource() const override { return true; } const char* getDefaultExtension() const override { return "mat"; } - void createResource(OutputMemoryStream& blob) override { blob << "shader \"/pipelines/standard.shd\""; } + void createResource(OutputMemoryStream& blob) override { blob << "shader \"/pipelines/standard.hlsl\""; } bool compile(const Path& src) override { return m_app.getAssetCompiler().copyCompile(src); } const char* getLabel() const override { return "Material"; } @@ -2988,9 +2988,7 @@ struct ModelPlugin final : AssetBrowser::IPlugin, AssetCompiler::IPlugin { )#"; m_downscale_program = gpu::allocProgramHandle(); - const gpu::ShaderType type = gpu::ShaderType::COMPUTE; - const char* srcs[] = { downscale_src }; - stream.createProgram(m_downscale_program, gpu::StateFlags::NONE, gpu::VertexDecl(gpu::PrimitiveType::NONE), srcs, &type, 1, nullptr, 0, "downscale"); + stream.createProgram(m_downscale_program, gpu::StateFlags::NONE, gpu::VertexDecl(gpu::PrimitiveType::NONE), downscale_src, gpu::ShaderType::COMPUTE, nullptr, 0, "downscale"); } ASSERT(src_w % dst_w == 0); @@ -3285,7 +3283,6 @@ struct ShaderPlugin final : AssetBrowser::IPlugin, AssetCompiler::IPlugin { explicit ShaderPlugin(StudioApp& app) : m_app(app) { - app.getAssetCompiler().registerExtension("shd", Shader::TYPE); app.getAssetCompiler().registerExtension("hlsl", Shader::TYPE); } @@ -3317,61 +3314,19 @@ struct ShaderPlugin final : AssetBrowser::IPlugin, AssetCompiler::IPlugin { } file.close(); - if (Path::hasExtension(path, "hlsl")) { - findHLSLIncludes(StringView((const char*)content.data(), (u32)content.size()), path); - return; - } - - lua_State* L = luaL_newstate(); - luaL_openlibs(L); - - struct Context { - const Path& path; - ShaderPlugin* plugin; - u8* content; - u32 content_len; - int idx; - } ctx = { path, this, content.getMutableData(), (u32)content.size(), 0 }; - - lua_pushlightuserdata(L, &ctx); - lua_setfield(L, LUA_GLOBALSINDEX, "this"); - - auto reg_dep = [](lua_State* L) -> int { - lua_getfield(L, LUA_GLOBALSINDEX, "this"); - Context* that = LuaWrapper::toType(L, -1); - lua_pop(L, 1); - const char* path = LuaWrapper::checkArg(L, 1); - that->plugin->m_app.getAssetCompiler().registerDependency(that->path, Path(path)); - return 0; - }; - - lua_pushcclosure(L, reg_dep, "include", 0); - lua_setfield(L, LUA_GLOBALSINDEX, "include"); - lua_pushcclosure(L, reg_dep, "import", 0); - lua_setfield(L, LUA_GLOBALSINDEX, "import"); - - static const char* preface = - "local new_g = setmetatable({include = include, import = import}, {__index = function() return function() end end })\n" - "setfenv(1, new_g)\n"; - - OutputMemoryStream tmp(m_app.getAllocator()); - tmp.write(preface, stringLength(preface)); - tmp.write(content.data(), content.size()); - - if (LuaWrapper::luaL_loadbuffer(L, (const char*)tmp.data(), tmp.size(), path.c_str()) != 0) { - logError(path, ": ", lua_tostring(L, -1)); - lua_pop(L, 2); - lua_close(L); - return; - } - - if (lua_pcall(L, 0, 0, 0) != 0) { - logError(lua_tostring(L, -1)); - lua_pop(L, 2); - lua_close(L); - return; + const StringView needle = "//@include \""; + StringView view((const char*)content.data(), (u32)content.size()); + for (;;) { + const char* inc = find(view, needle); + if (!inc) return; + + StringView dep_path; + dep_path.begin = inc + needle.size(); + dep_path.end = dep_path.begin + 1; + while (dep_path.end < view.end && *dep_path.end != '"') ++dep_path.end; + m_app.getAssetCompiler().registerDependency(path, Path(dep_path)); + view.begin = dep_path.end; } - lua_close(L); } void addSubresources(AssetCompiler& compiler, const Path& path) override { @@ -3396,7 +3351,6 @@ struct ShaderIncludePlugin final : AssetBrowser::IPlugin, AssetCompiler::IPlugin explicit ShaderIncludePlugin(StudioApp& app) : m_app(app) { - app.getAssetCompiler().registerExtension("inc", SHADER_INCLUDE_TYPE); app.getAssetCompiler().registerExtension("hlsli", SHADER_INCLUDE_TYPE); } @@ -3434,7 +3388,7 @@ struct EnvironmentProbePlugin final : PropertyGrid::IPlugin { Renderer* renderer = static_cast(system_manager.getSystem("renderer")); ResourceManagerHub& rm = engine.getResourceManager(); m_pipeline = Pipeline::create(*renderer, PipelineType::PROBE); - m_ibl_filter_shader = rm.load(Path("pipelines/ibl_filter.shd")); + m_ibl_filter_shader = rm.load(Path("pipelines/ibl_filter.hlsl")); } bool saveCubemap(u64 probe_guid, const Vec4* data, u32 texture_size, u32 num_src_mips, u32 num_saved_mips) { @@ -3581,9 +3535,12 @@ struct EnvironmentProbePlugin final : PropertyGrid::IPlugin { m_pipeline->setViewport(viewport); m_pipeline->render(false); - stream.setFramebufferCube(cubemap, i, 0); + gpu::TextureHandle view = gpu::allocTextureHandle(); + stream.createTextureView(view, cubemap, i, 0); const gpu::BindlessHandle side_tex = gpu::getBindlessHandle(m_pipeline->getOutput()); - m_pipeline->renderTexturedQuad(side_tex, i != 2 && i != 3, i == 2 || i == 3); + gpu::RWBindlessHandle dst = gpu::getRWBindlessHandle(view); + m_pipeline->blit(side_tex, dst, {(i32)texture_size, (i32)texture_size}, i != 2 && i != 3, i == 2 || i == 3); + stream.destroy(view); } if (job.is_reflection) { @@ -4913,10 +4870,11 @@ struct EditorUIRenderPlugin final : StudioApp::GUIPlugin const Vec2 offset(-1 + (float)-draw_data->DisplayPos.x * 2.f / w, 1 + (float)draw_data->DisplayPos.y * 2.f / h); if (new_program) { - const char* fs = - R"#(struct Input { - float4 color : TEXCOORD0; + const char* src = + R"#(struct VSInput { + float2 pos : TEXCOORD0; float2 uv : TEXCOORD1; + float4 color : TEXCOORD2; }; cbuffer ImGuiState : register(b4) { @@ -4925,34 +4883,14 @@ struct EditorUIRenderPlugin final : StudioApp::GUIPlugin uint c_texture; }; - float4 main(Input input) : SV_Target { - float4 tc = sampleBindlessLod(LinearSamplerClamp, c_texture, input.uv, 0); - return float4( - pow(abs(tc.rgb)/*to silence warning*/, (1/2.2).xxx) * input.color.rgb, - input.color.a * tc.a - ); - })#"; - const char* vs = - R"#(cbuffer ImGuiState : register(b4) { - float2 c_scale; - float2 c_offset; - uint c_texture; - }; - - struct Input { - float2 pos : TEXCOORD0; - float2 uv : TEXCOORD1; - float4 color : TEXCOORD2; - }; - - struct Output { + struct VSOutput { float4 color : TEXCOORD0; float2 uv : TEXCOORD1; float4 position : SV_POSITION; }; - Output main(Input input) { - Output output; + VSOutput mainVS(VSInput input) { + VSOutput output; output.color = input.color; output.uv = input.uv; float2 p = input.pos * c_scale + c_offset; @@ -4960,16 +4898,21 @@ struct EditorUIRenderPlugin final : StudioApp::GUIPlugin return output; } + float4 mainPS(VSOutput input) : SV_Target { + float4 tc = sampleBindlessLod(LinearSamplerClamp, c_texture, input.uv, 0); + return float4( + pow(abs(tc.rgb)/*to silence warning*/, (1/2.2).xxx) * input.color.rgb, + input.color.a * tc.a + ); + } )#"; - const char* srcs[] = {vs, fs}; - gpu::ShaderType types[] = {gpu::ShaderType::VERTEX, gpu::ShaderType::FRAGMENT}; gpu::VertexDecl decl(gpu::PrimitiveType::TRIANGLES); decl.addAttribute(0, 2, gpu::AttributeType::FLOAT, 0); decl.addAttribute(8, 2, gpu::AttributeType::FLOAT, 0); decl.addAttribute(16, 4, gpu::AttributeType::U8, gpu::Attribute::NORMALIZED); const gpu::StateFlags blend_state = gpu::getBlendStateBits(gpu::BlendFactors::SRC_ALPHA, gpu::BlendFactors::ONE_MINUS_SRC_ALPHA, gpu::BlendFactors::SRC_ALPHA, gpu::BlendFactors::ONE_MINUS_SRC_ALPHA); const gpu::StateFlags state = gpu::StateFlags::SCISSOR_TEST | blend_state; - stream.createProgram(program, state, decl, srcs, types, 2, nullptr, 0, "imgui shader"); + stream.createProgram(program, state, decl, src, gpu::ShaderType::SURFACE, nullptr, 0, "imgui shader"); } stream.setCurrentWindow(vp->PlatformHandle); @@ -5213,10 +5156,10 @@ struct StudioAppPlugin : StudioApp::IPlugin AssetCompiler& asset_compiler = m_app.getAssetCompiler(); - const char* shader_exts[] = {"shd", "hlsl"}; + const char* shader_exts[] = {"hlsl"}; asset_compiler.addPlugin(m_shader_plugin, Span(shader_exts)); - const char* inc_exts[] = {"inc", "hlsli"}; + const char* inc_exts[] = {"hlsli"}; asset_compiler.addPlugin(m_shader_include_plugin, Span(inc_exts)); const char* texture_exts[] = {"png", "jpg", "jpeg", "tga", "raw", "ltc"}; diff --git a/src/renderer/gpu/gpu.h b/src/renderer/gpu/gpu.h index 859d7b838a..b85133951f 100644 --- a/src/renderer/gpu/gpu.h +++ b/src/renderer/gpu/gpu.h @@ -79,7 +79,6 @@ enum class PrimitiveType : u8 { enum class ShaderType : u32 { VERTEX, FRAGMENT, - GEOMETRY, COMPUTE, SURFACE }; @@ -262,7 +261,7 @@ ProgramHandle allocProgramHandle(); QueryHandle createQuery(QueryType type); -void createProgram(ProgramHandle prog, StateFlags state, const VertexDecl& decl, const char** srcs, const ShaderType* types, u32 num, const char** prefixes, u32 prefixes_count, const char* name); +void createProgram(ProgramHandle prog, StateFlags state, const VertexDecl& decl, const char* src, ShaderType type, const char* name); void createBuffer(BufferHandle handle, BufferFlags flags, size_t size, const void* data, const char* debug_name); void createTexture(TextureHandle handle, u32 w, u32 h, u32 depth, TextureFormat format, TextureFlags flags, const char* debug_name); void createTextureView(TextureHandle view, TextureHandle texture, u32 layer, u32 mip); diff --git a/src/renderer/gpu/gpu_dx12.cpp b/src/renderer/gpu/gpu_dx12.cpp index d7ef23154e..238d6043bb 100644 --- a/src/renderer/gpu/gpu_dx12.cpp +++ b/src/renderer/gpu/gpu_dx12.cpp @@ -253,23 +253,19 @@ struct Query { }; struct Program { - Program(IAllocator& allocator) - : vs(allocator) - , ps(allocator) - , gs(allocator) - , cs(allocator) - {} + Program(IAllocator& allocator) {} - OutputMemoryStream vs; - OutputMemoryStream ps; - OutputMemoryStream gs; - OutputMemoryStream cs; + ID3DBlob* vs = nullptr; + ID3DBlob* ps = nullptr; + ID3DBlob* cs = nullptr; D3D12_INPUT_ELEMENT_DESC attributes[16]; u32 attribute_count = 0; StateFlags state; D3D12_PRIMITIVE_TOPOLOGY primitive_topology; D3D12_PRIMITIVE_TOPOLOGY_TYPE primitive_topology_type; - RuntimeHash32 content_hash; + // for CS, there's 1:1 mapping from `shader_hash` to PSO + // for VS/PS, there's 1:1 mapping from `shader_hash` and RT formats to PSO + StableHash32 shader_hash; #ifdef LUMIX_DEBUG StaticString<64> name; #endif @@ -326,29 +322,13 @@ struct FrameBuffer { }; struct ShaderCompiler { - struct Input { - const VertexDecl& decl; - Span srcs; - Span types; - Span prefixes; - }; - - static void set(ShaderType type, const void* data, u64 size, Program& program) { - ASSERT(size > 0); - OutputMemoryStream* str; - switch(type) { - case ShaderType::COMPUTE: str = &program.cs; break; - case ShaderType::VERTEX: str = &program.vs; break; - case ShaderType::FRAGMENT: str = &program.ps; break; - case ShaderType::GEOMETRY: str = &program.gs; break; - case ShaderType::SURFACE: ASSERT(false); return; - } - str->resize(size); - memcpy(str->getMutableData(), data, size); - } + ShaderCompiler(IAllocator& allocator) + : m_allocator(allocator, "shader compiler") + , m_cache(m_allocator) {} bool compile(const VertexDecl& decl - , const Input& input + , const char* src + , ShaderType type , const char* name , Program& program) { @@ -365,133 +345,47 @@ struct ShaderCompiler { program.attributes[i].InstanceDataStepRate = instanced ? 1 : 0; } - auto compile_stage = [&](ShaderType type, OutputMemoryStream& out, OutputMemoryStream* secondary = nullptr) -> bool { - const char* tmp[128]; - const u32 c = filter(input, type, tmp); - if (c == 0) { - out.clear(); - return true; - } - if (c > (u32)input.prefixes.length() + decl.attributes_count) { - const StableHash32 hash = computeHash(tmp, c); - // TODO surface shader cache - if (m_use_cache && type != ShaderType::SURFACE) { - auto iter = m_cache.find(hash); - if (iter.isValid()) { - set(type, iter.value().data.data(), iter.value().data.size(), program); - return true; - } - } + const StableHash32 hash(src, stringLength(src)); + if (type == ShaderType::SURFACE) { + // TODO surface shader cache + program.vs = compileStage(hash, src, "vs_5_1", name, "mainVS"); + if (!program.vs) return false; - String hlsl(m_allocator); - for (u32 i = 0; i < c; ++i) hlsl.append(tmp[i]); - - if (type == ShaderType::SURFACE) { - ID3DBlob* blob = compile(hash, hlsl.c_str(), ShaderType::VERTEX, name, "mainVS"); - if (!blob) return false; - set(ShaderType::VERTEX, blob->GetBufferPointer(), blob->GetBufferSize(), program); - ASSERT(blob->GetBufferSize() > 0); - blob->Release(); - - blob = compile(hash, hlsl.c_str(), ShaderType::FRAGMENT, name, "mainPS"); - if (!blob) return false; - set(ShaderType::FRAGMENT, blob->GetBufferPointer(), blob->GetBufferSize(), program); - ASSERT(blob->GetBufferSize() > 0); - blob->Release(); - } - else { - ID3DBlob* blob = compile(hash, hlsl.c_str(), type, name); - if (!blob) return false; - set(type, blob->GetBufferPointer(), blob->GetBufferSize(), program); - ASSERT(blob->GetBufferSize() > 0); - blob->Release(); - } + program.ps = compileStage(hash, src, "ps_5_1", name, "mainPS"); + if (!program.ps) return false; + } + else { + ASSERT(type == ShaderType::COMPUTE); + auto iter = m_cache.find(hash); + if (iter.isValid()) { + program.cs = iter.value(); return true; } - return false; - }; - bool compiled = compile_stage(ShaderType::VERTEX, program.vs); - compiled = compiled && compile_stage(ShaderType::FRAGMENT, program.ps); - if (program.vs.size() == 0) { - compiled = compiled && compile_stage(ShaderType::SURFACE, program.vs, &program.ps); + program.cs = compileStage(hash, src, "cs_5_1", name, "main"); + if (!program.cs) return false; + if (program.cs->GetBufferSize() == 0) { + program.cs->Release(); + program.cs = nullptr; + return false; + } } - compiled = compiled && compile_stage(ShaderType::COMPUTE, program.cs); - compiled = compiled && compile_stage(ShaderType::GEOMETRY, program.gs); - RollingHasher hasher; + RollingStableHasher hasher; hasher.begin(); hasher.update(program.attributes, sizeof(program.attributes[0]) * program.attribute_count); hasher.update(&program.state, sizeof(program.state)); hasher.update(&program.primitive_topology, sizeof(program.primitive_topology)); hasher.update(&program.primitive_topology_type, sizeof(program.primitive_topology_type)); - hasher.update(program.vs.data(), (u32)program.vs.size()); - hasher.update(program.ps.data(), (u32)program.ps.size()); - hasher.update(program.cs.data(), (u32)program.cs.size()); - hasher.update(program.gs.data(), (u32)program.gs.size()); - program.content_hash = hasher.end(); + if (program.vs) hasher.update(program.vs->GetBufferPointer(), (u32)program.vs->GetBufferSize()); + if (program.ps) hasher.update(program.ps->GetBufferPointer(), (u32)program.ps->GetBufferSize()); + if (program.cs) hasher.update(program.cs->GetBufferPointer(), (u32)program.cs->GetBufferSize()); + program.shader_hash = hasher.end(); - return compiled; - } - - ShaderCompiler(IAllocator& allocator) - : m_allocator(allocator, "shader compiler") - , m_cache(m_allocator) {} - - static u32 filter(const Input& input, ShaderType type, const char* (&out)[128]) - { - ASSERT(input.srcs.length() == input.types.length()); - out[0] = getTypeDefine(type); - out[1] = "#define LUMIX_DX_SHADER\n"; - out[2] = R"#( - #define TextureHandle int - #define TextureCubeArrayHandle int - - Texture2D bindless_textures[] : register(t0, space1); - TextureCubeArray bindless_cube_arrays[] : register(t0, space2); - Texture2DArray bindless_2D_arrays[] : register(t0, space3); - TextureCube bindless_cubemaps[] : register(t0, space4); - ByteAddressBuffer bindless_buffers[] : register(t0, space5); - RWTexture2D bindless_rw_textures[] : register(u0, space0); - RWByteAddressBuffer bindless_rw_buffers[] : register(u0, space1); - - SamplerState LinearSamplerClamp : register(s0); - SamplerState LinearSampler : register(s1); - - #define sampleCubeBindlessLod(sampler, index, uv, lod) bindless_cubemaps[index].Sample((sampler), (uv), (lod)) - #define sampleCubeBindless(sampler, index, uv) bindless_cubemaps[index].Sample((sampler), (uv)) - #define sampleBindless(sampler, index, uv) bindless_textures[index].Sample((sampler), (uv)) - #define sampleBindlessLod(sampler, index, uv, lod) bindless_textures[index].SampleLevel((sampler), (uv), (lod)) - #define sampleBindlessOffset(sampler, index, uv, offset) bindless_textures[index].Sample((sampler), (uv), (offset)) - #define sampleBindlessLodOffset(sampler, index, uv, lod, offset) bindless_textures[index].SampleLevel((sampler), (uv), (lod), (offset)) - #define sampleCubeArrayBindlessLod(sampler, index, uv, lod) bindless_cube_arrays[index].SampleLevel((sampler), (uv), (lod)) - )#"; - for (u32 i = 0; i < input.decl.attributes_count; ++i) { - out[i + 3] = getAttrDefine(i); - } - for(u32 i = 0; i < input.prefixes.length(); ++i) { - out[i + 3 + input.decl.attributes_count] = input.prefixes[i]; - } - - u32 sc = 0; - for(u32 i = 0; i < input.srcs.length(); ++i) { - if(input.types[i] != type) continue; - out[input.prefixes.length() + input.decl.attributes_count + sc + 3] = input.srcs[i]; - ++sc; - } - return sc ? sc + input.prefixes.length() + input.decl.attributes_count + 3 : 0; - }; - - static StableHash32 computeHash(const char** srcs, u32 count) { - RollingStableHasher hasher; - hasher.begin(); - for (u32 i = 0; i < count; ++i) { - hasher.update(srcs[i], (u32)strlen(srcs[i])); - } - return hasher.end(); + return true; } - ID3DBlob* compile(StableHash32 hash, const char* src, ShaderType type, const char* name, const char* entry_point = "main") { + ID3DBlob* compileStage(StableHash32 hash, const char* src, const char* target, const char* name, const char* entry_point) { ID3DBlob* output = NULL; ID3DBlob* errors = NULL; HRESULT hr = D3DCompile(src, @@ -500,7 +394,7 @@ struct ShaderCompiler { NULL, NULL, entry_point, - type == ShaderType::VERTEX ? "vs_5_1" : (type == ShaderType::COMPUTE ? "cs_5_1" : "ps_5_1"), + target, D3DCOMPILE_PACK_MATRIX_ROW_MAJOR | D3DCOMPILE_ENABLE_UNBOUNDED_DESCRIPTOR_TABLES | D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION, 0, &output, @@ -515,28 +409,26 @@ struct ShaderCompiler { if (FAILED(hr)) return nullptr; } ASSERT(output); - if (m_use_cache) { - CachedShader cached(m_allocator); - cached.data.write(output->GetBufferPointer(), output->GetBufferSize()); - m_cache.insert(hash, static_cast(cached)); + if (output->GetBufferSize() == 0) { + output->Release(); + return nullptr; } + m_cache.insert(hash, output); return output; }; - bool m_use_cache = true; - - void save(const char* filename) { + void saveCache(const char* filename) { os::OutputFile file; if (file.open(filename)) { u32 version = 0; bool success = file.write(&version, sizeof(version)); for (auto iter = m_cache.begin(), end = m_cache.end(); iter != end; ++iter) { const StableHash32 hash = iter.key(); - const CachedShader& s = iter.value(); - const u32 size = (u32)s.data.size(); + ID3DBlob* blob = iter.value(); + const u32 size = (u32)blob->GetBufferSize(); success = file.write(&hash, sizeof(hash)) && success; success = file.write(&size, sizeof(size)) && success; - success = file.write(s.data.data(), size) && success; + success = file.write(blob->GetBufferPointer(), size) && success; } if (!success) { logError("Could not write ", filename); @@ -545,7 +437,7 @@ struct ShaderCompiler { } } - void load(const char* filename) { + void loadCache(const char* filename) { PROFILE_FUNCTION(); os::InputFile file; if (file.open(filename)) { @@ -558,10 +450,14 @@ struct ShaderCompiler { while (file.read(&hash, sizeof(hash))) { u32 size; if (file.read(&size, sizeof(size))) { - CachedShader value(m_allocator); - value.data.resize(size); - if (!file.read(value.data.getMutableData(), size)) break; - m_cache.insert(hash, value); + ID3DBlob* blob; + HRESULT res = D3DCreateBlob(size, &blob); + if (FAILED(res)) { + logError("Failed to create blob"); + break; + } + if (!file.read(blob->GetBufferPointer(), size)) break; + m_cache.insert(hash, blob); } else { break; } @@ -571,42 +467,13 @@ struct ShaderCompiler { } static const char* getTypeDefine(gpu::ShaderType type) { - switch (type) { - case ShaderType::COMPUTE: return "#define LUMIX_COMPUTE_SHADER\n"; - case ShaderType::GEOMETRY: return "#define LUMIX_GEOMETRY_SHADER\n"; - case ShaderType::FRAGMENT: return "#define LUMIX_FRAGMENT_SHADER\n"; - case ShaderType::VERTEX: return "#define LUMIX_VERTEX_SHADER\n"; - case ShaderType::SURFACE: return ""; - } - ASSERT(false); return ""; } - static const char* getAttrDefine(u32 idx) { - switch (idx) { - case 0 : return "#define _HAS_ATTR0\n"; - case 1 : return "#define _HAS_ATTR1\n"; - case 2 : return "#define _HAS_ATTR2\n"; - case 3 : return "#define _HAS_ATTR3\n"; - case 4 : return "#define _HAS_ATTR4\n"; - case 5 : return "#define _HAS_ATTR5\n"; - case 6 : return "#define _HAS_ATTR6\n"; - case 7 : return "#define _HAS_ATTR7\n"; - case 8 : return "#define _HAS_ATTR8\n"; - case 9 : return "#define _HAS_ATTR9\n"; - case 10 : return "#define _HAS_ATTR10\n"; - case 11 : return "#define _HAS_ATTR11\n"; - case 12 : return "#define _HAS_ATTR12\n"; - default: ASSERT(false); return ""; - } - } - TagAllocator m_allocator; - struct CachedShader { - CachedShader(IAllocator& allocator) : data(allocator) {} - OutputMemoryStream data; - }; - HashMap m_cache; + + // cache source code -> binary blob + HashMap m_cache; }; struct PSOCache { @@ -615,14 +482,14 @@ struct PSOCache { {} ID3D12PipelineState* getPipelineStateCompute(ID3D12Device* device, ID3D12RootSignature* root_signature, ProgramHandle program) { - auto iter = cache.find(program->content_hash); + auto iter = cache.find(program->shader_hash); if (iter.isValid()) return iter.value(); - if (program->cs.size() == 0) return nullptr; + if (!program->cs) return nullptr; Program& p = *program; D3D12_COMPUTE_PIPELINE_STATE_DESC desc = {}; - desc.CS = {p.cs.data(), p.cs.size()}; + desc.CS = {p.cs->GetBufferPointer(), p.cs->GetBufferSize()}; desc.NodeMask = 1; desc.Flags = D3D12_PIPELINE_STATE_FLAG_NONE; desc.pRootSignature = root_signature; @@ -630,7 +497,7 @@ struct PSOCache { ID3D12PipelineState* pso; HRESULT hr = device->CreateComputePipelineState(&desc, IID_PPV_ARGS(&pso)); ASSERT(hr == S_OK); - cache.insert(program->content_hash, pso); + cache.insert(program->shader_hash, pso); return pso; } @@ -642,12 +509,12 @@ struct PSOCache { ASSERT(program); Program& p = *program; - RollingHasher hasher; + RollingStableHasher hasher; hasher.begin(); - hasher.update(&p.content_hash, sizeof(p.content_hash)); + hasher.update(&p.shader_hash, sizeof(p.shader_hash)); hasher.update(&fb.ds_format, sizeof(fb.ds_format)); hasher.update(&fb.formats[0], sizeof(fb.formats[0]) * fb.count); - const RuntimeHash32 hash = hasher.end(); + const StableHash32 hash = hasher.end(); auto iter = cache.find(hash); if (iter.isValid()) { @@ -655,12 +522,11 @@ struct PSOCache { return iter.value(); } - if (program->vs.size() + program->ps.size() + program->cs.size() + program->gs.size() == 0) return nullptr; + if (!program->vs && !program->ps && !program->cs) return nullptr; D3D12_GRAPHICS_PIPELINE_STATE_DESC desc = {}; - if (p.vs.size() > 0) desc.VS = {p.vs.data(), p.vs.size()}; - if (p.ps.size() > 0) desc.PS = {p.ps.data(), p.ps.size()}; - if (p.gs.size() > 0) desc.GS = {p.gs.data(), p.gs.size()}; + if (p.vs) desc.VS = {p.vs->GetBufferPointer(), p.vs->GetBufferSize()}; + if (p.ps) desc.PS = {p.ps->GetBufferPointer(), p.ps->GetBufferSize()}; desc.PrimitiveTopologyType = program->primitive_topology_type; @@ -802,7 +668,9 @@ struct PSOCache { return pso; } - HashMap cache; + // TODO separate compute and graphics cache + // TODO graphics cache should be [framebuffer][shader_hash] -> PSO, and [framebuffer] can be computed once in setFramebuffer + HashMap cache; ID3D12PipelineState* last = nullptr; }; @@ -1686,7 +1554,7 @@ void preinit(IAllocator& allocator, bool load_renderdoc) { } void shutdown() { - d3d->shader_compiler.save(".shader_cache_dx"); + d3d->shader_compiler.saveCache(".lumix/shader_cache_dx"); for (Frame& frame : d3d->frames) { frame.clear(); @@ -1947,7 +1815,7 @@ bool init(void* hwnd, InitFlags flags) { for (TextureHandle& h : d3d->current_framebuffer.attachments) h = INVALID_TEXTURE; - d3d->shader_compiler.load(".shader_cache_dx"); + d3d->shader_compiler.loadCache(".lumix/shader_cache_dx"); { D3D12_QUERY_HEAP_DESC queryHeapDesc = {}; @@ -2296,9 +2164,9 @@ void createBuffer(BufferHandle buffer, BufferFlags flags, size_t size, const voi desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; desc.Flags = shader_buffer ? D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS : D3D12_RESOURCE_FLAG_NONE; - HRESULT hr = d3d->device->CreateCommittedResource(&props, D3D12_HEAP_FLAG_NONE, &desc, D3D12_RESOURCE_STATE_COMMON, NULL, IID_PPV_ARGS(&buffer->resource)); + buffer->state = mappable ? D3D12_RESOURCE_STATE_GENERIC_READ : D3D12_RESOURCE_STATE_COMMON; + HRESULT hr = d3d->device->CreateCommittedResource(&props, D3D12_HEAP_FLAG_NONE, &desc, buffer->state, NULL, IID_PPV_ARGS(&buffer->resource)); ASSERT(hr == S_OK); - buffer->state = D3D12_RESOURCE_STATE_COMMON; D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = {}; srv_desc = {}; @@ -2367,11 +2235,12 @@ void createTextureView(TextureHandle view_handle, TextureHandle texture_handle, view.resource = texture.resource; view.state = texture.state; view.is_view = true; - + const bool is_srgb = u32(texture.flags & TextureFlags::SRGB); const bool no_mips = u32(texture.flags & TextureFlags::NO_MIPS); const bool is_3d = u32(texture.flags & TextureFlags::IS_3D); const bool is_cubemap = u32(texture.flags & TextureFlags::IS_CUBE); + const bool compute_write = u32(texture.flags & TextureFlags::COMPUTE_WRITE); const u32 mip_count = no_mips ? 1 : 1 + log2(maximum(view.w, view.h)) - mip; if (no_mips) mip = 0; ASSERT(!is_3d); @@ -2396,7 +2265,7 @@ void createTextureView(TextureHandle view_handle, TextureHandle texture_handle, uav_desc.Texture2DArray.PlaneSlice = 0; uav_desc.Texture2DArray.FirstArraySlice = layer; - d3d->srv_heap.alloc(d3d->device, view.heap_id, texture.resource, srv_desc, &uav_desc); + d3d->srv_heap.alloc(d3d->device, view.heap_id, texture.resource, srv_desc, compute_write ? &uav_desc : nullptr); return; } @@ -2420,7 +2289,7 @@ void createTextureView(TextureHandle view_handle, TextureHandle texture_handle, uav_desc.Texture2DArray.PlaneSlice = 0; uav_desc.Texture2DArray.FirstArraySlice = layer; - d3d->srv_heap.alloc(d3d->device, view.heap_id, texture.resource, srv_desc, &uav_desc); + d3d->srv_heap.alloc(d3d->device, view.heap_id, texture.resource, srv_desc, compute_write ? &uav_desc : nullptr); return; } @@ -2439,7 +2308,7 @@ void createTextureView(TextureHandle view_handle, TextureHandle texture_handle, uav_desc.Texture2D.MipSlice = mip; uav_desc.Texture2D.PlaneSlice = 0; - d3d->srv_heap.alloc(d3d->device, view.heap_id, texture.resource, srv_desc, &uav_desc); + d3d->srv_heap.alloc(d3d->device, view.heap_id, texture.resource, srv_desc, compute_write ? &uav_desc : nullptr); } void createTexture(TextureHandle handle, u32 w, u32 h, u32 depth, TextureFormat format, TextureFlags flags, const char* debug_name) { @@ -2950,11 +2819,8 @@ void update(BufferHandle buffer, const void* data, size_t size) { void createProgram(ProgramHandle program , StateFlags state , const VertexDecl& decl - , const char** srcs - , const ShaderType* types - , u32 num - , const char** prefixes - , u32 prefixes_count + , const char* src + , ShaderType type , const char* name) { ASSERT(program); @@ -2984,8 +2850,7 @@ void createProgram(ProgramHandle program default: ASSERT(0); break; } - ShaderCompiler::Input args { decl, Span(srcs, num), Span(types, num), Span(prefixes, prefixes_count) }; - d3d->shader_compiler.compile(decl, args, name, *program); + d3d->shader_compiler.compile(decl, src, type, name, *program); } } // namespace diff --git a/src/renderer/pipeline.cpp b/src/renderer/pipeline.cpp index 39be80b054..a0309b6183 100644 --- a/src/renderer/pipeline.cpp +++ b/src/renderer/pipeline.cpp @@ -36,24 +36,20 @@ #include // TODO crashes: - // TODO crash - open texture array // TODO crash when context menu is outside of main window + // TODO env probe lighting is off // TODO nice to have: // TODO semaphore in job system to wake workers? // TODO 3d ui in scene view // TODO property groups in property grid - // TODO move shader cache in .lumix ? // TODO render graph // TODO vertex pulling // TODO 200 MB in memory profiler - // TODO rewrite shaders to compute // TODO temporal upsample // TODO temporal SSAO // TODO shader cleanup - // TODO get rid of #line - // TODO fuzzy search - // TODO remove lua from shaders, meta, ... + // TODO remove meta, ... // TODO static samplers // TODO icons over some debugs, e.g. TDAO // TODO switch plugins to use new genie stuff like plugin() function @@ -120,6 +116,7 @@ struct GlobalState { Vec4 to_prev_frame_camera_translation; Vec4 light_direction; Vec4 light_color; + IVec2 random_uint2; IVec2 framebuffer_size; Vec2 pixel_jitter; Vec2 prev_pixel_jitter; @@ -513,8 +510,8 @@ struct PipelineImpl final : Pipeline { m_viewport.w = m_viewport.h = 800; ResourceManagerHub& rm = renderer.getEngine().getResourceManager(); m_tonemap_shader = rm.load(Path("pipelines/tonemap.hlsl")); - m_textured_quad_shader = rm.load(Path("pipelines/textured_quad.hlsl")); - m_lighting_shader = rm.load(Path("pipelines/lighting.shd")); + m_blit_shader = rm.load(Path("pipelines/blit.hlsl")); + m_lighting_shader = rm.load(Path("pipelines/lighting.hlsl")); m_draw2d_shader = rm.load(Path("pipelines/draw2d.hlsl")); m_debug_shape_shader = rm.load(Path("pipelines/debug_shape.hlsl")); m_instancing_shader = rm.load(Path("pipelines/instancing.hlsl")); @@ -596,7 +593,7 @@ struct PipelineImpl final : Pipeline { for (gpu::BufferHandle b : m_buffers) stream.destroy(b); m_tonemap_shader->decRefCount(); - m_textured_quad_shader->decRefCount(); + m_blit_shader->decRefCount(); m_lighting_shader->decRefCount(); m_draw2d_shader->decRefCount(); m_debug_shape_shader->decRefCount(); @@ -1000,21 +997,30 @@ struct PipelineImpl final : Pipeline { stream.drawArrays(indices_offset, indices_count); } - void renderTexturedQuad(gpu::BindlessHandle texture_bindless, bool flip_x, bool flip_y) override { + void blit(gpu::BindlessHandle src, gpu::RWBindlessHandle dst, IVec2 size, bool flip_x, bool flip_y) override { struct { - Vec4 offset_scale = Vec4(0, 0, 1, 1); Vec4 r_mask = Vec4(1, 0, 0, 0); Vec4 g_mask = Vec4(0, 1, 0, 0); Vec4 b_mask = Vec4(0, 0, 1, 0); Vec4 a_mask = Vec4(0, 0, 0, 1);; Vec4 offsets = Vec4(0, 0, 0, 1); - gpu::BindlessHandle texture; + IVec2 position = IVec2(0, 0); + IVec2 scale = IVec2(1, 1); + gpu::BindlessHandle src; + gpu::RWBindlessHandle dst; } udata; - udata.texture = texture_bindless; - if (flip_x) udata.offset_scale.z = -1; - if (flip_y) udata.offset_scale.w = -1; + udata.src = src; + udata.dst= dst; + if (flip_x) { + udata.position.x = size.x - 1; + udata.scale.x = -1; + } + if (flip_y) { + udata.position.y = size.y - 1; + udata.scale.y = -1; + } setUniform(udata); - drawArray(0, 3, *m_textured_quad_shader, 0, gpu::StateFlags::NONE); + dispatch(*m_blit_shader, (size.x + 15) / 16, (size.y + 15) / 16, 1); } void setUniformRaw(Span mem, UniformBuffer::Enum bind_point = UniformBuffer::DRAWCALL) override { @@ -1234,13 +1240,14 @@ struct PipelineImpl final : Pipeline { .type = RenderbufferDesc::RELATIVE, .rel_size = {1, 1}, .format = gpu::TextureFormat::R11G11B10F, + .flags = gpu::TextureFlags::RENDER_TARGET | gpu::TextureFlags::NO_MIPS | gpu::TextureFlags::COMPUTE_WRITE, .debug_name = "hdr_copy" }); DrawStream& stream = m_renderer.getDrawStream(); pass(getMainCamera()); - setRenderTargets(Span(&color_copy, 1)); - renderTexturedQuad(toBindless(hdr_rb, stream), false, false); + const IVec2 size = {m_viewport.w, m_viewport.h}; + blit(toBindless(hdr_rb, stream), toRWBindless(color_copy, stream), size, false, false); setRenderTargets(Span(&hdr_rb, 1), gbuffer.DS, true); @@ -1302,55 +1309,58 @@ struct PipelineImpl final : Pipeline { }; setUniform(ubdata); gpu::StateFlags stencil_state = gpu::getStencilStateBits(0, gpu::StencilFuncs::NOT_EQUAL, 0, 0xff, gpu::StencilOps::KEEP, gpu::StencilOps::KEEP, gpu::StencilOps::REPLACE); - gpu::StateFlags blend_state = gpu::getBlendStateBits(gpu::BlendFactors::ONE, gpu::BlendFactors::ONE, gpu::BlendFactors::ONE, gpu::BlendFactors::ONE); drawArray(0, 3, *m_lighting_shader, 0, stencil_state); endBlock(); return hdr_rb; } - void copy(RenderBufferHandle dst, RenderBufferHandle src, Vec4 r = Vec4(1, 0, 0, 0), Vec4 g = Vec4(0, 1, 0, 0), Vec4 b = Vec4(0, 0, 1, 0)) override { - setRenderTargets(Span(&dst, 1), INVALID_RENDERBUFFER); + void copy(RenderBufferHandle dst, RenderBufferHandle src, IVec2 size, Vec4 r = Vec4(1, 0, 0, 0), Vec4 g = Vec4(0, 1, 0, 0), Vec4 b = Vec4(0, 0, 1, 0)) override { struct { - Vec4 offset_scale; Vec4 r_mask; Vec4 g_mask; Vec4 b_mask; Vec4 a_mask; Vec4 offsets; - gpu::BindlessHandle texture; + IVec2 position; + IVec2 scale; + gpu::BindlessHandle src; + gpu::RWBindlessHandle dst; } copy_ub = { - Vec4(0, 0, 1, 1), - r, - g, - b, - Vec4(0, 0, 0, 1), - Vec4(0, 0, 0, 1) + r, + g, + b, + Vec4(0, 0, 0, 1), + Vec4(0, 0, 0, 1), + IVec2(0, 0), + IVec2(1, 1) }; DrawStream& stream = m_renderer.getDrawStream(); - copy_ub.texture = toBindless(src, stream); + copy_ub.src = toBindless(src, stream); + copy_ub.dst = toRWBindless(dst, stream); setUniform(copy_ub); - drawArray(0, 3, *m_textured_quad_shader, 0, gpu::StateFlags::NONE); + dispatch(*m_blit_shader, (size.x + 15) / 16, (size.y + 15) / 16, 1); } bool debugOutput(GBuffer gbuffer, RenderBufferHandle result) { + const IVec2 size = {m_viewport.w, m_viewport.h}; if (m_debug_show == DebugShow::ALBEDO) { - copy(result, gbuffer.A); + copy(result, gbuffer.A, size); return true; } if (m_debug_show == DebugShow::NORMAL) { - copy(result, gbuffer.B, {1, 0, 0, 0}, {0, 1, 0, 0}, {0, 0, 0, 0}); + copy(result, gbuffer.B, size, {1, 0, 0, 0}, {0, 1, 0, 0}, {0, 0, 0, 0}); return true; } if (m_debug_show == DebugShow::ROUGHNESS) { - copy(result, gbuffer.A, { 0, 0, 0, 1 }, { 0, 0, 0, 1 }, { 0, 0, 0, 1 }); + copy(result, gbuffer.A, size, { 0, 0, 0, 1 }, { 0, 0, 0, 1 }, { 0, 0, 0, 1 }); return true; } if (m_debug_show == DebugShow::METALLIC) { - copy(result, gbuffer.C, { 0, 0, 1, 0 }, { 0, 0, 1, 0 }, { 0, 0, 1, 0 }); + copy(result, gbuffer.C, size, { 0, 0, 1, 0 }, { 0, 0, 1, 0 }, { 0, 0, 1, 0 }); return true; } if (m_debug_show == DebugShow::AO) { - copy(result, gbuffer.B, {0, 0, 0, 1}, {0, 0, 0, 1}, {0, 0, 0, 1}); + copy(result, gbuffer.B, size, {0, 0, 0, 1}, {0, 0, 0, 1}, {0, 0, 0, 1}); return true; } @@ -1488,6 +1498,7 @@ struct PipelineImpl final : Pipeline { global_state.reflection_probes_bindless = gpu::getBindlessHandle(m_module->getReflectionProbesTexture()); global_state.shadow_atlas_bindless = m_shadow_atlas.texture ? gpu::getBindlessHandle(m_shadow_atlas.texture) : gpu::INVALID_BINDLESS_HANDLE; global_state.frame_idx = m_renderer.frameNumber(); + global_state.random_uint2 = IVec2((i32)rand(), (i32)rand()); global_state.framebuffer_size.x = m_viewport.w; global_state.framebuffer_size.y = m_viewport.h; global_state.cam_world_pos = Vec4(Vec3(m_viewport.pos), 1); @@ -3635,7 +3646,7 @@ struct PipelineImpl final : Pipeline { RenderModule* m_module; Draw2D m_draw2d; Shader* m_tonemap_shader = nullptr; - Shader* m_textured_quad_shader = nullptr; + Shader* m_blit_shader = nullptr; Shader* m_lighting_shader = nullptr; Shader* m_draw2d_shader = nullptr; Array> m_views; diff --git a/src/renderer/pipeline.h b/src/renderer/pipeline.h index 066da42c15..f4b35c25fa 100644 --- a/src/renderer/pipeline.h +++ b/src/renderer/pipeline.h @@ -1,6 +1,5 @@ #pragma once - #include "core/delegate.h" #include "core/geometry.h" #include "core/hash.h" @@ -126,7 +125,7 @@ struct LUMIX_RENDERER_API Pipeline { virtual gpu::TextureHandle getOutput() = 0; virtual PipelineType getType() const = 0; - virtual void copy(RenderBufferHandle dst, RenderBufferHandle src, Vec4 r = Vec4(1, 0, 0, 0), Vec4 g = Vec4(0, 1, 0, 0), Vec4 b = Vec4(0, 0, 1, 0)) = 0; + virtual void copy(RenderBufferHandle dst, RenderBufferHandle src, IVec2 size, Vec4 r = Vec4(1, 0, 0, 0), Vec4 g = Vec4(0, 1, 0, 0), Vec4 b = Vec4(0, 0, 1, 0)) = 0; virtual void beginBlock(const char* name) = 0; virtual void endBlock() = 0; virtual void drawArray(u32 indices_offset, u32 indices_count, Shader& shader, u32 define_mask = 0, gpu::StateFlags state = gpu::StateFlags::DEPTH_WRITE | gpu::StateFlags::DEPTH_FN_GREATER) = 0; @@ -140,7 +139,7 @@ struct LUMIX_RENDERER_API Pipeline { virtual gpu::RWBindlessHandle toRWBindless(RenderBufferHandle rb_idx, DrawStream& stream) = 0; virtual void setUniformRaw(Span mem, UniformBuffer::Enum bind_point = UniformBuffer::DRAWCALL) = 0; - virtual void renderTexturedQuad(gpu::BindlessHandle texture, bool flip_x = false, bool flip_y = false) = 0; + virtual void blit(gpu::BindlessHandle src, gpu::RWBindlessHandle dst, IVec2 size, bool flip_x = false, bool flip_y = false) = 0; virtual void viewport(i32 x, i32 y, i32 w, i32 h) = 0; virtual void pass(const CameraParams& cp) const = 0; virtual u32 cull(const CameraParams& cp, Span buckets) = 0; diff --git a/src/renderer/postprocess.h b/src/renderer/postprocess.h index ca5a427a45..2c76a273ca 100644 --- a/src/renderer/postprocess.h +++ b/src/renderer/postprocess.h @@ -161,31 +161,24 @@ struct FilmGrain : public RenderPlugin { const RenderBufferHandle ldr_buffer = INVALID_RENDERBUFFER; pipeline.beginBlock("film_grain"); - const RenderBufferHandle res = pipeline.createRenderbuffer({ - .format = gpu::TextureFormat::RGBA8, - .flags = gpu::TextureFlags::COMPUTE_WRITE | gpu::TextureFlags::NO_MIPS | gpu::TextureFlags::RENDER_TARGET, - .debug_name = "film_grain" - }); DrawStream& stream = pipeline.getRenderer().getDrawStream(); struct { float intensity; float lumamount; - gpu::BindlessHandle source; + gpu::RWBindlessHandle source; gpu::BindlessHandle noise; - gpu::RWBindlessHandle output; } ubdata = { camera.film_grain_intensity, 0.1f, - pipeline.toBindless(input, stream), + pipeline.toRWBindless(input, stream), gpu::getBindlessHandle(m_noise->handle), - pipeline.toRWBindless(res, stream) }; const Viewport& vp = pipeline.getViewport(); pipeline.setUniform(ubdata); pipeline.dispatch(*m_shader, (vp.w + 15) / 16, (vp.h + 15) / 16, 1); pipeline.endBlock(); - return res; + return input; } }; @@ -245,8 +238,7 @@ struct DOF : public RenderPlugin { pipeline.setUniform(ub); pipeline.dispatch(*m_shader, (vp.w + 15) / 16, (vp.h + 15) / 16, 1); - pipeline.setRenderTargets(Span(&input, 1)); - pipeline.renderTexturedQuad(pipeline.toBindless(dof_rb, stream), false, false); + pipeline.blit(pipeline.toBindless(dof_rb, stream), pipeline.toRWBindless(input, stream), {(i32)vp.w, (i32)vp.h}); pipeline.endBlock(); return input; @@ -339,7 +331,8 @@ struct Bloom : public RenderPlugin { bool debugOutput(RenderBufferHandle input, Pipeline& pipeline) override { if (pipeline.m_debug_show_plugin != this) return false; - pipeline.copy(input, m_extracted_rt); + const Viewport& vp = pipeline.getViewport(); + pipeline.copy(input, m_extracted_rt, {(i32)vp.w, (i32)vp.h}); pipeline.keepRenderbufferAlive(m_extracted_rt); return true; } @@ -633,7 +626,8 @@ struct SSS : public RenderPlugin { bool debugOutput(RenderBufferHandle input, Pipeline& pipeline) override { if (pipeline.m_debug_show_plugin != this) return false; RenderBufferHandle rb = pipeline.getData()->history; - if (rb != INVALID_RENDERBUFFER) pipeline.copy(input, rb); + const Viewport& vp = pipeline.getViewport(); + if (rb != INVALID_RENDERBUFFER) pipeline.copy(input, rb, {(i32)vp.w, (i32)vp.h}); return true; } @@ -821,12 +815,12 @@ struct TDAO : public RenderPlugin { auto* data = pipeline.getData(); if (data->rb != INVALID_RENDERBUFFER) { - pipeline.copy(input, data->rb, {1, 0, 0, 0}, {1, 0, 0, 0}, {1, 0, 0, 0}); + const Viewport& vp = pipeline.getViewport(); + pipeline.copy(input, data->rb, {(i32)vp.w, (i32)vp.h}, {1, 0, 0, 0}, {1, 0, 0, 0}, {1, 0, 0, 0}); } return true; } - void renderBeforeLightPass(const GBuffer& gbuffer, Pipeline& pipeline) override { if (pipeline.getType() == PipelineType::PREVIEW) return; PROFILE_FUNCTION(); @@ -891,27 +885,22 @@ struct TDAO : public RenderPlugin { pipeline.renderBucket(view_id, 1); } + const Viewport& vp = pipeline.getViewport(); struct { - float u_intensity; - float u_width; - float u_height; - float u_offset0; - float u_offset1; - float u_offset2; - float u_range; - float u_half_depth_range; - float u_scale; - float u_depth_offset; + Vec4 offset; + Vec2 size; + float intensity; + float range; + float half_depth_range; + float scale; + float depth_offset; gpu::BindlessHandle u_depth_buffer; gpu::RWBindlessHandle u_gbufferB; gpu::BindlessHandle u_topdown_depthmap; } ubdata = { + Vec4(0, 0, 0, 0), + Vec2((float)vp.w, (float)vp.h), m_intensity, - (float)pipeline.getViewport().w, - (float)pipeline.getViewport().h, - 0, - 0, - 0, m_xz_range, m_y_range * 0.5f, 0.01f, @@ -922,7 +911,7 @@ struct TDAO : public RenderPlugin { }; pipeline.setUniform(ubdata); - pipeline.dispatch(*m_shader, (pipeline.getViewport().w + 15) / 16, (pipeline.getViewport().h + 15) / 16, 1); + pipeline.dispatch(*m_shader, (vp.w + 15) / 16, (vp.h + 15) / 16, 1); pipeline.endBlock(); } @@ -1007,10 +996,10 @@ struct TAA : public RenderPlugin { .debug_name = "taa_output" }); stream.memoryBarrier(pipeline.toTexture(taa_tmp)); - pipeline.setRenderTargets(Span(&taa_output, 1)); - gpu::BindlessHandle t = pipeline.toBindless(taa_tmp, stream); - // TODO textured_quad_shader does unnecessary computations - pipeline.renderTexturedQuad(t, false, false); + // TODO blit does unnecessary computations + gpu::BindlessHandle src = pipeline.toBindless(taa_tmp, stream); + gpu::RWBindlessHandle dst = pipeline.toRWBindless(taa_output, stream); + pipeline.blit(src, dst, display_size); data->history_rb = taa_tmp; pipeline.keepRenderbufferAlive(data->history_rb); diff --git a/src/renderer/shader.cpp b/src/renderer/shader.cpp index 17c1211ed3..5ce50ba221 100644 --- a/src/renderer/shader.cpp +++ b/src/renderer/shader.cpp @@ -12,14 +12,10 @@ #include "renderer/renderer.h" #include "renderer/texture.h" - -namespace Lumix -{ - +namespace Lumix { const ResourceType Shader::TYPE("shader"); - u32 Shader::Uniform::size() const { switch (type) { case INT: return 4; @@ -34,7 +30,6 @@ u32 Shader::Uniform::size() const { return 0; } - Shader::Shader(const Path& path, ResourceManager& resource_manager, Renderer& renderer, IAllocator& allocator) : Resource(path, resource_manager, allocator) , m_allocator(allocator, m_path.c_str()) @@ -44,11 +39,8 @@ Shader::Shader(const Path& path, ResourceManager& resource_manager, Renderer& re , m_all_defines_mask(0) , m_defines(m_allocator) , m_programs(m_allocator) - , m_sources(m_allocator) -{ - m_sources.path = path; -} - + , m_code(m_allocator) +{} bool Shader::hasDefine(u8 define) const { return m_defines.indexOf(define) >= 0; @@ -60,14 +52,6 @@ bool ShaderKey::operator==(const ShaderKey& rhs) const { void Shader::compile(gpu::ProgramHandle program, const ShaderKey& key, gpu::VertexDecl decl, DrawStream& stream) { PROFILE_BLOCK("compile_shader"); - - const char* codes[64]; - gpu::ShaderType types[64]; - ASSERT((int)lengthOf(types) >= m_sources.stages.size()); - for (int i = 0; i < m_sources.stages.size(); ++i) { - codes[i] = &m_sources.stages[i].code[0]; - types[i] = m_sources.stages[i].type; - } const char* prefixes[36]; StaticString<128> defines_code[32]; int defines_count = 0; @@ -80,9 +64,8 @@ void Shader::compile(gpu::ProgramHandle program, const ShaderKey& key, gpu::Vert ++defines_count; } } - prefixes[defines_count + 1] = m_sources.common.length() == 0 ? "" : m_sources.common.c_str(); - stream.createProgram(program, key.state, decl, codes, types, m_sources.stages.size(), prefixes, 2 + defines_count, m_sources.path.c_str()); + stream.createProgram(program, key.state, decl, m_code.c_str(), m_type, prefixes, 1 + defines_count, getPath().c_str()); } gpu::ProgramHandle Shader::getProgram(gpu::StateFlags state, const gpu::VertexDecl& decl, u32 defines, const char* semantic_defines) { @@ -124,7 +107,7 @@ gpu::ProgramHandle Shader::getProgram(gpu::StateFlags state, const gpu::VertexDe } gpu::ProgramHandle Shader::getProgram(u32 defines) { - ASSERT(m_sources.stages.empty() || m_sources.stages[0].type == gpu::ShaderType::COMPUTE); + ASSERT(m_type == gpu::ShaderType::COMPUTE); const gpu::VertexDecl dummy_decl(gpu::PrimitiveType::NONE); ShaderKey key; static const char* no_def = ""; @@ -147,229 +130,6 @@ static Shader* getShader(lua_State* L) return shader; } - -namespace LuaAPI -{ - -int uniform(lua_State* L) -{ - const char* name = LuaWrapper::checkArg(L, 1); - const char* type = LuaWrapper::checkArg(L, 2); - Shader* shader = getShader(L); - ASSERT(shader); - - Shader::Uniform& u = shader->m_uniforms.emplace(); - copyString(u.name, name); - u.name_hash = RuntimeHash(name); - memset(&u.default_value, 0, sizeof(u.default_value)); - const struct { - const char* str; - Shader::Uniform::Type type; - } types[] = { - { "normalized_float", Shader::Uniform::NORMALIZED_FLOAT }, - { "float", Shader::Uniform::FLOAT }, - { "color", Shader::Uniform::COLOR }, - { "int", Shader::Uniform::INT }, - { "vec2", Shader::Uniform::FLOAT2 }, - { "vec3", Shader::Uniform::FLOAT3 }, - { "vec4", Shader::Uniform::FLOAT4 }, - }; - - bool valid = false; - for (auto& t : types) { - if (equalStrings(type, t.str)) { - valid = true; - u.type = t.type; - break; - } - } - - if (!valid) { - logError("Unknown uniform type ", type, " in ", shader->getPath()); - shader->m_uniforms.pop(); - return 0; - } - - if (lua_gettop(L) > 2) { - switch (lua_type(L, 3)) { - case LUA_TNUMBER: u.default_value.float_value = LuaWrapper::toType(L, 3); break; - case LUA_TTABLE: { - const size_t len = lua_objlen(L, 3); - switch (len) { - case 2: *(Vec2*)u.default_value.vec2 = LuaWrapper::toType(L, 3); break; - case 3: *(Vec3*)u.default_value.vec3 = LuaWrapper::toType(L, 3); break; - case 4: *(Vec4*)u.default_value.vec4 = LuaWrapper::toType(L, 3); break; - case 16: *(Matrix*)u.default_value.vec4 = LuaWrapper::toType(L, 3); break; - default: luaL_error(L, "Uniform %s has unsupported type", name); break; - } - break; - } - default: luaL_error(L, "Uniform %s has unsupported type", name); break; - } - } - - if(shader->m_uniforms.size() == 1) { - u.offset = 0; - } - else { - const Shader::Uniform& prev = shader->m_uniforms[shader->m_uniforms.size() - 2]; - u.offset = prev.offset + prev.size(); - const u32 align = u.size(); - u.offset += (align - u.offset % align) % align; - } - return 0; -} - - -int define(lua_State* L) -{ - Shader* shader = getShader(L); - const char* def = LuaWrapper::checkArg(L, 1); - - const u8 def_idx = shader->m_renderer.getShaderDefineIdx(def); - shader->m_defines.push(def_idx); - - return 0; -} - - -int texture_slot(lua_State* L) -{ - LuaWrapper::checkTableArg(L, 1); - Shader* shader = getShader(L); - - if(shader->m_texture_slot_count >= lengthOf(shader->m_texture_slots)) { - logError("Too many texture slots in ", shader->getPath()); - return 0; - } - - Shader::TextureSlot& slot = shader->m_texture_slots[shader->m_texture_slot_count]; - LuaWrapper::getOptionalStringField(L, -1, "name", Span(slot.name)); - char define[64]; - if (LuaWrapper::getOptionalStringField(L, -1, "define", Span(define))) { - slot.define_idx = shader->m_renderer.getShaderDefineIdx(define); - } - - Path tmp; - if(LuaWrapper::getOptionalStringField(L, -1, "default_texture", Span(tmp.beginUpdate(), tmp.capacity()))) { - tmp.endUpdate(); - ResourceManagerHub& manager = shader->getResourceManager().getOwner(); - slot.default_texture = manager.load(tmp); - } - - ++shader->m_texture_slot_count; - - return 0; -} - - -static void source(lua_State* L, gpu::ShaderType shader_type) -{ - const char* src = LuaWrapper::checkArg(L, 1); - - Shader* shader = getShader(L); - Shader::Stage& stage = shader->m_sources.stages.emplace(shader->m_allocator); - stage.type = shader_type; - - lua_Debug ar; - lua_getinfo(L, 1, "nsl", &ar); - const int line = ar.currentline; - ASSERT(line >= 0); - - const StaticString<32 + MAX_PATH> line_str("#line ", line, "\"", shader->getPath(), "\"", "\n"); - const int line_str_len = stringLength(line_str); - const int src_len = stringLength(src); - - stage.code.resize(line_str_len + src_len + 1); - memcpy(&stage.code[0], line_str, line_str_len); - memcpy(&stage.code[line_str_len], src, src_len); - stage.code.back() = '\0'; -} - - -static int common(lua_State* L) -{ - const char* src = LuaWrapper::checkArg(L, 1); - - Shader* shader = getShader(L); - - lua_Debug ar; - lua_getinfo(L, 1, "nsl", &ar); - const int line = ar.currentline; - ASSERT(line >= 0); - - const StaticString<32> line_str("#line ", line, "\n"); - - shader->m_sources.common.append(line_str, src); - return 0; -} - - -int vertex_shader(lua_State* L) -{ - source(L, gpu::ShaderType::VERTEX); - return 0; -} - - -int fragment_shader(lua_State* L) -{ - source(L, gpu::ShaderType::FRAGMENT); - return 0; -} - - -int geometry_shader(lua_State* L) -{ - source(L, gpu::ShaderType::GEOMETRY); - return 0; -} - -int import(lua_State* L) -{ - const char* path = LuaWrapper::checkArg(L, 1); - Shader* shader = getShader(L); - - OutputMemoryStream content(shader->m_allocator); - ResourceManagerHub& rm = shader->getResourceManager().getOwner(); - - if (!rm.loadRaw(shader->getPath(), Path(path), content)) { - logError("Failed to open/read import ", path, " imported from ", shader->getPath()); - return 0; - } - - if (!content.empty()) { - LuaWrapper::execute(L, StringView((const char*)content.data(), (u32)content.size()), path, 0); - } - - return 0; -} - -int include(lua_State* L) -{ - const char* path = LuaWrapper::checkArg(L, 1); - - Shader* shader = getShader(L); - - ResourceManagerHub& rm = shader->getResourceManager().getOwner(); - - OutputMemoryStream content(shader->m_allocator); - if (!rm.loadRaw(shader->getPath(), Path(path), content)) { - logError("Failed to open/read include ", path, " included from ", shader->getPath()); - return 0; - } - - if (!content.empty()) { - content << "\n"; - shader->m_sources.common.append(StringView((const char*)content.data(), (u32)content.size())); - } - - return 0; -} - - -} // namespace LuaAPI - static StringView getLine(StringView& src) { const char* b = src.begin; while (b < src.end && *b != '\n') ++b; @@ -405,163 +165,124 @@ static bool assign(Shader::Uniform& u, Tokenizer::Variant v) { } bool Shader::load(Span mem) { - StringView content((const char*)mem.begin(), (u32)mem.length()); - if (Path::hasExtension(getPath(), "hlsl")) { - StringView preprocess = content; - bool is_surface = false; - for (;;) { - StringView line = getLine(preprocess); - if (line.begin == preprocess.end) break; - if (startsWith(line, "//@")) { - line.removePrefix(3); - if (startsWith(line, "surface")) { - is_surface = true; + StringView preprocess((const char*)mem.begin(), (u32)mem.length()); + bool is_surface = false; + // TODO move this to asset compiler + for (;;) { + StringView line = getLine(preprocess); + if (line.begin == preprocess.end) break; + if (startsWith(line, "//@")) { + line.removePrefix(3); + if (startsWith(line, "surface")) { + is_surface = true; + } + else if (startsWith(line, "define \"")) { + line.removePrefix(8); + line.end = line.begin + 1; + while (line.end < preprocess.end && *line.end != '"') ++line.end; + + char tmp[64]; + copyString(tmp, line); + const u8 def_idx = m_renderer.getShaderDefineIdx(tmp); + m_defines.push(def_idx); + } + else if (startsWith(line, "uniform")) { + line.removePrefix(7); + Tokenizer t(preprocess, getPath().c_str()); + t.cursor = line.begin; + StringView name; + StringView type; + if (!t.consume(name, ",", type, ",")) return false; + + Shader::Uniform& u = m_uniforms.emplace(); + copyString(Span(u.name), name); + u.name_hash = RuntimeHash(name.begin, name.size()); + + if (equalStrings(type, "normalized_float")) u.type = Shader::Uniform::FLOAT; + else if (equalStrings(type, "float")) u.type = Shader::Uniform::FLOAT; + else if (equalStrings(type, "int")) u.type = Shader::Uniform::INT; + else if (equalStrings(type, "color")) u.type = Shader::Uniform::COLOR; + else if (equalStrings(type, "float2")) u.type = Shader::Uniform::FLOAT2; + else if (equalStrings(type, "float3")) u.type = Shader::Uniform::FLOAT3; + else if (equalStrings(type, "float4")) u.type = Shader::Uniform::FLOAT4; + else { + logError(getPath(), "(", getLine(type), "): Unknown uniform type ", type, " in ", getPath()); + t.logErrorPosition(type.begin); + return false; + } + + Tokenizer::Variant v = t.consumeVariant(); + if (v.type == Tokenizer::Variant::NONE) return false; + if (!assign(u, v)) { + logError(getPath(), "(", getLine(type), "): Uniform ", name, " has incompatible type ", type); + t.logErrorPosition(type.begin); + return false; + } + + if (m_uniforms.size() == 1) { + u.offset = 0; } - else if (startsWith(line, "define \"")) { - line.removePrefix(8); - line.end = line.begin + 1; - while (line.end < preprocess.end && *line.end != '"') ++line.end; - - char tmp[64]; - copyString(tmp, line); - const u8 def_idx = m_renderer.getShaderDefineIdx(tmp); - m_defines.push(def_idx); + else { + const Shader::Uniform& prev = m_uniforms[m_uniforms.size() - 2]; + u.offset = prev.offset + prev.size(); + const u32 align = u.size(); + u.offset += (align - u.offset % align) % align; } - else if (startsWith(line, "uniform")) { - line.removePrefix(7); - Tokenizer t(preprocess, getPath().c_str()); - t.cursor = line.begin; - StringView name; - StringView type; - if (!t.consume(name, ",", type, ",")) return false; - - Shader::Uniform& u = m_uniforms.emplace(); - copyString(Span(u.name), name); - u.name_hash = RuntimeHash(name.begin, name.size()); - - if (equalStrings(type, "normalized_float")) u.type = Shader::Uniform::FLOAT; - else if (equalStrings(type, "float")) u.type = Shader::Uniform::FLOAT; - else if (equalStrings(type, "int")) u.type = Shader::Uniform::INT; - else if (equalStrings(type, "color")) u.type = Shader::Uniform::COLOR; - else if (equalStrings(type, "float2")) u.type = Shader::Uniform::FLOAT2; - else if (equalStrings(type, "float3")) u.type = Shader::Uniform::FLOAT3; - else if (equalStrings(type, "float4")) u.type = Shader::Uniform::FLOAT4; - else { - logError(getPath(), "(", getLine(type), "): Unknown uniform type ", type, " in ", getPath()); - t.logErrorPosition(type.begin); - return false; - } - - Tokenizer::Variant v = t.consumeVariant(); - if (v.type == Tokenizer::Variant::NONE) return false; - if (!assign(u, v)) { - logError(getPath(), "(", getLine(type), "): Uniform ", name, " has incompatible type ", type); - t.logErrorPosition(type.begin); - return false; - } - - if (m_uniforms.size() == 1) { - u.offset = 0; - } - else { - const Shader::Uniform& prev = m_uniforms[m_uniforms.size() - 2]; - u.offset = prev.offset + prev.size(); - const u32 align = u.size(); - u.offset += (align - u.offset % align) % align; - } + } + else if (startsWith(line, "texture_slot")) { + line.removePrefix(12); + Tokenizer t(preprocess, getPath().c_str()); + t.content.end = line.end; + t.cursor = line.begin; + StringView name; + StringView default_texture; + if (!t.consume(name, ",", default_texture)) return false; + + Shader::TextureSlot& slot = m_texture_slots[m_texture_slot_count]; + ++m_texture_slot_count; + copyString(slot.name, name); + + ResourceManagerHub& manager = getResourceManager().getOwner(); + slot.default_texture = default_texture.empty() ? nullptr : manager.load(Path(default_texture)); + + Tokenizer::Token n = t.tryNextToken(); + if (n && n.value[0] == ',') { + StringView def; + if (!t.consume(def)) return false; + StaticString<64> tmp(def); + slot.define_idx = m_renderer.getShaderDefineIdx(tmp); } - else if (startsWith(line, "texture_slot")) { - line.removePrefix(12); - Tokenizer t(preprocess, getPath().c_str()); - t.content.end = line.end; - t.cursor = line.begin; - StringView name; - StringView default_texture; - if (!t.consume(name, ",", default_texture)) return false; - - Shader::TextureSlot& slot = m_texture_slots[m_texture_slot_count]; - ++m_texture_slot_count; - copyString(slot.name, name); - - ResourceManagerHub& manager = getResourceManager().getOwner(); - slot.default_texture = default_texture.empty() ? nullptr : manager.load(Path(default_texture)); - - Tokenizer::Token n = t.tryNextToken(); - if (n && n.value[0] == ',') { - StringView def; - if (!t.consume(def)) return false; - StaticString<64> tmp(def); - slot.define_idx = m_renderer.getShaderDefineIdx(tmp); - } + } + else if (startsWith(line, "include \"")) { + StringView path; + path.begin = line.begin + 9; + path.end = path.begin + 1; + while (path.end < preprocess.end && *path.end != '"') ++path.end; + + ResourceManagerHub& rm = getResourceManager().getOwner(); + OutputMemoryStream include_content(m_allocator); + if (!rm.loadRaw(getPath(), Path(path), include_content)) { + logError("Failed to open/read include ", path, " included from ", getPath()); + return false; } - else if (startsWith(line, "include \"")) { - StringView path; - path.begin = line.begin + 9; - path.end = path.begin + 1; - while (path.end < preprocess.end && *path.end != '"') ++path.end; - - ResourceManagerHub& rm = getResourceManager().getOwner(); - OutputMemoryStream include_content(m_allocator); - if (!rm.loadRaw(getPath(), Path(path), include_content)) { - logError("Failed to open/read include ", path, " included from ", getPath()); - return false; - } - - if (!include_content.empty()) { - include_content << "\n"; - m_sources.common.append(StringView((const char*)include_content.data(), (u32)include_content.size())); - } + + if (!include_content.empty()) { + include_content << "\n"; + m_code.append("#line 1 \"", path, "\"\n"); + m_code.append(StringView((const char*)include_content.data(), (u32)include_content.size())); } } } - - Shader::Stage& stage = m_sources.stages.emplace(m_allocator); - stage.type = is_surface ? gpu::ShaderType::SURFACE : gpu::ShaderType::COMPUTE; - stage.code.resize(mem.length() + 1); - memcpy(&stage.code[0], mem.begin(), mem.length()); - stage.code.back() = '\0'; - } - else { - lua_State* root_state = m_renderer.getEngine().getState(); - lua_State* L = lua_newthread(root_state); - const int state_ref = LuaWrapper::createRef(root_state); - lua_pop(root_state, 1); - - lua_pushlightuserdata(L, this); - lua_setfield(L, LUA_GLOBALSINDEX, "this"); - lua_pushcfunction(L, LuaAPI::common, "common"); - lua_setfield(L, LUA_GLOBALSINDEX, "common"); - lua_pushcfunction(L, LuaAPI::vertex_shader, "vertex_shader"); - lua_setfield(L, LUA_GLOBALSINDEX, "vertex_shader"); - lua_pushcfunction(L, LuaAPI::fragment_shader, "fragment_shader"); - lua_setfield(L, LUA_GLOBALSINDEX, "fragment_shader"); - lua_pushcfunction(L, LuaAPI::geometry_shader, "geometry_shader"); - lua_setfield(L, LUA_GLOBALSINDEX, "geometry_shader"); - lua_pushcfunction(L, LuaAPI::include, "include"); - lua_setfield(L, LUA_GLOBALSINDEX, "include"); - lua_pushcfunction(L, LuaAPI::import, "import"); - lua_setfield(L, LUA_GLOBALSINDEX, "import"); - lua_pushcfunction(L, LuaAPI::texture_slot, "texture_slot"); - lua_setfield(L, LUA_GLOBALSINDEX, "texture_slot"); - lua_pushcfunction(L, LuaAPI::define, "define"); - lua_setfield(L, LUA_GLOBALSINDEX, "define"); - lua_pushcfunction(L, LuaAPI::uniform, "uniform"); - lua_setfield(L, LUA_GLOBALSINDEX, "uniform"); - - if (!LuaWrapper::execute(L, content, getPath().c_str(), 0)) { - LuaWrapper::releaseRef(root_state, state_ref); - return false; - } - LuaWrapper::releaseRef(root_state, state_ref); } + + m_type = is_surface ? gpu::ShaderType::SURFACE : gpu::ShaderType::COMPUTE; + m_code.append("#line 1 \"", getPath(), "\"\n"); + m_code.append(StringView((const char*)mem.begin(), (u32)mem.length())); RollingHasher hasher; hasher.begin(); - for (auto& stage : m_sources.stages) { - hasher.update(stage.code.data(), stage.code.size()); - } - hasher.update(m_sources.common.c_str(), m_sources.common.length()); + hasher.update(m_code.c_str(), m_code.length()); m_content_hash = hasher.end(); return true; @@ -573,8 +294,7 @@ void Shader::unload() for (const ProgramPair& p : m_programs) { m_renderer.getEndFrameDrawStream().destroy(p.program); } - m_sources.common = ""; - m_sources.stages.clear(); + m_code = ""; m_programs.clear(); m_uniforms.clear(); for (u32 i = 0; i < m_texture_slot_count; ++i) { @@ -637,21 +357,23 @@ void Shader::toTextureVarName(Span out, const char* in) { void Shader::onBeforeReady() { if (m_uniforms.empty() && m_texture_slot_count == 0) return; - m_sources.common.append("cbuffer MaterialState : register(b2) {"); + String tmp(m_allocator); + tmp.append("cbuffer MaterialState : register(b2) {"); for (const Uniform& u : m_uniforms) { char var_name[64]; toUniformVarName(Span(var_name), u.name); - m_sources.common.append(toString(u.type), " ", var_name, ";\n"); + tmp.append(toString(u.type), " ", var_name, ";\n"); } for (u32 i = 0; i < m_texture_slot_count; ++i) { char var_name[64]; toTextureVarName(Span(var_name), m_texture_slots[i].name); - m_sources.common.append("uint ", var_name, ";\n"); + tmp.append("uint ", var_name, ";\n"); } - m_sources.common.append("};\n"); + tmp.append("};\n"); + m_code.insert(0, tmp); } diff --git a/src/renderer/shader.h b/src/renderer/shader.h index 76b204c8ee..38503deebd 100644 --- a/src/renderer/shader.h +++ b/src/renderer/shader.h @@ -67,37 +67,6 @@ struct LUMIX_RENDERER_API Shader final : Resource { u32 size() const; }; - struct Stage { - Stage(IAllocator& allocator) : code(allocator) {} - Stage(const Stage& rhs) - : type(rhs.type) - , code(rhs.code.getAllocator()) - { - rhs.code.copyTo(code); - } - - gpu::ShaderType type; - Array code; - }; - - struct Sources { - Sources(IAllocator& allocator) - : stages(allocator) - , common(allocator) - {} - Sources(const Sources& rhs) - : stages(rhs.stages.getAllocator()) - , common(rhs.common) - , path(rhs.path) - { - rhs.stages.copyTo(stages); - } - - Path path; - Array stages; - String common; - }; - Shader(const Path& path, ResourceManager& resource_manager, Renderer& renderer, IAllocator& allocator); ResourceType getType() const override { return TYPE; } @@ -122,7 +91,8 @@ struct LUMIX_RENDERER_API Shader final : Resource { gpu::ProgramHandle program; }; Array m_programs; - Sources m_sources; + gpu::ShaderType m_type; + String m_code; static const ResourceType TYPE; RuntimeHash32 m_content_hash;