From 9dec7ce6703e5acf26713d3db2e2d97f012697de Mon Sep 17 00:00:00 2001 From: Ali Chraghi Date: Thu, 1 Feb 2024 15:48:51 +0330 Subject: [PATCH 1/4] spirv: basic shader support --- lib/std/Target.zig | 4 +- lib/std/builtin.zig | 6 + src/Sema.zig | 5 + src/codegen/llvm.zig | 1 + src/codegen/spirv.zig | 215 ++++++++++++++++++++--------------- src/codegen/spirv/Module.zig | 53 ++++++--- src/codegen/spirv/spec.zig | 11 +- src/link/SpirV.zig | 29 +++-- tools/gen_spirv_spec.zig | 11 +- 9 files changed, 220 insertions(+), 115 deletions(-) diff --git a/lib/std/Target.zig b/lib/std/Target.zig index a3d165b568f8..4e150f796202 100644 --- a/lib/std/Target.zig +++ b/lib/std/Target.zig @@ -1221,6 +1221,7 @@ pub const Cpu = struct { .fs, .gs, .ss => arch == .x86_64 or arch == .x86, .global, .constant, .local, .shared => is_gpu, .param => is_nvptx, + .input, .output, .uniform => is_spirv, // TODO this should also check how many flash banks the cpu has .flash, .flash1, .flash2, .flash3, .flash4, .flash5 => arch == .avr, }; @@ -2353,7 +2354,7 @@ pub fn c_type_bit_size(target: Target, c_type: CType) u16 { .longdouble => return 128, }, - .opencl => switch (c_type) { + .opencl, .vulkan => switch (c_type) { .char => return 8, .short, .ushort => return 16, .int, .uint, .float => return 32, @@ -2386,7 +2387,6 @@ pub fn c_type_bit_size(target: Target, c_type: CType) u16 { .hermit, .hurd, .glsl450, - .vulkan, .driverkit, .shadermodel, .liteos, diff --git a/lib/std/builtin.zig b/lib/std/builtin.zig index 56ee990c5fff..a0fbaea7de0c 100644 --- a/lib/std/builtin.zig +++ b/lib/std/builtin.zig @@ -205,6 +205,9 @@ pub const CallingConvention = enum(u8) { Win64, /// AMD GPU, NVPTX, or SPIR-V kernel Kernel, + // Vulkan-only + Fragment, + Vertex, }; /// This data structure is used by the Zig language code generation and @@ -222,6 +225,9 @@ pub const AddressSpace = enum(u5) { param, shared, local, + input, + output, + uniform, // AVR address spaces. flash, diff --git a/src/Sema.zig b/src/Sema.zig index e7f2677c8eec..9d651446d624 100644 --- a/src/Sema.zig +++ b/src/Sema.zig @@ -9741,6 +9741,10 @@ fn finishFunc( .nvptx, .nvptx64, .amdgcn, .spirv32, .spirv64 => null, else => "nvptx, amdgcn and SPIR-V", }, + .Fragment, .Vertex => switch (arch) { + .spirv32, .spirv64 => null, + else => "SPIR-V", + }, })) |allowed_platform| { return sema.fail(block, cc_src, "callconv '{s}' is only available on {s}, not {s}", .{ @tagName(cc_resolved), @@ -37916,6 +37920,7 @@ pub fn analyzeAddressSpace( .gs, .fs, .ss => (arch == .x86 or arch == .x86_64) and ctx == .pointer, // TODO: check that .shared and .local are left uninitialized .param => is_nv, + .input, .output, .uniform => is_spirv, .global, .shared, .local => is_gpu, .constant => is_gpu and (ctx == .constant), // TODO this should also check how many flash banks the cpu has diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig index 1e63361048ff..a703b900131c 100644 --- a/src/codegen/llvm.zig +++ b/src/codegen/llvm.zig @@ -10848,6 +10848,7 @@ fn toLlvmCallConv(cc: std.builtin.CallingConvention, target: std.Target) Builder .amdgcn => .amdgpu_kernel, else => unreachable, }, + .Vertex, .Fragment => unreachable, }; } diff --git a/src/codegen/spirv.zig b/src/codegen/spirv.zig index a499f3d8ed28..e8446622f7cc 100644 --- a/src/codegen/spirv.zig +++ b/src/codegen/spirv.zig @@ -451,12 +451,12 @@ const DeclGen = struct { const spv_decl_index = blk: { const entry = try self.object.anon_decl_link.getOrPut(self.object.gpa, .{ val, storage_class }); if (entry.found_existing) { - try self.func.decl_deps.put(self.spv.gpa, entry.value_ptr.*, {}); + try self.addFunctionDep(entry.value_ptr.*, storage_class); return self.spv.declPtr(entry.value_ptr.*).result_id; } const spv_decl_index = try self.spv.allocDecl(.global); - try self.func.decl_deps.put(self.spv.gpa, spv_decl_index, {}); + try self.addFunctionDep(spv_decl_index, storage_class); entry.value_ptr.* = spv_decl_index; break :blk spv_decl_index; }; @@ -529,6 +529,37 @@ const DeclGen = struct { return var_id; } + fn addFunctionDep(self: *DeclGen, decl_index: SpvModule.Decl.Index, storage_class: StorageClass) !void { + const target = self.getTarget(); + if (target.os.tag == .vulkan) { + // Shader entry point dependencies must be variables with Input or Output storage class + switch (storage_class) { + .Input, .Output => { + try self.func.decl_deps.put(self.spv.gpa, decl_index, {}); + }, + else => {}, + } + } else { + try self.func.decl_deps.put(self.spv.gpa, decl_index, {}); + } + } + + fn castToGeneric(self: *DeclGen, type_id: IdRef, ptr_id: IdRef) !IdRef { + const target = self.getTarget(); + + if (target.os.tag == .vulkan) { + return ptr_id; + } else { + const result_id = self.spv.allocId(); + try self.func.body.emit(self.spv.gpa, .OpPtrCastToGeneric, .{ + .id_result_type = type_id, + .id_result = result_id, + .pointer = ptr_id, + }); + return result_id; + } + } + /// Start a new SPIR-V block, Emits the label of the new block, and stores which /// block we are currently generating. /// Note that there is no such thing as nested blocks like in ZIR or AIR, so we don't need to @@ -1019,7 +1050,7 @@ const DeclGen = struct { // TODO: Can we consolidate this in ptrElemPtr? const elem_ty = parent_ptr_ty.elemType2(mod); // use elemType() so that we get T for *[N]T. - const elem_ptr_ty_ref = try self.ptrType(elem_ty, spvStorageClass(parent_ptr_ty.ptrAddressSpace(mod))); + const elem_ptr_ty_ref = try self.ptrType(elem_ty, self.spvStorageClass(parent_ptr_ty.ptrAddressSpace(mod))); if (elem_ptr_ty_ref == result_ty_ref) { return elem_ptr_id; @@ -1074,7 +1105,7 @@ const DeclGen = struct { unreachable; // TODO } - const final_storage_class = spvStorageClass(ty.ptrAddressSpace(mod)); + const final_storage_class = self.spvStorageClass(ty.ptrAddressSpace(mod)); const actual_storage_class = switch (final_storage_class) { .Generic => .CrossWorkgroup, else => |other| other, @@ -1084,15 +1115,7 @@ const DeclGen = struct { const decl_ptr_ty_ref = try self.ptrType(decl_ty, final_storage_class); const ptr_id = switch (final_storage_class) { - .Generic => blk: { - const result_id = self.spv.allocId(); - try self.func.body.emit(self.spv.gpa, .OpPtrCastToGeneric, .{ - .id_result_type = self.typeId(decl_ptr_ty_ref), - .id_result = result_id, - .pointer = decl_id, - }); - break :blk result_id; - }, + .Generic => try self.castToGeneric(self.typeId(decl_ptr_ty_ref), decl_id), else => decl_id, }; @@ -1115,6 +1138,7 @@ const DeclGen = struct { const ty_ref = try self.resolveType(ty, .direct); const ty_id = self.typeId(ty_ref); const decl = mod.declPtr(decl_index); + switch (mod.intern_pool.indexToKey(decl.val.ip_index)) { .func => { // TODO: Properly lower function pointers. For now we are going to hack around it and @@ -1133,23 +1157,13 @@ const DeclGen = struct { const spv_decl_index = try self.object.resolveDecl(mod, decl_index); const decl_id = self.spv.declPtr(spv_decl_index).result_id; - try self.func.decl_deps.put(self.spv.gpa, spv_decl_index, {}); - - const final_storage_class = spvStorageClass(decl.@"addrspace"); + const final_storage_class = self.spvStorageClass(decl.@"addrspace"); + try self.addFunctionDep(spv_decl_index, final_storage_class); const decl_ptr_ty_ref = try self.ptrType(decl.ty, final_storage_class); const ptr_id = switch (final_storage_class) { - .Generic => blk: { - // Pointer should be Generic, but is actually placed in CrossWorkgroup. - const result_id = self.spv.allocId(); - try self.func.body.emit(self.spv.gpa, .OpPtrCastToGeneric, .{ - .id_result_type = self.typeId(decl_ptr_ty_ref), - .id_result = result_id, - .pointer = decl_id, - }); - break :blk result_id; - }, + .Generic => try self.castToGeneric(self.typeId(decl_ptr_ty_ref), decl_id), else => decl_id, }; @@ -1195,8 +1209,12 @@ const DeclGen = struct { // An array of largestSupportedIntBits. return self.todo("Implement {s} composite int type of {} bits", .{ @tagName(signedness), bits }); }; + // Kernel only supports unsigned ints. - // TODO: Only do this with Kernels + if (self.getTarget().os.tag == .vulkan) { + return self.spv.intType(signedness, backing_bits); + } + return self.spv.intType(.unsigned, backing_bits); } @@ -1453,7 +1471,7 @@ const DeclGen = struct { // Note: Don't cache this pointer type, it would mess up the recursive pointer functionality // in ptrType()! - const storage_class = spvStorageClass(ptr_info.flags.address_space); + const storage_class = self.spvStorageClass(ptr_info.flags.address_space); const ptr_ty_ref = try self.ptrType(Type.fromInterned(ptr_info.child), storage_class); if (ptr_info.flags.size != .Slice) { @@ -1634,13 +1652,20 @@ const DeclGen = struct { } } - fn spvStorageClass(as: std.builtin.AddressSpace) StorageClass { + fn spvStorageClass(self: *DeclGen, as: std.builtin.AddressSpace) StorageClass { + const target = self.getTarget(); return switch (as) { - .generic => .Generic, + .generic => switch (target.os.tag) { + .vulkan => .Private, + else => .Generic, + }, .shared => .Workgroup, .local => .Private, .global => .CrossWorkgroup, .constant => .UniformConstant, + .input => .Input, + .output => .Output, + .uniform => .Uniform, .gs, .fs, .ss, @@ -1920,7 +1945,7 @@ const DeclGen = struct { // point name is the same as a different OpName. const test_name = try std.fmt.allocPrint(self.gpa, "test {s}", .{name}); defer self.gpa.free(test_name); - try self.spv.declareEntryPoint(spv_decl_index, test_name); + try self.spv.declareEntryPoint(spv_decl_index, test_name, .Kernel); } fn genDecl(self: *DeclGen) !void { @@ -1928,6 +1953,7 @@ const DeclGen = struct { const ip = &mod.intern_pool; const decl = mod.declPtr(self.decl_index); const spv_decl_index = try self.object.resolveDecl(mod, self.decl_index); + const target = self.getTarget(); const decl_id = self.spv.declPtr(spv_decl_index).result_id; @@ -1994,30 +2020,24 @@ const DeclGen = struct { try self.generateTestEntryPoint(fqn, spv_decl_index); } } else { - const init_val = if (decl.val.getVariable(mod)) |payload| - Value.fromInterned(payload.init) - else - decl.val; - - if (init_val.ip_index == .unreachable_value) { - return self.todo("importing extern variables", .{}); - } - - // Currently, initializers for CrossWorkgroup variables is not implemented - // in Mesa. Therefore we generate an initialization kernel instead. - - const void_ty_ref = try self.resolveType(Type.void, .direct); - - const initializer_proto_ty_ref = try self.spv.resolve(.{ .function_type = .{ - .return_type = void_ty_ref, - .parameters = &.{}, - } }); + const opt_init_val: ?Value = blk: { + if (decl.val.getVariable(mod)) |payload| { + if (payload.is_extern) break :blk null; + break :blk Value.fromInterned(payload.init); + } + break :blk decl.val; + }; // Generate the actual variable for the global... - const final_storage_class = spvStorageClass(decl.@"addrspace"); - const actual_storage_class = switch (final_storage_class) { - .Generic => .CrossWorkgroup, - else => final_storage_class, + const final_storage_class = self.spvStorageClass(decl.@"addrspace"); + const actual_storage_class = blk: { + if (target.os.tag != .vulkan) { + break :blk switch (final_storage_class) { + .Generic => .CrossWorkgroup, + else => final_storage_class, + }; + } + break :blk final_storage_class; }; const ptr_ty_ref = try self.ptrType(decl.ty, actual_storage_class); @@ -2028,37 +2048,51 @@ const DeclGen = struct { .id_result = decl_id, .storage_class = actual_storage_class, }); + const fqn = ip.stringToSlice(try decl.getFullyQualifiedName(self.module)); + try self.spv.debugName(decl_id, fqn); - // Now emit the instructions that initialize the variable. - const initializer_id = self.spv.allocId(); - try self.func.prologue.emit(self.spv.gpa, .OpFunction, .{ - .id_result_type = self.typeId(void_ty_ref), - .id_result = initializer_id, - .function_control = .{}, - .function_type = self.typeId(initializer_proto_ty_ref), - }); - const root_block_id = self.spv.allocId(); - try self.func.prologue.emit(self.spv.gpa, .OpLabel, .{ - .id_result = root_block_id, - }); - self.current_block_label = root_block_id; + if (opt_init_val) |init_val| { + // Currently, initializers for CrossWorkgroup variables is not implemented + // in Mesa. Therefore we generate an initialization kernel instead. + const void_ty_ref = try self.resolveType(Type.void, .direct); - const val_id = try self.constant(decl.ty, init_val, .indirect); - try self.func.body.emit(self.spv.gpa, .OpStore, .{ - .pointer = decl_id, - .object = val_id, - }); + const initializer_proto_ty_ref = try self.spv.resolve(.{ .function_type = .{ + .return_type = void_ty_ref, + .parameters = &.{}, + } }); - // TODO: We should be able to get rid of this by now... - self.spv.endGlobal(spv_decl_index, begin, decl_id, initializer_id); + // Now emit the instructions that initialize the variable. + const initializer_id = self.spv.allocId(); + try self.func.prologue.emit(self.spv.gpa, .OpFunction, .{ + .id_result_type = self.typeId(void_ty_ref), + .id_result = initializer_id, + .function_control = .{}, + .function_type = self.typeId(initializer_proto_ty_ref), + }); + const root_block_id = self.spv.allocId(); + try self.func.prologue.emit(self.spv.gpa, .OpLabel, .{ + .id_result = root_block_id, + }); + self.current_block_label = root_block_id; - try self.func.body.emit(self.spv.gpa, .OpReturn, {}); - try self.func.body.emit(self.spv.gpa, .OpFunctionEnd, {}); - try self.spv.addFunction(spv_decl_index, self.func); + const val_id = try self.constant(decl.ty, init_val, .indirect); + try self.func.body.emit(self.spv.gpa, .OpStore, .{ + .pointer = decl_id, + .object = val_id, + }); - const fqn = ip.stringToSlice(try decl.getFullyQualifiedName(self.module)); - try self.spv.debugName(decl_id, fqn); - try self.spv.debugNameFmt(initializer_id, "initializer of {s}", .{fqn}); + // TODO: We should be able to get rid of this by now... + self.spv.endGlobal(spv_decl_index, begin, decl_id, initializer_id); + + try self.func.body.emit(self.spv.gpa, .OpReturn, {}); + try self.func.body.emit(self.spv.gpa, .OpFunctionEnd, {}); + try self.spv.addFunction(spv_decl_index, self.func); + + try self.spv.debugNameFmt(initializer_id, "initializer of {s}", .{fqn}); + } else { + self.spv.endGlobal(spv_decl_index, begin, decl_id, null); + try self.spv.declareDeclDeps(spv_decl_index, &.{}); + } } } @@ -3761,7 +3795,7 @@ const DeclGen = struct { const mod = self.module; // Construct new pointer type for the resulting pointer const elem_ty = ptr_ty.elemType2(mod); // use elemType() so that we get T for *[N]T. - const elem_ptr_ty_ref = try self.ptrType(elem_ty, spvStorageClass(ptr_ty.ptrAddressSpace(mod))); + const elem_ptr_ty_ref = try self.ptrType(elem_ty, self.spvStorageClass(ptr_ty.ptrAddressSpace(mod))); if (ptr_ty.isSinglePointer(mod)) { // Pointer-to-array. In this case, the resulting pointer is not of the same type // as the ptr_ty (we want a *T, not a *[N]T), and hence we need to use accessChain. @@ -3835,7 +3869,7 @@ const DeclGen = struct { const vector_ty = vector_ptr_ty.childType(mod); const scalar_ty = vector_ty.scalarType(mod); - const storage_class = spvStorageClass(vector_ptr_ty.ptrAddressSpace(mod)); + const storage_class = self.spvStorageClass(vector_ptr_ty.ptrAddressSpace(mod)); const scalar_ptr_ty_ref = try self.ptrType(scalar_ty, storage_class); const vector_ptr = try self.resolve(data.vector_ptr); @@ -3858,7 +3892,7 @@ const DeclGen = struct { if (layout.tag_size == 0) return; const tag_ty = un_ty.unionTagTypeSafety(mod).?; - const tag_ptr_ty_ref = try self.ptrType(tag_ty, spvStorageClass(un_ptr_ty.ptrAddressSpace(mod))); + const tag_ptr_ty_ref = try self.ptrType(tag_ty, self.spvStorageClass(un_ptr_ty.ptrAddressSpace(mod))); const union_ptr_id = try self.resolve(bin_op.lhs); const new_tag_id = try self.resolve(bin_op.rhs); @@ -4079,7 +4113,7 @@ const DeclGen = struct { return try self.spv.constUndef(result_ty_ref); } - const storage_class = spvStorageClass(object_ptr_ty.ptrAddressSpace(mod)); + const storage_class = self.spvStorageClass(object_ptr_ty.ptrAddressSpace(mod)); const pl_ptr_ty_ref = try self.ptrType(layout.payload_ty, storage_class); const pl_ptr_id = try self.accessChain(pl_ptr_ty_ref, object_ptr, &.{layout.payload_index}); @@ -4134,17 +4168,16 @@ const DeclGen = struct { .initializer = options.initializer, }); + const target = self.getTarget(); + if (target.os.tag == .vulkan) { + return var_id; + } + switch (options.storage_class) { .Generic => { const ptr_gn_ty_ref = try self.ptrType(ty, .Generic); // Convert to a generic pointer - const result_id = self.spv.allocId(); - try self.func.body.emit(self.spv.gpa, .OpPtrCastToGeneric, .{ - .id_result_type = self.typeId(ptr_gn_ty_ref), - .id_result = result_id, - .pointer = var_id, - }); - return result_id; + return self.castToGeneric(self.typeId(ptr_gn_ty_ref), var_id); }, .Function => return var_id, else => unreachable, @@ -4880,7 +4913,7 @@ const DeclGen = struct { const is_non_null_id = blk: { if (is_pointer) { if (payload_ty.hasRuntimeBitsIgnoreComptime(mod)) { - const storage_class = spvStorageClass(operand_ty.ptrAddressSpace(mod)); + const storage_class = self.spvStorageClass(operand_ty.ptrAddressSpace(mod)); const bool_ptr_ty = try self.ptrType(Type.bool, storage_class); const tag_ptr_id = try self.accessChain(bool_ptr_ty, operand_id, &.{1}); break :blk try self.load(Type.bool, tag_ptr_id, .{}); diff --git a/src/codegen/spirv/Module.zig b/src/codegen/spirv/Module.zig index 056792ab9ad0..98a7c67beeab 100644 --- a/src/codegen/spirv/Module.zig +++ b/src/codegen/spirv/Module.zig @@ -92,7 +92,7 @@ pub const Global = struct { /// The past-end offset into `self.flobals.section`. end_inst: u32, /// The result-id of the function that initializes this value. - initializer_id: IdRef, + initializer_id: ?IdRef, }; /// This models a kernel entry point. @@ -101,6 +101,8 @@ pub const EntryPoint = struct { decl_index: Decl.Index, /// The name of the kernel to be exported. name: CacheString, + /// Calling Convention + execution_model: spec.ExecutionModel, }; /// A general-purpose allocator which may be used to allocate resources for this module @@ -313,7 +315,7 @@ fn entryPoints(self: *Module) !Section { const entry_point_id = self.declPtr(entry_point.decl_index).result_id; try entry_points.emit(self.gpa, .OpEntryPoint, .{ - .execution_model = .Kernel, + .execution_model = entry_point.execution_model, .entry_point = entry_point_id, .name = self.cache.getString(entry_point.name).?, .interface = interface.items, @@ -362,11 +364,13 @@ fn initializer(self: *Module, entry_points: *Section) !Section { for (self.globals.globals.keys(), self.globals.globals.values()) |decl_index, global| { try self.addEntryPointDeps(decl_index, &seen, &interface); - try section.emit(self.gpa, .OpFunctionCall, .{ - .id_result_type = void_ty_id, - .id_result = self.allocId(), - .function = global.initializer_id, - }); + if (global.initializer_id) |initializer_id| { + try section.emit(self.gpa, .OpFunctionCall, .{ + .id_result_type = void_ty_id, + .id_result = self.allocId(), + .function = initializer_id, + }); + } } try section.emit(self.gpa, .OpReturn, {}); @@ -390,7 +394,7 @@ fn initializer(self: *Module, entry_points: *Section) !Section { } /// Emit this module as a spir-v binary. -pub fn flush(self: *Module, file: std.fs.File) !void { +pub fn flush(self: *Module, file: std.fs.File, target: std.Target) !void { // See SPIR-V Spec section 2.3, "Physical Layout of a SPIR-V Module and Instruction" // TODO: Perform topological sort on the globals. @@ -403,14 +407,25 @@ pub fn flush(self: *Module, file: std.fs.File) !void { var types_constants = try self.cache.materialize(self); defer types_constants.deinit(self.gpa); - var init_func = try self.initializer(&entry_points); + // TODO: Vulkan doesn't support initializer kernel + var init_func = if (target.os.tag != .vulkan) + try self.initializer(&entry_points) + else + Section{}; defer init_func.deinit(self.gpa); const header = [_]Word{ spec.magic_number, // TODO: From cpu features - // Emit SPIR-V 1.4 for now. This is the highest version that Intel's CPU OpenCL supports. - (1 << 16) | (4 << 8), + spec.Version.toWord(.{ + .major = 1, + .minor = switch (target.os.tag) { + // Emit SPIR-V 1.3 for now. This is the highest version that Vulkan 1.1 supports. + .vulkan => 3, + // Emit SPIR-V 1.4 for now. This is the highest version that Intel's CPU OpenCL supports. + else => 4, + }, + }), 0, // TODO: Register Zig compiler magic number. self.idBound(), 0, // Schema (currently reserved for future use) @@ -617,7 +632,13 @@ pub fn beginGlobal(self: *Module) u32 { return @as(u32, @intCast(self.globals.section.instructions.items.len)); } -pub fn endGlobal(self: *Module, global_index: Decl.Index, begin_inst: u32, result_id: IdRef, initializer_id: IdRef) void { +pub fn endGlobal( + self: *Module, + global_index: Decl.Index, + begin_inst: u32, + result_id: IdRef, + initializer_id: ?IdRef, +) void { const global = self.globalPtr(global_index).?; global.* = .{ .result_id = result_id, @@ -627,10 +648,16 @@ pub fn endGlobal(self: *Module, global_index: Decl.Index, begin_inst: u32, resul }; } -pub fn declareEntryPoint(self: *Module, decl_index: Decl.Index, name: []const u8) !void { +pub fn declareEntryPoint( + self: *Module, + decl_index: Decl.Index, + name: []const u8, + execution_model: spec.ExecutionModel, +) !void { try self.entry_points.append(self.gpa, .{ .decl_index = decl_index, .name = try self.resolveString(name), + .execution_model = execution_model, }); } diff --git a/src/codegen/spirv/spec.zig b/src/codegen/spirv/spec.zig index f73487f41f1d..7cd4bbbd1bbb 100644 --- a/src/codegen/spirv/spec.zig +++ b/src/codegen/spirv/spec.zig @@ -1,6 +1,15 @@ //! This file is auto-generated by tools/gen_spirv_spec.zig. -const Version = @import("std").SemanticVersion; +pub const Version = packed struct(Word) { + padding: u8 = 0, + minor: u8, + major: u8, + padding0: u8 = 0, + + pub fn toWord(self: @This()) Word { + return @bitCast(self); + } +}; pub const Word = u32; pub const IdResult = struct { diff --git a/src/link/SpirV.zig b/src/link/SpirV.zig index 7b66d914bf1b..e59af26ab7d2 100644 --- a/src/link/SpirV.zig +++ b/src/link/SpirV.zig @@ -86,8 +86,6 @@ pub fn createEmpty( else => unreachable, // Caught by Compilation.Config.resolve. } - assert(target.abi != .none); // Caught by Compilation.Config.resolve. - return self; } @@ -158,10 +156,27 @@ pub fn updateExports( }, }; const decl = mod.declPtr(decl_index); - if (decl.val.isFuncBody(mod) and decl.ty.fnCallingConvention(mod) == .Kernel) { + if (decl.val.isFuncBody(mod)) { + const target = mod.getTarget(); const spv_decl_index = try self.object.resolveDecl(mod, decl_index); - for (exports) |exp| { - try self.object.spv.declareEntryPoint(spv_decl_index, mod.intern_pool.stringToSlice(exp.opts.name)); + const execution_model = switch (decl.ty.fnCallingConvention(mod)) { + .Vertex => spec.ExecutionModel.Vertex, + .Fragment => spec.ExecutionModel.Fragment, + .Kernel => spec.ExecutionModel.Kernel, + else => unreachable, + }; + const is_vulkan = target.os.tag == .vulkan; + + if ((!is_vulkan and execution_model == .Kernel) or + (is_vulkan and (execution_model == .Fragment or execution_model == .Vertex))) + { + for (exports) |exp| { + try self.object.spv.declareEntryPoint( + spv_decl_index, + mod.intern_pool.stringToSlice(exp.opts.name), + execution_model, + ); + } } } @@ -224,7 +239,7 @@ pub fn flushModule(self: *SpirV, arena: Allocator, prog_node: *std.Progress.Node .extension = error_info.items, }); - try spv.flush(self.base.file.?); + try spv.flush(self.base.file.?, target); } fn writeCapabilities(spv: *SpvModule, target: std.Target) !void { @@ -233,7 +248,7 @@ fn writeCapabilities(spv: *SpvModule, target: std.Target) !void { const caps: []const spec.Capability = switch (target.os.tag) { .opencl => &.{ .Kernel, .Addresses, .Int8, .Int16, .Int64, .Float64, .Float16, .GenericPointer }, .glsl450 => &.{.Shader}, - .vulkan => &.{.Shader}, + .vulkan => &.{ .Shader, .VariablePointersStorageBuffer, .Int8, .Int16, .Int64, .Float64, .Float16 }, else => unreachable, // TODO }; diff --git a/tools/gen_spirv_spec.zig b/tools/gen_spirv_spec.zig index 4e163b3522f6..1f57ee426202 100644 --- a/tools/gen_spirv_spec.zig +++ b/tools/gen_spirv_spec.zig @@ -77,7 +77,16 @@ fn render(writer: anytype, allocator: Allocator, registry: g.CoreRegistry) !void try writer.writeAll( \\//! This file is auto-generated by tools/gen_spirv_spec.zig. \\ - \\const Version = @import("std").SemanticVersion; + \\pub const Version = packed struct(Word) { + \\ padding: u8 = 0, + \\ minor: u8, + \\ major: u8, + \\ padding0: u8 = 0, + \\ + \\ pub fn toWord(self: @This()) Word { + \\ return @bitCast(self); + \\ } + \\}; \\ \\pub const Word = u32; \\pub const IdResult = struct{ From fdf668b8ea9b69ba76a46ac2bb765231d54b88d5 Mon Sep 17 00:00:00 2001 From: Ali Chraghi Date: Thu, 1 Feb 2024 19:38:23 +0330 Subject: [PATCH 2/4] spirv: emit vectors whenever we can --- src/codegen/spirv.zig | 59 +++++++++++++++++++++++++++++++----- src/codegen/spirv/Module.zig | 7 +++++ 2 files changed, 59 insertions(+), 7 deletions(-) diff --git a/src/codegen/spirv.zig b/src/codegen/spirv.zig index e8446622f7cc..8bcc907436cd 100644 --- a/src/codegen/spirv.zig +++ b/src/codegen/spirv.zig @@ -744,6 +744,30 @@ const DeclGen = struct { return try self.load(ty, ptr_composite_id, .{}); } + /// Construct a vector at runtime. + /// ty must be an vector type. + /// Constituents should be in `indirect` representation (as the elements of an vector should be). + /// Result is in `direct` representation. + fn constructVector(self: *DeclGen, ty: Type, constituents: []const IdRef) !IdRef { + // The Khronos LLVM-SPIRV translator crashes because it cannot construct structs which' + // operands are not constant. + // See https://github.com/KhronosGroup/SPIRV-LLVM-Translator/issues/1349 + // For now, just initialize the struct by setting the fields manually... + // TODO: Make this OpCompositeConstruct when we can + const mod = self.module; + const ptr_composite_id = try self.alloc(ty, .{ .storage_class = .Function }); + const ptr_elem_ty_ref = try self.ptrType(ty.elemType2(mod), .Function); + for (constituents, 0..) |constitent_id, index| { + const ptr_id = try self.accessChain(ptr_elem_ty_ref, ptr_composite_id, &.{@as(u32, @intCast(index))}); + try self.func.body.emit(self.spv.gpa, .OpStore, .{ + .pointer = ptr_id, + .object = constitent_id, + }); + } + + return try self.load(ty, ptr_composite_id, .{}); + } + /// Construct an array at runtime. /// ty must be an array type. /// Constituents should be in `indirect` representation (as the elements of an array should be). @@ -963,13 +987,16 @@ const DeclGen = struct { } switch (tag) { - inline .array_type => if (array_type.sentinel != .none) { - constituents[constituents.len - 1] = try self.constant(elem_ty, Value.fromInterned(array_type.sentinel), .indirect); + inline .array_type => { + if (array_type.sentinel != .none) { + const sentinel = Value.fromInterned(array_type.sentinel); + constituents[constituents.len - 1] = try self.constant(elem_ty, sentinel, .indirect); + } + return self.constructArray(ty, constituents); }, - else => {}, + inline .vector_type => return self.constructVector(ty, constituents), + else => unreachable, } - - return try self.constructArray(ty, constituents); }, .struct_type => { const struct_type = mod.typeToStruct(ty).?; @@ -1492,8 +1519,14 @@ const DeclGen = struct { const elem_ty = ty.childType(mod); const elem_ty_ref = try self.resolveType(elem_ty, .indirect); + const len = ty.vectorLen(mod); + const is_scalar = elem_ty.isNumeric(mod) or elem_ty.toIntern() == .bool_type; + + const ty_ref = if (is_scalar and len > 1 and len <= 4) + try self.spv.vectorType(ty.vectorLen(mod), elem_ty_ref) + else + try self.spv.arrayType(ty.vectorLen(mod), elem_ty_ref); - const ty_ref = try self.spv.arrayType(ty.vectorLen(mod), elem_ty_ref); try self.type_map.put(self.gpa, ty.toIntern(), .{ .ty_ref = ty_ref }); return ty_ref; }, @@ -3688,7 +3721,19 @@ const DeclGen = struct { constituents[0..index], ); }, - .Vector, .Array => { + .Vector => { + const n_elems = result_ty.vectorLen(mod); + const elem_ids = try self.gpa.alloc(IdRef, n_elems); + defer self.gpa.free(elem_ids); + + for (elements, 0..) |element, i| { + const id = try self.resolve(element); + elem_ids[i] = try self.convertToIndirect(result_ty.childType(mod), id); + } + + return try self.constructVector(result_ty, elem_ids); + }, + .Array => { const array_info = result_ty.arrayInfo(mod); const n_elems: usize = @intCast(result_ty.arrayLenIncludingSentinel(mod)); const elem_ids = try self.gpa.alloc(IdRef, n_elems); diff --git a/src/codegen/spirv/Module.zig b/src/codegen/spirv/Module.zig index 98a7c67beeab..2c411b4590b1 100644 --- a/src/codegen/spirv/Module.zig +++ b/src/codegen/spirv/Module.zig @@ -508,6 +508,13 @@ pub fn intType(self: *Module, signedness: std.builtin.Signedness, bits: u16) !Ca } }); } +pub fn vectorType(self: *Module, len: u32, elem_ty_ref: CacheRef) !CacheRef { + return try self.resolve(.{ .vector_type = .{ + .component_type = elem_ty_ref, + .component_count = len, + } }); +} + pub fn arrayType(self: *Module, len: u32, elem_ty_ref: CacheRef) !CacheRef { const len_ty_ref = try self.resolve(.{ .int_type = .{ .signedness = .unsigned, From b1537976f72ccaf43c12f1f15ee3d9ae151239f7 Mon Sep 17 00:00:00 2001 From: Ali Chraghi Date: Fri, 2 Feb 2024 17:48:16 +0330 Subject: [PATCH 3/4] spirv: support enum integer values in Assembler --- src/codegen/spirv/Assembler.zig | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/codegen/spirv/Assembler.zig b/src/codegen/spirv/Assembler.zig index 87e18d4bd856..7cecd7ee38f2 100644 --- a/src/codegen/spirv/Assembler.zig +++ b/src/codegen/spirv/Assembler.zig @@ -591,9 +591,13 @@ fn parseValueEnum(self: *Assembler, kind: spec.OperandKind) !void { try self.expectToken(.value); const text = self.tokenText(tok); + const int_value = std.fmt.parseInt(u32, text, 0) catch null; const enumerant = for (kind.enumerants()) |enumerant| { - if (std.mem.eql(u8, enumerant.name, text)) - break enumerant; + if (int_value) |v| { + if (v == enumerant.value) break enumerant; + } else { + if (std.mem.eql(u8, enumerant.name, text)) break enumerant; + } } else { return self.fail(tok.start, "'{s}' is not a valid value for enumeration {s}", .{ text, @tagName(kind) }); }; From 0a6b69065ecf309805c0a84f2d93e75ea52eafa4 Mon Sep 17 00:00:00 2001 From: Ali Chraghi Date: Fri, 2 Feb 2024 17:48:40 +0330 Subject: [PATCH 4/4] std: add gpu namespace --- lib/std/gpu.zig | 166 ++++++++++++++++++++++++++++++++++++++++++++++++ lib/std/std.zig | 3 + 2 files changed, 169 insertions(+) create mode 100644 lib/std/gpu.zig diff --git a/lib/std/gpu.zig b/lib/std/gpu.zig new file mode 100644 index 000000000000..f3b37eff94a8 --- /dev/null +++ b/lib/std/gpu.zig @@ -0,0 +1,166 @@ +const std = @import("std.zig"); +const comptimePrint = std.fmt.comptimePrint; + +/// Will make `ptr` contain the location of the current invocation within the +/// global workgroup. Each component is equal to the index of the local workgroup +/// multiplied by the size of the local workgroup plus `localInvocationId`. +/// `ptr` must be a reference to variable or struct field. +pub fn globalInvocationId(comptime ptr: *addrspace(.input) @Vector(3, u32)) void { + asm volatile ( + \\OpDecorate %ptr BuiltIn GlobalInvocationId + : + : [ptr] "" (ptr), + ); +} + +/// Will make that variable contain the location of the current cluster +/// culling, task, mesh, or compute shader invocation within the local +/// workgroup. Each component ranges from zero through to the size of the +/// workgroup in that dimension minus one. +/// `ptr` must be a reference to variable or struct field. +pub fn localInvocationId(comptime ptr: *addrspace(.input) @Vector(3, u32)) void { + asm volatile ( + \\OpDecorate %ptr BuiltIn LocalInvocationId + : + : [ptr] "" (ptr), + ); +} + +/// Output vertex position from a `Vertex` entrypoint +/// `ptr` must be a reference to variable or struct field. +pub fn position(comptime ptr: *addrspace(.output) @Vector(4, f32)) void { + asm volatile ( + \\OpDecorate %ptr BuiltIn Position + : + : [ptr] "" (ptr), + ); +} + +/// Will make `ptr` contain the index of the vertex that is +/// being processed by the current vertex shader invocation. +/// `ptr` must be a reference to variable or struct field. +pub fn vertexIndex(comptime ptr: *addrspace(.input) u32) void { + asm volatile ( + \\OpDecorate %ptr BuiltIn VertexIndex + : + : [ptr] "" (ptr), + ); +} + +/// Output fragment depth from a `Fragment` entrypoint +/// `ptr` must be a reference to variable or struct field. +pub fn fragmentCoord(comptime ptr: *addrspace(.input) @Vector(4, f32)) void { + asm volatile ( + \\OpDecorate %ptr BuiltIn FragCoord + : + : [ptr] "" (ptr), + ); +} + +/// Output fragment depth from a `Fragment` entrypoint +/// `ptr` must be a reference to variable or struct field. +pub fn fragmentDepth(comptime ptr: *addrspace(.output) f32) void { + asm volatile ( + \\OpDecorate %ptr BuiltIn FragDepth + : + : [ptr] "" (ptr), + ); +} + +/// Forms the main linkage for `input` and `output` address spaces. +/// `ptr` must be a reference to variable or struct field. +pub fn location(comptime ptr: anytype, comptime loc: u32) void { + const code = comptimePrint("OpDecorate %ptr Location {}", .{loc}); + asm volatile (code + : + : [ptr] "" (ptr), + ); +} + +/// Forms the main linkage for `input` and `output` address spaces. +/// `ptr` must be a reference to variable or struct field. +pub fn binding(comptime ptr: anytype, comptime group: u32, comptime bind: u32) void { + const code = comptimePrint( + \\OpDecorate %ptr DescriptorSet {} + \\OpDecorate %ptr Binding {} + , .{ group, bind }); + asm volatile (code + : + : [ptr] "" (ptr), + ); +} + +pub const Origin = enum(u32) { + /// Increase toward the right and downward + upper_left = 7, + /// Increase toward the right and upward + lower_left = 8, +}; + +/// The coordinates appear to originate in the specified `origin`. +/// Only valid with the `Fragment` calling convention. +pub fn fragmentOrigin(comptime entry_point: anytype, comptime origin: Origin) void { + const origin_enum = switch (origin) { + .upper_left => .OriginUpperLeft, + .lower_left => .OriginLowerLeft, + }; + asm volatile ("OpExecutionMode %entry_point " ++ @tagName(origin_enum) + : + : [entry_point] "" (entry_point), + ); +} + +pub const DepthMode = enum(u32) { + /// Declares that this entry point dynamically writes the + /// `fragmentDepth` built in-decorated variable. + replacing = 12, + /// Indicates that per-fragment tests may assume that + /// any `fragmentDepth` built in-decorated value written by the shader is + /// greater-than-or-equal to the fragment’s interpolated depth value + greater = 14, + /// Indicates that per-fragment tests may assume that + /// any `fragmentDepth` built in-decorated value written by the shader is + /// less-than-or-equal to the fragment’s interpolated depth value + less = 15, + /// Indicates that per-fragment tests may assume that + /// any `fragmentDepth` built in-decorated value written by the shader is + /// the same as the fragment’s interpolated depth value + unchanged = 16, +}; + +/// Only valid with the `Fragment` calling convention. +pub fn depthMode(comptime entry_point: anytype, comptime mode: DepthMode) void { + const code = comptimePrint("OpExecutionMode %entry_point {}", .{@intFromEnum(mode)}); + asm volatile (code + : + : [entry_point] "" (entry_point), + ); +} + +/// Indicates the workgroup size in the `x`, `y`, and `z` dimensions. +/// Only valid with the `GLCompute` or `Kernel` calling conventions. +pub fn workgroupSize(comptime entry_point: anytype, comptime size: @Vector(3, u32)) void { + const code = comptimePrint("OpExecutionMode %entry_point LocalSize {} {} {}", .{ + size[0], + size[1], + size[2], + }); + asm volatile (code + : + : [entry_point] "" (entry_point), + ); +} + +/// A hint to the client, which indicates the workgroup size in the `x`, `y`, and `z` dimensions. +/// Only valid with the `GLCompute` or `Kernel` calling conventions. +pub fn workgroupSizeHint(comptime entry_point: anytype, comptime size: @Vector(3, u32)) void { + const code = comptimePrint("OpExecutionMode %entry_point LocalSizeHint {} {} {}", .{ + size[0], + size[1], + size[2], + }); + asm volatile (code + : + : [entry_point] "" (entry_point), + ); +} diff --git a/lib/std/std.zig b/lib/std/std.zig index 4500a281b74f..047da005c36d 100644 --- a/lib/std/std.zig +++ b/lib/std/std.zig @@ -104,6 +104,9 @@ pub const fmt = @import("fmt.zig"); /// File system-related functionality. pub const fs = @import("fs.zig"); +/// GPU programming helpers. +pub const gpu = @import("gpu.zig"); + /// Fast hashing functions (i.e. not cryptographically secure). pub const hash = @import("hash.zig"); pub const hash_map = @import("hash_map.zig");