Skip to content

Commit

Permalink
Replace Attr::SpvDebugLine with a better basic debug source locatio…
Browse files Browse the repository at this point in the history
…n attribute. (#9)

Partially modeled on a subset of the
[`NonSemantic.Shader.DebugInfo.100`](https://htmlpreview.github.io/?https://github.com/KhronosGroup/SPIRV-Registry/blob/main/nonsemantic/NonSemantic.Shader.DebugInfo.100.html)
SPIR-V extended instruction set, but more with Rust-GPU's needs in mind,
this extends the existing `file:line:col` debuginfo to support both:
- ranges (within a single file - comparable to rustc's internal `Span`)
- "inlined at" debuginfo (call site debug location + callee name)
- more expressive than `NonSemantic.Shader.DebugInfo.100` in one key
way: for some reason, that extension only seems to allow distinguishing
call sites *by line number* which seems borderline incorrect (wrt DWARF)

There is nothing in the `spirt` library itself that can create such
extended debuginfo (even though in theory e.g.
`NonSemantic.Shader.DebugInfo.100` support could do that), and instead
Rust-GPU is expected to produce it by converting its own custom
instructions.

The Rust-GPU side of this already exists since it's part of the
polaris/lodestar demo branches (IIRC this specifically happened for
recursion emulation, where `FuncCall` being outside `Block`s meant).

---

Example from a relatively messy inlining stack:

![image](https://github.com/user-attachments/assets/a69718d6-65a1-4749-bea6-27842417daa5)

Ideally large amounts of deduplication could be done by having a "debug
context tracker" in `print` that only needs to show *changes* (i.e.
entering/leaving a group of inlined instructions, and individual
location debuginfo that could still differ between instructions), but
that's not done in this PR.

The status quo on the Rust-GPU side was already quite verbose, and
arguably worse than this at times (or at least more "syntaxful"), so
this will do for the time being.
  • Loading branch information
eddyb authored Nov 6, 2024
2 parents 3cabf18 + f956ecc commit 000ec78
Show file tree
Hide file tree
Showing 6 changed files with 260 additions and 86 deletions.
108 changes: 88 additions & 20 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,7 @@ pub use context::AttrSet;
/// Definition for an [`AttrSet`]: a set of [`Attr`]s.
#[derive(Default, PartialEq, Eq, Hash)]
pub struct AttrSetDef {
// FIXME(eddyb) consider "persistent datastructures" (e.g. the `im` crate).
// FIXME(eddyb) use `BTreeMap<Attr, AttrValue>` and split some of the params
// between the `Attr` and `AttrValue` based on specified uniquness.
// FIXME(eddyb) don't put debuginfo in here, but rather at use sites
Expand All @@ -298,7 +299,34 @@ pub struct AttrSetDef {
}

impl AttrSetDef {
pub fn push_diag(&mut self, diag: Diag) {
pub fn dbg_src_loc(&self) -> Option<DbgSrcLoc> {
// FIXME(eddyb) seriously consider moving to `BTreeMap` (see above).
// HACK(eddyb) this assumes `Attr::DbgSrcLoc` is the first of `Attr`!
match self.attrs.first() {
Some(&Attr::DbgSrcLoc(OrdAssertEq(dbg_src_loc))) => Some(dbg_src_loc),
_ => None,
}
}

pub fn set_dbg_src_loc(&mut self, dbg_src_loc: DbgSrcLoc) {
// FIXME(eddyb) seriously consider moving to `BTreeMap` (see above).
// HACK(eddyb) this assumes `Attr::DbgSrcLoc` is the first of `Attr`!
if let Some(Attr::DbgSrcLoc(_)) = self.attrs.first() {
self.attrs.pop_first().unwrap();
}
self.attrs.insert(Attr::DbgSrcLoc(OrdAssertEq(dbg_src_loc)));
}

pub fn diags(&self) -> &[Diag] {
// FIXME(eddyb) seriously consider moving to `BTreeMap` (see above).
// HACK(eddyb) this assumes `Attr::Diagnostics` is the last of `Attr`!
match self.attrs.last() {
Some(Attr::Diagnostics(OrdAssertEq(diags))) => diags,
_ => &[],
}
}

pub fn mutate_diags(&mut self, f: impl FnOnce(&mut Vec<Diag>)) {
// FIXME(eddyb) seriously consider moving to `BTreeMap` (see above).
// HACK(eddyb) this assumes `Attr::Diagnostics` is the last of `Attr`!
let mut attr = if let Some(Attr::Diagnostics(_)) = self.attrs.last() {
Expand All @@ -307,30 +335,54 @@ impl AttrSetDef {
Attr::Diagnostics(OrdAssertEq(vec![]))
};
match &mut attr {
Attr::Diagnostics(OrdAssertEq(diags)) => diags.push(diag),
Attr::Diagnostics(OrdAssertEq(diags)) => f(diags),
_ => unreachable!(),
}
self.attrs.insert(attr);
}

// FIXME(eddyb) should this be hidden in favor of `AttrSet::append_diag`?
pub fn append_diag(&self, diag: Diag) -> Self {
let mut new_attrs = Self { attrs: self.attrs.clone() };
new_attrs.push_diag(diag);
new_attrs
// HACK(eddyb) these only exist to avoid changing code working with `AttrSetDef`s.
pub fn push_diags(&mut self, new_diags: impl IntoIterator<Item = Diag>) {
self.mutate_diags(|diags| diags.extend(new_diags));
}
pub fn push_diag(&mut self, diag: Diag) {
self.push_diags([diag]);
}
}

// FIXME(eddyb) should these methods be elsewhere?
impl AttrSet {
// FIXME(eddyb) should this be hidden in favor of `push_diag`?
// FIXME(eddyb) should these methods always take multiple values?
pub fn append_diag(self, cx: &Context, diag: Diag) -> Self {
cx.intern(cx[self].append_diag(diag))
// FIXME(eddyb) could these two methods have a better name?
pub fn reintern_with(self, cx: &Context, f: impl FnOnce(&mut AttrSetDef)) -> Self {
let mut new_attrs = AttrSetDef { attrs: cx[self].attrs.clone() };
f(&mut new_attrs);
cx.intern(new_attrs)
}
pub fn mutate(&mut self, cx: &Context, f: impl FnOnce(&mut AttrSetDef)) {
*self = self.reintern_with(cx, f);
}

pub fn dbg_src_loc(self, cx: &Context) -> Option<DbgSrcLoc> {
if self == AttrSet::default() {
return None;
}
cx[self].dbg_src_loc()
}

pub fn set_dbg_src_loc(&mut self, cx: &Context, dbg_src_loc: DbgSrcLoc) {
self.mutate(cx, |attrs| attrs.set_dbg_src_loc(dbg_src_loc));
}

pub fn diags(self, cx: &Context) -> &[Diag] {
cx[self].diags()
}

pub fn push_diags(&mut self, cx: &Context, diags: impl IntoIterator<Item = Diag>) {
self.mutate(cx, |attrs| attrs.push_diags(diags));
}

pub fn push_diag(&mut self, cx: &Context, diag: Diag) {
*self = self.append_diag(cx, diag);
self.push_diags(cx, [diag]);
}
}

Expand All @@ -342,18 +394,16 @@ impl AttrSet {
// FIXME(eddyb) consider interning individual attrs, not just `AttrSet`s.
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash, derive_more::From)]
pub enum Attr {
// HACK(eddyb) this must be the first variant of `Attr` for the correctness
// of `AttrSetDef::{dbg_src_loc,set_dbg_src_loc}`.
DbgSrcLoc(OrdAssertEq<DbgSrcLoc>),

/// `QPtr`-specific attributes (see [`qptr::QPtrAttr`]).
#[from]
QPtr(qptr::QPtrAttr),

SpvAnnotation(spv::Inst),

SpvDebugLine {
file_path: OrdAssertEq<InternedStr>,
line: u32,
col: u32,
},

/// Some SPIR-V instructions, like `OpFunction`, take a bitflags operand
/// that is effectively an optimization over using `OpDecorate`.
//
Expand All @@ -363,11 +413,29 @@ pub enum Attr {
/// Can be used anywhere to record [`Diag`]nostics produced during a pass,
/// while allowing the pass to continue (and its output to be pretty-printed).
//
// HACK(eddyb) this is the last variant to control printing order, but also
// to make `push_diag`/`append_diag` above work correctly!
// HACK(eddyb) this must be the last variant of `Attr` for the correctness
// of`AttrSetDef::{diags,mutate_diags}` (this also helps with printing order).
Diagnostics(OrdAssertEq<Vec<Diag>>),
}

/// Simple `file:line:column`-style debuginfo, similar to SPIR-V `OpLine`,
/// but also supporting `(line, column)` ranges, and inlined locations.
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
pub struct DbgSrcLoc {
pub file_path: InternedStr,

// FIXME(eddyb) `Range` might make sense here but these are inclusive,
// and `range::RangeInclusive` (the non-`Iterator` version of `a..=b`)
// isn't stable (nor the type of `a..=b` expressions), yet.
pub start_line_col: (u32, u32),
pub end_line_col: (u32, u32),

/// To describe locations originally in the callee of a call that was inlined,
/// the name of the callee and attributes describing the callsite are used,
/// where callsite attributes are expected to contain an [`Attr::DbgSrcLoc`].
pub inlined_callee_name_and_call_site: Option<(InternedStr, AttrSet)>,
}

/// Diagnostics produced by SPIR-T passes, and recorded in [`Attr::Diagnostics`].
#[derive(Clone, PartialEq, Eq, Hash)]
pub struct Diag {
Expand Down
113 changes: 94 additions & 19 deletions src/print/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ use crate::qptr::{self, QPtrAttr, QPtrMemUsage, QPtrMemUsageKind, QPtrOp, QPtrUs
use crate::visit::{InnerVisit, Visit, Visitor};
use crate::{
AddrSpace, Attr, AttrSet, AttrSetDef, Const, ConstDef, ConstKind, Context, DataInst,
DataInstDef, DataInstForm, DataInstFormDef, DataInstKind, DeclDef, Diag, DiagLevel,
DataInstDef, DataInstForm, DataInstFormDef, DataInstKind, DbgSrcLoc, DeclDef, Diag, DiagLevel,
DiagMsgPart, EntityListIter, ExportKey, Exportee, Func, FuncDecl, FuncParam, FxIndexMap,
FxIndexSet, GlobalVar, GlobalVarDecl, GlobalVarDefBody, Import, Module, ModuleDebugInfo,
ModuleDialect, Node, NodeDef, NodeKind, NodeOutputDecl, OrdAssertEq, Region, RegionDef,
Expand Down Expand Up @@ -2111,6 +2111,99 @@ impl Print for Attr {
type Output = pretty::Fragment;
fn print(&self, printer: &Printer<'_>) -> pretty::Fragment {
let non_comment_attr = match self {
// FIXME(eddyb) move from repeating the same backtrace-like comments
// (potentially many times in a row) to a more "stateful" approach,
// which only mentions every inlined callsite once per sequence of
// e.g. `DataInst`s that are all nested in it.
&Attr::DbgSrcLoc(OrdAssertEq(dbg_src_loc)) => {
let mut comments = SmallVec::<[_; 8]>::new();
let mut next_dbg_src_loc = Some(dbg_src_loc);
while let Some(dbg_src_loc) = next_dbg_src_loc {
let DbgSrcLoc {
file_path,
start_line_col: (start_line, mut start_col),
end_line_col: (end_line, mut end_col),
inlined_callee_name_and_call_site,
} = dbg_src_loc;

// HACK(eddyb) Rust-GPU's column numbers seem
// off-by-one wrt what e.g. VSCode expects
// for `:line:col` syntax, but it's hard to
// tell from the spec and `glslang` doesn't
// even emit column numbers at all!
start_col += 1;
end_col += 1;

let mut s = String::new();
if comments.is_empty() {
s += "// at ";
} else {
s += "// … at ";
}

// HACK(eddyb) only skip string-quoting and escaping for
// well-behaved file paths.
let file_path = &printer.cx[file_path];
if file_path.chars().all(|c| c.is_ascii_graphic() && c != ':') {
s += file_path;
} else {
write!(s, "{file_path:?}").unwrap();
}

// HACK(eddyb) the syntaxes used are taken from VSCode, i.e.:
// https://github.com/microsoft/vscode/blob/6b924c5/src/vs/workbench/contrib/terminalContrib/links/browser/terminalLinkParsing.ts#L75-L91
// (using the most boring syntax possible for every situation).
let is_quoted = s.ends_with('"');
let is_range = (start_line, start_col) != (end_line, end_col);
write!(
s,
"{}{start_line}{}{start_col}",
if is_quoted { ',' } else { ':' },
if is_quoted && is_range { '.' } else { ':' }
)
.unwrap();
if is_range {
s += "-";
if start_line != end_line {
write!(s, "{end_line}.").unwrap();
}
write!(s, "{end_col}").unwrap();
}

// Chain inlined locations by putting the most important
// details (`file:line:col`) at the start of each comment,
// and the less important ones (callee name) at the end.
next_dbg_src_loc =
inlined_callee_name_and_call_site.map(|(callee_name, call_site_attrs)| {
s += ", in `";

// HACK(eddyb) not trusting non-trivial strings to behave.
let callee_name = &printer.cx[callee_name];
if callee_name.chars().all(|c| c.is_ascii_alphanumeric() || c == '_') {
s += callee_name;
} else {
s.extend(callee_name.escape_debug().flat_map(|c| {
(c == '`').then_some('\\').into_iter().chain([c])
}));
}

s += "`, inlined …";

call_site_attrs.dbg_src_loc(printer.cx).unwrap_or_else(|| DbgSrcLoc {
file_path: printer.cx.intern("<unknown callee location>"),
start_line_col: (0, 0),
end_line_col: (0, 0),
inlined_callee_name_and_call_site: None,
})
});

if !comments.is_empty() {
comments.push(pretty::Node::ForceLineSeparation);
}
comments.push(printer.comment_style().apply(s));
}
return pretty::Fragment::new(comments);
}
Attr::Diagnostics(diags) => {
return pretty::Fragment::new(
diags
Expand Down Expand Up @@ -2250,24 +2343,6 @@ impl Print for Attr {
printer.pretty_spv_inst(printer.attr_style(), *opcode, imms, [None])
}
}
&Attr::SpvDebugLine { file_path, line, col } => {
// HACK(eddyb) Rust-GPU's column numbers seem
// off-by-one wrt what e.g. VSCode expects
// for `:line:col` syntax, but it's hard to
// tell from the spec and `glslang` doesn't
// even emit column numbers at all!
let col = col + 1;

// HACK(eddyb) only use skip string quoting
// and escaping for well-behaved file paths.
let file_path = &printer.cx[file_path.0];
let comment = if file_path.chars().all(|c| c.is_ascii_graphic() && c != ':') {
format!("// at {file_path}:{line}:{col}")
} else {
format!("// at {file_path:?}:{line}:{col}")
};
return printer.comment_style().apply(comment).into();
}
&Attr::SpvBitflagsOperand(imm) => printer.pretty_spv_operand_from_imms([imm]),
};
pretty::Fragment::new([
Expand Down
25 changes: 11 additions & 14 deletions src/spv/lift.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ use crate::spv::{self, spec};
use crate::visit::{InnerVisit, Visitor};
use crate::{
AddrSpace, Attr, AttrSet, Const, ConstDef, ConstKind, Context, DataInst, DataInstDef,
DataInstForm, DataInstFormDef, DataInstKind, DeclDef, EntityList, ExportKey, Exportee, Func,
FuncDecl, FuncParam, FxIndexMap, FxIndexSet, GlobalVar, GlobalVarDefBody, Import, Module,
ModuleDebugInfo, ModuleDialect, Node, NodeKind, NodeOutputDecl, Region, RegionInputDecl,
SelectionKind, Type, TypeDef, TypeKind, TypeOrConst, Value, cfg,
DataInstForm, DataInstFormDef, DataInstKind, DbgSrcLoc, DeclDef, EntityList, ExportKey,
Exportee, Func, FuncDecl, FuncParam, FxIndexMap, FxIndexSet, GlobalVar, GlobalVarDefBody,
Import, Module, ModuleDebugInfo, ModuleDialect, Node, NodeKind, NodeOutputDecl, OrdAssertEq,
Region, RegionInputDecl, SelectionKind, Type, TypeDef, TypeKind, TypeOrConst, Value, cfg,
};
use rustc_hash::FxHashMap;
use smallvec::SmallVec;
Expand Down Expand Up @@ -207,8 +207,8 @@ impl Visitor<'_> for NeedsIdsCollector<'_> {
| Attr::QPtr(_)
| Attr::SpvAnnotation { .. }
| Attr::SpvBitflagsOperand(_) => {}
Attr::SpvDebugLine { file_path, .. } => {
self.debug_strings.insert(&self.cx[file_path.0]);
Attr::DbgSrcLoc(OrdAssertEq(DbgSrcLoc { file_path, .. })) => {
self.debug_strings.insert(&self.cx[file_path]);
}
}
attr.inner_visit_with(self);
Expand Down Expand Up @@ -1508,9 +1508,9 @@ impl Module {

for attr in cx[attrs].attrs.iter() {
match attr {
Attr::Diagnostics(_)
Attr::DbgSrcLoc(_)
| Attr::Diagnostics(_)
| Attr::QPtr(_)
| Attr::SpvDebugLine { .. }
| Attr::SpvBitflagsOperand(_) => {}
Attr::SpvAnnotation(inst @ spv::Inst { opcode, .. }) => {
let target_id = result_id.expect(
Expand Down Expand Up @@ -1718,15 +1718,12 @@ impl Module {
// in order to end up with the expected line debuginfo.
// FIXME(eddyb) make this less of a search and more of a
// lookup by splitting attrs into key and value parts.
let new_debug_line = cx[attrs].attrs.iter().find_map(|attr| match *attr {
Attr::SpvDebugLine { file_path, line, col } => {
Some((ids.debug_strings[&cx[file_path.0]], line, col))
}
_ => None,
let new_debug_line = attrs.dbg_src_loc(cx).map(|dbg_src_loc| {
(ids.debug_strings[&cx[dbg_src_loc.file_path]], dbg_src_loc.start_line_col)
});
if current_debug_line != new_debug_line {
let (opcode, imms, ids) = match new_debug_line {
Some((file_path_id, line, col)) => (
Some((file_path_id, (line, col))) => (
wk.OpLine,
[
spv::Imm::Short(wk.LiteralInteger, line),
Expand Down
Loading

0 comments on commit 000ec78

Please sign in to comment.