Skip to content

Commit

Permalink
[red-knot] more efficient UnionBuilder::add (#13411)
Browse files Browse the repository at this point in the history
Avoid quadratic time in subsumed elements when adding a super-type of
existing union elements.

Reserve space in advance when adding multiple elements (from another
union) to a union.

Make union elements a `Box<[Type]>` instead of an `FxOrderSet`; the set
doesn't buy much since the rules of union uniqueness are defined in
terms of supertype/subtype, not in terms of simple type identity.

Move sealed-boolean handling out of a separate `UnionBuilder::simplify`
method and into `UnionBuilder::add`; now that `add` is iterating
existing elements anyway, this is more efficient.

Remove `UnionType::contains`, since it's now `O(n)` and we shouldn't
really need it, generally we care about subtype/supertype, not type
identity. (Right now it's used for `Type::Unbound`, which shouldn't even
be a type.)

Add support for `is_subtype_of` for the `object` type.

Addresses comments on #13401
  • Loading branch information
carljm authored Sep 20, 2024
1 parent 40c65dc commit 149fb20
Show file tree
Hide file tree
Showing 3 changed files with 103 additions and 75 deletions.
22 changes: 15 additions & 7 deletions crates/red_knot_python_semantic/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,7 @@ impl<'db> Type<'db> {
pub fn may_be_unbound(&self, db: &'db dyn Db) -> bool {
match self {
Type::Unbound => true,
Type::Union(union) => union.contains(db, Type::Unbound),
Type::Union(union) => union.elements(db).contains(&Type::Unbound),
// Unbound can't appear in an intersection, because an intersection with Unbound
// simplifies to just Unbound.
_ => false,
Expand Down Expand Up @@ -422,6 +422,8 @@ impl<'db> Type<'db> {
.elements(db)
.iter()
.any(|&elem_ty| ty.is_subtype_of(db, elem_ty)),
(_, Type::Instance(class)) if class.is_stdlib_symbol(db, "builtins", "object") => true,
(Type::Instance(class), _) if class.is_stdlib_symbol(db, "builtins", "object") => false,
// TODO
_ => false,
}
Expand Down Expand Up @@ -1002,16 +1004,16 @@ impl<'db> ClassType<'db> {
pub struct UnionType<'db> {
/// The union type includes values in any of these types.
#[return_ref]
elements: FxOrderSet<Type<'db>>,
elements_boxed: Box<[Type<'db>]>,
}

impl<'db> UnionType<'db> {
pub fn contains(&self, db: &'db dyn Db, ty: Type<'db>) -> bool {
self.elements(db).contains(&ty)
fn elements(self, db: &'db dyn Db) -> &'db [Type<'db>] {
self.elements_boxed(db)
}

/// Create a union from a list of elements
/// (which may be eagerly simplified into a different variant of [`Type`] altogether)
/// (which may be eagerly simplified into a different variant of [`Type`] altogether).
pub fn from_elements<T: Into<Type<'db>>>(
db: &'db dyn Db,
elements: impl IntoIterator<Item = T>,
Expand All @@ -1025,13 +1027,13 @@ impl<'db> UnionType<'db> {
}

/// Apply a transformation function to all elements of the union,
/// and create a new union from the resulting set of types
/// and create a new union from the resulting set of types.
pub fn map(
&self,
db: &'db dyn Db,
transform_fn: impl Fn(&Type<'db>) -> Type<'db>,
) -> Type<'db> {
Self::from_elements(db, self.elements(db).into_iter().map(transform_fn))
Self::from_elements(db, self.elements(db).iter().map(transform_fn))
}
}

Expand Down Expand Up @@ -1135,6 +1137,8 @@ mod tests {
}
}

#[test_case(Ty::BuiltinInstance("str"), Ty::BuiltinInstance("object"))]
#[test_case(Ty::BuiltinInstance("int"), Ty::BuiltinInstance("object"))]
#[test_case(Ty::Unknown, Ty::IntLiteral(1))]
#[test_case(Ty::Any, Ty::IntLiteral(1))]
#[test_case(Ty::Never, Ty::IntLiteral(1))]
Expand All @@ -1152,6 +1156,7 @@ mod tests {
assert!(from.into_type(&db).is_assignable_to(&db, to.into_type(&db)));
}

#[test_case(Ty::BuiltinInstance("object"), Ty::BuiltinInstance("int"))]
#[test_case(Ty::IntLiteral(1), Ty::BuiltinInstance("str"))]
#[test_case(Ty::BuiltinInstance("int"), Ty::BuiltinInstance("str"))]
#[test_case(Ty::BuiltinInstance("int"), Ty::IntLiteral(1))]
Expand All @@ -1160,6 +1165,8 @@ mod tests {
assert!(!from.into_type(&db).is_assignable_to(&db, to.into_type(&db)));
}

#[test_case(Ty::BuiltinInstance("str"), Ty::BuiltinInstance("object"))]
#[test_case(Ty::BuiltinInstance("int"), Ty::BuiltinInstance("object"))]
#[test_case(Ty::Never, Ty::IntLiteral(1))]
#[test_case(Ty::IntLiteral(1), Ty::BuiltinInstance("int"))]
#[test_case(Ty::StringLiteral("foo"), Ty::BuiltinInstance("str"))]
Expand All @@ -1172,6 +1179,7 @@ mod tests {
assert!(from.into_type(&db).is_subtype_of(&db, to.into_type(&db)));
}

#[test_case(Ty::BuiltinInstance("object"), Ty::BuiltinInstance("int"))]
#[test_case(Ty::Unknown, Ty::IntLiteral(1))]
#[test_case(Ty::Any, Ty::IntLiteral(1))]
#[test_case(Ty::IntLiteral(1), Ty::Unknown)]
Expand Down
127 changes: 75 additions & 52 deletions crates/red_knot_python_semantic/src/types/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,79 +27,89 @@
//! * An intersection containing two non-overlapping types should simplify to [`Type::Never`].
use crate::types::{builtins_symbol_ty, IntersectionType, Type, UnionType};
use crate::{Db, FxOrderSet};
use ordermap::set::MutableValues;
use smallvec::SmallVec;

pub(crate) struct UnionBuilder<'db> {
elements: FxOrderSet<Type<'db>>,
elements: Vec<Type<'db>>,
db: &'db dyn Db,
}

impl<'db> UnionBuilder<'db> {
pub(crate) fn new(db: &'db dyn Db) -> Self {
Self {
db,
elements: FxOrderSet::default(),
elements: vec![],
}
}

/// Adds a type to this union.
pub(crate) fn add(mut self, ty: Type<'db>) -> Self {
match ty {
Type::Union(union) => {
for element in union.elements(self.db) {
let new_elements = union.elements(self.db);
self.elements.reserve(new_elements.len());
for element in new_elements {
self = self.add(*element);
}
}
Type::Never => {}
_ => {
let mut remove = vec![];
for element in &self.elements {
let bool_pair = if let Type::BooleanLiteral(b) = ty {
Some(Type::BooleanLiteral(!b))
} else {
None
};

let mut to_add = ty;
let mut to_remove = SmallVec::<[usize; 2]>::new();
for (index, element) in self.elements.iter().enumerate() {
if Some(*element) == bool_pair {
to_add = builtins_symbol_ty(self.db, "bool");
to_remove.push(index);
// The type we are adding is a BooleanLiteral, which doesn't have any
// subtypes. And we just found that the union already contained our
// mirror-image BooleanLiteral, so it can't also contain bool or any
// supertype of bool. Therefore, we are done.
break;
}
if ty.is_subtype_of(self.db, *element) {
return self;
} else if element.is_subtype_of(self.db, ty) {
remove.push(*element);
to_remove.push(index);
}
}
for element in remove {
self.elements.remove(&element);

match to_remove[..] {
[] => self.elements.push(to_add),
[index] => self.elements[index] = to_add,
_ => {
let mut current_index = 0;
let mut to_remove = to_remove.into_iter();
let mut next_to_remove_index = to_remove.next();
self.elements.retain(|_| {
let retain = if Some(current_index) == next_to_remove_index {
next_to_remove_index = to_remove.next();
false
} else {
true
};
current_index += 1;
retain
});
self.elements.push(to_add);
}
}
self.elements.insert(ty);
}
}

self
}

/// Performs the following normalizations:
/// - Replaces `Literal[True,False]` with `bool`.
/// - TODO For enums `E` with members `X1`,...,`Xn`, replaces
/// `Literal[E.X1,...,E.Xn]` with `E`.
fn simplify(&mut self) {
if let Some(true_index) = self.elements.get_index_of(&Type::BooleanLiteral(true)) {
if self.elements.contains(&Type::BooleanLiteral(false)) {
*self.elements.get_index_mut2(true_index).unwrap() =
builtins_symbol_ty(self.db, "bool");
self.elements.remove(&Type::BooleanLiteral(false));
}
}
}

pub(crate) fn build(mut self) -> Type<'db> {
pub(crate) fn build(self) -> Type<'db> {
match self.elements.len() {
0 => Type::Never,
1 => self.elements[0],
_ => {
self.simplify();

match self.elements.len() {
0 => Type::Never,
1 => self.elements[0],
_ => {
self.elements.shrink_to_fit();
Type::Union(UnionType::new(self.db, self.elements))
}
}
}
_ => Type::Union(UnionType::new(self.db, self.elements.into())),
}
}
}
Expand Down Expand Up @@ -293,12 +303,6 @@ mod tests {
use crate::ProgramSettings;
use ruff_db::system::{DbWithTestSystem, SystemPathBuf};

impl<'db> UnionType<'db> {
fn elements_vec(self, db: &'db TestDb) -> Vec<Type<'db>> {
self.elements(db).into_iter().copied().collect()
}
}

fn setup_db() -> TestDb {
let db = TestDb::new();

Expand Down Expand Up @@ -326,7 +330,7 @@ mod tests {
let t1 = Type::IntLiteral(1);
let union = UnionType::from_elements(&db, [t0, t1]).expect_union();

assert_eq!(union.elements_vec(&db), &[t0, t1]);
assert_eq!(union.elements(&db), &[t0, t1]);
}

#[test]
Expand Down Expand Up @@ -363,10 +367,10 @@ mod tests {
let t3 = Type::IntLiteral(17);

let union = UnionType::from_elements(&db, [t0, t1, t3]).expect_union();
assert_eq!(union.elements_vec(&db), &[t0, t3]);
assert_eq!(union.elements(&db), &[t0, t3]);

let union = UnionType::from_elements(&db, [t0, t1, t2, t3]).expect_union();
assert_eq!(union.elements_vec(&db), &[bool_ty, t3]);
assert_eq!(union.elements(&db), &[bool_ty, t3]);
}

#[test]
Expand All @@ -378,26 +382,45 @@ mod tests {
let u1 = UnionType::from_elements(&db, [t0, t1]);
let union = UnionType::from_elements(&db, [u1, t2]).expect_union();

assert_eq!(union.elements_vec(&db), &[t0, t1, t2]);
assert_eq!(union.elements(&db), &[t0, t1, t2]);
}

#[test]
fn build_union_simplify_subtype() {
let db = setup_db();
let t0 = builtins_symbol_ty(&db, "str").to_instance(&db);
let t1 = Type::LiteralString;
let t2 = Type::Unknown;
let u0 = UnionType::from_elements(&db, [t0, t1]);
let u1 = UnionType::from_elements(&db, [t1, t0]);
let u2 = UnionType::from_elements(&db, [t0, t1, t2]);

assert_eq!(u0, t0);
assert_eq!(u1, t0);
assert_eq!(u2.expect_union().elements_vec(&db), &[t0, t2]);
}

#[test]
fn build_union_no_simplify_any() {}
fn build_union_no_simplify_unknown() {
let db = setup_db();
let t0 = builtins_symbol_ty(&db, "str").to_instance(&db);
let t1 = Type::Unknown;
let u0 = UnionType::from_elements(&db, [t0, t1]);
let u1 = UnionType::from_elements(&db, [t1, t0]);

assert_eq!(u0.expect_union().elements(&db), &[t0, t1]);
assert_eq!(u1.expect_union().elements(&db), &[t1, t0]);
}

#[test]
fn build_union_subsume_multiple() {
let db = setup_db();
let str_ty = builtins_symbol_ty(&db, "str").to_instance(&db);
let int_ty = builtins_symbol_ty(&db, "int").to_instance(&db);
let object_ty = builtins_symbol_ty(&db, "object").to_instance(&db);
let unknown_ty = Type::Unknown;

let u0 = UnionType::from_elements(&db, [str_ty, unknown_ty, int_ty, object_ty]);

assert_eq!(u0.expect_union().elements(&db), &[unknown_ty, object_ty]);
}

impl<'db> IntersectionType<'db> {
fn pos_vec(self, db: &'db TestDb) -> Vec<Type<'db>> {
Expand Down Expand Up @@ -477,7 +500,7 @@ mod tests {
.add_positive(u0)
.build()
.expect_union();
let [Type::Intersection(i0), Type::Intersection(i1)] = union.elements_vec(&db)[..] else {
let [Type::Intersection(i0), Type::Intersection(i1)] = union.elements(&db)[..] else {
panic!("expected a union of two intersections");
};
assert_eq!(i0.pos_vec(&db), &[ta, t0]);
Expand Down
29 changes: 13 additions & 16 deletions crates/red_knot_workspace/src/lint.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,22 +114,19 @@ fn lint_maybe_undefined(context: &SemanticLintContext, name: &ast::ExprName) {
return;
}
let semantic = &context.semantic;
match name.ty(semantic) {
Type::Unbound => {
context.push_diagnostic(format_diagnostic(
context,
&format!("Name '{}' used when not defined.", &name.id),
name.start(),
));
}
Type::Union(union) if union.contains(semantic.db(), Type::Unbound) => {
context.push_diagnostic(format_diagnostic(
context,
&format!("Name '{}' used when possibly not defined.", &name.id),
name.start(),
));
}
_ => {}
let ty = name.ty(semantic);
if ty.is_unbound() {
context.push_diagnostic(format_diagnostic(
context,
&format!("Name '{}' used when not defined.", &name.id),
name.start(),
));
} else if ty.may_be_unbound(semantic.db()) {
context.push_diagnostic(format_diagnostic(
context,
&format!("Name '{}' used when possibly not defined.", &name.id),
name.start(),
));
}
}

Expand Down

0 comments on commit 149fb20

Please sign in to comment.