From b6ce52d58d32f5b1051cafc5957d5d2d0c52fb6c Mon Sep 17 00:00:00 2001 From: Adam Harvey Date: Mon, 20 Nov 2023 15:56:21 -0800 Subject: [PATCH] typosquat: add suffix checks This extends our new typosquatting checks (see #7206) to detect an attack vector we've seen more recently where a bad actor tries to squat an existing, popular crate by adding or removing a common suffix (such as `-rs` or `-sys`). The suffix list in the configuration has been taken _approximately_ from the most popular suffixes in the existing set of crates, with a small amount of human judgement involved on which ones are more likely to be abused based on recent incidents. --- src/typosquat/cache.rs | 6 +- src/typosquat/checks.rs | 174 ++++++++++++++++++++++++++++++++++++++++ src/typosquat/config.rs | 6 ++ src/typosquat/mod.rs | 1 + 4 files changed, 186 insertions(+), 1 deletion(-) create mode 100644 src/typosquat/checks.rs diff --git a/src/typosquat/cache.rs b/src/typosquat/cache.rs index e3cde8bb4f..0cf6ee092b 100644 --- a/src/typosquat/cache.rs +++ b/src/typosquat/cache.rs @@ -7,7 +7,7 @@ use typomania::{ Harness, }; -use super::{config, database::TopCrates}; +use super::{checks::Suffixes, config, database::TopCrates}; static NOTIFICATION_EMAILS_ENV: &str = "TYPOSQUAT_NOTIFICATION_EMAILS"; @@ -72,6 +72,10 @@ impl Cache { .with_check(Typos::new(config::TYPOS.iter().map(|(c, typos)| { (*c, typos.iter().map(|ss| ss.to_string()).collect()) }))) + .with_check(Suffixes::new( + config::SUFFIX_SEPARATORS.iter(), + config::SUFFIXES.iter(), + )) .build(top), ), }) diff --git a/src/typosquat/checks.rs b/src/typosquat/checks.rs new file mode 100644 index 0000000000..a69b9f4949 --- /dev/null +++ b/src/typosquat/checks.rs @@ -0,0 +1,174 @@ +use typomania::{ + checks::{Check, Squat}, + Corpus, Package, +}; + +/// A typomania check that checks if commonly used suffixes have been added or removed. +pub struct Suffixes { + separators: Vec, + suffixes: Vec, +} + +impl Suffixes { + pub fn new(separators: Sep, suffixes: Suf) -> Self + where + Sep: Iterator, + Sep::Item: ToString, + Suf: Iterator, + Suf::Item: ToString, + { + Self { + separators: separators.map(|s| s.to_string()).collect(), + suffixes: suffixes.map(|s| s.to_string()).collect(), + } + } +} + +impl Check for Suffixes { + fn check( + &self, + corpus: &dyn Corpus, + name: &str, + package: &dyn Package, + ) -> typomania::Result> { + let mut squats = Vec::new(); + + for separator in self.separators.iter() { + for suffix in self.suffixes.iter() { + let combo = format!("{separator}{suffix}"); + + // If the package being examined ends in this separator and suffix combo, then we + // should see if it exists in the popular crate corpus. + if let Some(stem) = name.strip_suffix(&combo) { + if corpus.possible_squat(stem, name, package)? { + squats.push(Squat::Custom { + message: format!("adds the {combo} suffix"), + package: stem.to_string(), + }) + } + } + + // Alternatively, let's see if adding the separator and suffix combo to the package + // results in something popular; eg somebody trying to squat `foo` with `foo-rs`. + let suffixed = format!("{name}{combo}"); + if corpus.possible_squat(&suffixed, name, package)? { + squats.push(Squat::Custom { + message: format!("removes the {combo} suffix"), + package: suffixed, + }); + } + } + } + + Ok(squats) + } +} + +#[cfg(test)] +mod tests { + use std::collections::{HashMap, HashSet}; + + use googletest::prelude::*; + use typomania::{AuthorSet, Harness}; + + use super::*; + + #[test] + fn test_suffixes() -> anyhow::Result<()> { + let popular = TestCorpus::default() + .with_package(TestPackage::new("foo", "foo", ["Alice", "Bob"])) + .with_package(TestPackage::new("bar-rs", "Rust bar", ["Charlie"])) + .with_package(TestPackage::new("quux_sys", "libquux", ["Alice"])); + + let harness = Harness::empty_builder() + .with_check(Suffixes::new( + ["-", "_"].iter(), + ["core", "rs", "sys"].iter(), + )) + .build(popular); + + // Try some packages that shouldn't be squatting anything. + for package in [ + TestPackage::new("bar", "shared author", ["Charlie"]), + TestPackage::new("baz", "unrelated package", ["Bob"]), + TestPackage::new("foo-rs", "shared author", ["Alice"]), + ] + .into_iter() + { + let name = package.name.clone(); + let squats = harness.check_package(&name, Box::new(package))?; + assert_that!(squats, empty()); + } + + // Now try some packages that should be. + for package in [ + TestPackage::new("foo-rs", "no shared author", ["Charlie"]), + TestPackage::new("quux", "libquux", ["Charlie"]), + TestPackage::new("quux_sys_rs", "libquux... for Rust?", ["Charlie"]), + ] + .into_iter() + { + let name = package.name.clone(); + let squats = harness.check_package(&name, Box::new(package))?; + assert_that!(squats, not(empty())); + } + + Ok(()) + } + + struct TestPackage { + name: String, + description: String, + authors: HashSet, + } + + impl TestPackage { + fn new(name: &str, description: &str, authors: impl AsRef<[&'static str]>) -> Self { + Self { + name: name.to_string(), + description: description.to_string(), + authors: authors.as_ref().iter().map(|a| a.to_string()).collect(), + } + } + } + + impl Package for TestPackage { + fn authors(&self) -> &dyn AuthorSet { + self + } + + fn description(&self) -> Option<&str> { + Some(&self.description) + } + + fn shared_authors(&self, other: &dyn AuthorSet) -> bool { + self.authors.iter().any(|author| other.contains(author)) + } + } + + impl AuthorSet for TestPackage { + fn contains(&self, author: &str) -> bool { + self.authors.contains(author) + } + } + + #[derive(Default)] + struct TestCorpus(HashMap); + + impl TestCorpus { + fn with_package(mut self, package: TestPackage) -> Self { + self.0.insert(package.name.clone(), package); + self + } + } + + impl Corpus for TestCorpus { + fn contains_name(&self, name: &str) -> typomania::Result { + Ok(self.0.contains_key(name)) + } + + fn get(&self, name: &str) -> typomania::Result> { + Ok(self.0.get(name).map(|tp| tp as &dyn Package)) + } + } +} diff --git a/src/typosquat/config.rs b/src/typosquat/config.rs index 35fc12d071..6df9541ce0 100644 --- a/src/typosquat/config.rs +++ b/src/typosquat/config.rs @@ -5,6 +5,12 @@ pub(super) static CRATE_NAME_ALPHABET: &str = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890-_"; +/// Commonly used separators when building crate names. +pub(super) static SUFFIX_SEPARATORS: &[&str] = &["-", "_"]; + +/// Commonly used suffixes when building crate names. +pub(super) static SUFFIXES: &[&str] = &["api", "cli", "core", "lib", "rs", "rust", "sys"]; + /// The number of crates to consider in the "top crates" corpus. pub(super) static TOP_CRATES: i64 = 3000; diff --git a/src/typosquat/mod.rs b/src/typosquat/mod.rs index 350983baa2..83d26c3d1a 100644 --- a/src/typosquat/mod.rs +++ b/src/typosquat/mod.rs @@ -1,4 +1,5 @@ mod cache; +mod checks; mod config; mod database;