Skip to content

Commit

Permalink
typosquat: check for prefixes being manipulated like suffixes
Browse files Browse the repository at this point in the history
In rust-lang#7571, we added checks for crate names that added or removed suffixes
from popular crates. This has turned out to be a useful check! (Spoiler
alert for the blog post I'm publishing next week.)

@Turbo87 pointed out that this can also apply to prefixes, especially
`cargo-`. This generalises the suffix check to also check prefixes, and
adjusts the typomania configuration to add `cargo` to the list of
interesting affixes. For now, the same set of affixes will be used for
both, but depending on what we see, a future tweak would be to separate
the prefix and suffix lists. Let's see how that pans out.

In terms of implementation, I briefly toyed with making this generic
over the prefix/suffix combination to remove the copy/paste code, then
was reminded by rust-analyzer that `std::str::pattern::Pattern` isn't
stable. I'd rather duplicate 20 lines than deal with that, so here we
are.
  • Loading branch information
LawnGnome committed Jan 11, 2024
1 parent a7bc30c commit 997d98d
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 20 deletions.
6 changes: 3 additions & 3 deletions src/typosquat/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use typomania::{
Harness,
};

use super::{checks::Suffixes, config, database::TopCrates};
use super::{checks::Affixes, config, database::TopCrates};

static NOTIFICATION_EMAILS_ENV: &str = "TYPOSQUAT_NOTIFICATION_EMAILS";

Expand Down Expand Up @@ -72,9 +72,9 @@ impl Cache {
.with_check(Typos::new(config::TYPOS.iter().map(|(c, typos)| {
(*c, typos.iter().map(|ss| ss.to_string()).collect())
})))
.with_check(Suffixes::new(
config::SUFFIX_SEPARATORS.iter(),
.with_check(Affixes::new(
config::SUFFIXES.iter(),
config::SUFFIX_SEPARATORS.iter(),
))
.build(top),
),
Expand Down
57 changes: 41 additions & 16 deletions src/typosquat/checks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,28 +3,29 @@ use typomania::{
Corpus, Package,
};

/// A typomania check that checks if commonly used suffixes have been added or removed.
pub struct Suffixes {
/// A typomania check that checks if commonly used prefixes or suffixes have been added to or
/// removed from a package name.
pub struct Affixes {
affixes: Vec<String>,
separators: Vec<String>,
suffixes: Vec<String>,
}

impl Suffixes {
pub fn new<Sep, Suf>(separators: Sep, suffixes: Suf) -> Self
impl Affixes {
pub fn new<Aff, Sep>(affixes: Aff, separators: Sep) -> Self
where
Aff: Iterator,
Aff::Item: ToString,
Sep: Iterator,
Sep::Item: ToString,
Suf: Iterator,
Suf::Item: ToString,
{
Self {
affixes: affixes.map(|s| s.to_string()).collect(),
separators: separators.map(|s| s.to_string()).collect(),
suffixes: suffixes.map(|s| s.to_string()).collect(),
}
}
}

impl Check for Suffixes {
impl Check for Affixes {
fn check(
&self,
corpus: &dyn Corpus,
Expand All @@ -34,11 +35,32 @@ impl Check for Suffixes {
let mut squats = Vec::new();

for separator in self.separators.iter() {
for suffix in self.suffixes.iter() {
let combo = format!("{separator}{suffix}");
for affix in self.affixes.iter() {
// If the package being examined starts with this prefix and separator combo, then
// we should see if it exists without that prefix in the popular crate corpus.
let combo = format!("{affix}{separator}");
if let Some(stem) = name.strip_prefix(&combo) {
if corpus.possible_squat(stem, name, package)? {
squats.push(Squat::Custom {
message: format!("adds the {combo} prefix"),
package: stem.to_string(),
})
}
}

// Alternatively, let's see if adding the prefix and separator combo to the package
// results in something popular; eg somebody trying to squat `foo` with `rs-foo`.
let prefixed = format!("{combo}{name}");
if corpus.possible_squat(&prefixed, name, package)? {
squats.push(Squat::Custom {
message: format!("removes the {combo} prefix"),
package: prefixed,
});
}

// If the package being examined ends in this separator and suffix combo, then we
// should see if it exists in the popular crate corpus.
// should see if it exists without that suffix in the popular crate corpus.
let combo = format!("{separator}{affix}");
if let Some(stem) = name.strip_suffix(&combo) {
if corpus.possible_squat(stem, name, package)? {
squats.push(Squat::Custom {
Expand Down Expand Up @@ -74,16 +96,17 @@ mod tests {
use super::*;

#[test]
fn test_suffixes() -> anyhow::Result<()> {
fn test_affixes() -> anyhow::Result<()> {
let popular = TestCorpus::default()
.with_package(TestPackage::new("foo", "foo", ["Alice", "Bob"]))
.with_package(TestPackage::new("bar-rs", "Rust bar", ["Charlie"]))
.with_package(TestPackage::new("quux_sys", "libquux", ["Alice"]));
.with_package(TestPackage::new("quux_sys", "libquux", ["Alice"]))
.with_package(TestPackage::new("core-xyz", "Core xyz", ["Alice"]));

let harness = Harness::empty_builder()
.with_check(Suffixes::new(
["-", "_"].iter(),
.with_check(Affixes::new(
["core", "rs", "sys"].iter(),
["-", "_"].iter(),
))
.build(popular);

Expand All @@ -103,8 +126,10 @@ mod tests {
// Now try some packages that should be.
for package in [
TestPackage::new("foo-rs", "no shared author", ["Charlie"]),
TestPackage::new("rs-foo", "no shared author", ["Charlie"]),
TestPackage::new("quux", "libquux", ["Charlie"]),
TestPackage::new("quux_sys_rs", "libquux... for Rust?", ["Charlie"]),
TestPackage::new("xyz", "unprefixed core-xyz", ["Charlie"]),
]
.into_iter()
{
Expand Down
2 changes: 1 addition & 1 deletion src/typosquat/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ pub(super) static CRATE_NAME_ALPHABET: &str =
pub(super) static SUFFIX_SEPARATORS: &[&str] = &["-", "_"];

/// Commonly used suffixes when building crate names.
pub(super) static SUFFIXES: &[&str] = &["api", "cli", "core", "lib", "rs", "rust", "sys"];
pub(super) static SUFFIXES: &[&str] = &["api", "cargo", "cli", "core", "lib", "rs", "rust", "sys"];

/// The number of crates to consider in the "top crates" corpus.
pub(super) static TOP_CRATES: i64 = 3000;
Expand Down

0 comments on commit 997d98d

Please sign in to comment.