Skip to content

Commit

Permalink
Merge pull request #7206 from LawnGnome/typomania
Browse files Browse the repository at this point in the history
Wire up typosquatting checks when new packages are published
  • Loading branch information
LawnGnome committed Nov 14, 2023
2 parents c46f914 + 1705535 commit 7608eab
Show file tree
Hide file tree
Showing 17 changed files with 760 additions and 11 deletions.
19 changes: 19 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ tower = "=0.4.13"
tower-http = { version = "=0.4.4", features = ["add-extension", "fs", "catch-panic", "timeout", "compression-full"] }
tracing = "=0.1.40"
tracing-subscriber = { version = "=0.3.18", features = ["env-filter"] }
typomania = { version = "0.1.2", default-features = false }
url = "=2.4.1"

[dev-dependencies]
Expand Down
21 changes: 20 additions & 1 deletion src/admin/enqueue_job.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::db;
use crate::schema::background_jobs;
use crate::schema::{background_jobs, crates};
use crate::worker::jobs;
use crate::worker::swirl::BackgroundJob;
use anyhow::Result;
Expand All @@ -26,6 +26,10 @@ pub enum Command {
#[arg(long = "dry-run")]
dry_run: bool,
},
CheckTyposquat {
#[arg()]
name: String,
},
}

pub fn run(command: Command) -> Result<()> {
Expand Down Expand Up @@ -60,6 +64,21 @@ pub fn run(command: Command) -> Result<()> {
Command::NormalizeIndex { dry_run } => {
jobs::NormalizeIndex::new(dry_run).enqueue(conn)?;
}
Command::CheckTyposquat { name } => {
// The job will fail if the crate doesn't actually exist, so let's check that up front.
if crates::table
.filter(crates::name.eq(&name))
.count()
.get_result::<i64>(conn)?
== 0
{
anyhow::bail!(
"cannot enqueue a typosquat check for a crate that doesn't exist: {name}"
);
}

jobs::CheckTyposquat::new(&name).enqueue(conn)?;
}
};

Ok(())
Expand Down
4 changes: 3 additions & 1 deletion src/bin/background-worker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@
extern crate tracing;

use crates_io::cloudfront::CloudFront;
use crates_io::config;
use crates_io::db::DieselPool;
use crates_io::fastly::Fastly;
use crates_io::storage::Storage;
use crates_io::worker::swirl::Runner;
use crates_io::worker::{Environment, RunnerExt};
use crates_io::{config, Emails};
use crates_io::{db, ssh};
use crates_io_env_vars::{var, var_parsed};
use crates_io_index::RepositoryConfig;
Expand Down Expand Up @@ -73,6 +73,7 @@ fn main() -> anyhow::Result<()> {
.build()
.expect("Couldn't build client");

let emails = Emails::from_environment(&config);
let fastly = Fastly::from_environment(client);

let connection_pool = r2d2::Pool::builder()
Expand All @@ -88,6 +89,7 @@ fn main() -> anyhow::Result<()> {
fastly,
storage,
connection_pool.clone(),
emails,
);

let environment = Arc::new(environment);
Expand Down
11 changes: 8 additions & 3 deletions src/controllers/krate/publish.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//! Functionality related to publishing a new crate or version of a crate.

use crate::auth::AuthCheck;
use crate::worker::jobs;
use crate::worker::jobs::{self, CheckTyposquat};
use crate::worker::swirl::BackgroundJob;
use axum::body::Bytes;
use cargo_manifest::{Dependency, DepsSet, TargetDepsSet};
Expand Down Expand Up @@ -85,7 +85,7 @@ pub async fn publish(app: AppState, req: BytesRequest) -> AppResult<Json<GoodCra
// this query should only be used for the endpoint scope calculation
// since a race condition there would only cause `publish-new` instead of
// `publish-update` to be used.
let existing_crate = Crate::by_name(&metadata.name)
let existing_crate: Option<Crate> = Crate::by_name(&metadata.name)
.first::<Crate>(conn)
.optional()?;

Expand Down Expand Up @@ -222,7 +222,7 @@ pub async fn publish(app: AppState, req: BytesRequest) -> AppResult<Json<GoodCra
return Err(cargo_err("expected at most 5 categories per crate"));
}

let max_features = existing_crate
let max_features = existing_crate.as_ref()
.and_then(|c| c.max_features.map(|mf| mf as usize))
.unwrap_or(app.config.max_features);

Expand Down Expand Up @@ -393,6 +393,11 @@ pub async fn publish(app: AppState, req: BytesRequest) -> AppResult<Json<GoodCra

jobs::enqueue_sync_to_index(&krate.name, conn)?;

// Experiment: check new crates for potential typosquatting.
if existing_crate.is_none() {
CheckTyposquat::new(&krate.name).enqueue(conn)?;
}

// The `other` field on `PublishWarnings` was introduced to handle a temporary warning
// that is no longer needed. As such, crates.io currently does not return any `other`
// warnings at this time, but if we need to, the field is available.
Expand Down
38 changes: 34 additions & 4 deletions src/email.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use std::path::PathBuf;
use std::sync::Mutex;
use std::sync::{Arc, Mutex};

use crate::util::errors::{server_error, AppResult};

Expand All @@ -12,7 +12,7 @@ use lettre::transport::smtp::SmtpTransport;
use lettre::{Message, Transport};
use rand::distributions::{Alphanumeric, DistString};

#[derive(Debug)]
#[derive(Debug, Clone)]
pub struct Emails {
backend: EmailBackend,
}
Expand Down Expand Up @@ -48,7 +48,7 @@ impl Emails {
pub fn new_in_memory() -> Self {
Self {
backend: EmailBackend::Memory {
mails: Mutex::new(Vec::new()),
mails: Arc::new(Mutex::new(Vec::new())),
},
}
}
Expand Down Expand Up @@ -91,6 +91,35 @@ or go to https://{domain}/me/pending-invites to manage all of your crate ownersh
self.send(email, subject, &body)
}

/// Attempts to send a notification that a new crate may be typosquatting another crate.
pub fn send_possible_typosquat_notification(
&self,
email: &str,
crate_name: &str,
squats: &[typomania::checks::Squat],
) -> AppResult<()> {
let domain = crate::config::domain_name();
let subject = "Possible typosquatting in new crate";
let body = format!(
"New crate {crate_name} may be typosquatting one or more other crates.\n
Visit https://{domain}/crates/{crate_name} to see the offending crate.\n
\n
Specific squat checks that triggered:\n
\n
{squats}",
squats = squats
.iter()
.map(|squat| format!(
"- {squat} (https://{domain}/crates/{crate_name})\n",
crate_name = squat.package()
))
.collect::<Vec<_>>()
.join(""),
);

self.send(email, subject, &body)
}

/// Attempts to send an API token exposure notification email
pub fn send_token_exposed_notification(
&self,
Expand Down Expand Up @@ -204,6 +233,7 @@ Source type: {source}\n",
}
}

#[derive(Clone)]
enum EmailBackend {
/// Backend used in production to send mails using SMTP.
Smtp {
Expand All @@ -214,7 +244,7 @@ enum EmailBackend {
/// Backend used locally during development, will store the emails in the provided directory.
FileSystem { path: PathBuf },
/// Backend used during tests, will keep messages in memory to allow tests to retrieve them.
Memory { mails: Mutex<Vec<StoredEmail>> },
Memory { mails: Arc<Mutex<Vec<StoredEmail>>> },
}

// Custom Debug implementation to avoid showing the SMTP password.
Expand Down
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ pub mod sql;
pub mod ssh;
pub mod storage;
mod test_util;
pub mod typosquat;
pub mod util;
pub mod views;
pub mod worker;
Expand Down
1 change: 1 addition & 0 deletions src/tests/util/test_app.rs
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,7 @@ impl TestAppBuilder {
None,
app.storage.clone(),
app.primary_database.clone(),
app.emails.clone(),
);

let runner = Runner::new(app.primary_database.clone(), Arc::new(environment))
Expand Down
110 changes: 110 additions & 0 deletions src/typosquat/cache.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
use std::sync::Arc;

use diesel::PgConnection;
use thiserror::Error;
use typomania::{
checks::{Bitflips, Omitted, SwappedWords, Typos},
Harness,
};

use super::{config, database::TopCrates};

static NOTIFICATION_EMAILS_ENV: &str = "TYPOSQUAT_NOTIFICATION_EMAILS";

/// A cache containing everything we need to run typosquatting checks.
///
/// Specifically, this includes a corpus of popular crates attached to a typomania harness, and a
/// list of e-mail addresses that we'll send notifications to if potential typosquatting is
/// discovered.
pub struct Cache {
emails: Vec<String>,
harness: Option<Harness<TopCrates>>,
}

impl Cache {
/// Instantiates a new [`Cache`] from the environment.
///
/// This reads the `NOTIFICATION_EMAILS_ENV` environment variable to get the list of e-mail
/// addresses to send notifications to, then invokes [`Cache::new`] to read popular crates from
/// the database.
#[instrument(skip_all, err)]
pub fn from_env(conn: &mut PgConnection) -> Result<Self, Error> {
let emails: Vec<String> = crates_io_env_vars::var(NOTIFICATION_EMAILS_ENV)
.map_err(|e| Error::Environment {
name: NOTIFICATION_EMAILS_ENV.into(),
source: Arc::new(e),
})?
.unwrap_or_default()
.split(',')
.map(|s| s.trim().to_owned())
.filter(|s| !s.is_empty())
.collect();

if emails.is_empty() {
// If we're not notifying anyone, then there's really not much to do here.
warn!("$TYPOSQUAT_NOTIFICATION_EMAILS is not set; no typosquatting notifications will be sent");
Ok(Self {
emails,
harness: None,
})
} else {
// Otherwise, let's go get the top crates and build a corpus.
Self::new(emails, conn)
}
}

/// Instantiates a cache by querying popular crates and building them into a typomania harness.
///
/// This relies on configuration in the `super::config` module.
pub fn new(emails: Vec<String>, conn: &mut PgConnection) -> Result<Self, Error> {
let top = TopCrates::new(conn, config::TOP_CRATES)?;

Ok(Self {
emails,
harness: Some(
Harness::builder()
.with_check(Bitflips::new(
config::CRATE_NAME_ALPHABET,
top.crates.keys().map(String::as_str),
))
.with_check(Omitted::new(config::CRATE_NAME_ALPHABET))
.with_check(SwappedWords::new("-_"))
.with_check(Typos::new(config::TYPOS.iter().map(|(c, typos)| {
(*c, typos.iter().map(|ss| ss.to_string()).collect())
})))
.build(top),
),
})
}

pub fn get_harness(&self) -> Option<&Harness<TopCrates>> {
self.harness.as_ref()
}

pub fn iter_emails(&self) -> impl Iterator<Item = &str> {
self.emails.iter().map(String::as_str)
}
}

// Because the error returned from Cache::new() gets memoised in the environment, we either need to
// return it by reference from Environment::typosquat_cache() or we need to be able to clone it.
// We'll do some Arc wrapping in the variants below to ensure that everything is clonable while not
// destroying the source metadata.
#[derive(Error, Debug, Clone)]
pub enum Error {
#[error("error reading environment variable {name}: {source:?}")]
Environment {
name: String,
#[source]
source: Arc<anyhow::Error>,
},

#[error("error getting top crates: {0:?}")]
TopCrates(#[source] Arc<diesel::result::Error>),
}

impl From<diesel::result::Error> for Error {
fn from(value: diesel::result::Error) -> Self {
Self::TopCrates(Arc::new(value))
}
}
Loading

0 comments on commit 7608eab

Please sign in to comment.