-
Notifications
You must be signed in to change notification settings - Fork 601
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
worker: add a job to check for typosquats
- Loading branch information
Showing
11 changed files
with
700 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
use std::sync::Arc; | ||
|
||
use diesel::PgConnection; | ||
use typomania::Package; | ||
|
||
use crate::{ | ||
worker::{ | ||
swirl::{BackgroundJob, PerformState}, | ||
typosquat::{Cache, Crate}, | ||
Environment, | ||
}, | ||
Emails, | ||
}; | ||
|
||
/// A job to check the name of a newly published crate against the most popular crates to see if | ||
/// the new crate might be typosquatting an existing, popular crate. | ||
#[derive(Serialize, Deserialize, Debug)] | ||
pub struct CheckTyposquat { | ||
name: String, | ||
} | ||
|
||
impl CheckTyposquat { | ||
pub fn new(name: &str) -> Self { | ||
Self { name: name.into() } | ||
} | ||
} | ||
|
||
impl BackgroundJob for CheckTyposquat { | ||
const JOB_NAME: &'static str = "check_typosquat"; | ||
|
||
type Context = Arc<Environment>; | ||
|
||
#[instrument(skip(state, env), err)] | ||
fn run(&self, state: PerformState<'_>, env: &Self::Context) -> anyhow::Result<()> { | ||
let cache = env.typosquat_cache(state.conn)?; | ||
check(&env.emails, cache, state.conn, &self.name) | ||
} | ||
} | ||
|
||
fn check( | ||
emails: &Emails, | ||
cache: &Cache, | ||
conn: &mut PgConnection, | ||
name: &str, | ||
) -> anyhow::Result<()> { | ||
if let Some(harness) = cache.get_harness() { | ||
info!(name, "Checking new crate for potential typosquatting"); | ||
|
||
let krate: Box<dyn Package> = Box::new(Crate::from_name(conn, name)?); | ||
let squats = harness.check_package(name, krate)?; | ||
if !squats.is_empty() { | ||
// Well, well, well. For now, the only action we'll take is to e-mail people who | ||
// hopefully care to check into things more closely. | ||
info!(?squats, "Found potential typosquatting"); | ||
|
||
for email in cache.iter_emails() { | ||
if let Err(e) = emails.send_possible_typosquat_notification(email, name, &squats) { | ||
error!(?e, ?email, "Failed to send possible typosquat notification"); | ||
} | ||
} | ||
} | ||
} | ||
|
||
Ok(()) | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use crate::{test_util::pg_connection, worker::typosquat::test_util::Faker}; | ||
|
||
use super::*; | ||
|
||
#[test] | ||
fn integration() -> anyhow::Result<()> { | ||
let emails = Emails::new_in_memory(); | ||
let mut faker = Faker::new(pg_connection()); | ||
|
||
// Set up a user and a popular crate to match against. | ||
let user = faker.user("a")?; | ||
faker.crate_and_version("my-crate", "It's awesome", &user, 100)?; | ||
|
||
// Prime the cache so it only includes the crate we just created. | ||
let cache = Cache::new(vec!["[email protected]".to_string()], faker.borrow_conn())?; | ||
|
||
// Now we'll create new crates: one problematic, one not so. | ||
let other_user = faker.user("b")?; | ||
let (angel, _version) = faker.crate_and_version( | ||
"innocent-crate", | ||
"I'm just a simple, innocent crate", | ||
&other_user, | ||
0, | ||
)?; | ||
let (demon, _version) = faker.crate_and_version( | ||
"mycrate", | ||
"I'm even more innocent, obviously", | ||
&other_user, | ||
0, | ||
)?; | ||
|
||
// OK, we're done faking stuff. | ||
let mut conn = faker.into_conn(); | ||
|
||
// Run the check with a crate that shouldn't cause problems. | ||
check(&emails, &cache, &mut conn, &angel.name)?; | ||
assert!(emails.mails_in_memory().unwrap().is_empty()); | ||
|
||
// Now run the check with a less innocent crate. | ||
check(&emails, &cache, &mut conn, &demon.name)?; | ||
let sent_mail = emails.mails_in_memory().unwrap(); | ||
assert!(!sent_mail.is_empty()); | ||
let sent = sent_mail.into_iter().next().unwrap(); | ||
assert_eq!(&sent.to, "[email protected]"); | ||
|
||
Ok(()) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
use std::sync::Arc; | ||
|
||
use diesel::PgConnection; | ||
use thiserror::Error; | ||
use typomania::{ | ||
checks::{Bitflips, Omitted, SwappedWords, Typos}, | ||
Harness, | ||
}; | ||
|
||
use super::{config, database::TopCrates}; | ||
|
||
static NOTIFICATION_EMAILS_ENV: &str = "TYPOSQUAT_NOTIFICATION_EMAILS"; | ||
|
||
/// A cache containing everything we need to run typosquatting checks. | ||
/// | ||
/// Specifically, this includes a corpus of popular crates attached to a typomania harness, and a | ||
/// list of e-mail addresses that we'll send notifications to if potential typosquatting is | ||
/// discovered. | ||
pub struct Cache { | ||
emails: Vec<String>, | ||
harness: Option<Harness<TopCrates>>, | ||
} | ||
|
||
impl Cache { | ||
/// Instantiates a new [`Cache`] from the environment. | ||
/// | ||
/// This reads the [`NOTIFICATION_EMAILS_ENV`] environment variable to get the list of e-mail | ||
/// addresses to send notifications to, then invokes [`Cache::new`] to read popular crates from | ||
/// the database. | ||
#[instrument(skip_all, err)] | ||
pub fn from_env(conn: &mut PgConnection) -> Result<Self, Error> { | ||
let emails: Vec<String> = crates_io_env_vars::var(NOTIFICATION_EMAILS_ENV) | ||
.map_err(|e| Error::Environment { | ||
name: NOTIFICATION_EMAILS_ENV.into(), | ||
source: Arc::new(e), | ||
})? | ||
.unwrap_or_default() | ||
.split(',') | ||
.map(|s| s.trim().to_owned()) | ||
.filter(|s| !s.is_empty()) | ||
.collect(); | ||
|
||
if emails.is_empty() { | ||
// If we're not notifying anyone, then there's really not much to do here. | ||
warn!("$TYPOSQUAT_NOTIFICATION_EMAILS is not set; no typosquatting notifications will be sent"); | ||
Ok(Self { | ||
emails, | ||
harness: None, | ||
}) | ||
} else { | ||
// Otherwise, let's go get the top crates and build a corpus. | ||
Self::new(emails, conn) | ||
} | ||
} | ||
|
||
/// Instantiates a cache by querying popular crates and building them into a typomania harness. | ||
/// | ||
/// This relies on configuration in the [`super::config`] module. | ||
pub fn new(emails: Vec<String>, conn: &mut PgConnection) -> Result<Self, Error> { | ||
let top = TopCrates::new(conn, config::TOP_CRATES)?; | ||
|
||
Ok(Self { | ||
emails, | ||
harness: Some( | ||
Harness::builder() | ||
.with_check(Bitflips::new( | ||
config::CRATE_NAME_ALPHABET, | ||
top.crates.keys().map(String::as_str), | ||
)) | ||
.with_check(Omitted::new(config::CRATE_NAME_ALPHABET)) | ||
.with_check(SwappedWords::new("-_")) | ||
.with_check(Typos::new(config::TYPOS.iter().map(|(c, typos)| { | ||
(*c, typos.iter().map(|ss| ss.to_string()).collect()) | ||
}))) | ||
.build(top), | ||
), | ||
}) | ||
} | ||
|
||
pub fn get_harness(&self) -> Option<&Harness<TopCrates>> { | ||
self.harness.as_ref() | ||
} | ||
|
||
pub fn iter_emails(&self) -> impl Iterator<Item = &str> { | ||
self.emails.iter().map(String::as_str) | ||
} | ||
} | ||
|
||
// Because the error returned from Cache::new() gets memoised in the environment, we either need to | ||
// return it by reference from Environment::typosquat_cache() or we need to be able to clone it. | ||
// We'll do some Arc wrapping in the variants below to ensure that everything is clonable while not | ||
// destroying the source metadata. | ||
#[derive(Error, Debug, Clone)] | ||
pub enum Error { | ||
#[error("error reading environment variable {name}: {source:?}")] | ||
Environment { | ||
name: String, | ||
#[source] | ||
source: Arc<anyhow::Error>, | ||
}, | ||
|
||
#[error("error getting top crates: {0:?}")] | ||
TopCrates(#[source] Arc<diesel::result::Error>), | ||
} | ||
|
||
impl From<diesel::result::Error> for Error { | ||
fn from(value: diesel::result::Error) -> Self { | ||
Self::TopCrates(Arc::new(value)) | ||
} | ||
} |
Oops, something went wrong.