Skip to content

Commit

Permalink
Merge pull request #35 from junnlikestea/exclude-sources
Browse files Browse the repository at this point in the history
add ability to exclude source
  • Loading branch information
junnlikestea authored Dec 16, 2020
2 parents 1375bdc + f47c794 commit 67021f5
Show file tree
Hide file tree
Showing 3 changed files with 165 additions and 44 deletions.
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ name = "vita"

[dependencies]
reqwest = {version = "0.10.7", features = ["json"]}
strum = "0.20"
strum_macros = "0.20"
matches = "0.1.8"
async-trait = "0.1.41"
crobat = {path = "./crobat" }
Expand Down
22 changes: 17 additions & 5 deletions src/bin/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ impl ParsedArgs {
let matches = app.get_matches();
// make it a hashset incase user provided duplicate domains
let mut hosts: HashSet<String> = HashSet::new();
let mut excluded: Vec<&str> = Vec::new();
let max_concurrent: usize = matches.value_of("concurrency").unwrap().parse()?;
let timeout: u64 = matches.value_of("timeout").unwrap().parse()?;

Expand All @@ -68,6 +69,10 @@ impl ParsedArgs {
hosts = read_input(None)?;
}

if matches.is_present("exclude") {
excluded = matches.values_of("exclude").unwrap().collect();
}

let mut cleaner = PostProcessor::default();
if matches.is_present("subs-only") {
cleaner.any_subdomain(hosts.clone());
Expand All @@ -78,17 +83,16 @@ impl ParsedArgs {
let mut runner = Runner::default()
.concurrency(max_concurrent)
.timeout(timeout)
.free_sources();
.free_sources()
.exclude(&excluded);
if matches.is_present("all_sources") {
runner = runner.all_sources();
runner = runner.all_sources().exclude(&excluded);
}

let flush = matches.is_present("flush");

Ok(Self {
runner,
cleaner,
flush,
flush: matches.is_present("flush"),
hosts,
})
}
Expand Down Expand Up @@ -137,6 +141,14 @@ fn create_clap_app() -> clap::App<'static, 'static> {
.short("a")
.long("all"),
)
.arg(
Arg::with_name("exclude")
.help("Excludes sources from data collection")
.short("e")
.long("exclude")
.multiple(true)
.takes_value(true),
)
.arg(
Arg::with_name("subs-only")
.help("filter the results to only those which have the same subdomain")
Expand Down
185 changes: 146 additions & 39 deletions src/vita.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,40 @@ use crate::{client, error::Result, DataSource};
use futures::stream::{FuturesUnordered, StreamExt};
use futures_core::stream::Stream;
use reqwest::Client;
use std::collections::HashMap;
use std::collections::HashSet;
use std::str::FromStr;
use std::sync::Arc;
use strum_macros::EnumString;
use tokio::sync::mpsc;
use tracing::warn;
use tracing::{info, warn};

const CHAN_SIZE: usize = 255;

#[derive(Debug, Eq, PartialEq, Hash, EnumString)]
enum Source {
AlienVault,
AnubisDB,
BinaryEdge,
C99,
CertSpotter,
Chaos,
Crtsh,
Facebook,
HackerTarget,
Intelx,
PassiveTotal,
SecurityTrails,
SonarSearch,
Spyse,
Sublister,
ThreatCrowd,
ThreatMiner,
UrlScan,
VirusTotal,
Wayback,
}

// Configuration options for the `Runner`
struct Config {
timeout: u64,
Expand All @@ -37,7 +64,7 @@ impl Default for Config {
// The `Runner` is responsible for collecting data from all the sources.
pub struct Runner {
client: Client,
sources: Vec<Arc<dyn DataSource>>,
sources: HashMap<Source, Arc<dyn DataSource>>,
config: Config,
}

Expand All @@ -46,7 +73,7 @@ impl Default for Runner {
let config = Config::default();
Self {
client: client!(config.timeout, config.timeout),
sources: Vec::new(),
sources: HashMap::new(),
config,
}
}
Expand All @@ -65,22 +92,63 @@ impl Runner {
self
}

/// Excludes a collection sources from data collection
pub fn exclude(mut self, excluded: &[&str]) -> Self {
if !excluded.is_empty() {
excluded.iter().for_each(|s| {
if let Ok(source) = Source::from_str(s) {
info!("excluding {:?}", source);
self.sources.remove(&source);
};
});
}

self
}

/// Sets the sources to be all those which do not require an api key to use.
pub fn free_sources(mut self) -> Self {
// Client uses Arc internally so we're just cloning pointers
let free: Vec<Arc<dyn DataSource>> = vec![
Arc::new(AnubisDB::new(self.client.clone())),
Arc::new(AlienVault::new(self.client.clone())),
Arc::new(CertSpotter::new(self.client.clone())),
Arc::new(Crtsh::new(self.client.clone())),
Arc::new(ThreatCrowd::new(self.client.clone())),
Arc::new(UrlScan::new(self.client.clone())),
Arc::new(VirusTotal::new(self.client.clone())),
Arc::new(ThreatMiner::new(self.client.clone())),
Arc::new(Sublister::new(self.client.clone())),
Arc::new(Wayback::new(self.client.clone())),
Arc::new(HackerTarget::new(self.client.clone())),
Arc::new(SonarSearch::new(self.client.clone())),
// Client uses Arc internally
let free: Vec<(Source, Arc<dyn DataSource>)> = vec![
(
Source::AnubisDB,
Arc::new(AnubisDB::new(self.client.clone())),
),
(
Source::AlienVault,
Arc::new(AlienVault::new(self.client.clone())),
),
(
Source::CertSpotter,
Arc::new(CertSpotter::new(self.client.clone())),
),
(
Source::ThreatCrowd,
Arc::new(ThreatCrowd::new(self.client.clone())),
),
(
Source::VirusTotal,
Arc::new(VirusTotal::new(self.client.clone())),
),
(
Source::ThreatMiner,
Arc::new(ThreatMiner::new(self.client.clone())),
),
(
Source::Sublister,
Arc::new(Sublister::new(self.client.clone())),
),
(
Source::HackerTarget,
Arc::new(HackerTarget::new(self.client.clone())),
),
(
Source::SonarSearch,
Arc::new(SonarSearch::new(self.client.clone())),
),
(Source::Wayback, Arc::new(Wayback::new(self.client.clone()))),
(Source::UrlScan, Arc::new(UrlScan::new(self.client.clone()))),
(Source::Crtsh, Arc::new(Crtsh::new(self.client.clone()))),
];

self.sources.extend(free.into_iter());
Expand All @@ -89,27 +157,66 @@ impl Runner {

/// Sets the sources to include api keys in addition to the free sources.
pub fn all_sources(mut self) -> Self {
let all: Vec<Arc<dyn DataSource>> = vec![
Arc::new(AnubisDB::new(self.client.clone())),
Arc::new(AlienVault::new(self.client.clone())),
Arc::new(CertSpotter::new(self.client.clone())),
Arc::new(Crtsh::new(self.client.clone())),
Arc::new(ThreatCrowd::new(self.client.clone())),
Arc::new(UrlScan::new(self.client.clone())),
Arc::new(VirusTotal::new(self.client.clone())),
Arc::new(ThreatMiner::new(self.client.clone())),
Arc::new(Sublister::new(self.client.clone())),
Arc::new(SecurityTrails::new(self.client.clone())),
Arc::new(Wayback::new(self.client.clone())),
Arc::new(HackerTarget::new(self.client.clone())),
Arc::new(SonarSearch::new(self.client.clone())),
Arc::new(BinaryEdge::new(self.client.clone())),
Arc::new(PassiveTotal::new(self.client.clone())),
Arc::new(Facebook::new(self.client.clone())),
Arc::new(Spyse::new(self.client.clone())),
Arc::new(C99::new(self.client.clone())),
Arc::new(Intelx::new(self.client.clone())),
Arc::new(Chaos::new(self.client.clone())),
let all: Vec<(Source, Arc<dyn DataSource>)> = vec![
(
Source::AnubisDB,
Arc::new(AnubisDB::new(self.client.clone())),
),
(
Source::AlienVault,
Arc::new(AlienVault::new(self.client.clone())),
),
(
Source::CertSpotter,
Arc::new(CertSpotter::new(self.client.clone())),
),
(
Source::ThreatCrowd,
Arc::new(ThreatCrowd::new(self.client.clone())),
),
(
Source::VirusTotal,
Arc::new(VirusTotal::new(self.client.clone())),
),
(
Source::ThreatMiner,
Arc::new(ThreatMiner::new(self.client.clone())),
),
(
Source::Sublister,
Arc::new(Sublister::new(self.client.clone())),
),
(
Source::SecurityTrails,
Arc::new(SecurityTrails::new(self.client.clone())),
),
(
Source::HackerTarget,
Arc::new(HackerTarget::new(self.client.clone())),
),
(
Source::SonarSearch,
Arc::new(SonarSearch::new(self.client.clone())),
),
(
Source::BinaryEdge,
Arc::new(BinaryEdge::new(self.client.clone())),
),
(
Source::PassiveTotal,
Arc::new(PassiveTotal::new(self.client.clone())),
),
(
Source::Facebook,
Arc::new(Facebook::new(self.client.clone())),
),
(Source::Spyse, Arc::new(Spyse::new(self.client.clone()))),
(Source::C99, Arc::new(C99::new(self.client.clone()))),
(Source::Intelx, Arc::new(Intelx::new(self.client.clone()))),
(Source::Wayback, Arc::new(Wayback::new(self.client.clone()))),
(Source::UrlScan, Arc::new(UrlScan::new(self.client.clone()))),
(Source::Crtsh, Arc::new(Crtsh::new(self.client.clone()))),
(Source::Chaos, Arc::new(Chaos::new(self.client.clone()))),
];

self.sources.extend(all.into_iter());
Expand All @@ -132,7 +239,7 @@ impl Runner {
futures.next().await;
}

for source in sources.iter() {
for source in sources.values() {
let source = Arc::clone(source);
let host = Arc::clone(&host);
let tx = tx2.clone();
Expand Down

0 comments on commit 67021f5

Please sign in to comment.