Skip to content

Commit

Permalink
Add parsing for path keys in rust
Browse files Browse the repository at this point in the history
  • Loading branch information
AHarmlessPyro committed Mar 29, 2023
1 parent 04d6c4a commit 4bee3c2
Show file tree
Hide file tree
Showing 3 changed files with 108 additions and 24 deletions.
64 changes: 45 additions & 19 deletions ingestors/rust-common/src/metlo_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ pub struct MetloSensitiveData {
class_name: String,
severity: String,
regex: Option<String>,
key_regex: Option<String>,
}

#[derive(Debug, Default, Clone, Deserialize, Serialize)]
Expand Down Expand Up @@ -173,29 +174,54 @@ pub async fn pull_metlo_config() -> Result<(), Box<dyn std::error::Error>> {
.await?
.json::<MetloConfig>()
.await?;

let new_sensitive_data: Vec<SensitiveData> = resp
.sensitive_data_list
.iter()
.map(|e| match &e.regex {
Some(unwrapped_regex) => {
let regex = Regex::new(unwrapped_regex);
match regex {
Ok(r) => Some(SensitiveData {
sensitive_data_type: e.class_name.clone(),
regex: r,
}),
Err(err) => {
log::debug!(
"Failed to Compile Regex \"{}\" - {}\n",
e.class_name,
err.to_string()
);
None
}
}
.map(|e| match (&e.regex, &e.key_regex) {
(Some(regex), Some(key_regex)) => {
let _regex = Regex::new(regex);
let _key_regex = Regex::new(key_regex);
Some(SensitiveData {
sensitive_data_type: e.class_name.clone(),
regex: match _regex {
Ok(r) => Some(r),
Err(_) => None,
},
key_regex: match _key_regex {
Ok(r) => Some(r),
Err(_) => None,
},
})
}
(Some(regex), None) => {
let _regex = Regex::new(regex);
Some(SensitiveData {
sensitive_data_type: e.class_name.clone(),
regex: match _regex {
Ok(r) => Some(r),
Err(_) => None,
},
key_regex: None,
})
}
(None, Some(key_regex)) => {
let _key_regex = Regex::new(key_regex);
Some(SensitiveData {
sensitive_data_type: e.class_name.clone(),
regex: None,
key_regex: match _key_regex {
Ok(r) => Some(r),
Err(_) => None,
},
})
}
(None, None) => {
log::debug!(
"Missing both regex and key_regex fields in \"{}\"\n",
e.class_name,
);
None
}
None => None,
})
.flatten()
.collect();
Expand Down
27 changes: 26 additions & 1 deletion ingestors/rust-common/src/process_trace.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::{
open_api::{find_open_api_diff, get_split_path, EndpointInfo},
process_graphql::{process_graphql_body, process_graphql_query},
sensitive_data::detect_sensitive_data,
sensitive_data::{detect_sensitive_data, detect_sensitive_in_path_data},
trace::{ApiResponse, ApiTrace, GraphQlData, KeyVal, ProcessTraceRes, ProcessTraceResInner},
METLO_CONFIG,
};
Expand Down Expand Up @@ -120,14 +120,17 @@ pub fn process_json_val(
serde_json::Value::Null => {
let resolved_path = fix_path(path, response_alias_map);
insert_data_type(data_types, resolved_path.as_str(), "null".to_string());
process_path(path, resolved_path, sensitive_data_detected)
}
serde_json::Value::Bool(_) => {
let resolved_path = fix_path(path, response_alias_map);
insert_data_type(data_types, resolved_path.as_str(), "boolean".to_string());
process_path(path, resolved_path, sensitive_data_detected)
}
serde_json::Value::Number(_) => {
let resolved_path = fix_path(path, response_alias_map);
insert_data_type(data_types, resolved_path.as_str(), "number".to_string());
process_path(path, resolved_path, sensitive_data_detected)
}
serde_json::Value::String(e) => {
let resolved_path = fix_path(path, response_alias_map);
Expand Down Expand Up @@ -156,6 +159,7 @@ pub fn process_json_val(
}
}
}
process_path(path, resolved_path, sensitive_data_detected)
}
serde_json::Value::Array(ls) => {
let limit = std::cmp::min(ls.len(), 10);
Expand Down Expand Up @@ -200,6 +204,27 @@ pub fn process_json_val(
}
}

fn process_path(
path: &mut String,
resolved_path: String,
sensitive_data_detected: &mut HashMap<String, HashSet<String>>,
) {
let sensitive_data_path = detect_sensitive_in_path_data(path.as_str());
if !sensitive_data_path.is_empty() {
let old_sensitive_data = sensitive_data_detected.get_mut(&resolved_path);
match old_sensitive_data {
None => {
sensitive_data_detected.insert(resolved_path.clone(), sensitive_data_path);
}
Some(old) => {
for e in sensitive_data_path {
old.insert(e);
}
}
}
}
}

fn process_json(
prefix: String,
value: Value,
Expand Down
41 changes: 37 additions & 4 deletions ingestors/rust-common/src/sensitive_data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,21 @@ use crate::METLO_CONFIG;
#[derive(Debug, Clone)]
pub struct SensitiveData {
pub sensitive_data_type: String,
pub regex: Regex,
pub regex: Option<Regex>,
pub key_regex: Option<Regex>,
}

lazy_static! {
pub static ref DEFAULT_SENSITIVE_DATA_LS: Vec<SensitiveData> = vec![
SensitiveData {
sensitive_data_type: "email".to_string(),
regex: Regex::new(r#"(^|\s)(?:[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*|"(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])*")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\[(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?|[a-z0-9-]*[a-z0-9]:(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])+)\])(\s|$)"#).unwrap(),
regex: Some(Regex::new(r#"(^|\s)(?:[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*|"(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])*")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\[(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?|[a-z0-9-]*[a-z0-9]:(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])+)\])(\s|$)"#).unwrap()),
key_regex: None
},
SensitiveData {
sensitive_data_type: "ipv4".to_string(),
regex: Regex::new(r#"(^|\s)(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(\s|$)"#).unwrap(),
regex: Some(Regex::new(r#"(^|\s)(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(\s|$)"#).unwrap()),
key_regex: None
},
];
static ref AADHAR_MULT: Vec<Vec<u8>> = vec![
Expand Down Expand Up @@ -116,7 +119,10 @@ fn validate(sensitive_data_type: String, text: &str) -> bool {
fn detect_sensitive_data_inner(txt: &str, sensitive_data: &[SensitiveData]) -> HashSet<String> {
sensitive_data
.iter()
.filter(|e| e.regex.is_match(txt) && validate(e.sensitive_data_type.clone(), txt))
.filter(|e| match &e.regex {
Some(regex) => regex.is_match(txt) && validate(e.sensitive_data_type.clone(), txt),
None => false,
})
.map(|e| e.sensitive_data_type.clone())
.collect()
}
Expand All @@ -131,3 +137,30 @@ pub fn detect_sensitive_data(txt: &str) -> HashSet<String> {
Err(_err) => detect_sensitive_data_inner(txt, &DEFAULT_SENSITIVE_DATA_LS),
}
}

fn detect_sensitive_data_in_path_inner(
txt: &str,
sensitive_data: &[SensitiveData],
) -> HashSet<String> {
sensitive_data
.iter()
.filter(|e| match &e.key_regex {
Some(regex) => {
return regex.is_match(txt) && validate(e.sensitive_data_type.clone(), txt);
}
None => false,
})
.map(|e| e.sensitive_data_type.clone())
.collect()
}

pub fn detect_sensitive_in_path_data(txt: &str) -> HashSet<String> {
let conf_read = METLO_CONFIG.try_read();
match conf_read {
Ok(conf) => match &conf.sensitive_data {
Some(s) => detect_sensitive_data_in_path_inner(txt, s),
None => HashSet::new(),
},
Err(_err) => HashSet::new(),
}
}

0 comments on commit 4bee3c2

Please sign in to comment.