Skip to content

Commit

Permalink
Add support for key based patterns analysis (#462)
Browse files Browse the repository at this point in the history
Co-authored-by: Nikhil Shahi <[email protected]>
  • Loading branch information
AHarmlessPyro and NikhilShahi authored Mar 30, 2023
1 parent b1458cf commit 41fe1a3
Show file tree
Hide file tree
Showing 10 changed files with 239 additions and 55 deletions.
25 changes: 22 additions & 3 deletions backend/src/services/data-classes/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -78,12 +78,22 @@ export const getCombinedDataClasses = async (ctx: MetloContext) => {
})
.filter(v => v !== undefined)
roughMap.forEach(v => {
const [key, { severity, patterns: regexList, ...rest1 }, ...rest] =
Object.entries(v)[0]
const [
key,
{ severity, patterns: regexList, keyPatterns: keyRegexList, ...rest1 },
...rest
] = Object.entries(v)[0]
userDefinedClassMap.push({
className: key,
severity: RiskScore[severity] as RiskScore,
regex: new RegExp(regexList.map(regex => `(${regex})`).join("|")),
regex:
regexList && regexList.length > 0
? new RegExp(regexList.map(regex => `(${regex})`).join("|"))
: null,
keyRegex:
keyRegexList && keyRegexList.length > 0
? new RegExp(keyRegexList.map(regex => `(${regex})`).join("|"))
: null,
})
})
}
Expand All @@ -98,7 +108,16 @@ export const getCombinedDataClasses = async (ctx: MetloContext) => {
})
return [...metloDefinedClassMap, ...userDefinedClassMap].map(cls => {
if (cls.regex) {
if (cls.keyRegex) {
return {
...cls,
regex: cls.regex.source,
keyRegex: cls.keyRegex.source,
}
}
return { ...cls, regex: cls.regex.source }
} else if (cls.keyRegex) {
return { ...cls, keyRegex: cls.keyRegex.source }
} else {
return {
className: cls.className,
Expand Down
20 changes: 17 additions & 3 deletions backend/src/services/data-classes/utils.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,20 @@
import { RiskScore } from "@common/enums"
import Zod from "zod"

export interface rawDataClass {
interface rawDataClassRegex {
className: string
severity: RiskScore
regex: RegExp
regex?: RegExp
shortName?: string
}
interface rawDataClassKeyRegex {
className: string
severity: RiskScore
keyRegex?: RegExp
shortName?: string
}

export interface rawDataClass extends rawDataClassRegex, rawDataClassKeyRegex {}

const scoreArray = Object.keys(RiskScore)

Expand All @@ -18,5 +26,11 @@ const SCORE_VALUES: [string, ...string[]] = [

export const customDataClass = Zod.object({
severity: Zod.enum(SCORE_VALUES),
patterns: Zod.string().array(),
patterns: Zod.string().array().optional(),
keyPatterns: Zod.string().array().optional(),
})
.partial()
.refine(
data => data.severity && (data.patterns || data.keyPatterns),
"Severity must be provided along with either of patterns or keyPatterns",
)
21 changes: 20 additions & 1 deletion backend/src/services/metlo-config/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -137,8 +137,27 @@ export const METLO_CONFIG_SCHEMA = {
format: "regex",
},
},
keyPatterns: {
type: "array",
minItems: 1,
uniqueItems: true,
items: {
type: "string",
format: "regex",
},
},
},
required: ["severity", "patterns"],
anyOf: [
{
required: ["severity", "patterns"],
},
{
required: ["severity", "keyPatterns"],
},
{
required: ["severity", "keyPatterns", "patterns"],
},
],
},
},
additionalProperties: false,
Expand Down
52 changes: 41 additions & 11 deletions backend/src/services/scanner/scan.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ export const VALIDATION_FUNC_MAP: Record<any, (e: string) => boolean> = {
[__DataClass_INTERNAL__.BRAZIL_CPF]: validateBrazilCPF,
}

export const scan = (text: any, dataClasses: DataClass[]): string[] => {
export const scanValue = (text: any, dataClasses: DataClass[]): string[] => {
const res: string[] = []
let convertedText: string
try {
Expand All @@ -52,20 +52,50 @@ export const scan = (text: any, dataClasses: DataClass[]): string[] => {
if (STRING_ONLY_DATA_CLASSES.has(className) && typeof text !== "string") {
return
}
const r = new RegExp(exp)
const match = r.test(convertedText)
if (match) {
const validationFunc = VALIDATION_FUNC_MAP[className]
if (validationFunc) {
const matchArr = convertedText.match(r)
if (matchArr && validationFunc(matchArr[0])) {
res.push(className)
}
} else {
if (exp) {
const r = new RegExp(exp)
const matchedValue = r.test(convertedText)
const matchRes = returnMatch(matchedValue, className, convertedText, r)
if (matchRes) {
res.push(className)
}
}
}
})
return res
}

export const scanKey = (text: string, dataClasses: DataClass[]): string[] => {
const res: string[] = []
dataClasses.forEach(({ className, keyRegex: keyExp }) => {
if (keyExp) {
const keyMatch = new RegExp(keyExp)
const matchedKey = keyMatch.test(text)
const matchRes = returnMatch(matchedKey, className, text, keyMatch)
if (matchRes) {
res.push(className)
}
}
})
return res
}

const returnMatch = (
match: boolean,
className: string,
convertedText: string,
matcher: RegExp,
): boolean => {
if (match) {
const validationFunc = VALIDATION_FUNC_MAP[className]
if (validationFunc) {
const matchArr = convertedText.match(matcher)
if (matchArr && validationFunc(matchArr[0])) {
return true
}
} else {
return true
}
}
return false
}
12 changes: 9 additions & 3 deletions backend/src/services/scanner/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { DataClass, PairObject } from "@common/types"
import { DataSection, DataType } from "@common/enums"
import { isParameter, parsedJson, parsedJsonNonNull } from "utils"
import { getPathTokens } from "@common/utils"
import { scan } from "./scan"
import { scanKey, scanValue } from "./scan"
import { getMapDataFields } from "services/data-field/utils"

const handleDataField = (
Expand All @@ -17,9 +17,15 @@ const handleDataField = (
const key = `${statusCode}_${contentType}_${dataSection}${
dataPath ? `.${dataPath}` : ""
}`
const detectedData = scan(dataValue, dataClasses)
const detectedDataInValue = scanValue(dataValue, dataClasses)
const detectedDataInPath = scanKey(dataPath, dataClasses)
let newDataClasses = sensitiveDataMap[key] || []
detectedData.forEach(e => {
detectedDataInValue.forEach(e => {
if (!newDataClasses.includes(e)) {
newDataClasses.push(e)
}
})
detectedDataInPath.forEach(e => {
if (!newDataClasses.includes(e)) {
newDataClasses.push(e)
}
Expand Down
1 change: 1 addition & 0 deletions common/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,7 @@ export interface DataClass {
className: string
severity: RiskScore
regex?: string
keyRegex?: string
shortName?: string
}

Expand Down
64 changes: 45 additions & 19 deletions ingestors/rust-common/src/metlo_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ pub struct MetloSensitiveData {
class_name: String,
severity: String,
regex: Option<String>,
key_regex: Option<String>,
}

#[derive(Debug, Default, Clone, Deserialize, Serialize)]
Expand Down Expand Up @@ -173,29 +174,54 @@ pub async fn pull_metlo_config() -> Result<(), Box<dyn std::error::Error>> {
.await?
.json::<MetloConfig>()
.await?;

let new_sensitive_data: Vec<SensitiveData> = resp
.sensitive_data_list
.iter()
.map(|e| match &e.regex {
Some(unwrapped_regex) => {
let regex = Regex::new(unwrapped_regex);
match regex {
Ok(r) => Some(SensitiveData {
sensitive_data_type: e.class_name.clone(),
regex: r,
}),
Err(err) => {
log::debug!(
"Failed to Compile Regex \"{}\" - {}\n",
e.class_name,
err.to_string()
);
None
}
}
.map(|e| match (&e.regex, &e.key_regex) {
(Some(regex), Some(key_regex)) => {
let _regex = Regex::new(regex);
let _key_regex = Regex::new(key_regex);
Some(SensitiveData {
sensitive_data_type: e.class_name.clone(),
regex: match _regex {
Ok(r) => Some(r),
Err(_) => None,
},
key_regex: match _key_regex {
Ok(r) => Some(r),
Err(_) => None,
},
})
}
(Some(regex), None) => {
let _regex = Regex::new(regex);
Some(SensitiveData {
sensitive_data_type: e.class_name.clone(),
regex: match _regex {
Ok(r) => Some(r),
Err(_) => None,
},
key_regex: None,
})
}
(None, Some(key_regex)) => {
let _key_regex = Regex::new(key_regex);
Some(SensitiveData {
sensitive_data_type: e.class_name.clone(),
regex: None,
key_regex: match _key_regex {
Ok(r) => Some(r),
Err(_) => None,
},
})
}
(None, None) => {
log::debug!(
"Missing both regex and key_regex fields in \"{}\"\n",
e.class_name,
);
None
}
None => None,
})
.flatten()
.collect();
Expand Down
24 changes: 18 additions & 6 deletions ingestors/rust-common/src/process_graphql.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::{
process_trace::{insert_data_type, process_json_val},
process_trace::{insert_data_type, process_json_val, process_path},
sensitive_data::detect_sensitive_data,
trace::{
GraphQlData, GraphQlRes, KeyVal, Operation, OperationItem, ProcessTraceResInner, Variable,
Expand Down Expand Up @@ -47,11 +47,21 @@ fn process_graphql_argument<'a>(
}
}
schema::Value::Boolean(_) => {
insert_data_type(data_types, path.as_str(), "boolean".to_owned())
insert_data_type(data_types, path.as_str(), "boolean".to_owned());
process_path(&path, path.clone(), sensitive_data_detected);
}
schema::Value::Float(_) => {
insert_data_type(data_types, path.as_str(), "number".to_owned());
process_path(&path, path.clone(), sensitive_data_detected);
}
schema::Value::Int(_) => {
insert_data_type(data_types, path.as_str(), "number".to_owned());
process_path(&path, path.clone(), sensitive_data_detected);
}
schema::Value::Null => {
insert_data_type(data_types, path.as_str(), "null".to_owned());
process_path(&path, path.clone(), sensitive_data_detected);
}
schema::Value::Float(_) => insert_data_type(data_types, path.as_str(), "number".to_owned()),
schema::Value::Int(_) => insert_data_type(data_types, path.as_str(), "number".to_owned()),
schema::Value::Null => insert_data_type(data_types, path.as_str(), "null".to_owned()),
schema::Value::String(s) => {
insert_data_type(data_types, path.as_str(), "string".to_owned());
let text = s.as_str();
Expand All @@ -72,6 +82,7 @@ fn process_graphql_argument<'a>(
Some(old) => old.extend(sensitive_data),
}
}
process_path(&path, path.clone(), sensitive_data_detected);
}
schema::Value::Enum(e) => {
let s = &e.to_owned().to_owned();
Expand All @@ -94,6 +105,7 @@ fn process_graphql_argument<'a>(
Some(old) => old.extend(sensitive_data),
}
}
process_path(&path, path.clone(), sensitive_data_detected);
}
schema::Value::List(ls) => {
let limit = std::cmp::min(ls.len(), 10);
Expand Down Expand Up @@ -439,7 +451,7 @@ fn process_graphql_obj(m: &Map<String, Value>) -> Option<GraphQlRes> {
let query = m.get("query");
let default_map = Map::new();
let variables_map: &Map<String, Value> = match m.get("variables") {
Some(v) => v.as_object().unwrap(),
Some(v) => v.as_object().unwrap_or(&default_map),
None => &default_map,
};
let operation_name = match m.get("operationName") {
Expand Down
Loading

0 comments on commit 41fe1a3

Please sign in to comment.