From 79019dec0197119f3d39e24f1cb2337759d044d0 Mon Sep 17 00:00:00 2001 From: sravan Date: Sun, 30 Jul 2023 22:15:02 +0900 Subject: [PATCH 1/5] feat: Add validation to timestamp and tag fields Timestamp and tag fields shouldnt start or end with dot Todo: Implement tests --- .../src/default_doc_mapper/default_mapper.rs | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper.rs b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper.rs index b6b2add821d..70bb7d28a08 100644 --- a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper.rs +++ b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper.rs @@ -127,6 +127,12 @@ fn validate_timestamp_field( timestamp_field_path: &str, mapping_root_node: &MappingNode, ) -> anyhow::Result<()> { + if timestamp_field_path.starts_with(".") { + bail!("Timestamp field `{timestamp_field_path}` should not start with a `.`."); + } + if timestamp_field_path.ends_with(".") { + bail!("Timestamp field `{timestamp_field_path}` should not end with a `.`."); + } let Some(timestamp_field_type) = mapping_root_node.find_field_mapping_type(timestamp_field_path) else { @@ -269,6 +275,12 @@ impl TryFrom for DefaultDocMapper { /// - if str, the field must use the `raw` tokenizer for indexing. /// - the field must be indexed. fn validate_tag(tag_field_name: &str, schema: &Schema) -> Result<(), anyhow::Error> { + if tag_field_name.starts_with(".") { + bail!("Tag field `{tag_field_name}` should not start with a `.`."); + } + if tag_field_name.ends_with(".") { + bail!("Tag field `{tag_field_name}` should not end with a `.`."); + } let field = schema .get_field(tag_field_name) .with_context(|| format!("Unknown tag field: `{tag_field_name}`"))?; @@ -720,6 +732,18 @@ mod tests { .unwrap(); } + #[test] + fn test_timestamp_field_that_start_with_dot_is_invalid() {} + + #[test] + fn test_timestamp_field_that_ends_with_dot_is_invalid() {} + + #[test] + fn test_tag_field_name_that_starts_with_dot_is_invalid() {} + + #[test] + fn test_tag_field_name_that_ends_with_dot_is_invalid() {} + #[test] fn test_fail_to_build_doc_mapper_with_timestamp_field_with_multivalues_cardinality() { let doc_mapper = r#"{ From e5e9b9549a51d505f97a1170c36b4d9ee92a36fe Mon Sep 17 00:00:00 2001 From: Marc hurabielle Date: Wed, 2 Aug 2023 20:27:57 +0900 Subject: [PATCH 2/5] ensure we validate escaped string --- .../src/default_doc_mapper/default_mapper.rs | 46 +++++++++++++++++-- 1 file changed, 41 insertions(+), 5 deletions(-) diff --git a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper.rs b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper.rs index 70bb7d28a08..597488016ad 100644 --- a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper.rs +++ b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper.rs @@ -127,10 +127,10 @@ fn validate_timestamp_field( timestamp_field_path: &str, mapping_root_node: &MappingNode, ) -> anyhow::Result<()> { - if timestamp_field_path.starts_with(".") { + if timestamp_field_path.starts_with(".") || timestamp_field_path.starts_with(['\\', '.']) { bail!("Timestamp field `{timestamp_field_path}` should not start with a `.`."); } - if timestamp_field_path.ends_with(".") { + if timestamp_field_path.ends_with(".") || timestamp_field_path.ends_with(['\\', '.']) { bail!("Timestamp field `{timestamp_field_path}` should not end with a `.`."); } let Some(timestamp_field_type) = @@ -275,10 +275,10 @@ impl TryFrom for DefaultDocMapper { /// - if str, the field must use the `raw` tokenizer for indexing. /// - the field must be indexed. fn validate_tag(tag_field_name: &str, schema: &Schema) -> Result<(), anyhow::Error> { - if tag_field_name.starts_with(".") { + if tag_field_name.starts_with(".") || tag_field_name.starts_with(['\\', '.']) { bail!("Tag field `{tag_field_name}` should not start with a `.`."); } - if tag_field_name.ends_with(".") { + if tag_field_name.ends_with(".") || tag_field_name.ends_with(['\\', '.']) { bail!("Tag field `{tag_field_name}` should not end with a `.`."); } let field = schema @@ -733,7 +733,43 @@ mod tests { } #[test] - fn test_timestamp_field_that_start_with_dot_is_invalid() {} + fn test_timestamp_field_that_start_with_dot_is_invalid() { + assert_eq!( + serde_json::from_str::( + r#"{ + "field_mappings": [ + { + "name": "my.timestamp", + "type": "datetime", + "fast": true + } + ], + "timestamp_field": ".my.timestamp" + }"#, + ) + .unwrap_err() + .to_string(), + "Timestamp field `.my.timestamp` should not start with a `.`.", + ); + + assert_eq!( + serde_json::from_str::( + r#"{ + "field_mappings": [ + { + "name": "my.timestamp", + "type": "datetime", + "fast": true + } + ], + "timestamp_field": "\\.my\\.timestamp" + }"#, + ) + .unwrap_err() + .to_string(), + "Timestamp field `\\.my\\.timestamp` should not start with a `.`.", + ) + } #[test] fn test_timestamp_field_that_ends_with_dot_is_invalid() {} From 7e60b9e4af9e1ea2e9c93192064b4d4c5ee9532a Mon Sep 17 00:00:00 2001 From: Marc hurabielle Date: Wed, 2 Aug 2023 20:41:46 +0900 Subject: [PATCH 3/5] add test for tag validation --- .../src/default_doc_mapper/default_mapper.rs | 72 ++++++++++++++++++- 1 file changed, 69 insertions(+), 3 deletions(-) diff --git a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper.rs b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper.rs index 597488016ad..e3250cee66d 100644 --- a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper.rs +++ b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper.rs @@ -772,13 +772,79 @@ mod tests { } #[test] - fn test_timestamp_field_that_ends_with_dot_is_invalid() {} + fn test_timestamp_field_that_ends_with_dot_is_invalid() { + assert_eq!( + serde_json::from_str::( + r#"{ + "timestamp_field": "my.timestamp." + }"#, + ) + .unwrap_err() + .to_string(), + "Timestamp field `my.timestamp.` should not end with a `.`.", + ); + + assert_eq!( + serde_json::from_str::( + r#"{ + "timestamp_field": "my\\.timestamp\\." + }"#, + ) + .unwrap_err() + .to_string(), + "Timestamp field `my\\.timestamp\\.` should not end with a `.`.", + ) + } #[test] - fn test_tag_field_name_that_starts_with_dot_is_invalid() {} + fn test_tag_field_name_that_starts_with_dot_is_invalid() { + assert_eq!( + serde_json::from_str::( + r#"{ + "tag_fields": [".my.tag"] + }"#, + ) + .unwrap_err() + .to_string(), + "Tag field `.my.tag` should not start with a `.`.", + ); + + assert_eq!( + serde_json::from_str::( + r#"{ + "tag_fields": ["\\.my\\.tag"] + }"#, + ) + .unwrap_err() + .to_string(), + "Tag field `\\.my\\.tag` should not start with a `.`.", + ) + } #[test] - fn test_tag_field_name_that_ends_with_dot_is_invalid() {} + fn test_tag_field_name_that_ends_with_dot_is_invalid() { + assert_eq!( + serde_json::from_str::( + r#"{ + "tag_fields": ["my.tag."] + }"#, + ) + .unwrap_err() + .to_string(), + "Tag field `my.tag.` should not end with a `.`.", + ); + + assert_eq!( + serde_json::from_str::( + r#"{ + "tag_fields": ["my\\.tag\\."] + }"#, + ) + .unwrap_err() + .to_string(), + "Tag field `my\\.tag\\.` should not end with a `.`.", + ) + } #[test] fn test_fail_to_build_doc_mapper_with_timestamp_field_with_multivalues_cardinality() { From 1e91e81b86532a03b7026e5d1936bcb0bf83e58c Mon Sep 17 00:00:00 2001 From: Marc hurabielle Date: Thu, 3 Aug 2023 09:20:27 +0900 Subject: [PATCH 4/5] fix linter --- .../src/default_doc_mapper/default_mapper.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper.rs b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper.rs index e3250cee66d..6281b7d2462 100644 --- a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper.rs +++ b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper.rs @@ -127,10 +127,10 @@ fn validate_timestamp_field( timestamp_field_path: &str, mapping_root_node: &MappingNode, ) -> anyhow::Result<()> { - if timestamp_field_path.starts_with(".") || timestamp_field_path.starts_with(['\\', '.']) { + if timestamp_field_path.starts_with('.') || timestamp_field_path.starts_with(['\\', '.']) { bail!("Timestamp field `{timestamp_field_path}` should not start with a `.`."); } - if timestamp_field_path.ends_with(".") || timestamp_field_path.ends_with(['\\', '.']) { + if timestamp_field_path.ends_with('.') || timestamp_field_path.ends_with(['\\', '.']) { bail!("Timestamp field `{timestamp_field_path}` should not end with a `.`."); } let Some(timestamp_field_type) = @@ -275,10 +275,10 @@ impl TryFrom for DefaultDocMapper { /// - if str, the field must use the `raw` tokenizer for indexing. /// - the field must be indexed. fn validate_tag(tag_field_name: &str, schema: &Schema) -> Result<(), anyhow::Error> { - if tag_field_name.starts_with(".") || tag_field_name.starts_with(['\\', '.']) { + if tag_field_name.starts_with('.') || tag_field_name.starts_with(['\\', '.']) { bail!("Tag field `{tag_field_name}` should not start with a `.`."); } - if tag_field_name.ends_with(".") || tag_field_name.ends_with(['\\', '.']) { + if tag_field_name.ends_with('.') || tag_field_name.ends_with(['\\', '.']) { bail!("Tag field `{tag_field_name}` should not end with a `.`."); } let field = schema From a5a12f0cfa8271ee53f85d4b9366eea038aa6629 Mon Sep 17 00:00:00 2001 From: sravan Date: Thu, 3 Aug 2023 22:29:56 +0900 Subject: [PATCH 5/5] Endwith dont need to be checked with string literal `//.` and `.` should end in the same way Also address review comment about `['//.']` -> `"//."` --- .../src/default_doc_mapper/default_mapper.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper.rs b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper.rs index 6281b7d2462..55724325b63 100644 --- a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper.rs +++ b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper.rs @@ -127,10 +127,10 @@ fn validate_timestamp_field( timestamp_field_path: &str, mapping_root_node: &MappingNode, ) -> anyhow::Result<()> { - if timestamp_field_path.starts_with('.') || timestamp_field_path.starts_with(['\\', '.']) { + if timestamp_field_path.starts_with('.') || timestamp_field_path.starts_with("\\.") { bail!("Timestamp field `{timestamp_field_path}` should not start with a `.`."); } - if timestamp_field_path.ends_with('.') || timestamp_field_path.ends_with(['\\', '.']) { + if timestamp_field_path.ends_with('.') { bail!("Timestamp field `{timestamp_field_path}` should not end with a `.`."); } let Some(timestamp_field_type) = @@ -275,10 +275,10 @@ impl TryFrom for DefaultDocMapper { /// - if str, the field must use the `raw` tokenizer for indexing. /// - the field must be indexed. fn validate_tag(tag_field_name: &str, schema: &Schema) -> Result<(), anyhow::Error> { - if tag_field_name.starts_with('.') || tag_field_name.starts_with(['\\', '.']) { + if tag_field_name.starts_with('.') || tag_field_name.starts_with("\\.") { bail!("Tag field `{tag_field_name}` should not start with a `.`."); } - if tag_field_name.ends_with('.') || tag_field_name.ends_with(['\\', '.']) { + if tag_field_name.ends_with('.') { bail!("Tag field `{tag_field_name}` should not end with a `.`."); } let field = schema