Skip to content

Commit

Permalink
use serde_multikey in DefaultDocMapper
Browse files Browse the repository at this point in the history
  • Loading branch information
trinity-1686a committed Jul 26, 2023
1 parent 07c9064 commit 492369b
Show file tree
Hide file tree
Showing 5 changed files with 85 additions and 116 deletions.
33 changes: 3 additions & 30 deletions quickwit/quickwit-config/src/index_config/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@ pub struct DocMapping {
#[serde(default)]
pub timestamp_field: Option<String>,
#[serde_multikey(
deserializer = parse_dynamic_mapping,
serializer = serialize_dynamic_mapping,
deserializer = Mode::from_parts,
serializer = Mode::into_parts,
fields = (
#[serde(default)]
mode: ModeType,
Expand All @@ -88,30 +88,6 @@ pub struct DocMapping {
pub tokenizers: Vec<TokenizerEntry>,
}

fn parse_dynamic_mapping(
mode: ModeType,
dynamic_mapping: Option<QuickwitJsonOptions>,
) -> anyhow::Result<Mode> {
Ok(match (mode, dynamic_mapping) {
(ModeType::Lenient, None) => Mode::Lenient,
(ModeType::Strict, None) => Mode::Strict,
(ModeType::Dynamic, Some(dynamic_mapping)) => Mode::Dynamic(dynamic_mapping),
(ModeType::Dynamic, None) => Mode::default(), // Dynamic with default options
(_, Some(_)) => anyhow::bail!(
"`dynamic_mapping` is only allowed with mode=dynamic. (Here mode=`{:?}`)",
mode
),
})
}

fn serialize_dynamic_mapping(mode: Mode) -> (ModeType, Option<QuickwitJsonOptions>) {
match mode {
Mode::Lenient => (ModeType::Lenient, None),
Mode::Strict => (ModeType::Strict, None),
Mode::Dynamic(json_options) => (ModeType::Dynamic, Some(json_options)),
}
}

#[derive(Clone, Debug, Serialize, Deserialize, utoipa::ToSchema)]
#[serde(deny_unknown_fields)]
pub struct IndexingResources {
Expand Down Expand Up @@ -539,16 +515,13 @@ pub fn build_doc_mapper(
doc_mapping: &DocMapping,
search_settings: &SearchSettings,
) -> anyhow::Result<Arc<dyn DocMapper>> {
// TODO actually updating DefaultDocMapperBuilder would be better
let (mode, dynamic_mapping) = serialize_dynamic_mapping(doc_mapping.mode.clone());
let builder = DefaultDocMapperBuilder {
store_source: doc_mapping.store_source,
default_search_fields: search_settings.default_search_fields.clone(),
timestamp_field: doc_mapping.timestamp_field.clone(),
field_mappings: doc_mapping.field_mappings.clone(),
tag_fields: doc_mapping.tag_fields.iter().cloned().collect(),
mode,
dynamic_mapping,
mode: doc_mapping.mode.clone(),
partition_key: doc_mapping.partition_key.clone(),
max_num_partitions: doc_mapping.max_num_partitions,
tokenizers: doc_mapping.tokenizers.clone(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,39 +39,10 @@ use crate::doc_mapper::{JsonObject, Partition};
use crate::query_builder::build_query;
use crate::routing_expression::RoutingExpr;
use crate::{
Cardinality, DocMapper, DocParsingError, ModeType, QueryParserError, TokenizerEntry,
WarmupInfo, DYNAMIC_FIELD_NAME, SOURCE_FIELD_NAME,
Cardinality, DocMapper, DocParsingError, Mode, QueryParserError, TokenizerEntry, WarmupInfo,
DYNAMIC_FIELD_NAME, SOURCE_FIELD_NAME,
};

/// Defines how an unmapped field should be handled.
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
pub enum Mode {
/// Lenient mode: unmapped fields are just ignored.
Lenient,
/// Strict mode: when parsing a document with an unmapped field, an error is yielded.
Strict,
/// Dynamic mode: unmapped fields are captured and handled according to the provided
/// configuration.
Dynamic(QuickwitJsonOptions),
}

impl Default for Mode {
fn default() -> Self {
Mode::Dynamic(QuickwitJsonOptions::default_dynamic())
}
}

impl Mode {
/// Extact the `ModeType` of this `Mode`
pub fn mode_type(&self) -> ModeType {
match self {
Mode::Lenient => ModeType::Lenient,
Mode::Strict => ModeType::Strict,
Mode::Dynamic(_) => ModeType::Dynamic,
}
}
}

/// Default [`DocMapper`] implementation
/// which defines a set of rules to map json fields
/// to tantivy index fields.
Expand Down Expand Up @@ -159,7 +130,6 @@ impl TryFrom<DefaultDocMapperBuilder> for DefaultDocMapper {
type Error = anyhow::Error;

fn try_from(builder: DefaultDocMapperBuilder) -> anyhow::Result<DefaultDocMapper> {
let mode = builder.mode()?;
let mut schema_builder = Schema::builder();
let field_mappings = build_mapping_tree(&builder.field_mappings, &mut schema_builder)?;
let source_field = if builder.store_source {
Expand All @@ -172,7 +142,7 @@ impl TryFrom<DefaultDocMapperBuilder> for DefaultDocMapper {
validate_timestamp_field(timestamp_field_path, &field_mappings)?;
};

let dynamic_field = if let Mode::Dynamic(json_options) = &mode {
let dynamic_field = if let Mode::Dynamic(json_options) = &builder.mode {
Some(schema_builder.add_json_field(DYNAMIC_FIELD_NAME, json_options.clone()))
} else {
None
Expand Down Expand Up @@ -265,7 +235,7 @@ impl TryFrom<DefaultDocMapperBuilder> for DefaultDocMapper {
required_fields,
partition_key,
max_num_partitions: builder.max_num_partitions,
mode,
mode: builder.mode,
tokenizer_entries: builder.tokenizers,
tokenizer_manager,
})
Expand Down Expand Up @@ -348,11 +318,6 @@ fn validate_fields_tokenizers(

impl From<DefaultDocMapper> for DefaultDocMapperBuilder {
fn from(default_doc_mapper: DefaultDocMapper) -> Self {
let mode = default_doc_mapper.mode.mode_type();
let dynamic_mapping: Option<QuickwitJsonOptions> = match &default_doc_mapper.mode {
Mode::Dynamic(mapping_options) => Some(mapping_options.clone()),
_ => None,
};
let partition_key_str = default_doc_mapper.partition_key.to_string();
let partition_key_opt: Option<String> = if partition_key_str.is_empty() {
None
Expand All @@ -367,8 +332,7 @@ impl From<DefaultDocMapper> for DefaultDocMapperBuilder {
field_mappings: default_doc_mapper.field_mappings.into(),
tag_fields: default_doc_mapper.tag_field_names.into_iter().collect(),
default_search_fields: default_doc_mapper.default_search_field_names,
mode,
dynamic_mapping,
mode: default_doc_mapper.mode,
partition_key: partition_key_opt,
max_num_partitions: default_doc_mapper.max_num_partitions,
tokenizers: default_doc_mapper.tokenizer_entries,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,10 @@

use std::num::NonZeroU32;

use anyhow::bail;
use serde::{Deserialize, Serialize};

use super::tokenizer_entry::TokenizerEntry;
use super::FieldMappingEntry;
use crate::default_doc_mapper::default_mapper::Mode;
use crate::default_doc_mapper::QuickwitJsonOptions;
use crate::DefaultDocMapper;

Expand All @@ -34,6 +32,7 @@ use crate::DefaultDocMapper;
/// It is also used to serialize/deserialize a DocMapper.
/// note that this is not the way is the DocMapping is deserialized
/// from the configuration.
#[quickwit_macros::serde_multikey]
#[derive(Serialize, Deserialize, Clone)]
#[serde(deny_unknown_fields)]
pub struct DefaultDocMapperBuilder {
Expand All @@ -60,18 +59,81 @@ pub struct DefaultDocMapperBuilder {
/// Maximum number of partitions.
#[serde(default = "DefaultDocMapper::default_max_num_partitions")]
pub max_num_partitions: NonZeroU32,
/// Defines the indexing mode.
#[serde(default)]
pub mode: ModeType,
/// If mode is set to dynamic, `dynamic_mapping` defines
/// how the unmapped fields should be handled.
#[serde(default)]
pub dynamic_mapping: Option<QuickwitJsonOptions>,
#[serde_multikey(
deserializer = Mode::from_parts,
serializer = Mode::into_parts,
fields = (
/// Defines the indexing mode.
#[serde(default)]
mode: ModeType,
/// If mode is set to dynamic, `dynamic_mapping` defines
/// how the unmapped fields should be handled.
#[serde(default)]
dynamic_mapping: Option<QuickwitJsonOptions>,
),
)]
/// Defines how the unmapped fields should be handled.
pub mode: Mode,
/// User-defined tokenizers.
#[serde(default)]
pub tokenizers: Vec<TokenizerEntry>,
}

/// Defines how an unmapped field should be handled.
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
pub enum Mode {
/// Lenient mode: unmapped fields are just ignored.
Lenient,
/// Strict mode: when parsing a document with an unmapped field, an error is yielded.
Strict,
/// Dynamic mode: unmapped fields are captured and handled according to the provided
/// configuration.
Dynamic(QuickwitJsonOptions),
}

impl Mode {
/// Extact the `ModeType` of this `Mode`
pub fn mode_type(&self) -> ModeType {
match self {
Mode::Lenient => ModeType::Lenient,
Mode::Strict => ModeType::Strict,
Mode::Dynamic(_) => ModeType::Dynamic,
}
}

/// Build a Mode from its type and optional dynamic mapping options
pub fn from_parts(
mode: ModeType,
dynamic_mapping: Option<QuickwitJsonOptions>,
) -> anyhow::Result<Mode> {
Ok(match (mode, dynamic_mapping) {
(ModeType::Lenient, None) => Mode::Lenient,
(ModeType::Strict, None) => Mode::Strict,
(ModeType::Dynamic, Some(dynamic_mapping)) => Mode::Dynamic(dynamic_mapping),
(ModeType::Dynamic, None) => Mode::default(), // Dynamic with default options
(_, Some(_)) => anyhow::bail!(
"`dynamic_mapping` is only allowed with mode=dynamic. (Here mode=`{:?}`)",
mode
),
})
}

/// Obtain the mode type and dynamic options from a Mode
pub fn into_parts(self) -> (ModeType, Option<QuickwitJsonOptions>) {
match self {
Mode::Lenient => (ModeType::Lenient, None),
Mode::Strict => (ModeType::Strict, None),
Mode::Dynamic(json_options) => (ModeType::Dynamic, Some(json_options)),
}
}
}

impl Default for Mode {
fn default() -> Self {
Mode::Dynamic(QuickwitJsonOptions::default_dynamic())
}
}

/// `Mode` describing how the unmapped field should be handled.
#[derive(Clone, Copy, Default, Debug, Eq, PartialEq, Serialize, Deserialize, utoipa::ToSchema)]
#[serde(rename_all = "lowercase")]
Expand All @@ -93,36 +155,7 @@ impl Default for DefaultDocMapperBuilder {
}
}

// TODO remove this after refactor, it should be handled in QuickwitJsonOptions or in Mode
// By default, in dynamic mode, all fields are fast fields.
fn default_dynamic_mapping() -> QuickwitJsonOptions {
QuickwitJsonOptions {
fast: super::FastFieldOptions::EnabledWithNormalizer {
normalizer: super::QuickwitTextNormalizer::Raw,
},
..Default::default()
}
}

impl DefaultDocMapperBuilder {
pub(crate) fn mode(&self) -> anyhow::Result<Mode> {
if self.mode != ModeType::Dynamic && self.dynamic_mapping.is_some() {
bail!(
"`dynamic_mapping` is only allowed with mode=dynamic. (Here mode=`{:?}`)",
self.mode
);
}
Ok(match self.mode {
ModeType::Lenient => Mode::Lenient,
ModeType::Strict => Mode::Strict,
ModeType::Dynamic => Mode::Dynamic(
self.dynamic_mapping
.clone()
.unwrap_or_else(default_dynamic_mapping),
),
})
}

/// Build a valid `DefaultDocMapper`.
/// This will consume your `DefaultDocMapperBuilder`.
pub fn try_build(self) -> anyhow::Result<DefaultDocMapper> {
Expand All @@ -141,8 +174,7 @@ mod tests {
assert!(default_mapper_builder.default_search_fields.is_empty());
assert!(default_mapper_builder.field_mappings.is_empty());
assert!(default_mapper_builder.tag_fields.is_empty());
assert_eq!(default_mapper_builder.mode, ModeType::Dynamic);
assert!(default_mapper_builder.dynamic_mapping.is_none());
assert_eq!(default_mapper_builder.mode.mode_type(), ModeType::Dynamic);
assert_eq!(default_mapper_builder.store_source, false);
assert!(default_mapper_builder.timestamp_field.is_none());
}
Expand Down
4 changes: 2 additions & 2 deletions quickwit/quickwit-doc-mapper/src/default_doc_mapper/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ use anyhow::bail;
use once_cell::sync::Lazy;
use regex::Regex;

pub use self::default_mapper::{DefaultDocMapper, Mode};
pub use self::default_mapper_builder::{DefaultDocMapperBuilder, ModeType};
pub use self::default_mapper::DefaultDocMapper;
pub use self::default_mapper_builder::{DefaultDocMapperBuilder, Mode, ModeType};
pub use self::field_mapping_entry::{
FastFieldOptions, FieldMappingEntry, QuickwitBytesOptions, QuickwitJsonOptions,
QuickwitNumericOptions, QuickwitTextNormalizer, QuickwitTextOptions, TextIndexingOptions,
Expand Down
6 changes: 3 additions & 3 deletions quickwit/quickwit-doc-mapper/src/doc_mapper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ mod tests {
use crate::default_doc_mapper::{FieldMappingType, QuickwitJsonOptions, TextIndexingOptions};
use crate::{
Cardinality, DefaultDocMapper, DefaultDocMapperBuilder, DocMapper, DocParsingError,
FieldMappingEntry, ModeType, TermRange, WarmupInfo, DYNAMIC_FIELD_NAME,
FieldMappingEntry, Mode, TermRange, WarmupInfo, DYNAMIC_FIELD_NAME,
};

const JSON_DEFAULT_DOC_MAPPER: &str = r#"
Expand Down Expand Up @@ -366,7 +366,7 @@ mod tests {
#[test]
fn test_doc_mapper_query_with_json_field_default_search_fields() {
let doc_mapper: DefaultDocMapper = DefaultDocMapperBuilder {
mode: ModeType::Dynamic,
mode: Mode::default(),
..Default::default()
}
.try_build()
Expand All @@ -385,7 +385,7 @@ mod tests {
#[test]
fn test_doc_mapper_query_with_json_field_ambiguous_term() {
let doc_mapper: DefaultDocMapper = DefaultDocMapperBuilder {
mode: ModeType::Dynamic,
mode: Mode::default(),
..Default::default()
}
.try_build()
Expand Down

0 comments on commit 492369b

Please sign in to comment.