Skip to content

Commit

Permalink
Add support for custom tokenizers ngram and regex.
Browse files Browse the repository at this point in the history
  • Loading branch information
fmassot committed Jun 25, 2023
1 parent 3b093f1 commit ce91eb0
Show file tree
Hide file tree
Showing 30 changed files with 903 additions and 188 deletions.
22 changes: 11 additions & 11 deletions quickwit/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion quickwit/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ quickwit-serve = { version = "0.6.1", path = "./quickwit-serve" }
quickwit-storage = { version = "0.6.1", path = "./quickwit-storage" }
quickwit-telemetry = { version = "0.6.1", path = "./quickwit-telemetry" }

tantivy = { git = "https://github.com/quickwit-oss/tantivy/", rev = "924fc70", default-features = false, features = [
tantivy = { git = "https://github.com/quickwit-oss/tantivy/", branch = "fmassot/add-box-token-filter-and-refactor", default-features = false, features = [
"mmap",
"lz4-compression",
"zstd-compression",
Expand Down
6 changes: 5 additions & 1 deletion quickwit/quickwit-config/src/index_config/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ use humantime::parse_duration;
use quickwit_common::uri::Uri;
use quickwit_doc_mapper::{
DefaultDocMapper, DefaultDocMapperBuilder, DocMapper, FieldMappingEntry, ModeType,
QuickwitJsonOptions,
QuickwitJsonOptions, TokenizerEntry,
};
use serde::{Deserialize, Serialize};
pub use serialize::load_index_config_from_user_config;
Expand Down Expand Up @@ -76,6 +76,8 @@ pub struct DocMapping {
#[schema(value_type = u32)]
#[serde(default = "DefaultDocMapper::default_max_num_partitions")]
pub max_num_partitions: NonZeroU32,
#[serde(default)]
pub tokenizers: Vec<TokenizerEntry>,
}

#[derive(Clone, Debug, Serialize, Deserialize, utoipa::ToSchema)]
Expand Down Expand Up @@ -431,6 +433,7 @@ impl TestableForRegression for IndexConfig {
partition_key: Some("tenant_id".to_string()),
max_num_partitions: NonZeroU32::new(100).unwrap(),
timestamp_field: Some("timestamp".to_string()),
tokenizers: vec![],
};
let retention_policy = Some(RetentionPolicy::new(
"90 days".to_string(),
Expand Down Expand Up @@ -507,6 +510,7 @@ pub fn build_doc_mapper(
dynamic_mapping: doc_mapping.dynamic_mapping.clone(),
partition_key: doc_mapping.partition_key.clone(),
max_num_partitions: doc_mapping.max_num_partitions,
tokenizers: doc_mapping.tokenizers.clone(),
};
Ok(Arc::new(builder.try_build()?))
}
Expand Down
Loading

0 comments on commit ce91eb0

Please sign in to comment.