diff --git a/crates/zspell/Cargo.toml b/crates/zspell/Cargo.toml index 6d2fb95..5913bca 100644 --- a/crates/zspell/Cargo.toml +++ b/crates/zspell/Cargo.toml @@ -27,25 +27,33 @@ name = "zspell" path = "src/bin/bin.rs" [dependencies] +# Base dependencies +atty = "0.2" +cfg-if = "1.0" futures = "0.3" +hashbrown = { version = "0.12", features = ["rayon"] } +home = "0.5" lazy_static = "1.4" +rayon = "1.5" regex = "1" +stringmetrics = "2" strum = { version = "0.24", features = ["derive"] } strum_macros = "0.24" +sys-locale = "0.2" +thiserror = "1.0" unicode-segmentation = "1.9.0" + +# CLI dependencies +# At some point, we may wish to split these crates off clap = { version = "3.2", features = ["derive"] } -stringmetrics = "2" -thiserror = "1.0" -rayon = "1.5" -hashbrown = { version = "0.12", features = ["rayon"] } -home = "0.5" -cfg-if = "1.0" +termcolor = "1.1" [dev-dependencies] criterion = "0.3" assert_cmd = "2.0" predicates = "2.1" tempfile = "3.3" +util = { path = "util" } [[bench]] name = "datastructure" diff --git a/crates/zspell/src/dictionary.rs b/crates/zspell/src/dictionary.rs index 59ad475..e4a3309 100644 --- a/crates/zspell/src/dictionary.rs +++ b/crates/zspell/src/dictionary.rs @@ -251,7 +251,8 @@ impl Dictionary { || self.wordlist_nosuggest.contains(lower)) } - /// Create a sorted vector of all items in the word list + /// Create a iterator over all items in the dictionary's wordlist. That is, + /// words that will always be accepted and succested. /// /// Note that this is relatively slow. Prefer [`Dictionary::check`] for /// validating a word exists. @@ -267,6 +268,41 @@ impl Dictionary { Ok(self.wordlist.iter()) } + /// Create a iterator over all items in the dictionary's nonsuggesting + /// wordlist. These are words that will always be accepted but never be + /// suggested. + /// + /// Note that this is relatively slow. Prefer [`Dictionary::check`] for + /// validating a word exists. + /// + /// # Errors + /// + /// Returns [`DictError::NotCompiled`] if the dictionary has not yet been + /// compiled + #[inline] + pub fn iter_wordlist_nosuggest_items(&self) -> Result, DictError> { + self.break_if_not_compiled()?; + + Ok(self.wordlist_nosuggest.iter()) + } + + /// Create a iterator over all items in the dictionary's forbidden wordlist. + /// These are words that are never accepted as correct. + /// + /// Note that this is relatively slow. Prefer [`Dictionary::check`] for + /// validating a word exists. + /// + /// # Errors + /// + /// Returns [`DictError::NotCompiled`] if the dictionary has not yet been + /// compiled + #[inline] + pub fn iter_wordlist_forbidden_items(&self) -> Result, DictError> { + self.break_if_not_compiled()?; + + Ok(self.wordlist_forbidden.iter()) + } + /// Helper function to error if we haven't compiled when we needed to #[inline] const fn break_if_not_compiled(&self) -> Result<(), DictError> { diff --git a/crates/zspell/src/system.rs b/crates/zspell/src/system.rs index 5ccbde8..ebd3366 100644 --- a/crates/zspell/src/system.rs +++ b/crates/zspell/src/system.rs @@ -8,6 +8,7 @@ use std::{ env, path::{Component, Path, PathBuf}, }; +// use sys_locale::get_locale; use crate::errors::DictError; // use crate::errors::FileError; @@ -230,6 +231,10 @@ pub fn expand_dir_wildcards(paths: &mut Vec) -> HashSet { ret } +// pub fn find_dict_from_path() { +// let locale = get_locale().unwrap_or_else(|| String::from("en-US")); +// } + /// Take in a path and load the dictionary /// /// # Errors @@ -272,9 +277,6 @@ pub fn create_dict_from_path(basepath: &str) -> Result { Ok(dic) } -// Need function to expand wildcard paths. Will need to look through the parent -// directory and see if anything is a RE match - #[cfg(test)] mod tests { use super::*; diff --git a/crates/zspell/tests/affix_integration.rs b/crates/zspell/tests/affix_integration.rs index 7c26c7a..94b79b0 100644 --- a/crates/zspell/tests/affix_integration.rs +++ b/crates/zspell/tests/affix_integration.rs @@ -1,11 +1,11 @@ -use std::fs; +use util::TestCollection; use zspell::Config; #[test] fn affix_create_words() { let mut afx = Config::new(); - let content = fs::read_to_string("tests/files/1_pfxsfx.aff").unwrap(); + let content = TestCollection::load("1_pfxsfx.test").afx_str; afx.load_from_str(content.as_str()).unwrap(); diff --git a/crates/zspell/tests/dictionary_integration.rs b/crates/zspell/tests/dictionary_integration.rs index 3020a61..16545bf 100644 --- a/crates/zspell/tests/dictionary_integration.rs +++ b/crates/zspell/tests/dictionary_integration.rs @@ -1,4 +1,6 @@ use std::fs; + +use util::TestCollection; use zspell::Dictionary; /// Run integration tests on a file located in tests/files @@ -22,40 +24,22 @@ fn create_dic_from_file(fname: &str) -> Dictionary { dic } -/// Validate a dictionary's wordlist is correct -fn test_dic_wordlist(dic: &Dictionary, fname: &str) { - let out_name = format!("tests/files/{}.words", fname); - - let out_content = fs::read_to_string(out_name.clone()) - .expect(format!("error reading file {}", out_name).as_str()); - let mut correct: Vec<_> = out_content.lines().collect(); - correct.sort_unstable(); - let mut result: Vec<_> = dic - .iter_wordlist_items() - .expect("Error getting wordlist") - .collect(); - result.sort_unstable(); - - assert_eq!(result, correct); -} - -/// Test compiling the dictionary from our short test file -#[test] -fn test_short_compile() { - let dic = create_dic_from_file("1_pfxsfx"); - test_dic_wordlist(&dic, "1_pfxsfx"); -} - -/// Test check functionality on our short file +/// Test check functionality on a real file #[test] fn test_short_check() { - let dic = create_dic_from_file("1_pfxsfx"); + let dic = create_dic_from_file("w1_eng_short"); // Test all ownership methods - assert_eq!(dic.check("xxx"), Ok(true)); - assert_eq!(dic.check("yybb".to_string()), Ok(true)); - assert_eq!(dic.check("aazzzcc".to_owned()), Ok(true)); - assert_eq!(dic.check(&"zzz".to_string()), Ok(true)); + assert_eq!(dic.check("bananas"), Ok(true)); + assert_eq!(dic.check("pines".to_string()), Ok(true)); + assert_eq!(dic.check("pillowing".to_owned()), Ok(true)); + assert_eq!(dic.check(&"pined".to_string()), Ok(true)); assert_eq!(dic.check("not contained"), Ok(false)); } + +#[test] +fn test_prefixes() { + let coll = TestCollection::load("1_pfxsfx.test"); + coll.validate(); +} diff --git a/crates/zspell/tests/files/1_pfxsfx.aff b/crates/zspell/tests/files/1_pfxsfx.aff deleted file mode 100644 index 2d85462..0000000 --- a/crates/zspell/tests/files/1_pfxsfx.aff +++ /dev/null @@ -1,8 +0,0 @@ -SET UTF-8 - -PFX A Y 1 -PFX A 0 aa . - -SFX B Y 2 -SFX B y bb y -SFX B 0 cc [^y] diff --git a/crates/zspell/tests/files/1_pfxsfx.dic b/crates/zspell/tests/files/1_pfxsfx.dic deleted file mode 100644 index a719014..0000000 --- a/crates/zspell/tests/files/1_pfxsfx.dic +++ /dev/null @@ -1,4 +0,0 @@ -3 -xxx/A -yyy/B -zzz/AB diff --git a/crates/zspell/tests/files/1_pfxsfx.test b/crates/zspell/tests/files/1_pfxsfx.test new file mode 100644 index 0000000..539b576 --- /dev/null +++ b/crates/zspell/tests/files/1_pfxsfx.test @@ -0,0 +1,31 @@ +==== afx_str ==== +SET UTF-8 + +PFX A Y 1 +PFX A 0 aa . + +SFX B Y 2 +SFX B y bb y +SFX B 0 cc [^y] + + +==== dic_str ===== +3 +xxx/A +yyy/B +zzz/AB + + +==== check_valid ==== +xxx +yyy +zzz +aaxxx +yybb +aazzz +zzzcc +aazzzcc + + +==== check_invalid ==== +not contained diff --git a/crates/zspell/tests/files/1_pfxsfx.words b/crates/zspell/tests/files/1_pfxsfx.words deleted file mode 100644 index 4f2687a..0000000 --- a/crates/zspell/tests/files/1_pfxsfx.words +++ /dev/null @@ -1,8 +0,0 @@ -xxx -yyy -zzz -aaxxx -yybb -aazzz -zzzcc -aazzzcc diff --git a/crates/zspell/tests/files/example.test b/crates/zspell/tests/files/example.test new file mode 100644 index 0000000..c0a9aff --- /dev/null +++ b/crates/zspell/tests/files/example.test @@ -0,0 +1,29 @@ +# Example test file + +==== afx_str ==== +# This section contains contents of the .aff file + +==== dic_str ==== +# This section contains contents of the .dic file + +==== check_valid ==== +# Words that should come up as "valid" when checked + +==== check_valid ==== +# These words should appear as valid + +==== check_invalid ==== +# These words should not be valid + +==== wordlist ==== +# The dictionary's internal word list contents + +==== wordlist_nosuggest ==== +# Contents of the never suggested wordlist + +==== wordlist_forbidden ==== +# Contents of the non-accepted wordlist + +==== suggestions ==== +# Something like the following +appl > apple | Apfel | app diff --git a/crates/zspell/util/Cargo.toml b/crates/zspell/util/Cargo.toml new file mode 100644 index 0000000..b43cc03 --- /dev/null +++ b/crates/zspell/util/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "util" +version = "0.1.0" +edition = "2021" +publish = false + +[dependencies] +zspell = { path = "../" } diff --git a/crates/zspell/util/src/lib.rs b/crates/zspell/util/src/lib.rs new file mode 100644 index 0000000..65645d2 --- /dev/null +++ b/crates/zspell/util/src/lib.rs @@ -0,0 +1,211 @@ +use std::fs; + +use zspell::Dictionary; + +/// A collection from a .test file that we can easily validate +#[derive(Debug)] +pub struct TestCollection { + /// The affix file as a string + pub afx_str: String, + /// The dictionary file as a string + pub dic_str: String, + /// These words will be checked with the check algorithm + pub check_valid: Option>, + pub check_invalid: Option>, + pub wordlist: Option>, + pub wordlist_nosuggest: Option>, + pub wordlist_forbidden: Option>, + pub suggestions: Option)>>, +} + +impl TestCollection { + pub fn load(fname: &str) -> Self { + let mut ret = Self { + afx_str: String::new(), + dic_str: String::new(), + check_valid: None, + check_invalid: None, + wordlist: None, + wordlist_nosuggest: None, + wordlist_forbidden: None, + suggestions: None, + }; + + let fname_new = format!("tests/files/{fname}"); + + let f_content = fs::read_to_string(fname_new.clone()) + .unwrap_or_else(|_| panic!("error reading file '{fname_new}'")); + + let mut content_iter = f_content.trim().split("====").filter(|&x| !x.is_empty()); + + while let Some(s_title) = content_iter.next() { + let sec_title = s_title.trim(); + let sec_content = match content_iter.next() { + Some(s) => s, + None => panic!("Section title with no content"), + }; + + match sec_title { + "afx_str" => ret.afx_str = sec_content.to_owned(), + "dic_str" => ret.dic_str = sec_content.to_owned(), + "check_valid" => { + ret.check_valid = Some( + sec_content + .trim() + .split('\n') + .map(|s| s.to_owned()) + .collect::>(), + ) + } + "check_invalid" => { + ret.check_invalid = Some( + sec_content + .trim() + .split('\n') + .map(|s| s.to_owned()) + .collect::>(), + ) + } + "wordlist" => { + ret.wordlist = Some( + sec_content + .trim() + .split('\n') + .map(|s| s.to_owned()) + .collect::>(), + ) + } + "wordlist_nosuggest" => { + ret.wordlist_nosuggest = Some( + sec_content + .trim() + .split('\n') + .map(|s| s.to_owned()) + .collect::>(), + ) + } + "wordlist_forbidden" => { + ret.wordlist_forbidden = Some( + sec_content + .trim() + .split('\n') + .map(|s| s.to_owned()) + .collect::>(), + ) + } + "suggestions" => { + // Suggestions look like "appl > apple | Apfel | app" + // Turn into ("appl", ["apple", "Apfel", "app"]) + let mut tmp_ret: Vec<_> = Vec::new(); + + let sug_split = sec_content.split_terminator('\n'); + for suggestion in sug_split { + let tmp = suggestion.split_once('>').expect("Bad suggestion"); + tmp_ret.push(( + tmp.0.to_owned(), + tmp.1.split('|').map(|s| s.trim().to_owned()).collect(), + )) + } + ret.suggestions = Some(tmp_ret) + } + "end" => break, + other => panic!("Bad section heading '{}'. Collection:\n{:#?}\n", other, ret), + }; + } + + ret + } + + /// Validate all expected checks are correct + fn run_check_valid_invalid(&self, dic: &Dictionary) { + match &self.check_valid { + Some(v) => { + for item in v { + let res = dic.check(item).expect("Dictionary error"); + assert!(res, "{} failed check (expected true)", item); + } + println!("Validated {} items as true", v.len()); + } + None => println!("Skipped check_valid testing"), + }; + match &self.check_invalid { + Some(v) => { + for item in v { + let res = dic.check(item).expect("Dictionary error"); + assert!(!res, "{} failed check (expected false)", item) + } + println!("Validated {} items as false", v.len()); + } + None => println!("Skipped check_invalid testing"), + }; + } + + /// Validate all our word lists are equal + fn check_wordlists(&self, dic: &Dictionary) { + let mut wordlist_v: Vec<_> = dic + .iter_wordlist_items() + .expect("Error getting wordlist") + .map(|s| s.to_owned()) + .collect(); + wordlist_v.sort_unstable(); + + let mut wordlist_ns_v: Vec<_> = dic + .iter_wordlist_items() + .expect("Error getting nosuggest wordlist") + .map(|s| s.to_owned()) + .collect(); + wordlist_ns_v.sort_unstable(); + + let mut wordlist_f_v: Vec<_> = dic + .iter_wordlist_items() + .expect("Error getting forbidden wordlist") + .map(|s| s.to_owned()) + .collect(); + wordlist_f_v.sort_unstable(); + + match &self.wordlist { + Some(v) => { + assert_eq!(*v, wordlist_v); + println!("Validated wordlist against {} items", v.len()); + } + None => println!("Skipped wordlist testing"), + }; + + match &self.wordlist_nosuggest { + Some(v) => { + assert_eq!(*v, wordlist_ns_v); + println!("Validated wordlist_nosuggest against {} items", v.len()); + } + None => println!("Skipped wordlist_nosuggest testing"), + }; + + match &self.wordlist_forbidden { + Some(v) => { + assert_eq!(*v, wordlist_f_v); + println!("Validated wordlist_forbidden against {} items", v.len()); + } + None => println!("Skipped wordlist_forbidden testing"), + }; + } + + fn check_suggestions(&self, _dic: &Dictionary) { + println!("Skpped suggestion testing"); + } + + pub fn validate(&self) { + let mut dic = Dictionary::new(); + + // Validate we can load the dictionary + dic.config + .load_from_str(self.afx_str.as_str()) + .expect("config loading failure"); + dic.load_dict_from_str(self.dic_str.as_str()) + .expect("loading failure"); + dic.compile().expect("compiling failure"); + + // Now check everything we can + self.run_check_valid_invalid(&dic); + self.check_wordlists(&dic); + self.check_suggestions(&dic); + } +}