Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(config): Custom ignores #695

Merged
merged 2 commits into from
Mar 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions crates/typos-cli/src/bin/typos-cli/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ impl FileArgs {
locale: self.locale,
..Default::default()
}),
extend_ignore_re: Default::default(),
}
}

Expand Down
28 changes: 27 additions & 1 deletion crates/typos-cli/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ impl GlobEngineConfig {
}
}

#[derive(Debug, Clone, Default, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
//#[serde(deny_unknown_fields)] // Doesn't work with `flatten`
#[serde(default)]
#[serde(rename_all = "kebab-case")]
Expand All @@ -283,6 +283,8 @@ pub struct EngineConfig {
pub tokenizer: Option<TokenizerConfig>,
#[serde(flatten)]
pub dict: Option<DictConfig>,
#[serde(with = "serde_regex")]
pub extend_ignore_re: Vec<regex::Regex>,
}

impl EngineConfig {
Expand All @@ -298,6 +300,7 @@ impl EngineConfig {
.unwrap_or_else(TokenizerConfig::from_defaults),
),
dict: Some(empty.dict.unwrap_or_else(DictConfig::from_defaults)),
extend_ignore_re: Default::default(),
}
}

Expand Down Expand Up @@ -327,6 +330,8 @@ impl EngineConfig {
let mut dict = Some(dict);
std::mem::swap(&mut dict, &mut self.dict);
}
self.extend_ignore_re
.extend(source.extend_ignore_re.iter().cloned());
}

pub fn binary(&self) -> bool {
Expand All @@ -340,8 +345,29 @@ impl EngineConfig {
pub fn check_file(&self) -> bool {
self.check_file.unwrap_or(true)
}

pub fn extend_ignore_re(&self) -> Box<dyn Iterator<Item = &regex::Regex> + '_> {
Box::new(self.extend_ignore_re.iter())
}
}

impl PartialEq for EngineConfig {
fn eq(&self, rhs: &Self) -> bool {
self.binary == rhs.binary
&& self.check_filename == rhs.check_filename
&& self.check_file == rhs.check_file
&& self.tokenizer == rhs.tokenizer
&& self.dict == rhs.dict
&& self
.extend_ignore_re
.iter()
.map(|r| r.as_str())
.eq(rhs.extend_ignore_re.iter().map(|r| r.as_str()))
}
}

impl Eq for EngineConfig {}

#[derive(Debug, Clone, Default, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
#[serde(deny_unknown_fields)]
#[serde(default)]
Expand Down
62 changes: 62 additions & 0 deletions crates/typos-cli/src/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,14 @@ impl FileChecker for Typos {
reporter.report(msg.into())?;
} else {
let mut accum_line_num = AccumulateLineNum::new();
let mut ignores: Option<Ignores> = None;
for typo in typos::check_bytes(&buffer, policy.tokenizer, policy.dict) {
if ignores
.get_or_insert_with(|| Ignores::new(&buffer, policy.ignore))
.is_ignored(typo.span())
{
continue;
}
let line_num = accum_line_num.line_num(&buffer, typo.byte_offset);
let (line, line_offset) = extract_line(&buffer, typo.byte_offset);
let msg = report::Typo {
Expand Down Expand Up @@ -86,7 +93,14 @@ impl FileChecker for FixTypos {
} else {
let mut fixes = Vec::new();
let mut accum_line_num = AccumulateLineNum::new();
let mut ignores: Option<Ignores> = None;
for typo in typos::check_bytes(&buffer, policy.tokenizer, policy.dict) {
if ignores
.get_or_insert_with(|| Ignores::new(&buffer, policy.ignore))
.is_ignored(typo.span())
{
continue;
}
if is_fixable(&typo) {
fixes.push(typo.into_owned());
} else {
Expand Down Expand Up @@ -163,7 +177,14 @@ impl FileChecker for DiffTypos {
} else {
let mut fixes = Vec::new();
let mut accum_line_num = AccumulateLineNum::new();
let mut ignores: Option<Ignores> = None;
for typo in typos::check_bytes(&buffer, policy.tokenizer, policy.dict) {
if ignores
.get_or_insert_with(|| Ignores::new(&buffer, policy.ignore))
.is_ignored(typo.span())
{
continue;
}
if is_fixable(&typo) {
fixes.push(typo.into_owned());
} else {
Expand Down Expand Up @@ -276,7 +297,14 @@ impl FileChecker for Identifiers {
let msg = report::BinaryFile { path };
reporter.report(msg.into())?;
} else {
let mut ignores: Option<Ignores> = None;
for word in policy.tokenizer.parse_bytes(&buffer) {
if ignores
.get_or_insert_with(|| Ignores::new(&buffer, policy.ignore))
.is_ignored(word.span())
{
continue;
}
// HACK: Don't look up the line_num per entry to better match the performance
// of Typos for comparison purposes. We don't really get much out of it
// anyway.
Expand Down Expand Up @@ -329,11 +357,18 @@ impl FileChecker for Words {
let msg = report::BinaryFile { path };
reporter.report(msg.into())?;
} else {
let mut ignores: Option<Ignores> = None;
for word in policy
.tokenizer
.parse_bytes(&buffer)
.flat_map(|i| i.split())
{
if ignores
.get_or_insert_with(|| Ignores::new(&buffer, policy.ignore))
.is_ignored(word.span())
{
continue;
}
// HACK: Don't look up the line_num per entry to better match the performance
// of Typos for comparison purposes. We don't really get much out of it
// anyway.
Expand Down Expand Up @@ -644,6 +679,33 @@ fn walk_entry(
Ok(())
}

#[derive(Clone, Debug)]
struct Ignores {
blocks: Vec<std::ops::Range<usize>>,
}

impl Ignores {
fn new(content: &[u8], ignores: &[regex::Regex]) -> Self {
let mut blocks = Vec::new();
if let Ok(content) = std::str::from_utf8(content) {
for ignore in ignores {
for mat in ignore.find_iter(content) {
blocks.push(mat.range());
}
}
}
Self { blocks }
}

fn is_ignored(&self, span: std::ops::Range<usize>) -> bool {
let start = span.start;
let end = span.end.saturating_sub(1);
self.blocks
.iter()
.any(|block| block.contains(&start) || block.contains(&end))
}
}

#[cfg(test)]
mod test {
use super::*;
Expand Down
27 changes: 22 additions & 5 deletions crates/typos-cli/src/policy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ pub struct ConfigEngine<'s> {
walk: Intern<crate::config::Walk>,
tokenizer: Intern<typos::tokens::Tokenizer>,
dict: Intern<crate::dict::Override<'s, 's, crate::dict::BuiltIn>>,
ignore: Intern<Vec<regex::Regex>>,
}

impl<'s> ConfigEngine<'s> {
Expand All @@ -54,6 +55,7 @@ impl<'s> ConfigEngine<'s> {
walk: Default::default(),
tokenizer: Default::default(),
dict: Default::default(),
ignore: Default::default(),
}
}

Expand Down Expand Up @@ -88,7 +90,7 @@ impl<'s> ConfigEngine<'s> {
dir.type_matcher.definitions()
}

pub fn policy(&self, path: &std::path::Path) -> Policy<'_, '_> {
pub fn policy(&self, path: &std::path::Path) -> Policy<'_, '_, '_> {
debug_assert!(path.is_absolute(), "{} is not absolute", path.display());
let dir = self.get_dir(path).expect("`walk()` should be called first");
let (file_type, file_config) = dir.get_file_config(path);
Expand All @@ -99,6 +101,7 @@ impl<'s> ConfigEngine<'s> {
binary: file_config.binary,
tokenizer: self.get_tokenizer(&file_config),
dict: self.get_dict(&file_config),
ignore: self.get_ignore(&file_config),
}
}

Expand All @@ -114,6 +117,10 @@ impl<'s> ConfigEngine<'s> {
self.dict.get(file.dict)
}

fn get_ignore(&self, file: &FileConfig) -> &[regex::Regex] {
self.ignore.get(file.ignore)
}

fn get_dir(&self, path: &std::path::Path) -> Option<&DirConfig> {
for path in path.ancestors() {
if let Some(dir) = self.configs.get(path) {
Expand Down Expand Up @@ -220,7 +227,10 @@ impl<'s> ConfigEngine<'s> {
let check_filename = engine.check_filename();
let check_file = engine.check_file();
let crate::config::EngineConfig {
tokenizer, dict, ..
tokenizer,
dict,
extend_ignore_re,
..
} = engine;
let tokenizer_config =
tokenizer.unwrap_or_else(crate::config::TokenizerConfig::from_defaults);
Expand Down Expand Up @@ -254,12 +264,15 @@ impl<'s> ConfigEngine<'s> {
let dict = self.dict.intern(dict);
let tokenizer = self.tokenizer.intern(tokenizer);

let ignore = self.ignore.intern(extend_ignore_re);

FileConfig {
check_filenames: check_filename,
check_files: check_file,
binary,
tokenizer,
dict,
ignore,
}
}
}
Expand Down Expand Up @@ -328,20 +341,22 @@ struct FileConfig {
check_filenames: bool,
check_files: bool,
binary: bool,
ignore: usize,
}

#[non_exhaustive]
#[derive(derive_setters::Setters)]
pub struct Policy<'t, 'd> {
pub struct Policy<'t, 'd, 'i> {
pub check_filenames: bool,
pub check_files: bool,
pub file_type: Option<&'d str>,
pub binary: bool,
pub tokenizer: &'t typos::tokens::Tokenizer,
pub dict: &'d dyn typos::Dictionary,
pub ignore: &'i [regex::Regex],
}

impl<'t, 'd> Policy<'t, 'd> {
impl<'t, 'd, 'i> Policy<'t, 'd, 'i> {
pub fn new() -> Self {
Default::default()
}
Expand All @@ -350,8 +365,9 @@ impl<'t, 'd> Policy<'t, 'd> {
static DEFAULT_TOKENIZER: once_cell::sync::Lazy<typos::tokens::Tokenizer> =
once_cell::sync::Lazy::new(typos::tokens::Tokenizer::new);
static DEFAULT_DICT: crate::dict::BuiltIn = crate::dict::BuiltIn::new(crate::config::Locale::En);
static DEFAULT_IGNORE: &[regex::Regex] = &[];

impl<'t, 'd> Default for Policy<'t, 'd> {
impl<'t, 'd, 'i> Default for Policy<'t, 'd, 'i> {
fn default() -> Self {
Self {
check_filenames: true,
Expand All @@ -360,6 +376,7 @@ impl<'t, 'd> Default for Policy<'t, 'd> {
binary: false,
tokenizer: &DEFAULT_TOKENIZER,
dict: &DEFAULT_DICT,
ignore: DEFAULT_IGNORE,
}
}
}
Expand Down
8 changes: 8 additions & 0 deletions crates/typos-cli/tests/cmd/extend-ignore-re.in/_typos.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[files]
extend-exclude = ["_typos.toml"]

[default]
extend-ignore-re = ["`.*`"]

[default.extend-identifiers]
hello = "goodbye"
1 change: 1 addition & 0 deletions crates/typos-cli/tests/cmd/extend-ignore-re.in/file.ignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
hello `hello`
12 changes: 12 additions & 0 deletions crates/typos-cli/tests/cmd/extend-ignore-re.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
bin.name = "typos"
stdin = ""
stdout = """
error: `hello` should be `goodbye`
--> ./file.ignore:1:1
|
1 | hello `hello`
| ^^^^^
|
"""
stderr = ""
status.code = 2
6 changes: 6 additions & 0 deletions crates/typos/src/check.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,12 @@ impl<'m> Typo<'m> {
corrections: self.corrections.borrow(),
}
}

pub fn span(&self) -> std::ops::Range<usize> {
let start = self.byte_offset;
let end = start + self.typo.len();
start..end
}
}

impl<'m> Default for Typo<'m> {
Expand Down
14 changes: 14 additions & 0 deletions crates/typos/src/tokens.rs
Original file line number Diff line number Diff line change
Expand Up @@ -634,6 +634,13 @@ impl<'t> Identifier<'t> {
self.offset
}

#[inline]
pub fn span(&self) -> std::ops::Range<usize> {
let start = self.offset;
let end = start + self.token.len();
start..end
}

/// Split into individual Words.
#[inline]
pub fn split(&self) -> impl Iterator<Item = Word<'t>> {
Expand Down Expand Up @@ -702,6 +709,13 @@ impl<'t> Word<'t> {
pub fn offset(&self) -> usize {
self.offset
}

#[inline]
pub fn span(&self) -> std::ops::Range<usize> {
let start = self.offset;
let end = start + self.token.len();
start..end
}
}

struct SplitIdent<'s> {
Expand Down
1 change: 1 addition & 0 deletions docs/reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ Configuration is read from the following (in precedence order)
| default.check-file | \- | bool | Verifying spelling in files. |
| default.unicode | --unicode | bool | Allow unicode characters in identifiers (and not just ASCII) |
| default.locale | --locale | en, en-us, en-gb, en-ca, en-au | English dialect to correct to. |
| default.extend-ignore-re | \- | list of [regexes](https://docs.rs/regex/latest/regex/index.html#syntax) | Custom uncorrectable sections (e.g. markdown code fences, PGP signatures, etc) |
| default.extend-identifiers | \- | table of strings | Corrections for [identifiers](./design.md#identifiers-and-words). When the correction is blank, the identifier is never valid. When the correction is the key, the identifier is always valid. |
| default.extend-ignore-identifiers-re | \- | list of [regexes](https://docs.rs/regex/latest/regex/index.html#syntax) | Pattern-match always-valid identifiers |
| default.extend-words | \- | table of strings | Corrections for [words](./design.md#identifiers-and-words). When the correction is blank, the word is never valid. When the correction is the key, the word is always valid. |
Expand Down