From c57d0fb4e862005ca0c7b79fb90a31a11e022ff7 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Sat, 3 Feb 2018 20:33:52 -0500 Subject: [PATCH] config: add persistent configuration This commit adds support for reading configuration files that change ripgrep's default behavior. The format of the configuration file is an "rc" style and is very simple. It is defined by two rules: 1. Every line is a shell argument, after trimming ASCII whitespace. 2. Lines starting with '#' (optionally preceded by any amount of ASCII whitespace) are ignored. ripgrep will look for a single configuration file if and only if the RIPGREP_CONFIG_PATH environment variable is set and is non-empty. ripgrep will parse shell arguments from this file on startup and will behave as if the arguments in this file were prepended to any explicit arguments given to ripgrep on the command line. For example, if your ripgreprc file contained a single line: --smart-case then the following command RIPGREP_CONFIG_PATH=wherever/.ripgreprc rg foo would behave identically to the following command rg --smart-case foo This commit also adds a new flag, --no-config, that when present will suppress any and all support for configuration. This includes any future support for auto-loading configuration files from pre-determined paths (which this commit does not add). Conflicts between configuration files and explicit arguments are handled exactly like conflicts in the same command line invocation. That is, this command: RIPGREP_CONFIG_PATH=wherever/.ripgreprc rg foo --case-sensitive is exactly equivalent to rg --smart-case foo --case-sensitive in which case, the --case-sensitive flag would override the --smart-case flag. Closes #196 --- README.md | 44 +++++++++++ complete/_rg | 1 + doc/rg.1 | 82 ++++++++++++++++++++ doc/rg.1.md | 53 +++++++++++++ src/app.rs | 23 +++++- src/args.rs | 47 +++++++++++- src/config.rs | 195 +++++++++++++++++++++++++++++++++++++++++++++++ src/main.rs | 3 +- tests/tests.rs | 16 ++++ tests/workdir.rs | 1 + 10 files changed, 460 insertions(+), 5 deletions(-) create mode 100644 src/config.rs diff --git a/README.md b/README.md index ac6768639..502cfa5c0 100644 --- a/README.md +++ b/README.md @@ -362,6 +362,50 @@ extensions. The syntax supported is [documented as part of Rust's regex library](https://doc.rust-lang.org/regex/regex/index.html#syntax). +### Configuration files + +ripgrep supports reading configuration files that change ripgrep's default +behavior. The format of the configuration file is an "rc" style and is very +simple. It is defined by two rules: + +1. Every line is a shell argument, after trimming ASCII whitespace. +2. Lines starting with '#' (optionally preceded by any amount of + ASCII whitespace) are ignored. + +ripgrep will look for a single configuration file if and only if the +`RIPGREP_CONFIG_PATH` environment variable is set and is non-empty. ripgrep +will parse shell arguments from this file on startup and will behave as if +the arguments in this file were prepended to any explicit arguments given to +ripgrep on the command line. + +For example, if your ripgreprc file contained a single line: + + --smart-case + +then the following command + + RIPGREP_CONFIG_PATH=wherever/.ripgreprc rg foo + +would behave identically to the following command + + rg --smart-case foo + +ripgrep also provides a flag, --no-config, that when present will suppress +any and all support for configuration. This includes any future support for +auto-loading configuration files from pre-determined paths. + +Conflicts between configuration files and explicit arguments are handled +exactly like conflicts in the same command line invocation. That is, this +command: + + RIPGREP_CONFIG_PATH=wherever/.ripgreprc rg foo --case-sensitive + +is exactly equivalent to + + rg --smart-case foo --case-sensitive + +in which case, the --case-sensitive flag would override the --smart-case flag. + ### Shell completions Shell completion files are included in the release tarball for Bash, Fish, Zsh diff --git a/complete/_rg b/complete/_rg index 6b62c1691..1074597dc 100644 --- a/complete/_rg +++ b/complete/_rg @@ -54,6 +54,7 @@ _rg() { '(--mmap --no-mmap)--mmap[search using memory maps when possible]' '(-H --with-filename --no-filename)--no-filename[suppress all file names]' "(-p --heading --pretty --vimgrep)--no-heading[don't group matches by file name]" + "--no-config[don't load configuration files]" "(--no-ignore-parent)--no-ignore[don't respect ignore files]" "--no-ignore-parent[don't respect ignore files in parent directories]" "--no-ignore-vcs[don't respect version control ignore files]" diff --git a/doc/rg.1 b/doc/rg.1 index fd562e812..b81124632 100644 --- a/doc/rg.1 +++ b/doc/rg.1 @@ -403,6 +403,17 @@ context related options.) .RS .RE .TP +.B \-\-no\-config +Never read configuration files. +When this flag is present, ripgrep will not respect the +RIPGREP_CONFIG_PATH environment variable. +.RS +.PP +If ripgrep ever grows a feature to automatically read configuration +files in pre\-defined locations, then this flag will also disable that +behavior as well. +.RE +.TP .B \-\-no\-messages Suppress all error messages. .RS @@ -597,6 +608,77 @@ ripgrep. Note that this must be passed to every invocation of rg. .RS .RE +.SH CONFIGURATION FILES +.PP +ripgrep supports reading configuration files that change ripgrep\[aq]s +default behavior. +The format of the configuration file is an "rc" style and is very +simple. +It is defined by two rules: +.IP +.nf +\f[C] +1.\ Every\ line\ is\ a\ shell\ argument,\ after\ trimming\ ASCII\ whitespace. +2.\ Lines\ starting\ with\ \[aq]#\[aq]\ (optionally\ preceded\ by\ any\ amount\ of +\ \ \ ASCII\ whitespace)\ are\ ignored. +\f[] +.fi +.PP +ripgrep will look for a single configuration file if and only if the +RIPGREP_CONFIG_PATH environment variable is set and is non\-empty. +ripgrep will parse shell arguments from this file on startup and will +behave as if the arguments in this file were prepended to any explicit +arguments given to ripgrep on the command line. +.PP +For example, if your ripgreprc file contained a single line: +.IP +.nf +\f[C] +\-\-smart\-case +\f[] +.fi +.PP +then the following command +.IP +.nf +\f[C] +RIPGREP_CONFIG_PATH=wherever/.ripgreprc\ rg\ foo +\f[] +.fi +.PP +would behave identically to the following command +.IP +.nf +\f[C] +rg\ \-\-smart\-case\ foo +\f[] +.fi +.PP +ripgrep also provides a flag, \-\-no\-config, that when present will +suppress any and all support for configuration. +This includes any future support for auto\-loading configuration files +from pre\-determined paths. +.PP +Conflicts between configuration files and explicit arguments are handled +exactly like conflicts in the same command line invocation. +That is, this command: +.IP +.nf +\f[C] +RIPGREP_CONFIG_PATH=wherever/.ripgreprc\ rg\ foo\ \-\-case\-sensitive +\f[] +.fi +.PP +is exactly equivalent to +.IP +.nf +\f[C] +rg\ \-\-smart\-case\ foo\ \-\-case\-sensitive +\f[] +.fi +.PP +in which case, the \-\-case\-sensitive flag would override the +\-\-smart\-case flag. .SH SHELL COMPLETION .PP Shell completion files are included in the release tarball for Bash, diff --git a/doc/rg.1.md b/doc/rg.1.md index 6b0542867..c92c6aa3a 100644 --- a/doc/rg.1.md +++ b/doc/rg.1.md @@ -268,6 +268,14 @@ Project home page: https://github.com/BurntSushi/ripgrep when ripgrep thinks it will be faster. (Note that mmap searching doesn't currently support the various context related options.) +--no-config +: Never read configuration files. When this flag is present, ripgrep will not + respect the RIPGREP_CONFIG_PATH environment variable. + + If ripgrep ever grows a feature to automatically read configuration files + in pre-defined locations, then this flag will also disable that behavior as + well. + --no-messages : Suppress all error messages. @@ -392,6 +400,51 @@ Project home page: https://github.com/BurntSushi/ripgrep the default type definitions that are found inside of ripgrep. Note that this must be passed to every invocation of rg. +# CONFIGURATION FILES + +ripgrep supports reading configuration files that change +ripgrep's default behavior. The format of the configuration file is an +"rc" style and is very simple. It is defined by two rules: + + 1. Every line is a shell argument, after trimming ASCII whitespace. + 2. Lines starting with '#' (optionally preceded by any amount of + ASCII whitespace) are ignored. + +ripgrep will look for a single configuration file if and only if the +RIPGREP_CONFIG_PATH environment variable is set and is non-empty. +ripgrep will parse shell arguments from this file on startup and will +behave as if the arguments in this file were prepended to any explicit +arguments given to ripgrep on the command line. + +For example, if your ripgreprc file contained a single line: + + --smart-case + +then the following command + + RIPGREP_CONFIG_PATH=wherever/.ripgreprc rg foo + +would behave identically to the following command + + rg --smart-case foo + +ripgrep also provides a flag, --no-config, that when present will suppress +any and all support for configuration. This includes any future support +for auto-loading configuration files from pre-determined paths. + +Conflicts between configuration files and explicit arguments are handled +exactly like conflicts in the same command line invocation. That is, +this command: + + RIPGREP_CONFIG_PATH=wherever/.ripgreprc rg foo --case-sensitive + +is exactly equivalent to + + rg --smart-case foo --case-sensitive + +in which case, the --case-sensitive flag would override the --smart-case +flag. + # SHELL COMPLETION Shell completion files are included in the release tarball for Bash, Fish, Zsh diff --git a/src/app.rs b/src/app.rs index 5fd8065f4..16cd3d826 100644 --- a/src/app.rs +++ b/src/app.rs @@ -22,6 +22,11 @@ Note that ripgrep may abort unexpectedly when using default settings if it searches a file that is simultaneously truncated. This behavior can be avoided by passing the --no-mmap flag. +ripgrep supports configuration files. Set RIPGREP_CONFIG_PATH to a +configuration file. The file can specify one shell argument per line. Lines +starting with '#' are ignored. For more details, see the man page or the +README. + Project home page: https://github.com/BurntSushi/ripgrep Use -h for short descriptions and --help for more details."; @@ -513,6 +518,7 @@ fn all_args_and_flags() -> Vec { flag_max_filesize(&mut args); flag_maxdepth(&mut args); flag_mmap(&mut args); + flag_no_config(&mut args); flag_no_ignore(&mut args); flag_no_ignore_parent(&mut args); flag_no_ignore_vcs(&mut args); @@ -1113,6 +1119,20 @@ This flag overrides --mmap. args.push(arg); } +fn flag_no_config(args: &mut Vec) { + const SHORT: &str = "Never read configuration files."; + const LONG: &str = long!("\ +Never read configuration files. When this flag is present, ripgrep will not +respect the RIPGREP_CONFIG_PATH environment variable. + +If ripgrep ever grows a feature to automatically read configuration files in +pre-defined locations, then this flag will also disable that behavior as well. +"); + let arg = RGArg::switch("no-config") + .help(SHORT).long_help(LONG); + args.push(arg); +} + fn flag_no_ignore(args: &mut Vec) { const SHORT: &str = "Don't respect ignore files."; const LONG: &str = long!("\ @@ -1182,8 +1202,7 @@ part on a separate output line. } fn flag_path_separator(args: &mut Vec) { - const SHORT: &str = - "Set the path separator to use when printing file paths."; + const SHORT: &str = "Set the path separator."; const LONG: &str = long!("\ Set the path separator to use when printing file paths. This defaults to your platform's path separator, which is / on Unix and \\ on Windows. This flag is diff --git a/src/args.rs b/src/args.rs index df7eeb8f9..d0990fdc0 100644 --- a/src/args.rs +++ b/src/args.rs @@ -25,6 +25,7 @@ use printer::{ColorSpecs, Printer}; use unescape::unescape; use worker::{Worker, WorkerBuilder}; +use config; use logger::Logger; use Result; @@ -88,17 +89,59 @@ impl Args { /// /// Also, initialize a global logger. pub fn parse() -> Result { - let matches = app::app().get_matches(); + // We parse the args given on CLI. This does not include args from + // the config. We use the CLI args as an initial configuration while + // trying to parse config files. If a config file exists and has + // arguments, then we re-parse argv, otherwise we just use the matches + // we have here. + let early_matches = ArgMatches(app::app().get_matches()); if let Err(err) = Logger::init() { errored!("failed to initialize logger: {}", err); } + if early_matches.is_present("debug") { + log::set_max_level(log::LevelFilter::Debug); + } else { + log::set_max_level(log::LevelFilter::Warn); + } + + let matches = Args::matches(early_matches); + // The logging level may have changed if we brought in additional + // arguments from a configuration file, so recheck it and set the log + // level as appropriate. if matches.is_present("debug") { log::set_max_level(log::LevelFilter::Debug); } else { log::set_max_level(log::LevelFilter::Warn); } - ArgMatches(matches).to_args() + matches.to_args() + } + + /// Run clap and return the matches. If clap determines a problem with the + /// user provided arguments (or if --help or --version are given), then an + /// error/usage/version will be printed and the process will exit. + /// + /// If there are no additional arguments from the environment (e.g., a + /// config file), then the given matches are returned as is. + fn matches(early_matches: ArgMatches<'static>) -> ArgMatches<'static> { + // If the end user says no config, then respect it. + if early_matches.is_present("no-config") { + debug!("not reading config files because --no-config is present"); + return early_matches; + } + // If the user wants ripgrep to use a config file, then parse args + // from that first. + let mut args = config::args(early_matches.is_present("no-messages")); + if args.is_empty() { + return early_matches; + } + let mut cliargs = env::args_os(); + if let Some(bin) = cliargs.next() { + args.insert(0, bin); + } + args.extend(cliargs); + debug!("final argv: {:?}", args); + ArgMatches(app::app().get_matches_from(args)) } /// Returns true if ripgrep should print the files it will search and exit diff --git a/src/config.rs b/src/config.rs new file mode 100644 index 000000000..c47e6a504 --- /dev/null +++ b/src/config.rs @@ -0,0 +1,195 @@ +// This module provides routines for reading ripgrep config "rc" files. The +// primary output of these routines is a sequence of arguments, where each +// argument corresponds precisely to one shell argument. + +use std::env; +use std::error::Error; +use std::fs::File; +use std::io::{self, BufRead}; +use std::ffi::OsString; +use std::path::{Path, PathBuf}; + +use Result; + +/// Return a sequence of arguments derived from ripgrep rc configuration files. +/// +/// If no_messages is false and there was a problem reading a config file, +/// then errors are printed to stderr. +pub fn args(no_messages: bool) -> Vec { + let config_path = match env::var_os("RIPGREP_CONFIG_PATH") { + None => return vec![], + Some(config_path) => { + if config_path.is_empty() { + return vec![]; + } + PathBuf::from(config_path) + } + }; + let (args, errs) = match parse(&config_path) { + Ok((args, errs)) => (args, errs), + Err(err) => { + if !no_messages { + eprintln!("{}", err); + } + return vec![]; + } + }; + if !no_messages && !errs.is_empty() { + for err in errs { + eprintln!("{}:{}", config_path.display(), err); + } + } + debug!( + "{}: arguments loaded from config file: {:?}", + config_path.display(), args); + args +} + +/// Parse a single ripgrep rc file from the given path. +/// +/// On success, this returns a set of shell arguments, in order, that should +/// be pre-pended to the arguments given to ripgrep at the command line. +/// +/// If the file could not be read, then an error is returned. If there was +/// a problem parsing one or more lines in the file, then errors are returned +/// for each line in addition to successfully parsed arguments. +fn parse>( + path: P, +) -> Result<(Vec, Vec>)> { + let path = path.as_ref(); + match File::open(&path) { + Ok(file) => parse_reader(file), + Err(err) => errored!("{}: {}", path.display(), err), + } +} + +/// Parse a single ripgrep rc file from the given reader. +/// +/// Callers should not provided a buffered reader, as this routine will use its +/// own buffer internally. +/// +/// On success, this returns a set of shell arguments, in order, that should +/// be pre-pended to the arguments given to ripgrep at the command line. +/// +/// If the reader could not be read, then an error is returned. If there was a +/// problem parsing one or more lines, then errors are returned for each line +/// in addition to successfully parsed arguments. +fn parse_reader( + rdr: R, +) -> Result<(Vec, Vec>)> { + let mut bufrdr = io::BufReader::new(rdr); + let (mut args, mut errs) = (vec![], vec![]); + let mut line = vec![]; + let mut line_number = 0; + while { + line.clear(); + line_number += 1; + bufrdr.read_until(b'\n', &mut line)? > 0 + } { + trim(&mut line); + if line.is_empty() || line[0] == b'#' { + continue; + } + match bytes_to_os_string(&line) { + Ok(osstr) => { + args.push(osstr); + } + Err(err) => { + errs.push(format!("{}: {}", line_number, err).into()); + } + } + } + Ok((args, errs)) +} + +/// Trim the given bytes of whitespace according to the ASCII definition. +fn trim(x: &mut Vec) { + let upto = x.iter().take_while(|b| is_space(**b)).count(); + x.drain(..upto); + let revto = x.len() - x.iter().rev().take_while(|b| is_space(**b)).count(); + x.drain(revto..); +} + +/// Returns true if and only if the given byte is an ASCII space character. +fn is_space(b: u8) -> bool { + b == b'\t' + || b == b'\n' + || b == b'\x0B' + || b == b'\x0C' + || b == b'\r' + || b == b' ' +} + +/// On Unix, get an OsString from raw bytes. +#[cfg(unix)] +fn bytes_to_os_string(bytes: &[u8]) -> Result { + use std::os::unix::ffi::OsStringExt; + Ok(OsString::from_vec(bytes.to_vec())) +} + +/// On non-Unix (like Windows), require UTF-8. +#[cfg(not(unix))] +fn bytes_to_os_string(bytes: &[u8]) -> Result { + String::from_utf8(bytes.to_vec()).map(OsString::from).map_err(From::from) +} + +#[cfg(test)] +mod tests { + use std::ffi::OsString; + use super::parse_reader; + + #[test] + fn basic() { + let (args, errs) = parse_reader(&b"\ +# Test +--context=0 + --smart-case +-u + + + # --bar +--foo +"[..]).unwrap(); + assert!(errs.is_empty()); + let args: Vec = + args.into_iter().map(|s| s.into_string().unwrap()).collect(); + assert_eq!(args, vec![ + "--context=0", "--smart-case", "-u", "--foo", + ]); + } + + // We test that we can handle invalid UTF-8 on Unix-like systems. + #[test] + #[cfg(unix)] + fn error() { + use std::os::unix::ffi::OsStringExt; + + let (args, errs) = parse_reader(&b"\ +quux +foo\xFFbar +baz +"[..]).unwrap(); + assert!(errs.is_empty()); + assert_eq!(args, vec![ + OsString::from("quux"), + OsString::from_vec(b"foo\xFFbar".to_vec()), + OsString::from("baz"), + ]); + } + + // ... but test that invalid UTF-8 fails on Windows. + #[test] + #[cfg(not(unix))] + fn error() { + let (args, errs) = parse_reader(&b"\ +quux +foo\xFFbar +baz +"[..]).unwrap(); + assert_eq!(errs.len(), 1); + assert_eq!(args, vec![ + OsString::from("quux"), + OsString::from("baz"), + ]); + } +} diff --git a/src/main.rs b/src/main.rs index d63735050..b3b192c1a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -39,6 +39,7 @@ macro_rules! errored { mod app; mod args; +mod config; mod decoder; mod decompressor; mod logger; @@ -49,7 +50,7 @@ mod search_stream; mod unescape; mod worker; -pub type Result = result::Result>; +pub type Result = result::Result>; fn main() { reset_sigpipe(); diff --git a/tests/tests.rs b/tests/tests.rs index dc19350c0..ecc840e79 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -1711,6 +1711,22 @@ fn compressed_failing_gzip() { assert_eq!(err.contains("not in gzip format"), true); } +sherlock!(feature_196_persistent_config, "sherlock", +|wd: WorkDir, mut cmd: Command| { + // Make sure we get no matches by default. + wd.assert_err(&mut cmd); + + // Now add our config file, and make sure it impacts ripgrep. + wd.create(".ripgreprc", "--ignore-case"); + cmd.env("RIPGREP_CONFIG_PATH", ".ripgreprc"); + let lines: String = wd.stdout(&mut cmd); + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +be, to a very large extent, the result of luck. Sherlock Holmes +"; + assert_eq!(lines, expected); +}); + #[test] fn feature_740_passthru() { let wd = WorkDir::new("feature_740"); diff --git a/tests/workdir.rs b/tests/workdir.rs index ea5408a40..3c47e9483 100644 --- a/tests/workdir.rs +++ b/tests/workdir.rs @@ -93,6 +93,7 @@ impl WorkDir { /// this working directory. pub fn command(&self) -> process::Command { let mut cmd = process::Command::new(&self.bin()); + cmd.env_remove("RIPGREP_CONFIG_PATH"); cmd.current_dir(&self.dir); cmd }