From a7c52afb57602f798c93ef2ad48439df5c164e23 Mon Sep 17 00:00:00 2001 From: ath3 Date: Sun, 7 Nov 2021 04:08:44 +0100 Subject: [PATCH] Detect filetype from shebang line --- helix-core/src/indent.rs | 1 + helix-core/src/syntax.rs | 31 ++++++++++++++++++++++++++++--- languages.toml | 33 +++++++++++++++++++++++++++++++++ 3 files changed, 62 insertions(+), 3 deletions(-) diff --git a/helix-core/src/indent.rs b/helix-core/src/indent.rs index 20f034ea759d0..b6f5081acd63f 100644 --- a/helix-core/src/indent.rs +++ b/helix-core/src/indent.rs @@ -450,6 +450,7 @@ where language: vec![LanguageConfiguration { scope: "source.rust".to_string(), file_types: vec!["rs".to_string()], + shebangs: vec![], language_id: "Rust".to_string(), highlight_config: OnceCell::new(), config: None, diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs index f3e3f238bb864..1c637813dc69c 100644 --- a/helix-core/src/syntax.rs +++ b/helix-core/src/syntax.rs @@ -14,6 +14,7 @@ use std::{ cell::RefCell, collections::{HashMap, HashSet}, fmt, + fs::File, path::Path, sync::Arc, }; @@ -52,6 +53,7 @@ pub struct LanguageConfiguration { pub language_id: String, pub scope: String, // source.rust pub file_types: Vec, // filename ends_with? + pub shebangs: Vec, // interpreter(s) associated with language pub roots: Vec, // these indicate project roots <.git, Cargo.toml> pub comment_token: Option, @@ -254,6 +256,7 @@ pub struct Loader { // highlight_names ? language_configs: Vec>, language_config_ids_by_file_type: HashMap, // Vec + language_config_ids_by_shebang: HashMap, } impl Loader { @@ -261,6 +264,7 @@ impl Loader { let mut loader = Self { language_configs: Vec::new(), language_config_ids_by_file_type: HashMap::new(), + language_config_ids_by_shebang: HashMap::new(), }; for config in config.language { @@ -273,6 +277,11 @@ impl Loader { .language_config_ids_by_file_type .insert(file_type.clone(), language_id); } + for shebang in &config.shebangs { + loader + .language_config_ids_by_shebang + .insert(shebang.clone(), language_id); + } loader.language_configs.push(Arc::new(config)); } @@ -283,7 +292,7 @@ impl Loader { pub fn language_config_for_file_name(&self, path: &Path) -> Option> { // Find all the language configurations that match this file name // or a suffix of the file name. - let configuration_id = path + let mut configuration_id = path .file_name() .and_then(|n| n.to_str()) .and_then(|file_name| self.language_config_ids_by_file_type.get(file_name)) @@ -293,9 +302,25 @@ impl Loader { .and_then(|extension| self.language_config_ids_by_file_type.get(extension)) }); - configuration_id.and_then(|&id| self.language_configs.get(id).cloned()) + // If we have not found the configuration_id, see if we can get it from a shebang line + if configuration_id.is_none() { + if let Ok(mut file) = File::open(path) { + let mut buf = [0; 100]; + if std::io::Read::read(&mut file, &mut buf[..]).is_ok() { + if let Ok(str) = str::from_utf8(&buf) { + static SHEBANG_REGEX: Lazy = Lazy::new(|| { + Regex::new(r"#!/[^\s]*/(env\s)*([_a-zA-Z0-9-]+)").unwrap() + }); + configuration_id = SHEBANG_REGEX + .captures(str) + .and_then(|cap| cap.get(2)) + .and_then(|cap| self.language_config_ids_by_shebang.get(cap.as_str())) + } + } + } + }; - // TODO: content_regex handling conflict resolution + configuration_id.and_then(|&id| self.language_configs.get(id).cloned()) } pub fn language_config_for_scope(&self, scope: &str) -> Option> { diff --git a/languages.toml b/languages.toml index bd510ea1c3d55..0a8b2ccb9191d 100644 --- a/languages.toml +++ b/languages.toml @@ -3,6 +3,7 @@ name = "rust" scope = "source.rust" injection-regex = "rust" file-types = ["rs"] +shebangs = [] roots = [] auto-format = true comment-token = "//" @@ -17,6 +18,7 @@ name = "toml" scope = "source.toml" injection-regex = "toml" file-types = ["toml"] +shebangs = [] roots = [] comment-token = "#" @@ -27,6 +29,7 @@ name = "protobuf" scope = "source.proto" injection-regex = "protobuf" file-types = ["proto"] +shebangs = [] roots = [] comment-token = "//" @@ -37,6 +40,7 @@ name = "elixir" scope = "source.elixir" injection-regex = "elixir" file-types = ["ex", "exs"] +shebangs = [] roots = [] comment-token = "#" @@ -48,6 +52,7 @@ name = "json" scope = "source.json" injection-regex = "json" file-types = ["json"] +shebangs = [] roots = [] indent = { tab-width = 2, unit = " " } @@ -57,6 +62,7 @@ name = "c" scope = "source.c" injection-regex = "c" file-types = ["c"] # TODO: ["h"] +shebangs = [] roots = [] comment-token = "//" @@ -68,6 +74,7 @@ name = "cpp" scope = "source.cpp" injection-regex = "cpp" file-types = ["cc", "hh", "cpp", "hpp", "h", "ipp", "tpp", "cxx", "hxx", "ixx", "txx", "ino"] +shebangs = [] roots = [] comment-token = "//" @@ -79,6 +86,7 @@ name = "c-sharp" scope = "source.csharp" injection-regex = "c-?sharp" file-types = ["cs"] +shebangs = [] roots = [] comment-token = "//" @@ -89,6 +97,7 @@ name = "go" scope = "source.go" injection-regex = "go" file-types = ["go"] +shebangs = [] roots = ["Gopkg.toml", "go.mod"] auto-format = true comment-token = "//" @@ -102,6 +111,7 @@ name = "javascript" scope = "source.js" injection-regex = "^(js|javascript)$" file-types = ["js", "mjs"] +shebangs = [] roots = [] comment-token = "//" # TODO: highlights-jsx, highlights-params @@ -113,6 +123,7 @@ name = "typescript" scope = "source.ts" injection-regex = "^(ts|typescript)$" file-types = ["ts"] +shebangs = [] roots = [] # TODO: highlights-jsx, highlights-params @@ -124,6 +135,7 @@ name = "tsx" scope = "source.tsx" injection-regex = "^(tsx)$" # |typescript file-types = ["tsx"] +shebangs = [] roots = [] # TODO: highlights-jsx, highlights-params @@ -135,6 +147,7 @@ name = "css" scope = "source.css" injection-regex = "css" file-types = ["css"] +shebangs = [] roots = [] indent = { tab-width = 2, unit = " " } @@ -144,6 +157,7 @@ name = "html" scope = "text.html.basic" injection-regex = "html" file-types = ["html"] +shebangs = [] roots = [] indent = { tab-width = 2, unit = " " } @@ -153,6 +167,7 @@ name = "python" scope = "source.python" injection-regex = "python" file-types = ["py"] +shebangs = ["python", "python2", "python3"] roots = [] comment-token = "#" @@ -165,6 +180,7 @@ name = "nix" scope = "source.nix" injection-regex = "nix" file-types = ["nix"] +shebangs = [] roots = [] comment-token = "#" @@ -176,6 +192,7 @@ name = "ruby" scope = "source.ruby" injection-regex = "ruby" file-types = ["rb"] +shebangs = ["ruby"] roots = [] comment-token = "#" @@ -187,6 +204,7 @@ name = "bash" scope = "source.bash" injection-regex = "bash" file-types = ["sh", "bash"] +shebangs = ["sh", "bash", "dash"] roots = [] comment-token = "#" @@ -198,6 +216,7 @@ name = "php" scope = "source.php" injection-regex = "php" file-types = ["php"] +shebangs = ["php"] roots = [] indent = { tab-width = 4, unit = " " } @@ -207,6 +226,7 @@ name = "latex" scope = "source.tex" injection-regex = "tex" file-types = ["tex"] +shebangs = [] roots = [] comment-token = "%" @@ -217,6 +237,7 @@ name = "julia" scope = "source.julia" injection-regex = "julia" file-types = ["jl"] +shebangs = [] roots = [] comment-token = "#" language-server = { command = "julia", args = [ @@ -242,6 +263,7 @@ name = "java" scope = "source.java" injection-regex = "java" file-types = ["java"] +shebangs = [] roots = [] indent = { tab-width = 4, unit = " " } @@ -250,6 +272,7 @@ name = "ledger" scope = "source.ledger" injection-regex = "ledger" file-types = ["ldg", "ledger", "journal"] +shebangs = [] roots = [] comment-token = ";" indent = { tab-width = 4, unit = " " } @@ -259,6 +282,7 @@ name = "ocaml" scope = "source.ocaml" injection-regex = "ocaml" file-types = ["ml"] +shebangs = [] roots = [] comment-token = "(**)" indent = { tab-width = 2, unit = " " } @@ -267,6 +291,7 @@ indent = { tab-width = 2, unit = " " } name = "ocaml-interface" scope = "source.ocaml.interface" file-types = ["mli"] +shebangs = [] roots = [] comment-token = "(**)" indent = { tab-width = 2, unit = " "} @@ -275,6 +300,7 @@ indent = { tab-width = 2, unit = " "} name = "lua" scope = "source.lua" file-types = ["lua"] +shebangs = [] roots = [] comment-token = "--" indent = { tab-width = 2, unit = " " } @@ -284,6 +310,7 @@ name = "svelte" scope = "source.svelte" injection-regex = "svelte" file-types = ["svelte"] +shebangs = [] roots = [] indent = { tab-width = 2, unit = " " } language-server = { command = "svelteserver", args = ["--stdio"] } @@ -294,6 +321,7 @@ name = "vue" scope = "source.vue" injection-regex = "vue" file-types = ["vue"] +shebangs = [] roots = [] indent = { tab-width = 2, unit = " " } @@ -301,6 +329,7 @@ indent = { tab-width = 2, unit = " " } name = "yaml" scope = "source.yaml" file-types = ["yml", "yaml"] +shebangs = [] roots = [] comment-token = "#" indent = { tab-width = 2, unit = " " } @@ -320,6 +349,7 @@ name = "zig" scope = "source.zig" injection-regex = "zig" file-types = ["zig"] +shebangs = [] roots = ["build.zig"] auto-format = true comment-token = "//" @@ -332,6 +362,7 @@ name = "prolog" scope = "source.prolog" roots = [] file-types = ["pl", "prolog"] +shebangs = ["swipl"] comment-token = "%" language-server = { command = "swipl", args = [ @@ -343,6 +374,7 @@ language-server = { command = "swipl", args = [ name = "tsq" scope = "source.tsq" file-types = ["scm"] +shebangs = [] roots = [] comment-token = ";" indent = { tab-width = 2, unit = " " } @@ -351,6 +383,7 @@ indent = { tab-width = 2, unit = " " } name = "cmake" scope = "source.cmake" file-types = ["cmake", "CMakeLists.txt"] +shebangs = [] roots = [] comment-token = "#" indent = { tab-width = 2, unit = " " }