Skip to content

Commit

Permalink
Detect filetype from shebang line
Browse files Browse the repository at this point in the history
  • Loading branch information
ath3 committed Nov 7, 2021
1 parent 7b65a6d commit a7c52af
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 3 deletions.
1 change: 1 addition & 0 deletions helix-core/src/indent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,7 @@ where
language: vec![LanguageConfiguration {
scope: "source.rust".to_string(),
file_types: vec!["rs".to_string()],
shebangs: vec![],
language_id: "Rust".to_string(),
highlight_config: OnceCell::new(),
config: None,
Expand Down
31 changes: 28 additions & 3 deletions helix-core/src/syntax.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use std::{
cell::RefCell,
collections::{HashMap, HashSet},
fmt,
fs::File,
path::Path,
sync::Arc,
};
Expand Down Expand Up @@ -52,6 +53,7 @@ pub struct LanguageConfiguration {
pub language_id: String,
pub scope: String, // source.rust
pub file_types: Vec<String>, // filename ends_with? <Gemfile, rb, etc>
pub shebangs: Vec<String>, // interpreter(s) associated with language
pub roots: Vec<String>, // these indicate project roots <.git, Cargo.toml>
pub comment_token: Option<String>,

Expand Down Expand Up @@ -254,13 +256,15 @@ pub struct Loader {
// highlight_names ?
language_configs: Vec<Arc<LanguageConfiguration>>,
language_config_ids_by_file_type: HashMap<String, usize>, // Vec<usize>
language_config_ids_by_shebang: HashMap<String, usize>,
}

impl Loader {
pub fn new(config: Configuration) -> Self {
let mut loader = Self {
language_configs: Vec::new(),
language_config_ids_by_file_type: HashMap::new(),
language_config_ids_by_shebang: HashMap::new(),
};

for config in config.language {
Expand All @@ -273,6 +277,11 @@ impl Loader {
.language_config_ids_by_file_type
.insert(file_type.clone(), language_id);
}
for shebang in &config.shebangs {
loader
.language_config_ids_by_shebang
.insert(shebang.clone(), language_id);
}

loader.language_configs.push(Arc::new(config));
}
Expand All @@ -283,7 +292,7 @@ impl Loader {
pub fn language_config_for_file_name(&self, path: &Path) -> Option<Arc<LanguageConfiguration>> {
// Find all the language configurations that match this file name
// or a suffix of the file name.
let configuration_id = path
let mut configuration_id = path
.file_name()
.and_then(|n| n.to_str())
.and_then(|file_name| self.language_config_ids_by_file_type.get(file_name))
Expand All @@ -293,9 +302,25 @@ impl Loader {
.and_then(|extension| self.language_config_ids_by_file_type.get(extension))
});

configuration_id.and_then(|&id| self.language_configs.get(id).cloned())
// If we have not found the configuration_id, see if we can get it from a shebang line
if configuration_id.is_none() {
if let Ok(mut file) = File::open(path) {
let mut buf = [0; 100];
if std::io::Read::read(&mut file, &mut buf[..]).is_ok() {
if let Ok(str) = str::from_utf8(&buf) {
static SHEBANG_REGEX: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"#!/[^\s]*/(env\s)*([_a-zA-Z0-9-]+)").unwrap()
});
configuration_id = SHEBANG_REGEX
.captures(str)
.and_then(|cap| cap.get(2))
.and_then(|cap| self.language_config_ids_by_shebang.get(cap.as_str()))
}
}
}
};

// TODO: content_regex handling conflict resolution
configuration_id.and_then(|&id| self.language_configs.get(id).cloned())
}

pub fn language_config_for_scope(&self, scope: &str) -> Option<Arc<LanguageConfiguration>> {
Expand Down
33 changes: 33 additions & 0 deletions languages.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ name = "rust"
scope = "source.rust"
injection-regex = "rust"
file-types = ["rs"]
shebangs = []
roots = []
auto-format = true
comment-token = "//"
Expand All @@ -17,6 +18,7 @@ name = "toml"
scope = "source.toml"
injection-regex = "toml"
file-types = ["toml"]
shebangs = []
roots = []
comment-token = "#"

Expand All @@ -27,6 +29,7 @@ name = "protobuf"
scope = "source.proto"
injection-regex = "protobuf"
file-types = ["proto"]
shebangs = []
roots = []
comment-token = "//"

Expand All @@ -37,6 +40,7 @@ name = "elixir"
scope = "source.elixir"
injection-regex = "elixir"
file-types = ["ex", "exs"]
shebangs = []
roots = []
comment-token = "#"

Expand All @@ -48,6 +52,7 @@ name = "json"
scope = "source.json"
injection-regex = "json"
file-types = ["json"]
shebangs = []
roots = []

indent = { tab-width = 2, unit = " " }
Expand All @@ -57,6 +62,7 @@ name = "c"
scope = "source.c"
injection-regex = "c"
file-types = ["c"] # TODO: ["h"]
shebangs = []
roots = []
comment-token = "//"

Expand All @@ -68,6 +74,7 @@ name = "cpp"
scope = "source.cpp"
injection-regex = "cpp"
file-types = ["cc", "hh", "cpp", "hpp", "h", "ipp", "tpp", "cxx", "hxx", "ixx", "txx", "ino"]
shebangs = []
roots = []
comment-token = "//"

Expand All @@ -79,6 +86,7 @@ name = "c-sharp"
scope = "source.csharp"
injection-regex = "c-?sharp"
file-types = ["cs"]
shebangs = []
roots = []
comment-token = "//"

Expand All @@ -89,6 +97,7 @@ name = "go"
scope = "source.go"
injection-regex = "go"
file-types = ["go"]
shebangs = []
roots = ["Gopkg.toml", "go.mod"]
auto-format = true
comment-token = "//"
Expand All @@ -102,6 +111,7 @@ name = "javascript"
scope = "source.js"
injection-regex = "^(js|javascript)$"
file-types = ["js", "mjs"]
shebangs = []
roots = []
comment-token = "//"
# TODO: highlights-jsx, highlights-params
Expand All @@ -113,6 +123,7 @@ name = "typescript"
scope = "source.ts"
injection-regex = "^(ts|typescript)$"
file-types = ["ts"]
shebangs = []
roots = []
# TODO: highlights-jsx, highlights-params

Expand All @@ -124,6 +135,7 @@ name = "tsx"
scope = "source.tsx"
injection-regex = "^(tsx)$" # |typescript
file-types = ["tsx"]
shebangs = []
roots = []
# TODO: highlights-jsx, highlights-params

Expand All @@ -135,6 +147,7 @@ name = "css"
scope = "source.css"
injection-regex = "css"
file-types = ["css"]
shebangs = []
roots = []

indent = { tab-width = 2, unit = " " }
Expand All @@ -144,6 +157,7 @@ name = "html"
scope = "text.html.basic"
injection-regex = "html"
file-types = ["html"]
shebangs = []
roots = []

indent = { tab-width = 2, unit = " " }
Expand All @@ -153,6 +167,7 @@ name = "python"
scope = "source.python"
injection-regex = "python"
file-types = ["py"]
shebangs = ["python", "python2", "python3"]
roots = []
comment-token = "#"

Expand All @@ -165,6 +180,7 @@ name = "nix"
scope = "source.nix"
injection-regex = "nix"
file-types = ["nix"]
shebangs = []
roots = []
comment-token = "#"

Expand All @@ -176,6 +192,7 @@ name = "ruby"
scope = "source.ruby"
injection-regex = "ruby"
file-types = ["rb"]
shebangs = ["ruby"]
roots = []
comment-token = "#"

Expand All @@ -187,6 +204,7 @@ name = "bash"
scope = "source.bash"
injection-regex = "bash"
file-types = ["sh", "bash"]
shebangs = ["sh", "bash", "dash"]
roots = []
comment-token = "#"

Expand All @@ -198,6 +216,7 @@ name = "php"
scope = "source.php"
injection-regex = "php"
file-types = ["php"]
shebangs = ["php"]
roots = []

indent = { tab-width = 4, unit = " " }
Expand All @@ -207,6 +226,7 @@ name = "latex"
scope = "source.tex"
injection-regex = "tex"
file-types = ["tex"]
shebangs = []
roots = []
comment-token = "%"

Expand All @@ -217,6 +237,7 @@ name = "julia"
scope = "source.julia"
injection-regex = "julia"
file-types = ["jl"]
shebangs = []
roots = []
comment-token = "#"
language-server = { command = "julia", args = [
Expand All @@ -242,6 +263,7 @@ name = "java"
scope = "source.java"
injection-regex = "java"
file-types = ["java"]
shebangs = []
roots = []
indent = { tab-width = 4, unit = " " }

Expand All @@ -250,6 +272,7 @@ name = "ledger"
scope = "source.ledger"
injection-regex = "ledger"
file-types = ["ldg", "ledger", "journal"]
shebangs = []
roots = []
comment-token = ";"
indent = { tab-width = 4, unit = " " }
Expand All @@ -259,6 +282,7 @@ name = "ocaml"
scope = "source.ocaml"
injection-regex = "ocaml"
file-types = ["ml"]
shebangs = []
roots = []
comment-token = "(**)"
indent = { tab-width = 2, unit = " " }
Expand All @@ -267,6 +291,7 @@ indent = { tab-width = 2, unit = " " }
name = "ocaml-interface"
scope = "source.ocaml.interface"
file-types = ["mli"]
shebangs = []
roots = []
comment-token = "(**)"
indent = { tab-width = 2, unit = " "}
Expand All @@ -275,6 +300,7 @@ indent = { tab-width = 2, unit = " "}
name = "lua"
scope = "source.lua"
file-types = ["lua"]
shebangs = []
roots = []
comment-token = "--"
indent = { tab-width = 2, unit = " " }
Expand All @@ -284,6 +310,7 @@ name = "svelte"
scope = "source.svelte"
injection-regex = "svelte"
file-types = ["svelte"]
shebangs = []
roots = []
indent = { tab-width = 2, unit = " " }
language-server = { command = "svelteserver", args = ["--stdio"] }
Expand All @@ -294,13 +321,15 @@ name = "vue"
scope = "source.vue"
injection-regex = "vue"
file-types = ["vue"]
shebangs = []
roots = []
indent = { tab-width = 2, unit = " " }

[[language]]
name = "yaml"
scope = "source.yaml"
file-types = ["yml", "yaml"]
shebangs = []
roots = []
comment-token = "#"
indent = { tab-width = 2, unit = " " }
Expand All @@ -320,6 +349,7 @@ name = "zig"
scope = "source.zig"
injection-regex = "zig"
file-types = ["zig"]
shebangs = []
roots = ["build.zig"]
auto-format = true
comment-token = "//"
Expand All @@ -332,6 +362,7 @@ name = "prolog"
scope = "source.prolog"
roots = []
file-types = ["pl", "prolog"]
shebangs = ["swipl"]
comment-token = "%"

language-server = { command = "swipl", args = [
Expand All @@ -343,6 +374,7 @@ language-server = { command = "swipl", args = [
name = "tsq"
scope = "source.tsq"
file-types = ["scm"]
shebangs = []
roots = []
comment-token = ";"
indent = { tab-width = 2, unit = " " }
Expand All @@ -351,6 +383,7 @@ indent = { tab-width = 2, unit = " " }
name = "cmake"
scope = "source.cmake"
file-types = ["cmake", "CMakeLists.txt"]
shebangs = []
roots = []
comment-token = "#"
indent = { tab-width = 2, unit = " " }
Expand Down

0 comments on commit a7c52af

Please sign in to comment.