Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce a shared extractor library #12546

Merged
merged 12 commits into from
Mar 27, 2023
Merged
9 changes: 7 additions & 2 deletions .github/workflows/ruby-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ jobs:
echo "/usr/local/opt/gnu-tar/libexec/gnubin" >> $GITHUB_PATH
- name: Install cargo-cross
if: runner.os == 'Linux'
run: cargo install cross --version 0.2.1
run: cargo install cross --version 0.2.5
- uses: ./.github/actions/os-version
id: os_version
- name: Cache entire extractor
Expand Down Expand Up @@ -85,7 +85,12 @@ jobs:
# This ensures we don't depend on glibc > 2.17.
- name: Release build (linux)
if: steps.cache-extractor.outputs.cache-hit != 'true' && runner.os == 'Linux'
run: cd extractor && cross build --release
run: |
cd extractor
cross build --release
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps use --target-dir instead of using mv afterwards?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think that will work. The default target-dir is ./target. Inside this dir, cross creates the x86_64-unknown-linux-gnu directory.

mv target/x86_64-unknown-linux-gnu/release/extractor target/release/
mv target/x86_64-unknown-linux-gnu/release/autobuilder target/release/
mv target/x86_64-unknown-linux-gnu/release/generator target/release/
- name: Release build (windows and macos)
if: steps.cache-extractor.outputs.cache-hit != 'true' && runner.os != 'Linux'
run: cd extractor && cargo build --release
Expand Down
113 changes: 83 additions & 30 deletions ql/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 0 additions & 3 deletions ql/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
[workspace]
members = [
"autobuilder",
"extractor",
"generator",
"node-types",
"buramu",
]
9 changes: 0 additions & 9 deletions ql/autobuilder/Cargo.toml

This file was deleted.

4 changes: 1 addition & 3 deletions ql/extractor/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@ edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
flate2 = "1.0"
node-types = { path = "../node-types" }
tree-sitter = ">= 0.20, < 0.21"
tree-sitter-ql = { git = "https://github.com/tree-sitter/tree-sitter-ql.git", rev = "d08db734f8dc52f6bc04db53a966603122bc6985"}
tree-sitter-ql-dbscheme = { git = "https://github.com/erik-krogh/tree-sitter-ql-dbscheme.git", rev = "63e1344353f63931e88bfbc2faa2e78e1421b213"}
Expand All @@ -19,5 +17,5 @@ clap = "2.33"
tracing = "0.1"
tracing-subscriber = { version = "0.3.16", features = ["env-filter"] }
rayon = "1.7.0"
num_cpus = "1.14.0"
regex = "1.7.2"
codeql-extractor = { path = "../../shared/tree-sitter-extractor" }
File renamed without changes.
83 changes: 43 additions & 40 deletions ql/extractor/src/main.rs → ql/extractor/src/bin/extractor.rs
Original file line number Diff line number Diff line change
@@ -1,41 +1,9 @@
mod extractor;
mod trap;

extern crate num_cpus;

use rayon::prelude::*;
use std::fs;
use std::io::BufRead;
use std::path::{Path, PathBuf};

/**
* Gets the number of threads the extractor should use, by reading the
* CODEQL_THREADS environment variable and using it as described in the
* extractor spec:
*
* "If the number is positive, it indicates the number of threads that should
* be used. If the number is negative or zero, it should be added to the number
* of cores available on the machine to determine how many threads to use
* (minimum of 1). If unspecified, should be considered as set to -1."
*/
fn num_codeql_threads() -> usize {
let threads_str = std::env::var("CODEQL_THREADS").unwrap_or_else(|_| "-1".to_owned());
match threads_str.parse::<i32>() {
Ok(num) if num <= 0 => {
let reduction = -num as usize;
std::cmp::max(1, num_cpus::get() - reduction)
}
Ok(num) => num as usize,

Err(_) => {
tracing::error!(
"Unable to parse CODEQL_THREADS value '{}'; defaulting to 1 thread.",
&threads_str
);
1
}
}
}
use codeql_extractor::{diagnostics, extractor, node_types, trap};

fn main() -> std::io::Result<()> {
tracing_subscriber::fmt()
Expand All @@ -45,7 +13,23 @@ fn main() -> std::io::Result<()> {
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.init();

let num_threads = num_codeql_threads();
let diagnostics = diagnostics::DiagnosticLoggers::new("ql");
let mut main_thread_logger = diagnostics.logger();
let num_threads = match codeql_extractor::options::num_threads() {
Ok(num) => num,
Err(e) => {
main_thread_logger.write(
main_thread_logger
.new_entry("configuration-error", "Configuration error")
.message(
"{}; defaulting to 1 thread.",
&[diagnostics::MessageArg::Code(&e)],
)
.severity(diagnostics::Severity::Warning),
);
1
}
};
tracing::info!(
"Using {} {}",
num_threads,
Expand All @@ -55,6 +39,20 @@ fn main() -> std::io::Result<()> {
"threads"
}
);
let trap_compression = match trap::Compression::from_env("CODEQL_QL_TRAP_COMPRESSION") {
Ok(x) => x,
Err(e) => {
main_thread_logger.write(
main_thread_logger
.new_entry("configuration-error", "Configuration error")
.message("{}; using gzip.", &[diagnostics::MessageArg::Code(&e)])
.severity(diagnostics::Severity::Warning),
);
trap::Compression::Gzip
}
};
drop(main_thread_logger);

rayon::ThreadPoolBuilder::new()
.num_threads(num_threads)
.build_global()
Expand All @@ -79,7 +77,6 @@ fn main() -> std::io::Result<()> {
.value_of("output-dir")
.expect("missing --output-dir");
let trap_dir = PathBuf::from(trap_dir);
let trap_compression = trap::Compression::from_env("CODEQL_QL_TRAP_COMPRESSION");

let file_list = matches.value_of("file-list").expect("missing --file-list");
let file_list = fs::File::open(file_list)?;
Expand Down Expand Up @@ -119,26 +116,29 @@ fn main() -> std::io::Result<()> {
let source = std::fs::read(&path)?;
let code_ranges = vec![];
let mut trap_writer = trap::Writer::new();
let mut diagnostics_writer = diagnostics.logger();
if line.ends_with(".dbscheme") {
extractor::extract(
dbscheme,
"dbscheme",
&dbscheme_schema,
&mut diagnostics_writer,
&mut trap_writer,
&path,
&source,
&code_ranges,
)?
)
} else if line.ends_with("qlpack.yml") {
extractor::extract(
yaml,
"yaml",
&yaml_schema,
&mut diagnostics_writer,
&mut trap_writer,
&path,
&source,
&code_ranges,
)?
)
} else if line.ends_with(".json")
|| line.ends_with(".jsonl")
|| line.ends_with(".jsonc")
Expand All @@ -147,31 +147,34 @@ fn main() -> std::io::Result<()> {
json,
"json",
&json_schema,
&mut diagnostics_writer,
&mut trap_writer,
&path,
&source,
&code_ranges,
)?
)
} else if line.ends_with(".blame") {
extractor::extract(
blame,
"blame",
&blame_schema,
&mut diagnostics_writer,
&mut trap_writer,
&path,
&source,
&code_ranges,
)?
)
} else {
extractor::extract(
language,
"ql",
&schema,
&mut diagnostics_writer,
&mut trap_writer,
&path,
&source,
&code_ranges,
)?
)
}
std::fs::create_dir_all(&src_archive_file.parent().unwrap())?;
std::fs::copy(&path, &src_archive_file)?;
Expand Down
Loading