From 1b3a5cdab138855838b41c75998e59b7d8ee191d Mon Sep 17 00:00:00 2001 From: Paolo Tranquilli Date: Wed, 11 Sep 2024 17:09:18 +0200 Subject: [PATCH 1/6] Rust: make the cli flags override automatic This makes the clap flags overlay over `Config` entirely derived via an attribute macro. Also, the `--intputs-file` option is replaced by a more standard and versatile `@` parameter file mechanism. --- Cargo.lock | 38 +++++++++++++++++++++++ Cargo.toml | 1 + rust/extractor/Cargo.toml | 5 ++- rust/extractor/macros/Cargo.toml | 11 +++++++ rust/extractor/macros/src/lib.rs | 52 ++++++++++++++++++++++++++++++++ rust/extractor/src/config.rs | 41 ++++--------------------- rust/tools/index-files.sh | 2 +- 7 files changed, 111 insertions(+), 39 deletions(-) create mode 100644 rust/extractor/macros/Cargo.toml create mode 100644 rust/extractor/macros/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index fcd7d8068439..d3b543434997 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -96,6 +96,16 @@ version = "1.0.87" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "10f00e1f6e58a40e807377c75c6a7f97bf9044fab57816f2414e6f5f4499d7b8" +[[package]] +name = "argfile" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a1cc0ba69de57db40674c66f7cf2caee3981ddef084388482c95c0e2133e5e8" +dependencies = [ + "fs-err", + "os_str_bytes", +] + [[package]] name = "arrayvec" version = "0.7.6" @@ -360,6 +370,7 @@ name = "codeql-rust" version = "0.1.0" dependencies = [ "anyhow", + "argfile", "clap", "codeql-extractor", "figment", @@ -374,6 +385,7 @@ dependencies = [ "ra_ap_project_model", "ra_ap_syntax", "ra_ap_vfs", + "rust-extractor-macros", "serde", "serde_with", "stderrlog", @@ -643,6 +655,15 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "fs-err" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88a41f105fe1d5b6b34b2055e3dc59bb79b46b48b2040b9e6c7b4b5de097aa41" +dependencies = [ + "autocfg", +] + [[package]] name = "fsevent-sys" version = "4.1.0" @@ -1064,6 +1085,15 @@ version = "11.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9" +[[package]] +name = "os_str_bytes" +version = "7.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ac44c994af577c799b1b4bd80dc214701e349873ad894d6cdf96f4f7526e0b9" +dependencies = [ + "memchr", +] + [[package]] name = "overload" version = "0.1.1" @@ -1875,6 +1905,14 @@ dependencies = [ "text-size", ] +[[package]] +name = "rust-extractor-macros" +version = "0.1.0" +dependencies = [ + "quote", + "syn", +] + [[package]] name = "rustc-hash" version = "1.1.0" diff --git a/Cargo.toml b/Cargo.toml index 5f095736c8a7..4aacef79adc8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,6 +6,7 @@ members = [ "shared/tree-sitter-extractor", "ruby/extractor", "rust/extractor", + "rust/extractor/macros", ] [patch.crates-io] diff --git a/rust/extractor/Cargo.toml b/rust/extractor/Cargo.toml index c849ea4aa464..3b474f90f984 100644 --- a/rust/extractor/Cargo.toml +++ b/rust/extractor/Cargo.toml @@ -22,7 +22,6 @@ serde = "1.0.209" serde_with = "3.9.0" stderrlog = "0.6.0" triomphe = "0.1.13" -# Ideally, we'd like to pull this in via a relative path. -# However, our bazel/rust tooling chokes on this, c.f. https://github.com/bazelbuild/rules_rust/issues/1525 -# Therefore, we have a pretty bad hack in place instead, see README.md in the codeql-extractor-fake-crate directory. +argfile = "0.2.1" codeql-extractor = { path = "../../shared/tree-sitter-extractor" } +rust-extractor-macros = { path = "macros" } diff --git a/rust/extractor/macros/Cargo.toml b/rust/extractor/macros/Cargo.toml new file mode 100644 index 000000000000..d4d10bc3bde2 --- /dev/null +++ b/rust/extractor/macros/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "rust-extractor-macros" +version = "0.1.0" +edition = "2021" + +[lib] +proc-macro = true + +[dependencies] +quote = "1.0.37" +syn = { version = "2.0.77", features = ["full"] } diff --git a/rust/extractor/macros/src/lib.rs b/rust/extractor/macros/src/lib.rs new file mode 100644 index 000000000000..13472665454a --- /dev/null +++ b/rust/extractor/macros/src/lib.rs @@ -0,0 +1,52 @@ +use proc_macro::TokenStream; +use quote::{quote, format_ident}; +use syn; + + +/// Allow all fields in the extractor config to be also overrideable by extractor CLI flags +#[proc_macro_attribute] +pub fn extractor_cli_config(_attr: TokenStream, item: TokenStream) -> TokenStream { + let ast = syn::parse_macro_input!(item as syn::ItemStruct); + let name = &ast.ident; + let new_name = format_ident!("Cli{}", name); + let fields: Vec<_> = ast.fields.iter().map(|f| { + let id = f.ident.as_ref().unwrap(); + let ty = &f.ty; + if let syn::Type::Path(p) = ty { + if p.path.is_ident(&format_ident!("bool")) { + return quote! { + #[arg(long)] + #id: bool, + }; + } + } + if id == &format_ident!("verbose") { + quote! { + #[arg(long, short, action=clap::ArgAction::Count)] + #id: u8, + } + } else if id == &format_ident!("inputs") { + quote! { + #id: #ty, + } + } else { + quote! { + #[arg(long)] + #id: Option<#ty>, + } + } + }).collect(); + let gen = quote! { + #[serde_with::apply(_ => #[serde(default)])] + #[derive(Debug, Deserialize, Default)] + #ast + + #[serde_with::skip_serializing_none] + #[derive(clap::Parser, Serialize)] + #[command(about, long_about = None)] + struct #new_name { + #(#fields)* + } + }; + gen.into() +} diff --git a/rust/extractor/src/config.rs b/rust/extractor/src/config.rs index 399c2bb9e7e7..310ca2c3649b 100644 --- a/rust/extractor/src/config.rs +++ b/rust/extractor/src/config.rs @@ -1,14 +1,15 @@ use anyhow::Context; -use clap::{ArgAction, Parser, ValueEnum}; +use clap::Parser; use codeql_extractor::trap; use figment::{ providers::{Env, Serialized}, Figment, }; +use rust_extractor_macros::extractor_cli_config; use serde::{Deserialize, Serialize}; use std::path::PathBuf; -#[derive(Debug, PartialEq, Eq, Default, Serialize, Deserialize, Clone, Copy, ValueEnum)] +#[derive(Debug, PartialEq, Eq, Default, Serialize, Deserialize, Clone, Copy, clap::ValueEnum)] #[serde(rename_all = "lowercase")] #[clap(rename_all = "lowercase")] pub enum Compression { @@ -26,8 +27,7 @@ impl From for trap::Compression { } } -#[serde_with::apply(_ => #[serde(default)])] -#[derive(Debug, Deserialize, Default)] +#[extractor_cli_config] pub struct Config { pub scratch_dir: PathBuf, pub trap_dir: PathBuf, @@ -38,39 +38,10 @@ pub struct Config { pub inputs: Vec, } -#[serde_with::apply(_ => #[serde(skip_serializing_if = "is_default")])] -#[derive(clap::Parser, Serialize)] -#[command(about, long_about = None)] -struct CliArgs { - #[arg(long)] - scratch_dir: Option, - #[arg(long)] - trap_dir: Option, - #[arg(long)] - source_archive_dir: Option, - #[arg(long)] - compression: Option, - #[arg(short, long, action = ArgAction::Count)] - verbose: u8, - #[arg(long)] - inputs_file: Option, - - inputs: Vec, -} - -fn is_default(t: &T) -> bool { - *t == Default::default() -} - impl Config { pub fn extract() -> anyhow::Result { - let mut cli_args = CliArgs::parse(); - if let Some(inputs_file) = cli_args.inputs_file.take() { - let inputs_list = std::fs::read_to_string(inputs_file).context("reading file list")?; - cli_args - .inputs - .extend(inputs_list.split_terminator("\n").map(PathBuf::from)); - } + let args = argfile::expand_args(argfile::parse_fromfile, argfile::PREFIX)?; + let cli_args = CliConfig::parse_from(args); Figment::new() .merge(Env::prefixed("CODEQL_EXTRACTOR_RUST_")) .merge(Env::prefixed("CODEQL_EXTRACTOR_RUST_OPTION_")) diff --git a/rust/tools/index-files.sh b/rust/tools/index-files.sh index da4b841b692f..f3d93fbaf4a9 100755 --- a/rust/tools/index-files.sh +++ b/rust/tools/index-files.sh @@ -2,4 +2,4 @@ set -eu -exec "$CODEQL_EXTRACTOR_RUST_ROOT/tools/$CODEQL_PLATFORM/extractor" --inputs-file="$1" +exec "$CODEQL_EXTRACTOR_RUST_ROOT/tools/$CODEQL_PLATFORM/extractor" @"$1" From 0a8c0f5ab40cfc681913ce20ae1328c195ef53fa Mon Sep 17 00:00:00 2001 From: Paolo Tranquilli Date: Thu, 12 Sep 2024 08:46:50 +0200 Subject: [PATCH 2/6] Rust: fix bazel build --- MODULE.bazel | 1 + ruby/extractor/BUILD.bazel | 2 +- rust/extractor/BUILD.bazel | 6 ++++-- rust/extractor/macros/BUILD.bazel | 20 ++++++++++++++++++++ shared/tree-sitter-extractor/BUILD.bazel | 8 ++++++-- 5 files changed, 32 insertions(+), 5 deletions(-) create mode 100644 rust/extractor/macros/BUILD.bazel diff --git a/MODULE.bazel b/MODULE.bazel index 20efc5390396..020d5b0fbaea 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -60,6 +60,7 @@ r.from_cargo( "//:Cargo.toml", "//ruby/extractor:Cargo.toml", "//rust/extractor:Cargo.toml", + "//rust/extractor/macros:Cargo.toml", "//shared/tree-sitter-extractor:Cargo.toml", ], ) diff --git a/ruby/extractor/BUILD.bazel b/ruby/extractor/BUILD.bazel index 203f90310fa7..158f1c91e4c4 100644 --- a/ruby/extractor/BUILD.bazel +++ b/ruby/extractor/BUILD.bazel @@ -12,6 +12,6 @@ codeql_rust_binary( deps = all_crate_deps( normal = True, ) + [ - "//shared/tree-sitter-extractor:codeql-extractor", + "//shared/tree-sitter-extractor", ], ) diff --git a/rust/extractor/BUILD.bazel b/rust/extractor/BUILD.bazel index 924d5e01497c..a5d99e82584e 100644 --- a/rust/extractor/BUILD.bazel +++ b/rust/extractor/BUILD.bazel @@ -7,11 +7,13 @@ codeql_rust_binary( aliases = aliases(), proc_macro_deps = all_crate_deps( proc_macro = True, - ), + ) + [ + "//rust/extractor/macros", + ], visibility = ["//rust:__subpackages__"], deps = all_crate_deps( normal = True, ) + [ - "//shared/tree-sitter-extractor:codeql-extractor", + "//shared/tree-sitter-extractor", ], ) diff --git a/rust/extractor/macros/BUILD.bazel b/rust/extractor/macros/BUILD.bazel new file mode 100644 index 000000000000..4ddfb14a171f --- /dev/null +++ b/rust/extractor/macros/BUILD.bazel @@ -0,0 +1,20 @@ +load("@rules_rust//rust:defs.bzl", "rust_proc_macro") +load("@tree_sitter_extractors_deps//:defs.bzl", "aliases", "all_crate_deps") + +rust_proc_macro( + name = "rust_extractor_macros", + srcs = glob(["src/**/*.rs"]), + aliases = aliases(), + proc_macro_deps = all_crate_deps( + proc_macro = True, + ), + deps = all_crate_deps( + normal = True, + ), +) + +alias( + name = "macros", + actual = "rust_extractor_macros", + visibility = ["//rust:__subpackages__"], +) diff --git a/shared/tree-sitter-extractor/BUILD.bazel b/shared/tree-sitter-extractor/BUILD.bazel index dc9001a32d25..0ebc189954be 100644 --- a/shared/tree-sitter-extractor/BUILD.bazel +++ b/shared/tree-sitter-extractor/BUILD.bazel @@ -1,8 +1,6 @@ load("@rules_rust//rust:defs.bzl", "rust_library") load("@tree_sitter_extractors_deps//:defs.bzl", "aliases", "all_crate_deps") -package(default_visibility = ["//visibility:public"]) - rust_library( name = "codeql-extractor", srcs = glob([ @@ -14,3 +12,9 @@ rust_library( ], deps = all_crate_deps(), ) + +alias( + name = "tree-sitter-extractor", + actual = ":codeql-extractor", + visibility = ["//visibility:public"], +) From 6adf88542e4477f7cab6ff02ca965b293f85ce50 Mon Sep 17 00:00:00 2001 From: Paolo Tranquilli Date: Thu, 12 Sep 2024 08:53:08 +0200 Subject: [PATCH 3/6] Rust: fix linting script --- rust/extractor/macros/src/lib.rs | 58 +++++++++++++++++--------------- rust/lint.py | 10 +++--- 2 files changed, 36 insertions(+), 32 deletions(-) diff --git a/rust/extractor/macros/src/lib.rs b/rust/extractor/macros/src/lib.rs index 13472665454a..1905d40f9b21 100644 --- a/rust/extractor/macros/src/lib.rs +++ b/rust/extractor/macros/src/lib.rs @@ -1,7 +1,5 @@ use proc_macro::TokenStream; -use quote::{quote, format_ident}; -use syn; - +use quote::{format_ident, quote}; /// Allow all fields in the extractor config to be also overrideable by extractor CLI flags #[proc_macro_attribute] @@ -9,33 +7,37 @@ pub fn extractor_cli_config(_attr: TokenStream, item: TokenStream) -> TokenStrea let ast = syn::parse_macro_input!(item as syn::ItemStruct); let name = &ast.ident; let new_name = format_ident!("Cli{}", name); - let fields: Vec<_> = ast.fields.iter().map(|f| { - let id = f.ident.as_ref().unwrap(); - let ty = &f.ty; - if let syn::Type::Path(p) = ty { - if p.path.is_ident(&format_ident!("bool")) { - return quote! { - #[arg(long)] - #id: bool, - }; - } - } - if id == &format_ident!("verbose") { - quote! { - #[arg(long, short, action=clap::ArgAction::Count)] - #id: u8, + let fields: Vec<_> = ast + .fields + .iter() + .map(|f| { + let id = f.ident.as_ref().unwrap(); + let ty = &f.ty; + if let syn::Type::Path(p) = ty { + if p.path.is_ident(&format_ident!("bool")) { + return quote! { + #[arg(long)] + #id: bool, + }; + } } - } else if id == &format_ident!("inputs") { - quote! { - #id: #ty, - } - } else { - quote! { - #[arg(long)] - #id: Option<#ty>, + if id == &format_ident!("verbose") { + quote! { + #[arg(long, short, action=clap::ArgAction::Count)] + #id: u8, + } + } else if id == &format_ident!("inputs") { + quote! { + #id: #ty, + } + } else { + quote! { + #[arg(long)] + #id: Option<#ty>, + } } - } - }).collect(); + }) + .collect(); let gen = quote! { #[serde_with::apply(_ => #[serde(default)])] #[derive(Debug, Deserialize, Default)] diff --git a/rust/lint.py b/rust/lint.py index 1af2470dbbcf..eb71cbd0b3dd 100755 --- a/rust/lint.py +++ b/rust/lint.py @@ -5,12 +5,14 @@ import shutil import sys -extractor_dir = pathlib.Path(__file__).resolve().parent / "extractor" +this_dir = pathlib.Path(__file__).resolve().parent cargo = shutil.which("cargo") assert cargo, "no cargo binary found on `PATH`" -fmt = subprocess.run([cargo, "fmt", "--quiet"], cwd=extractor_dir) -clippy = subprocess.run([cargo, "clippy", "--fix", "--allow-dirty", "--allow-staged", "--quiet"], - cwd=extractor_dir) +fmt = subprocess.run([cargo, "fmt", "--all", "--quiet"], cwd=this_dir) +for manifest in this_dir.rglob("Cargo.toml"): + if not manifest.is_relative_to(this_dir / "ql"): + clippy = subprocess.run([cargo, "clippy", "--fix", "--allow-dirty", "--allow-staged", "--quiet"], + cwd=manifest.parent) sys.exit(fmt.returncode or clippy.returncode) From 5ae88243033c013bf1fd473e1e9e0648493e7aed Mon Sep 17 00:00:00 2001 From: Paolo Tranquilli Date: Thu, 12 Sep 2024 08:56:07 +0200 Subject: [PATCH 4/6] Rust: add context to parameter file expansion errors --- rust/extractor/src/config.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rust/extractor/src/config.rs b/rust/extractor/src/config.rs index 310ca2c3649b..74c9f4b20a5b 100644 --- a/rust/extractor/src/config.rs +++ b/rust/extractor/src/config.rs @@ -40,7 +40,8 @@ pub struct Config { impl Config { pub fn extract() -> anyhow::Result { - let args = argfile::expand_args(argfile::parse_fromfile, argfile::PREFIX)?; + let args = argfile::expand_args(argfile::parse_fromfile, argfile::PREFIX) + .context("expanding parameter files")?; let cli_args = CliConfig::parse_from(args); Figment::new() .merge(Env::prefixed("CODEQL_EXTRACTOR_RUST_")) From 403cc3df9013c3a278b6b8b9b8b5a52f2dd2d91b Mon Sep 17 00:00:00 2001 From: Paolo Tranquilli Date: Fri, 13 Sep 2024 06:50:12 +0200 Subject: [PATCH 5/6] Rust: avoid cli flag defaults overriding env settings --- rust/extractor/macros/src/lib.rs | 2 ++ rust/extractor/src/config.rs | 2 ++ 2 files changed, 4 insertions(+) diff --git a/rust/extractor/macros/src/lib.rs b/rust/extractor/macros/src/lib.rs index 1905d40f9b21..781d53bd8513 100644 --- a/rust/extractor/macros/src/lib.rs +++ b/rust/extractor/macros/src/lib.rs @@ -17,6 +17,7 @@ pub fn extractor_cli_config(_attr: TokenStream, item: TokenStream) -> TokenStrea if p.path.is_ident(&format_ident!("bool")) { return quote! { #[arg(long)] + #[serde(skip_serializing_if="<&bool>::not")] #id: bool, }; } @@ -24,6 +25,7 @@ pub fn extractor_cli_config(_attr: TokenStream, item: TokenStream) -> TokenStrea if id == &format_ident!("verbose") { quote! { #[arg(long, short, action=clap::ArgAction::Count)] + #[serde(skip_serializing_if="u8::is_zero")] #id: u8, } } else if id == &format_ident!("inputs") { diff --git a/rust/extractor/src/config.rs b/rust/extractor/src/config.rs index 74c9f4b20a5b..98dac7fce326 100644 --- a/rust/extractor/src/config.rs +++ b/rust/extractor/src/config.rs @@ -5,8 +5,10 @@ use figment::{ providers::{Env, Serialized}, Figment, }; +use num_traits::Zero; use rust_extractor_macros::extractor_cli_config; use serde::{Deserialize, Serialize}; +use std::ops::Not; use std::path::PathBuf; #[derive(Debug, PartialEq, Eq, Default, Serialize, Deserialize, Clone, Copy, clap::ValueEnum)] From 23dd572d5e1b430cc8b8f6365a915cbbb236c079 Mon Sep 17 00:00:00 2001 From: Paolo Tranquilli Date: Fri, 13 Sep 2024 13:39:39 +0200 Subject: [PATCH 6/6] Rust: add `CODEQL_` base env layer --- rust/extractor/src/config.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/rust/extractor/src/config.rs b/rust/extractor/src/config.rs index 98dac7fce326..0625968a4af7 100644 --- a/rust/extractor/src/config.rs +++ b/rust/extractor/src/config.rs @@ -46,6 +46,7 @@ impl Config { .context("expanding parameter files")?; let cli_args = CliConfig::parse_from(args); Figment::new() + .merge(Env::prefixed("CODEQL_")) .merge(Env::prefixed("CODEQL_EXTRACTOR_RUST_")) .merge(Env::prefixed("CODEQL_EXTRACTOR_RUST_OPTION_")) .merge(Serialized::defaults(cli_args))