diff --git a/.gitignore b/.gitignore index 60a2bf2be..6e649549e 100644 --- a/.gitignore +++ b/.gitignore @@ -4,8 +4,9 @@ .vscode *.profdata *.profraw +*.swp target macros/target site/node_modules site/public -site/resources \ No newline at end of file +site/resources diff --git a/Cargo.lock b/Cargo.lock index 3f91dbeac..a10ec51a6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -219,6 +219,15 @@ dependencies = [ "syn 2.0.77", ] +[[package]] +name = "atomic" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d818003e740b63afc82337e3160717f4f63078720a810b7b903e70a5d1d2994" +dependencies = [ + "bytemuck", +] + [[package]] name = "autocfg" version = "1.3.0" @@ -1295,6 +1304,19 @@ dependencies = [ "subtle", ] +[[package]] +name = "figment" +version = "0.10.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8cb01cd46b0cf372153850f4c6c272d9cbea2da513e07538405148f95bd789f3" +dependencies = [ + "atomic", + "serde", + "toml", + "uncased", + "version_check", +] + [[package]] name = "filedescriptor" version = "0.8.2" @@ -3781,6 +3803,15 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed646292ffc8188ef8ea4d1e0e0150fb15a5c2e12ad9b8fc191ae7a8a7f3c4b9" +[[package]] +name = "uncased" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1b88fcfe09e89d3866a5c11019378088af2d24c3fbd4f0543f96b479ec90697" +dependencies = [ + "version_check", +] + [[package]] name = "unicase" version = "2.7.0" @@ -4673,12 +4704,15 @@ dependencies = [ "enable-ansi-support", "encoding_rs", "env_logger", + "figment", "globwalk", + "home", "itertools 0.13.0", "log", "pprof", "protobuf", "protobuf-json-mapping", + "serde", "serde_json", "strum_macros 0.26.4", "superconsole", diff --git a/Cargo.toml b/Cargo.toml index a374e5756..88419cf5e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -52,9 +52,11 @@ dsa = "0.6.3" ecdsa = "0.16.9" enable-ansi-support = "0.2.1" env_logger = "0.11.3" +figment = "0.10.19" fmmap = "0.3.3" globwalk = "0.9.1" goldenfile = "1.6.1" +home = "0.5.9" ihex = "3.0.0" indenter = "0.3.3" indexmap = "2.2.6" diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 5992c269c..7b9ce3d3d 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -43,7 +43,9 @@ ascii_tree = { workspace = true } anyhow = { workspace = true } clap = { workspace = true, features = ["cargo", "derive"] } clap_complete = { workspace = true } +figment = { workspace = true, features = ["toml"] } globwalk = { workspace = true } +home = { workspace = true } itertools = { workspace = true } enable-ansi-support = { workspace = true } env_logger = { workspace = true, optional = true, features = ["auto-color"] } @@ -51,6 +53,7 @@ log = { workspace = true, optional = true } protobuf = { workspace = true } protobuf-json-mapping = { workspace = true } serde_json = { workspace = true, features = ["preserve_order"] } +serde = { workspace = true } yansi = { workspace = true } yara-x = { workspace = true, features = ["parallel-compilation"] } yara-x-parser = { workspace = true } diff --git a/cli/src/commands/fmt.rs b/cli/src/commands/fmt.rs index f1e39749d..76f33e7ac 100644 --- a/cli/src/commands/fmt.rs +++ b/cli/src/commands/fmt.rs @@ -2,10 +2,12 @@ use std::fs::File; use std::path::PathBuf; use std::{fs, io, process}; -use crate::help::FMT_CHECK_MODE; use clap::{arg, value_parser, ArgAction, ArgMatches, Command}; use yara_x_fmt::Formatter; +use crate::config::{load_config_from_file, FormatConfig}; +use crate::help::{CONFIG_FILE, FMT_CHECK_MODE}; + pub fn fmt() -> Command { super::command("fmt") .about("Format YARA source files") @@ -17,13 +19,33 @@ pub fn fmt() -> Command { .action(ArgAction::Append), ) .arg(arg!(-c --check "Run in 'check' mode").long_help(FMT_CHECK_MODE)) + .arg( + arg!(-C --config "Config file") + .value_parser(value_parser!(PathBuf)) + .long_help(CONFIG_FILE), + ) } -pub fn exec_fmt(args: &ArgMatches) -> anyhow::Result<()> { +pub fn exec_fmt( + args: &ArgMatches, + main_config: FormatConfig, +) -> anyhow::Result<()> { let files = args.get_many::("FILE").unwrap(); let check = args.get_flag("check"); + let config_file = args.get_one::("config"); + + let config: FormatConfig = if config_file.is_some() { + load_config_from_file(config_file.unwrap())?.fmt + } else { + main_config + }; - let formatter = Formatter::new(); + let formatter = Formatter::new() + .align_metadata(config.meta.align_values) + .align_patterns(config.patterns.align_values) + .indent_section_headers(config.rule.indent_section_headers) + .indent_section_contents(config.rule.indent_section_contents) + .indent_spaces(config.rule.indent_spaces); let mut changed = false; for file in files { diff --git a/cli/src/config.rs b/cli/src/config.rs new file mode 100644 index 000000000..5bce41f31 --- /dev/null +++ b/cli/src/config.rs @@ -0,0 +1,79 @@ +use std::path::Path; + +use figment::{ + providers::{Format, Serialized, Toml}, + Figment, +}; +use serde::{Deserialize, Serialize}; + +/// Configuration structure for "yr" commands. +#[derive(Deserialize, Serialize, Debug)] +pub struct Config { + /// Format specific configuration information. + pub fmt: FormatConfig, +} + +/// Format specific configuration information. +#[derive(Deserialize, Serialize, Debug)] +pub struct FormatConfig { + /// Rule specific formatting information. + pub rule: Rule, + /// Meta specific formatting information. + pub meta: Meta, + /// Pattern specific formatting information. + pub patterns: Patterns, +} + +/// Rule specific formatting information. +#[derive(Deserialize, Serialize, Debug)] +pub struct Rule { + /// Indent section headers (meta, strings, condition). + pub indent_section_headers: bool, + /// Indent section contents one level past section headers. + pub indent_section_contents: bool, + /// Number of spaces for indent. Set to 0 to use tabs. + pub indent_spaces: u8, +} + +/// Meta specific formatting information. +#[derive(Deserialize, Serialize, Debug)] +pub struct Meta { + /// Align values to longest key. + pub align_values: bool, +} + +/// Pattern specific formatting information. +#[derive(Deserialize, Serialize, Debug)] +pub struct Patterns { + /// Align patterns to the longest name. + pub align_values: bool, +} + +impl Default for Config { + fn default() -> Config { + Config { + fmt: FormatConfig { + rule: Rule { + indent_section_headers: true, + indent_section_contents: true, + indent_spaces: 2, + }, + meta: Meta { align_values: true }, + patterns: Patterns { align_values: true }, + }, + } + } +} + +/// Load config file from a given path. Path must contain a valid TOML file or +/// this function will propagate the error. For structure of the config file +/// see "YARA-X Config Guide.md". +pub fn load_config_from_file( + config_file: &Path, +) -> Result { + let config: Config = + Figment::from(Serialized::defaults(Config::default())) + .merge(Toml::file_exact(config_file)) + .extract()?; + Ok(config) +} diff --git a/cli/src/help.rs b/cli/src/help.rs index 8063ec8a1..44480803d 100644 --- a/cli/src/help.rs +++ b/cli/src/help.rs @@ -150,3 +150,11 @@ pub const FMT_CHECK_MODE: &str = r#"Run in 'check' mode Doesn't modify the files. Exits with 0 if files are formatted correctly. Exits with 1 if formatting is required."#; + +pub const CONFIG_FILE: &str = r#"Config file for YARA-X + +Config file which controls the behavior of YARA-X. See XXX (FILL IN URL +ONCE DOCS ARE WRITTEN) for supported options. + +If config file is not specified, ${HOME}/.yara-x.toml is used. If that does not +exist the default options are applied."#; diff --git a/cli/src/main.rs b/cli/src/main.rs index 6a145cdf7..b0368a0eb 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -1,7 +1,9 @@ mod commands; +mod config; mod help; mod walk; +use config::{load_config_from_file, Config}; use crossterm::tty::IsTty; use std::{io, panic, process}; use yansi::Color::Red; @@ -19,6 +21,7 @@ const APP_HELP_TEMPLATE: &str = r#"YARA-X {version}, the pattern matching swiss "#; const EXIT_ERROR: i32 = 1; +const CONFIG_FILE: &str = ".yara-x.toml"; fn main() -> anyhow::Result<()> { // Enable support for ANSI escape codes in Windows. In other platforms @@ -58,12 +61,27 @@ fn main() -> anyhow::Result<()> { process::exit(EXIT_ERROR); })); + let config: Config = match home::home_dir() { + Some(home_path) if !home_path.as_os_str().is_empty() => { + load_config_from_file(&home_path.join(CONFIG_FILE)).unwrap_or_else( + |err| { + println!("Error parsing config, using defaults: {}", err); + Config::default() + }, + ) + } + _ => { + println!("Unable to find home directory, using defaults."); + Config::default() + } + }; + let result = match args.subcommand() { #[cfg(feature = "debug-cmd")] Some(("debug", args)) => commands::exec_debug(args), Some(("check", args)) => commands::exec_check(args), Some(("fix", args)) => commands::exec_fix(args), - Some(("fmt", args)) => commands::exec_fmt(args), + Some(("fmt", args)) => commands::exec_fmt(args, config.fmt), Some(("scan", args)) => commands::exec_scan(args), Some(("dump", args)) => commands::exec_dump(args), Some(("compile", args)) => commands::exec_compile(args), diff --git a/docs/YARA-X Config Guide.md b/docs/YARA-X Config Guide.md new file mode 100644 index 000000000..332a1e6bb --- /dev/null +++ b/docs/YARA-X Config Guide.md @@ -0,0 +1,85 @@ +YARA-X Config Guide +=================== + +YARA-X uses a configuration file for controlling the behavior of different +commands. It currently supports the fmt command, but others will be added in the +future. + +The `yr` command looks in `${HOME}/.yara-x.toml` when starting up. If that file +does not exist the default values are used. + +An example `.yara-x.toml` file is below, with comments that explain each option. +This is the definitive list of supported configuration options, and will be +updated as more are added. + +```toml +# Config file for YARA-X. + +# Any options that are not valid are ignored. However, valid keys with an +# invalid type will cause a parsing error. For example, if you set +# rule.indent_spaces to false, it will result in a parsing error. +pants = false # Invalid keys are ignored. + +# The configuration of the "fmt" subcommand can be controlled by options in the +# "fmt" section. Each line is a key-value pair where the key uses a dot notation +# to deliniate different options. The "rule" namespace are for options that +# apply to the rule as a whole, while the "meta" and "patterns" namespaces are +# for options that only apply to those sections in a rule. +[fmt] +# Indent section headers so that: +# +# rule a { +# condition: +# true +# } +# +# Becomes: +# +# rule a { +# condition: +# true +# } +rule.indent_section_headers = true + +# Indent section contents so that: +# rule a { +# condition: +# true +# } +# +# Becomes: +# +# rule a { +# condition: +# true +# } +rule.indent_section_contents = true + +# Number of spaces to use for indentation. Setting this to 0 will use one tab +# character per level of indentation. To disable indentation entirely use +# rule.indent_section_headers and rule.indent_section_contents +rule.indent_spaces = 2 + +# Align metadata values so that: +# +# rule a { +# meta: +# key = "a" +# long_key = "b" +# } +# +# Becomes: +# +# rule a { +# meta: +# key = "a" +# long_key = "b" +# } +# +# Note that alignment is done with spaces, regardless of rule.indent_spaces +# setting. +meta.align_values = false + +# Same as meta.align_values but applies to patterns. +patterns.align_values = false +``` \ No newline at end of file diff --git a/fmt/src/indentation.rs b/fmt/src/indentation.rs index 822577a29..5547dfe5a 100644 --- a/fmt/src/indentation.rs +++ b/fmt/src/indentation.rs @@ -15,6 +15,7 @@ where { input: T, indent_level: i16, + num_spaces: u8, output_buffer: VecDeque>, } @@ -22,8 +23,13 @@ impl<'a, T> AddIndentationSpaces<'a, T> where T: TokenStream<'a>, { - pub fn new(input: T) -> Self { - Self { input, indent_level: 0, output_buffer: VecDeque::new() } + pub fn new(input: T, num_spaces: u8) -> Self { + Self { + input, + num_spaces, + indent_level: 0, + output_buffer: VecDeque::new(), + } } } @@ -52,9 +58,14 @@ where Token::Newline => { self.output_buffer.push_back(Token::Newline); for _ in 0..self.indent_level { - // Indent with two spaces per level - self.output_buffer.push_back(Token::Whitespace); - self.output_buffer.push_back(Token::Whitespace); + if self.num_spaces == 0 { + self.output_buffer.push_back(Token::Tab); + } else { + for _ in 0..self.num_spaces { + self.output_buffer + .push_back(Token::Whitespace); + } + } } return self.output_buffer.pop_front(); } diff --git a/fmt/src/lib.rs b/fmt/src/lib.rs index 1bca28a34..c26305bd4 100644 --- a/fmt/src/lib.rs +++ b/fmt/src/lib.rs @@ -65,6 +65,9 @@ pub enum Error { pub struct Formatter { align_metadata: bool, align_patterns: bool, + indent_section_headers: bool, + indent_section_contents: bool, + indent_spaces: u8, } impl Default for Formatter { @@ -77,7 +80,13 @@ impl Default for Formatter { impl Formatter { /// Creates a new formatter. pub fn new() -> Self { - Formatter { align_metadata: true, align_patterns: true } + Formatter { + align_metadata: true, + align_patterns: true, + indent_section_headers: true, + indent_section_contents: true, + indent_spaces: 2, + } } /// Specify if the metadata block must be aligned. @@ -148,6 +157,82 @@ impl Formatter { self } + /// Specify if the section definitions must be aligned. + /// + /// If true, the section headers look like this... + /// + /// ```text + /// rule test { + /// strings: + /// $short = "foo" + /// $very_long = "bar" + /// $even_longer = "baz" + /// condition: + /// ... + /// } + /// ``` + /// + /// And if false, the section headers look like this... + /// + /// ```text + /// rule test { + /// strings: + /// $short = "foo" + /// $very_long = "bar" + /// $even_longer = "baz" + /// condition: + /// ... + /// } + /// ``` + /// + /// The default value is `true`. + pub fn indent_section_headers(mut self, yes: bool) -> Self { + self.indent_section_headers = yes; + self + } + + /// Specify if the section contents must be aligned. + /// + /// If true, the section contents look like this... + /// + /// ```text + /// rule test { + /// strings: + /// $short = "foo" + /// $very_long = "bar" + /// $even_longer = "baz" + /// condition: + /// ... + /// } + /// ``` + /// + /// And if false, the section contents look like this... + /// + /// ```text + /// rule test { + /// strings: + /// $short = "foo" + /// $very_long = "bar" + /// $even_longer = "baz" + /// condition: + /// ... + /// } + /// ``` + /// + /// The default value is `true`. + pub fn indent_section_contents(mut self, yes: bool) -> Self { + self.indent_section_contents = yes; + self + } + + /// Number of spaces to indent, if indenting at all. Set to 0 to use tabs. + /// + /// The default is `2`. + pub fn indent_spaces(mut self, n: u8) -> Self { + self.indent_spaces = n; + self + } + /// Reads YARA source code from `input` and write it into `output` after /// formatting. /// @@ -480,8 +565,18 @@ impl Formatter { let tokens = FormatHexPatterns::new(tokens); - let tokens = Self::indent_body(tokens); - let tokens = Self::indent_sections(tokens); + let tokens: Box>> = + if self.indent_section_headers { + Box::new(Self::indent_body(tokens)) + } else { + Box::new(tokens) + }; + let tokens: Box>> = + if self.indent_section_contents { + Box::new(Self::indent_sections(tokens)) + } else { + Box::new(tokens) + }; let tokens = Self::indent_hex_patterns(tokens); let tokens = Self::indent_parenthesized_exprs(tokens); @@ -534,7 +629,7 @@ impl Formatter { processor::actions::copy, );*/ - let tokens = AddIndentationSpaces::new(tokens); + let tokens = AddIndentationSpaces::new(tokens, self.indent_spaces); let tokens = RemoveTrailingSpaces::new(tokens); tokens diff --git a/fmt/src/tokens/mod.rs b/fmt/src/tokens/mod.rs index 4eeacf9f3..a30fbd175 100644 --- a/fmt/src/tokens/mod.rs +++ b/fmt/src/tokens/mod.rs @@ -212,6 +212,8 @@ pub(crate) enum Token<'a> { // Non-control tokens // Whitespace, + #[allow(dead_code)] + Tab, Comment(&'a [u8]), BlockComment(Vec>), @@ -251,6 +253,7 @@ impl<'a> Token<'a> { } Token::Indentation(..) => categories::BaseCategory::Indentation, Token::Whitespace => categories::BaseCategory::Whitespace, + Token::Tab => categories::BaseCategory::Whitespace, Token::Comment(..) | Token::BlockComment(..) | Token::TailComment(..) @@ -291,6 +294,7 @@ impl<'a> Token<'a> { pub fn as_bytes(&self) -> &'a [u8] { match self { Token::Whitespace => b" ", + Token::Tab => b"\t", Token::Newline => b"\n", Token::Identifier(s) | Token::Keyword(s) @@ -420,6 +424,7 @@ pub(crate) trait TokenStream<'a>: Iterator> { col_num = 0; } Token::Whitespace + | Token::Tab | Token::Comment(_) | Token::Identifier(_) | Token::Keyword(_)