From 934deb11f3ff19b3045afdcb0ec6e6af8cd4e54e Mon Sep 17 00:00:00 2001 From: PabstMirror Date: Sat, 24 Aug 2024 09:26:13 -0500 Subject: [PATCH] sqf: check `format` string and args --- libs/sqf/src/analyze/codes/mod.rs | 1 + .../sqf/src/analyze/codes/saa7_format_args.rs | 45 ++++++++ libs/sqf/src/analyze/format_args.rs | 108 ++++++++++++++++++ libs/sqf/src/analyze/mod.rs | 2 + libs/sqf/tests/analyze.rs | 1 + .../tests/analyze/saa7_format_args/source.sqf | 5 + .../analyze/saa7_format_args/stdout.ansi | 20 ++++ 7 files changed, 182 insertions(+) create mode 100644 libs/sqf/src/analyze/codes/saa7_format_args.rs create mode 100644 libs/sqf/src/analyze/format_args.rs create mode 100644 libs/sqf/tests/analyze/saa7_format_args/source.sqf create mode 100644 libs/sqf/tests/analyze/saa7_format_args/stdout.ansi diff --git a/libs/sqf/src/analyze/codes/mod.rs b/libs/sqf/src/analyze/codes/mod.rs index 80e0e6b0..fe834cea 100644 --- a/libs/sqf/src/analyze/codes/mod.rs +++ b/libs/sqf/src/analyze/codes/mod.rs @@ -9,3 +9,4 @@ pub mod saa2_find_in_str; pub mod saa3_typename; pub mod saa4_str_format; pub mod saa5_select_parse_number; +pub mod saa7_format_args; diff --git a/libs/sqf/src/analyze/codes/saa7_format_args.rs b/libs/sqf/src/analyze/codes/saa7_format_args.rs new file mode 100644 index 00000000..f3b13715 --- /dev/null +++ b/libs/sqf/src/analyze/codes/saa7_format_args.rs @@ -0,0 +1,45 @@ +use std::ops::Range; + +use hemtt_workspace::reporting::{Code, Diagnostic, Processed, Severity}; + +pub struct FormatArgs { + span: Range, + problem: String, + + diagnostic: Option, +} + +impl Code for FormatArgs { + fn ident(&self) -> &'static str { + "SAA7" + } + + fn severity(&self) -> Severity { + Severity::Help + } + + fn message(&self) -> String { + self.problem.clone() + } + + fn diagnostic(&self) -> Option { + self.diagnostic.clone() + } +} + +impl FormatArgs { + #[must_use] + pub fn new(span: Range, problem: String, processed: &Processed) -> Self { + Self { + span, + problem, + diagnostic: None, + } + .generate_processed(processed) + } + + fn generate_processed(mut self, processed: &Processed) -> Self { + self.diagnostic = Diagnostic::new_for_processed(&self, self.span.clone(), processed); + self + } +} diff --git a/libs/sqf/src/analyze/format_args.rs b/libs/sqf/src/analyze/format_args.rs new file mode 100644 index 00000000..849ba90d --- /dev/null +++ b/libs/sqf/src/analyze/format_args.rs @@ -0,0 +1,108 @@ +use crate::{analyze::codes::saa7_format_args::FormatArgs, Expression, Statements, UnaryCommand}; +use hemtt_workspace::reporting::{Code, Processed}; +use std::{cmp::Ordering, sync::Arc}; + +#[must_use] +pub fn format_args(statements: &Statements, processed: &Processed) -> Vec> { + let mut advice: Vec> = Vec::new(); + for statement in statements.content() { + for expression in statement.walk_expressions() { + advice.extend(check_expression(expression, processed)); + } + } + advice +} + +#[must_use] +fn check_expression(expression: &Expression, processed: &Processed) -> Vec> { + let Expression::UnaryCommand(UnaryCommand::Named(name), target, _) = expression else { + return Vec::new(); + }; + if name.to_lowercase() != "format" { + return Vec::new(); + } + let Expression::Array(args, _) = &**target else { + return Vec::new(); + }; + if args.is_empty() { + return vec![Arc::new(FormatArgs::new( + expression.full_span(), + "format string: empty array".to_string(), + processed, + ))]; + } + let Expression::String(format, _, _) = &args[0] else { + return Vec::new(); + }; + + #[allow(clippy::option_if_let_else)] + if let Some(problem) = get_format_problem(format, args.len() - 1) { + vec![Arc::new(FormatArgs::new( + expression.full_span(), + problem, + processed, + ))] + } else { + Vec::new() + } +} + +#[must_use] +fn get_format_problem(input: &str, extra_args: usize) -> Option { + let format = format!("{input} ",); // add extra terminator + + let mut tokens: Vec = Vec::new(); + let mut token_active = false; + let mut token_start = 0; + for (i, c) in format.chars().enumerate() { + if token_active && !c.is_ascii_digit() { + token_active = false; + if i > token_start { + let token_value = format + .get(token_start..i) + .unwrap_or_default() + .parse() + .unwrap_or_default(); + tokens.push(token_value); + } else if c != '%' { + return Some(format!( + "format string: non-escaped \"%\" [at index {token_start}]" + )); + } + } + if !token_active && c == '%' { + token_active = true; + token_start = i + 1; + } + } + let max_index = *tokens.iter().max().unwrap_or(&0); + + match extra_args.cmp(tokens.iter().max().unwrap_or(&0)) { + Ordering::Less => Some(format!( + "format string: undefined tokens [used \"%{max_index}\", passed {extra_args}]" + )), + Ordering::Greater => Some(format!( + "format string: unused args [used \"%{max_index}\", passed {extra_args}]" + )), + Ordering::Equal => { + if max_index > tokens.len() { + Some(format!( + "format string: skipped tokens [used \"%{max_index}\", but only {} tokens]", + tokens.len() + )) + } else { + None + } + } + } +} + +#[test] +fn test() { + assert!(get_format_problem("", 0).is_none()); + assert!(get_format_problem("%1%2", 2).is_none()); + assert!(get_format_problem("%1%2", 1).is_some()); // undefined tokens + assert!(get_format_problem("%1%2", 3).is_some()); // unused args + assert!(get_format_problem("%2", 2).is_some()); // skipped tokens + assert!(get_format_problem("50%", 0).is_some()); // un-escaped % +} diff --git a/libs/sqf/src/analyze/mod.rs b/libs/sqf/src/analyze/mod.rs index d8077550..3273b09d 100644 --- a/libs/sqf/src/analyze/mod.rs +++ b/libs/sqf/src/analyze/mod.rs @@ -2,6 +2,7 @@ pub mod codes; mod event_handlers; mod find_in_str; +mod format_args; mod if_assign; mod required_version; mod select_parse_number; @@ -58,6 +59,7 @@ pub fn analyze( warnings.extend(find_in_str::find_in_str(statements, processed)); warnings.extend(typename::typename(statements, processed)); // warnings.extend(str_format::str_format(statements, processed)); // Too many false positives for now + warnings.extend(format_args::format_args(statements, processed)); warnings.extend(select_parse_number::select_parse_number( statements, processed, database, )); diff --git a/libs/sqf/tests/analyze.rs b/libs/sqf/tests/analyze.rs index f9d158ec..ef3cbcae 100644 --- a/libs/sqf/tests/analyze.rs +++ b/libs/sqf/tests/analyze.rs @@ -67,3 +67,4 @@ analyze!(saa2_find_in_str); analyze!(saa3_typename); analyze!(saa4_str_format); analyze!(saa5_select_parse_number); +analyze!(saa7_format_args); diff --git a/libs/sqf/tests/analyze/saa7_format_args/source.sqf b/libs/sqf/tests/analyze/saa7_format_args/source.sqf new file mode 100644 index 00000000..9be336ad --- /dev/null +++ b/libs/sqf/tests/analyze/saa7_format_args/source.sqf @@ -0,0 +1,5 @@ +format ["%1", 1, 2, 3]; + +format ["%1%3", 1]; + +format ["%5", 1, 2 ,3 ,4, 5]; diff --git a/libs/sqf/tests/analyze/saa7_format_args/stdout.ansi b/libs/sqf/tests/analyze/saa7_format_args/stdout.ansi new file mode 100644 index 00000000..6fe17734 --- /dev/null +++ b/libs/sqf/tests/analyze/saa7_format_args/stdout.ansi @@ -0,0 +1,20 @@ +help[SAA7]: format string: unused args [used "%1", passed 3] + ┌─ source.sqf:1:1 + │ +1 │ format ["%1", 1, 2, 3]; + │ ^^^^^^^^^^^^^^^^^^^^^ format string: unused args [used "%1", passed 3] + + +help[SAA7]: format string: undefined tokens [used "%3", passed 1] + ┌─ source.sqf:3:1 + │ +3 │ format ["%1%3", 1]; + │ ^^^^^^^^^^^^^^^^^ format string: undefined tokens [used "%3", passed 1] + + +help[SAA7]: format string: skipped tokens [used "%5", but only 1 tokens] + ┌─ source.sqf:5:1 + │ +5 │ format ["%5", 1, 2 ,3 ,4, 5]; + │ ^^^^^^^^^^^^^^^^^^^^^^^^^^^ format string: skipped tokens [used "%5", but only 1 tokens] +