From 4cef4f00337bd7d5c8921301ca1c18bc2e1a437d Mon Sep 17 00:00:00 2001 From: Mikhail Cheshkov Date: Thu, 16 Jan 2025 22:39:49 +0300 Subject: [PATCH] perf(cubesql): Improve rules loading perf (#9014) * Add rules loading benchmark * Add separate cheaper error type, without backtrace and input part * Avoid run-time formatting to add : * Avoid calls to replace when strip_prefix + to_string is enough --- rust/cubesql/cubesql/benches/benchmarks.rs | 128 +++++++------ .../cubesql/src/compile/rewrite/language.rs | 170 +++++++++++------- .../cubesql/src/compile/rewrite/mod.rs | 9 +- 3 files changed, 180 insertions(+), 127 deletions(-) diff --git a/rust/cubesql/cubesql/benches/benchmarks.rs b/rust/cubesql/cubesql/benches/benchmarks.rs index 13c6682e84b85..86e5bca191381 100644 --- a/rust/cubesql/cubesql/benches/benchmarks.rs +++ b/rust/cubesql/cubesql/benches/benchmarks.rs @@ -404,58 +404,58 @@ pub fn long_simple_in_str_expr_1k(c: &mut Criterion) { fn get_long_in_expr() -> String { r#" - SELECT - "WideCube"."dim1" as "column1", - "WideCube"."dim2" as "column2", - "WideCube"."dim3" as "column3", - "WideCube"."dim4" as "column4", - "WideCube"."dim5" as "column5", - "WideCube"."dim6" as "column6", - "WideCube"."dim7" as "column7", - "WideCube"."dim8" as "column8", - "WideCube"."dim9" as "column9", - "WideCube"."dim10" as "column10", - "WideCube"."dim11" as "column11", - "WideCube"."dim12" as "column12", - "WideCube"."dim13" as "column13", - "WideCube"."dim14" as "column14", - "WideCube"."dim15" as "column15", - SUM("WideCube"."dim16") as "some_sum" - FROM - "WideCube" - WHERE - "WideCube"."dim1" = 1 - AND "WideCube"."dim2" = 2 - AND "WideCube"."dim3" = 3 - AND "WideCube"."dim4" = 4 - AND "WideCube"."dim5" = 5 - AND "WideCube"."dim6" = 6 - AND "WideCube"."dim7" = 7 - AND "WideCube"."dim8" = 8 - AND "WideCube"."dim9" = 9 - AND "WideCube"."dim10" = 10 - AND ("WideCube"."dim11" = 42 OR "WideCube"."dim11" IS NULL) + SELECT + "WideCube"."dim1" as "column1", + "WideCube"."dim2" as "column2", + "WideCube"."dim3" as "column3", + "WideCube"."dim4" as "column4", + "WideCube"."dim5" as "column5", + "WideCube"."dim6" as "column6", + "WideCube"."dim7" as "column7", + "WideCube"."dim8" as "column8", + "WideCube"."dim9" as "column9", + "WideCube"."dim10" as "column10", + "WideCube"."dim11" as "column11", + "WideCube"."dim12" as "column12", + "WideCube"."dim13" as "column13", + "WideCube"."dim14" as "column14", + "WideCube"."dim15" as "column15", + SUM("WideCube"."dim16") as "some_sum" + FROM + "WideCube" + WHERE + "WideCube"."dim1" = 1 + AND "WideCube"."dim2" = 2 + AND "WideCube"."dim3" = 3 + AND "WideCube"."dim4" = 4 + AND "WideCube"."dim5" = 5 + AND "WideCube"."dim6" = 6 + AND "WideCube"."dim7" = 7 + AND "WideCube"."dim8" = 8 + AND "WideCube"."dim9" = 9 + AND "WideCube"."dim10" = 10 + AND ("WideCube"."dim11" = 42 OR "WideCube"."dim11" IS NULL) AND ( "WideCube"."dim12" IN ( - 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50 ) OR "WideCube"."dim12" IS NULL - ) AND "WideCube"."dim20" = 55 - GROUP BY - "WideCube"."dim1", - "WideCube"."dim2", - "WideCube"."dim3", - "WideCube"."dim4", - "WideCube"."dim5", - "WideCube"."dim6", - "WideCube"."dim7", - "WideCube"."dim8", - "WideCube"."dim9", - "WideCube"."dim10", - "WideCube"."dim11", - "WideCube"."dim12", - "WideCube"."dim13", - "WideCube"."dim14", + ) AND "WideCube"."dim20" = 55 + GROUP BY + "WideCube"."dim1", + "WideCube"."dim2", + "WideCube"."dim3", + "WideCube"."dim4", + "WideCube"."dim5", + "WideCube"."dim6", + "WideCube"."dim7", + "WideCube"."dim8", + "WideCube"."dim9", + "WideCube"."dim10", + "WideCube"."dim11", + "WideCube"."dim12", + "WideCube"."dim13", + "WideCube"."dim14", "WideCube"."dim15" "#.into() } @@ -482,8 +482,8 @@ pub fn tableau_logical_17(c: &mut Criterion) { fn get_ts_last_day_redshift_query() -> String { r#" WITH "qt_0" AS ( - SELECT - DATE_TRUNC('month', "ta_1"."order_date") "ca_1", + SELECT + DATE_TRUNC('month', "ta_1"."order_date") "ca_1", CASE WHEN sum("ta_1"."sumPrice") IS NOT NULL THEN sum("ta_1"."sumPrice") ELSE 0 @@ -495,8 +495,8 @@ fn get_ts_last_day_redshift_query() -> String { ) GROUP BY "ca_1" ) - SELECT - min("ta_2"."ca_1") "ca_3", + SELECT + min("ta_2"."ca_1") "ca_3", max("ta_2"."ca_1") "ca_4" FROM "qt_0" "ta_2" "# @@ -743,4 +743,26 @@ criterion_group! { targets = split_query, split_query_count_distinct, wrapped_query, power_bi_wrap, power_bi_sum_wrap, long_in_expr, long_simple_in_number_expr_1k, long_simple_in_str_expr_50, long_simple_in_str_expr_1k, tableau_logical_17, tableau_bugs_b8888, ts_last_day_redshift, quicksight_1, quicksight_2 } -criterion_main!(benches); + +fn simple_rules_loading(c: &mut Criterion) { + let context = Arc::new( + futures::executor::block_on(create_test_postgresql_cube_context(get_test_tenant_ctx())) + .unwrap(), + ); + // preload rules at least once + let _rules = rewrite_rules(context.clone()); + + c.bench_function("simple_rules_loading", |b| { + b.iter(|| { + rewrite_rules(context.clone()); + }) + }); +} + +criterion_group! { + name = rules_loading; + config = Criterion::default(); + targets = simple_rules_loading +} + +criterion_main!(benches, rules_loading); diff --git a/rust/cubesql/cubesql/src/compile/rewrite/language.rs b/rust/cubesql/cubesql/src/compile/rewrite/language.rs index 1bae740891932..5622a02377c40 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/language.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/language.rs @@ -1,3 +1,34 @@ +use std::{ + num::{ParseFloatError, ParseIntError}, + str::ParseBoolError, +}; + +// `from_op` is generated as if-else chain from op.parse() +// Because of this a lot of instances of FromStr::Err will be constructed just as a "take other branch" marker +// This type should be very cheap to construct, at least while we use FromStr chains +// Don't store any input in here, FromStr is not designed for this, and it requires allocation +// Instead rely on egg::FromOpError for now, it will return allocated `op`, but only once in the end +// TODO make parser dispatch stricter, and return both input and LanguageParseError from `from_op` +#[derive(thiserror::Error, Debug)] +pub enum LanguageParseError { + #[error("Should start with '{0}'")] + ShouldStartWith(&'static str), + #[error("Can't be matched against {0}")] + ShouldMatch(&'static str), + #[error("Should contain a valid type")] + InvalidType, + #[error("Should contains a valid scalar type")] + InvalidScalarType, + #[error("Can't parse boolean scalar value with error: {0}")] + InvalidBoolValue(#[source] ParseBoolError), + #[error("Can't parse i64 scalar value with error: {0}")] + InvalidIntValue(#[source] ParseIntError), + #[error("Can't parse f64 scalar value with error: {0}")] + InvalidFloatValue(#[source] ParseFloatError), + #[error("Conversion from string is not supported")] + NotSupported, +} + #[macro_export] macro_rules! plan_to_language { ($(#[$meta:meta])* $vis:vis enum $name:ident $variants:tt) => { @@ -13,13 +44,13 @@ macro_rules! variant_field_struct { pub struct [<$variant $var_field:camel>](String); impl FromStr for [<$variant $var_field:camel>] { - type Err = CubeError; + type Err = $crate::compile::rewrite::language::LanguageParseError; fn from_str(s: &str) -> Result { - let prefix = format!("{}:", std::stringify!([<$variant $var_field:camel>])); - if s.starts_with(&prefix) { - return Ok([<$variant $var_field:camel>](s.replace(&prefix, ""))); + const PREFIX: &'static str = concat!(std::stringify!([<$variant $var_field:camel>]), ":"); + if let Some(suffix) = s.strip_prefix(PREFIX) { + return Ok([<$variant $var_field:camel>](suffix.to_string())); } - Err(CubeError::internal(format!("Can't convert {}. Should start with '{}'", s, prefix))) + Err(Self::Err::ShouldStartWith(PREFIX)) } } @@ -37,13 +68,13 @@ macro_rules! variant_field_struct { pub struct [<$variant $var_field:camel>](usize); impl FromStr for [<$variant $var_field:camel>] { - type Err = CubeError; + type Err = $crate::compile::rewrite::language::LanguageParseError; fn from_str(s: &str) -> Result { - let prefix = format!("{}:", std::stringify!([<$variant $var_field:camel>])); - if s.starts_with(&prefix) { - return Ok([<$variant $var_field:camel>](s.replace(&prefix, "").parse().unwrap())); + const PREFIX: &'static str = concat!(std::stringify!([<$variant $var_field:camel>]), ":"); + if let Some(suffix) = s.strip_prefix(PREFIX) { + return Ok([<$variant $var_field:camel>](suffix.parse().unwrap())); } - Err(CubeError::internal(format!("Can't convert {}. Should start with '{}'", s, prefix))) + Err(Self::Err::ShouldStartWith(PREFIX)) } } @@ -61,13 +92,13 @@ macro_rules! variant_field_struct { pub struct [<$variant $var_field:camel>](bool); impl FromStr for [<$variant $var_field:camel>] { - type Err = CubeError; + type Err = $crate::compile::rewrite::language::LanguageParseError; fn from_str(s: &str) -> Result { - let prefix = format!("{}:", std::stringify!([<$variant $var_field:camel>])); - if s.starts_with(&prefix) { - return Ok([<$variant $var_field:camel>](s.replace(&prefix, "").parse().unwrap())); + const PREFIX: &'static str = concat!(std::stringify!([<$variant $var_field:camel>]), ":"); + if let Some(suffix) = s.strip_prefix(PREFIX) { + return Ok([<$variant $var_field:camel>](suffix.parse().unwrap())); } - Err(CubeError::internal(format!("Can't convert {}. Should start with '{}'", s, prefix))) + Err(Self::Err::ShouldStartWith(PREFIX)) } } @@ -85,18 +116,17 @@ macro_rules! variant_field_struct { pub struct [<$variant $var_field:camel>](Option); impl FromStr for [<$variant $var_field:camel>] { - type Err = CubeError; + type Err = $crate::compile::rewrite::language::LanguageParseError; fn from_str(s: &str) -> Result { - let prefix = format!("{}:", std::stringify!([<$variant $var_field:camel>])); - if s.starts_with(&prefix) { - let replaced = s.replace(&prefix, ""); - if &replaced == "None" { + const PREFIX: &'static str = concat!(std::stringify!([<$variant $var_field:camel>]), ":"); + if let Some(suffix) = s.strip_prefix(PREFIX) { + if suffix == "None" { return Ok([<$variant $var_field:camel>](None)); } else { - return Ok([<$variant $var_field:camel>](Some(s.replace(&prefix, "").parse().unwrap()))); + return Ok([<$variant $var_field:camel>](Some(suffix.parse().unwrap()))); } } - Err(CubeError::internal(format!("Can't convert {}. Should start with '{}'", s, prefix))) + Err(Self::Err::ShouldStartWith(PREFIX)) } } @@ -114,18 +144,17 @@ macro_rules! variant_field_struct { pub struct [<$variant $var_field:camel>](Option>); impl FromStr for [<$variant $var_field:camel>] { - type Err = CubeError; + type Err = $crate::compile::rewrite::language::LanguageParseError; fn from_str(s: &str) -> Result { - let prefix = format!("{}:", std::stringify!([<$variant $var_field:camel>])); - if s.starts_with(&prefix) { - let replaced = s.replace(&prefix, ""); - if &replaced == "None" { + const PREFIX: &'static str = concat!(std::stringify!([<$variant $var_field:camel>]), ":"); + if let Some(suffix) = s.strip_prefix(PREFIX) { + if suffix == "None" { return Ok([<$variant $var_field:camel>](None)); } else { - return Ok([<$variant $var_field:camel>](Some(s.split(',').map(|s| s.to_string()).collect::>()))); + return Ok([<$variant $var_field:camel>](Some(suffix.split(',').map(|s| s.to_string()).collect::>()))); } } - Err(CubeError::internal(format!("Can't convert {}. Should start with '{}'", s, prefix))) + Err(Self::Err::ShouldStartWith(PREFIX)) } } @@ -143,18 +172,17 @@ macro_rules! variant_field_struct { pub struct [<$variant $var_field:camel>](Option); impl FromStr for [<$variant $var_field:camel>] { - type Err = CubeError; + type Err = $crate::compile::rewrite::language::LanguageParseError; fn from_str(s: &str) -> Result { - let prefix = format!("{}:", std::stringify!([<$variant $var_field:camel>])); - if s.starts_with(&prefix) { - let replaced = s.replace(&prefix, ""); - if &replaced == "None" { + const PREFIX: &'static str = concat!(std::stringify!([<$variant $var_field:camel>]), ":"); + if let Some(suffix) = s.strip_prefix(PREFIX) { + if suffix == "None" { return Ok([<$variant $var_field:camel>](None)); } else { - return Ok([<$variant $var_field:camel>](Some(s.to_string()))); + return Ok([<$variant $var_field:camel>](Some(suffix.to_string()))); } } - Err(CubeError::internal("Conversion from string is not supported".to_string())) + Err(Self::Err::NotSupported) } } @@ -172,9 +200,9 @@ macro_rules! variant_field_struct { pub struct [<$variant $var_field:camel>](Column); impl FromStr for [<$variant $var_field:camel>] { - type Err = CubeError; + type Err = $crate::compile::rewrite::language::LanguageParseError; fn from_str(_s: &str) -> Result { - Err(CubeError::internal("Conversion from string is not supported".to_string())) + Err(Self::Err::NotSupported) } } @@ -192,9 +220,9 @@ macro_rules! variant_field_struct { pub struct [<$variant $var_field:camel>](Vec); impl FromStr for [<$variant $var_field:camel>] { - type Err = CubeError; + type Err = $crate::compile::rewrite::language::LanguageParseError; fn from_str(_s: &str) -> Result { - Err(CubeError::internal("Conversion from string is not supported".to_string())) + Err(Self::Err::NotSupported) } } @@ -212,9 +240,9 @@ macro_rules! variant_field_struct { pub struct [<$variant $var_field:camel>](Vec); impl FromStr for [<$variant $var_field:camel>] { - type Err = CubeError; + type Err = $crate::compile::rewrite::language::LanguageParseError; fn from_str(_s: &str) -> Result { - Err(CubeError::internal("Conversion from string is not supported".to_string())) + Err(Self::Err::NotSupported) } } @@ -232,9 +260,9 @@ macro_rules! variant_field_struct { pub struct [<$variant $var_field:camel>](String); impl FromStr for [<$variant $var_field:camel>] { - type Err = CubeError; + type Err = $crate::compile::rewrite::language::LanguageParseError; fn from_str(_s: &str) -> Result { - Err(CubeError::internal("Conversion from string is not supported".to_string())) + Err(Self::Err::NotSupported) } } @@ -252,9 +280,9 @@ macro_rules! variant_field_struct { pub struct [<$variant $var_field:camel>](String); impl FromStr for [<$variant $var_field:camel>] { - type Err = CubeError; + type Err = $crate::compile::rewrite::language::LanguageParseError; fn from_str(_s: &str) -> Result { - Err(CubeError::internal("Conversion from string is not supported".to_string())) + Err(Self::Err::NotSupported) } } @@ -272,13 +300,13 @@ macro_rules! variant_field_struct { pub struct [<$variant $var_field:camel>](String); impl FromStr for [<$variant $var_field:camel>] { - type Err = CubeError; + type Err = $crate::compile::rewrite::language::LanguageParseError; fn from_str(s: &str) -> Result { - let prefix = format!("{}:", std::stringify!([<$variant $var_field:camel>])); - if s.starts_with(&prefix) { - return Ok([<$variant $var_field:camel>](s.replace(&prefix, ""))); + const PREFIX: &'static str = concat!(std::stringify!([<$variant $var_field:camel>]), ":"); + if let Some(suffix) = s.strip_prefix(PREFIX) { + return Ok([<$variant $var_field:camel>](suffix.to_string())); } - Err(CubeError::internal(format!("Can't convert {}. Should start with '{}'", s, prefix))) + Err(Self::Err::ShouldStartWith(PREFIX)) } } @@ -484,14 +512,16 @@ macro_rules! variant_field_struct { pub struct [<$variant $var_field:camel>]($var_field_type); impl FromStr for [<$variant $var_field:camel>] { - type Err = CubeError; + type Err = $crate::compile::rewrite::language::LanguageParseError; fn from_str(s: &str) -> Result { - let prefix = format!("{}:", std::stringify!([<$variant $var_field:camel>])); - let name = s.strip_prefix(&prefix).ok_or(CubeError::internal(format!("Can't convert {}. Should start with '{}'", s, prefix)))?; + const PREFIX: &'static str = concat!(std::stringify!([<$variant $var_field:camel>]), ":"); + let name = s + .strip_prefix(PREFIX) + .ok_or(Self::Err::ShouldStartWith(PREFIX))?; match name { $($name => Ok([<$variant $var_field:camel>]($variant_type)),)* - x => Err(CubeError::internal(format!("{} can't be matched against {}", x, std::stringify!($var_field_type)))) + _ => Err(Self::Err::ShouldMatch(std::stringify!($var_field_type))) } } } @@ -557,10 +587,12 @@ macro_rules! variant_field_struct { pub struct [<$variant $var_field:camel>](DataType); impl FromStr for [<$variant $var_field:camel>] { - type Err = CubeError; + type Err = $crate::compile::rewrite::language::LanguageParseError; fn from_str(s: &str) -> Result { - let prefix = format!("{}:", std::stringify!([<$variant $var_field:camel>])); - let typed_str = s.strip_prefix(&prefix).ok_or(CubeError::internal(format!("Can't convert {}. Should start with '{}'", s, prefix)))?; + const PREFIX: &'static str = concat!(std::stringify!([<$variant $var_field:camel>]), ":"); + let typed_str = s + .strip_prefix(PREFIX) + .ok_or(Self::Err::ShouldStartWith(PREFIX))?; match typed_str { "Float32" => Ok([<$variant $var_field:camel>](DataType::Float32)), @@ -571,7 +603,7 @@ macro_rules! variant_field_struct { "Utf8" => Ok([<$variant $var_field:camel>](DataType::Utf8)), "Date32" => Ok([<$variant $var_field:camel>](DataType::Date32)), "Date64" => Ok([<$variant $var_field:camel>](DataType::Date64)), - _ => Err(CubeError::internal(format!("Can't convert {}. Should contain a valid type, actual: {}", s, typed_str))), + _ => Err(Self::Err::InvalidType), } } } @@ -590,24 +622,26 @@ macro_rules! variant_field_struct { pub struct [<$variant $var_field:camel>](ScalarValue); impl FromStr for [<$variant $var_field:camel>] { - type Err = CubeError; + type Err = $crate::compile::rewrite::language::LanguageParseError; fn from_str(s: &str) -> Result { - let prefix = format!("{}:", std::stringify!([<$variant $var_field:camel>])); - let typed_str = s.strip_prefix(&prefix).ok_or(CubeError::internal(format!("Can't convert {}. Should start with '{}'", s, prefix)))?; + const PREFIX: &'static str = concat!(std::stringify!([<$variant $var_field:camel>]), ":"); + let typed_str = s + .strip_prefix(PREFIX) + .ok_or(Self::Err::ShouldStartWith(PREFIX))?; if let Some(value) = typed_str.strip_prefix("s:") { Ok([<$variant $var_field:camel>](ScalarValue::Utf8(Some(value.to_string())))) } else if let Some(value) = typed_str.strip_prefix("b:") { - let n: bool = value.parse().map_err(|err| CubeError::internal(format!("Can't parse boolean scalar value from '{}' with error: {}", typed_str, err)))?; + let n: bool = value.parse().map_err(|err| Self::Err::InvalidBoolValue(err))?; Ok([<$variant $var_field:camel>](ScalarValue::Boolean(Some(n)))) } else if let Some(value) = typed_str.strip_prefix("i:") { - let n: i64 = value.parse().map_err(|err| CubeError::internal(format!("Can't parse i64 scalar value from '{}' with error: {}", typed_str, err)))?; + let n: i64 = value.parse().map_err(|err| Self::Err::InvalidIntValue(err))?; Ok([<$variant $var_field:camel>](ScalarValue::Int64(Some(n)))) } else if let Some(value) = typed_str.strip_prefix("f:") { - let n: f64 = value.parse().map_err(|err| CubeError::internal(format!("Can't parse f64 scalar value from '{}' with error: {}", typed_str, err)))?; + let n: f64 = value.parse().map_err(|err| Self::Err::InvalidFloatValue(err))?; Ok([<$variant $var_field:camel>](ScalarValue::Float64(Some(n)))) } else { - Err(CubeError::internal(format!("Can't convert {}. Should contains type type, actual: {}", s, typed_str))) + Err(Self::Err::InvalidScalarType) } } } @@ -657,9 +691,9 @@ macro_rules! variant_field_struct { pub struct [<$variant $var_field:camel>]($var_field_type); impl FromStr for [<$variant $var_field:camel>] { - type Err = CubeError; + type Err = $crate::compile::rewrite::language::LanguageParseError; fn from_str(_s: &str) -> Result { - Err(CubeError::internal("Conversion from string is not supported".to_string())) + Err(Self::Err::NotSupported) } } diff --git a/rust/cubesql/cubesql/src/compile/rewrite/mod.rs b/rust/cubesql/cubesql/src/compile/rewrite/mod.rs index ed788076c7798..61c96bc63e3e0 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/mod.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/mod.rs @@ -6,12 +6,9 @@ pub mod rewriter; pub mod rules; use self::analysis::{LogicalPlanData, MemberNameToExpr}; -use crate::{ - compile::rewrite::{ - analysis::{LogicalPlanAnalysis, OriginalExpr}, - rewriter::{CubeEGraph, CubeRewrite}, - }, - CubeError, +use crate::compile::rewrite::{ + analysis::{LogicalPlanAnalysis, OriginalExpr}, + rewriter::{CubeEGraph, CubeRewrite}, }; use analysis::MemberNamesToExpr; use datafusion::{