From 362d9346f8f3f12d875885d1572288c737fcd83b Mon Sep 17 00:00:00 2001 From: tlento Date: Wed, 8 Nov 2023 14:22:39 -0800 Subject: [PATCH] Fix query validation for metric_time requirements As of right now we require metric_time in the group by expression for any metric query against cumulative metrics or derived metrics with time offset windows or an offset_to_grain parameter set. The current query validation was only asserting that some time dimension was included in the group by. In the case of derived metrics, this simply failed later in the query building phase for any query with a different time dimension in the group by expression. In the case of cumulative metrics, this would run an incorrectly rendered query and then return incorrect results. This validation change ensures that users have the proper configuration on queries based on our current limitations. --- .../unreleased/Fixes-20231108-150708.yaml | 6 ++ metricflow/query/query_parser.py | 14 +++- metricflow/test/query/test_query_parser.py | 74 +++++++++++++++++++ 3 files changed, 90 insertions(+), 4 deletions(-) create mode 100644 .changes/unreleased/Fixes-20231108-150708.yaml diff --git a/.changes/unreleased/Fixes-20231108-150708.yaml b/.changes/unreleased/Fixes-20231108-150708.yaml new file mode 100644 index 0000000000..7a0a9ebdac --- /dev/null +++ b/.changes/unreleased/Fixes-20231108-150708.yaml @@ -0,0 +1,6 @@ +kind: Fixes +body: Fix query validation for metric_time requirements +time: 2023-11-08T15:07:08.681102-08:00 +custom: + Author: tlento + Issue: "825" diff --git a/metricflow/query/query_parser.py b/metricflow/query/query_parser.py index f302a48365..2fd3036e3f 100644 --- a/metricflow/query/query_parser.py +++ b/metricflow/query/query_parser.py @@ -270,8 +270,13 @@ def parse_and_validate_query( finally: logger.info(f"Parsing the query took: {time.time() - start_time:.2f}s") - def _validate_no_time_dimension_query(self, metric_references: Sequence[MetricReference]) -> None: - """Validate if all requested metrics are queryable without a time dimension.""" + def _validate_no_metric_time_dimension_query( + self, metric_references: Sequence[MetricReference], time_dimension_specs: Sequence[TimeDimensionSpec] + ) -> None: + """Validate if all requested metrics are queryable without grouping by metric_time.""" + if any([spec.reference == DataSet.metric_time_dimension_reference() for spec in time_dimension_specs]): + return + for metric_reference in metric_references: metric = self._metric_lookup.get_metric(metric_reference) if metric.type == MetricType.CUMULATIVE: @@ -471,8 +476,9 @@ def _parse_and_validate_query( time_dimension_specs = requested_linkable_specs.time_dimension_specs + tuple( time_dimension_spec for _, time_dimension_spec in partial_time_dimension_spec_replacements.items() ) - if len(time_dimension_specs) == 0: - self._validate_no_time_dimension_query(metric_references=metric_references) + self._validate_no_metric_time_dimension_query( + metric_references=metric_references, time_dimension_specs=time_dimension_specs + ) self._time_granularity_solver.validate_time_granularity(metric_references, time_dimension_specs) self._validate_date_part(metric_references, time_dimension_specs) diff --git a/metricflow/test/query/test_query_parser.py b/metricflow/test/query/test_query_parser.py index 03e5ab9ccb..a340f0dc8e 100644 --- a/metricflow/test/query/test_query_parser.py +++ b/metricflow/test/query/test_query_parser.py @@ -97,6 +97,11 @@ expr: created_at type_params: time_granularity: month + - name: loaded_at + type: time + expr: created_at + type_params: + time_granularity: day - name: country type: categorical expr: country @@ -412,6 +417,75 @@ def test_parse_and_validate_metric_constraint_dims() -> None: ) +def test_cumulative_metric_no_time_dimension_validation() -> None: + """Test that queries for cumulative metrics fail if no time dimensions are selected. + + This is a test of validation enforcement to ensure users don't get incorrect results due to current + limitations, and should be deleted or updated when this restriction is lifted. + """ + bookings_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=BOOKINGS_YAML) + metrics_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=METRICS_YAML) + revenue_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=REVENUE_YAML) + query_parser = query_parser_from_yaml( + [EXAMPLE_PROJECT_CONFIGURATION_YAML_CONFIG_FILE, bookings_yaml_file, revenue_yaml_file, metrics_yaml_file] + ) + + with pytest.raises(UnableToSatisfyQueryError, match="must be queried with the dimension 'metric_time'"): + query_parser.parse_and_validate_query( + metric_names=["revenue_cumulative"], + ) + + +def test_cumulative_metric_wrong_time_dimension_validation() -> None: + """Test that queries for cumulative metrics fail if the agg_time_dimension is not selected. + + Our current behavior for cases where a different time dimension is selected by the agg_time_dimension is + not is undefined. Until we add support for grouping by a different time dimension for a cumulative metric + computed against metric_time, overriding the agg_time_dimension at query time, or both, this query is + restricted. + + This is a test of validation enforcement to ensure users don't get incorrect results due to current + limitations, and should be deleted or updated when this restriction is lifted. + """ + bookings_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=BOOKINGS_YAML) + metrics_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=METRICS_YAML) + revenue_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=REVENUE_YAML) + query_parser = query_parser_from_yaml( + [EXAMPLE_PROJECT_CONFIGURATION_YAML_CONFIG_FILE, bookings_yaml_file, revenue_yaml_file, metrics_yaml_file] + ) + + with pytest.raises(UnableToSatisfyQueryError, match="must be queried with the dimension 'metric_time'"): + query_parser.parse_and_validate_query( + metric_names=["revenue_cumulative"], + group_by_names=["company__loaded_at"], + ) + + +def test_cumulative_metric_agg_time_dimension_name_validation() -> None: + """Test that queries for cumulative metrics fail if the agg_time_dimension is selected by name. + + Currently, cumulative metrics only return correct results if the query includes the `metric_time` virtual + dimension. In many cases the underlying agg_time_dimension is a single column and users will use it + directly instead of requesting metric_time. While shis should be fine, we cannot allow it until we fix + the query rendering issues that prevent this from working correctly. + + This is a test of validation enforcement to ensure users don't get incorrect results due to current + limitations, and should be deleted or updated when this restriction is lifted. + """ + bookings_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=BOOKINGS_YAML) + metrics_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=METRICS_YAML) + revenue_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=REVENUE_YAML) + query_parser = query_parser_from_yaml( + [EXAMPLE_PROJECT_CONFIGURATION_YAML_CONFIG_FILE, bookings_yaml_file, revenue_yaml_file, metrics_yaml_file] + ) + + with pytest.raises(UnableToSatisfyQueryError, match="must be queried with the dimension 'metric_time'"): + query_parser.parse_and_validate_query( + metric_names=["revenue_cumulative"], + group_by_names=["company__ds"], + ) + + def test_derived_metric_query_parsing() -> None: """Test derived metric inputs are properly validated.""" bookings_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=BOOKINGS_YAML)