From 47474a9f371551e96dc76ab0193841ba2e712bed Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Wed, 27 Nov 2024 18:56:13 +0000 Subject: [PATCH 01/22] Adds Schema class and modifies schema handling --- google/cloud/bigquery/client.py | 4 +- google/cloud/bigquery/schema.py | 121 ++++++++++++++++++++++++++------ google/cloud/bigquery/table.py | 15 ++-- tests/unit/test_client.py | 4 +- tests/unit/test_table.py | 8 ++- 5 files changed, 121 insertions(+), 31 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 97f239f7a..80b35cd12 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -3680,7 +3680,7 @@ def insert_rows( if selected_fields is not None: schema = selected_fields - if len(schema) == 0: + if not schema: raise ValueError( ( "Could not determine schema for table '{}'. Call client.get_table() " @@ -4029,7 +4029,7 @@ def list_rows( # No schema, but no selected_fields. Assume the developer wants all # columns, so get the table resource for them rather than failing. - elif len(schema) == 0: + elif not schema: table = self.get_table(table.reference, retry=retry, timeout=timeout) schema = table.schema diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index cd9006d31..67c30427c 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -547,40 +547,66 @@ def _build_schema_resource(fields): """ return [field.to_api_repr() for field in fields] - def _to_schema_fields(schema): - """Coerce `schema` to a list of schema field instances. - - Args: - schema(Sequence[Union[ \ - :class:`~google.cloud.bigquery.schema.SchemaField`, \ - Mapping[str, Any] \ - ]]): - Table schema to convert. If some items are passed as mappings, - their content must be compatible with - :meth:`~google.cloud.bigquery.schema.SchemaField.from_api_repr`. - - Returns: - Sequence[:class:`~google.cloud.bigquery.schema.SchemaField`] + """TODO docstring + QUESTION: do we want a flag to force the generation of a Schema object? - Raises: - Exception: If ``schema`` is not a sequence, or if any item in the - sequence is not a :class:`~google.cloud.bigquery.schema.SchemaField` - instance or a compatible mapping representation of the field. + CAST a list of elements to either: + * a Schema object with SchemaFields and an attribute + * a list of SchemaFields but no attribute """ - for field in schema: if not isinstance(field, (SchemaField, collections.abc.Mapping)): raise ValueError( "Schema items must either be fields or compatible " "mapping representations." ) - + + if isinstance(schema, Schema): + schema = Schema([ + field if isinstance(field, SchemaField) else SchemaField.from_api_repr(field) + for field in schema + ], foreign_type_info=schema.foreign_type_info) + return schema return [ field if isinstance(field, SchemaField) else SchemaField.from_api_repr(field) for field in schema ] +# OLD TO DELETE +# def _to_schema_fields(schema): +# """Coerce `schema` to a list of schema field instances. + +# Args: +# schema(Sequence[Union[ \ +# :class:`~google.cloud.bigquery.schema.SchemaField`, \ +# Mapping[str, Any] \ +# ]]): +# Table schema to convert. If some items are passed as mappings, +# their content must be compatible with +# :meth:`~google.cloud.bigquery.schema.SchemaField.from_api_repr`. + +# Returns: +# Sequence[:class:`~google.cloud.bigquery.schema.SchemaField`] + +# Raises: +# Exception: If ``schema`` is not a sequence, or if any item in the +# sequence is not a :class:`~google.cloud.bigquery.schema.SchemaField` +# instance or a compatible mapping representation of the field. +# """ + +# for field in schema: +# if not isinstance(field, (SchemaField, collections.abc.Mapping)): +# raise ValueError( +# "Schema items must either be fields or compatible " +# "mapping representations." +# ) + +# return [ +# field if isinstance(field, SchemaField) else SchemaField.from_api_repr(field) +# for field in schema +# ] + class PolicyTagList(object): """Define Policy Tags for a column. @@ -921,3 +947,58 @@ def from_api_repr(cls, resource: dict) -> SerDeInfo: config = cls("") config._properties = copy.deepcopy(resource) return config + + +class Schema: + def __init__(self, fields=None, foreign_type_info=None): + self._properties = {} + self._fields = [] if fields is None else list(fields) #Internal List + self.foreign_type_info = foreign_type_info + + @property + def foreign_type_info(self) -> Any: + """TODO: docstring""" + return self._properties.get("foreignTypeInfo") + + @foreign_type_info.setter + def foreign_type_info(self, value: str) -> None: + value = _isinstance_or_raise(value, str, none_allowed=True) + self._properties["foreignTypeInfo"] = value + + @property + def _fields(self) -> Any: + """TODO: docstring""" + return self._properties.get("_fields") + + @_fields.setter + def _fields(self, value: list) -> None: + value = _isinstance_or_raise(value, list, none_allowed=True) + self._properties["_fields"] = value + + + def __len__(self): + return len(self._properties["_fields"]) + + def __getitem__(self, index): + return self._properties["_fields"][index] + + def __setitem__(self, index, value): + self._properties["_fields"][index] = value + + def __delitem__(self, index): + del self._properties["_fields"][index] + + def __iter__(self): + return iter(self._properties["_fields"]) + + def __str__(self): + return str(self._properties["_fields"]) + + def __repr__(self): + return f"Schema({self.foreign_type_info!r}, {self._properties['_fields']!r})" + + def append(self, item): + self._properties["_fields"].append(item) + + def extend(self, iterable): + self._properties["_fields"].extend(iterable) \ No newline at end of file diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index f6e5711aa..daacd083d 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -70,6 +70,7 @@ from google.cloud.bigquery.schema import _build_schema_resource from google.cloud.bigquery.schema import _parse_schema_resource from google.cloud.bigquery.schema import _to_schema_fields +from google.cloud.bigquery.schema import Schema from google.cloud.bigquery.external_config import ExternalCatalogTableOptions if typing.TYPE_CHECKING: # pragma: NO COVER @@ -451,10 +452,13 @@ def schema(self): instance or a compatible mapping representation of the field. """ prop = self._properties.get(self._PROPERTY_TO_API_FIELD["schema"]) - if not prop: - return [] - else: - return _parse_schema_resource(prop) + if not prop: # if empty Schema, empty list, None + if prop is None: + return None + return prop + elif isinstance(prop, Schema): + return prop + return _parse_schema_resource(prop) @schema.setter def schema(self, value): @@ -1336,7 +1340,8 @@ def _row_from_mapping(mapping, schema): Raises: ValueError: If schema is empty. """ - if len(schema) == 0: + + if not schema: raise ValueError(_TABLE_HAS_NO_SCHEMA) row = [] diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index cd336b73f..11e6fbb8b 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -60,6 +60,7 @@ from google.cloud.bigquery import ParquetOptions import google.cloud.bigquery.retry from google.cloud.bigquery.retry import DEFAULT_TIMEOUT +from google.cloud.bigquery.schema import Schema import google.cloud.bigquery.table from test_utils.imports import maybe_fail_import @@ -2608,7 +2609,8 @@ def test_update_table_w_schema_None(self): sent = {"schema": None} self.assertEqual(req[1]["data"], sent) self.assertEqual(req[1]["path"], "/%s" % path) - self.assertEqual(len(updated_table.schema), 0) + valid_options = [Schema(), [], None] + self.assertIn(updated_table.schema, valid_options) def test_update_table_delete_property(self): from google.cloud.bigquery.table import Table diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 023510731..41f25dede 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -31,7 +31,7 @@ from google.cloud.bigquery import exceptions from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.dataset import DatasetReference -from google.cloud.bigquery.schema import SerDeInfo, StorageDescriptor +from google.cloud.bigquery.schema import SerDeInfo, StorageDescriptor, Schema def _mock_client(): @@ -499,7 +499,8 @@ def _verifyResourceProperties(self, table, resource): if "schema" in resource: self._verifySchema(table.schema, resource) else: - self.assertEqual(table.schema, []) + valid_options = [Schema(), [], None] + self.assertIn(table.schema, valid_options) if "externalDataConfiguration" in resource: edc = table.external_data_configuration @@ -536,7 +537,8 @@ def test_ctor(self): "/projects/%s/datasets/%s/tables/%s" % (self.PROJECT, self.DS_ID, self.TABLE_NAME), ) - self.assertEqual(table.schema, []) + valid_options = [Schema(), [], None] + self.assertIn(table.schema, valid_options) self.assertIsNone(table.created) self.assertIsNone(table.etag) From f22246af824bd93c612c9a3e6367f760be9be50e Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Mon, 2 Dec 2024 21:43:37 +0000 Subject: [PATCH 02/22] Updates some features of schema handle None more effectively. --- google/cloud/bigquery/schema.py | 46 +++++++++++++++++++-------------- google/cloud/bigquery/table.py | 6 ++--- tests/unit/test_table.py | 2 +- 3 files changed, 29 insertions(+), 25 deletions(-) diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 67c30427c..e9641db74 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -547,6 +547,7 @@ def _build_schema_resource(fields): """ return [field.to_api_repr() for field in fields] + def _to_schema_fields(schema): """TODO docstring QUESTION: do we want a flag to force the generation of a Schema object? @@ -561,18 +562,24 @@ def _to_schema_fields(schema): "Schema items must either be fields or compatible " "mapping representations." ) - + if isinstance(schema, Schema): - schema = Schema([ - field if isinstance(field, SchemaField) else SchemaField.from_api_repr(field) - for field in schema - ], foreign_type_info=schema.foreign_type_info) + schema = Schema( + [ + field + if isinstance(field, SchemaField) + else SchemaField.from_api_repr(field) + for field in schema + ], + foreign_type_info=schema.foreign_type_info, + ) return schema return [ field if isinstance(field, SchemaField) else SchemaField.from_api_repr(field) for field in schema ] + # OLD TO DELETE # def _to_schema_fields(schema): # """Coerce `schema` to a list of schema field instances. @@ -952,9 +959,9 @@ def from_api_repr(cls, resource: dict) -> SerDeInfo: class Schema: def __init__(self, fields=None, foreign_type_info=None): self._properties = {} - self._fields = [] if fields is None else list(fields) #Internal List + self._fields = [] if fields is None else list(fields) # Internal List self.foreign_type_info = foreign_type_info - + @property def foreign_type_info(self) -> Any: """TODO: docstring""" @@ -975,30 +982,29 @@ def _fields(self, value: list) -> None: value = _isinstance_or_raise(value, list, none_allowed=True) self._properties["_fields"] = value - def __len__(self): - return len(self._properties["_fields"]) + return len(self._fields) def __getitem__(self, index): - return self._properties["_fields"][index] + return self._fields[index] def __setitem__(self, index, value): - self._properties["_fields"][index] = value - + self._fields[index] = value + def __delitem__(self, index): - del self._properties["_fields"][index] + del self._fields[index] def __iter__(self): - return iter(self._properties["_fields"]) - + return iter(self._fields) + def __str__(self): - return str(self._properties["_fields"]) - + return str(self._fields) + def __repr__(self): return f"Schema({self.foreign_type_info!r}, {self._properties['_fields']!r})" - + def append(self, item): - self._properties["_fields"].append(item) + self._fields.append(item) def extend(self, iterable): - self._properties["_fields"].extend(iterable) \ No newline at end of file + self._fields.extend(iterable) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index daacd083d..651e6f08b 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -452,10 +452,8 @@ def schema(self): instance or a compatible mapping representation of the field. """ prop = self._properties.get(self._PROPERTY_TO_API_FIELD["schema"]) - if not prop: # if empty Schema, empty list, None - if prop is None: - return None - return prop + if not prop: # if empty Schema, empty list, None + return [] elif isinstance(prop, Schema): return prop return _parse_schema_resource(prop) diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 41f25dede..c60d75339 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -538,7 +538,7 @@ def test_ctor(self): % (self.PROJECT, self.DS_ID, self.TABLE_NAME), ) valid_options = [Schema(), [], None] - self.assertIn(table.schema, valid_options) + self.assertIn(table.schema, valid_options) self.assertIsNone(table.created) self.assertIsNone(table.etag) From 76709716b7e6ee550cc953d70b1c219123036198 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 4 Dec 2024 07:43:34 -0500 Subject: [PATCH 03/22] Update google/cloud/bigquery/schema.py --- google/cloud/bigquery/schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index e9641db74..05de37a21 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -1001,7 +1001,7 @@ def __str__(self): return str(self._fields) def __repr__(self): - return f"Schema({self.foreign_type_info!r}, {self._properties['_fields']!r})" + return f"Schema({self.foreign_type_info!r}, {self._fields!r})" def append(self, item): self._fields.append(item) From c37be6793c545f5f201a9600f767df884712b086 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Fri, 6 Dec 2024 14:10:06 +0000 Subject: [PATCH 04/22] Updates Schema object and class related tests for Schema and Table --- google/cloud/bigquery/schema.py | 34 ++- google/cloud/bigquery/table.py | 2 + tests/unit/test_schema.py | 515 ++++++++++++++++++++------------ tests/unit/test_table.py | 35 ++- 4 files changed, 391 insertions(+), 195 deletions(-) diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 05de37a21..0ff2bc040 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -550,8 +550,6 @@ def _build_schema_resource(fields): def _to_schema_fields(schema): """TODO docstring - QUESTION: do we want a flag to force the generation of a Schema object? - CAST a list of elements to either: * a Schema object with SchemaFields and an attribute * a list of SchemaFields but no attribute @@ -829,8 +827,6 @@ def serde_info(self) -> Any: prop = _get_sub_prop(self._properties, ["serDeInfo"]) if prop is not None: prop = StorageDescriptor().from_api_repr(prop) - print(f"DINOSAUR prop: {prop}") - return prop @serde_info.setter @@ -957,6 +953,7 @@ def from_api_repr(cls, resource: dict) -> SerDeInfo: class Schema: + # TODO docstrings and type hints def __init__(self, fields=None, foreign_type_info=None): self._properties = {} self._fields = [] if fields is None else list(fields) # Internal List @@ -998,13 +995,38 @@ def __iter__(self): return iter(self._fields) def __str__(self): - return str(self._fields) + return str(self._fields) # This does not handle the case where FTI exists def __repr__(self): - return f"Schema({self.foreign_type_info!r}, {self._fields!r})" + return f"Schema({self._fields!r}, {self.foreign_type_info!r})" def append(self, item): self._fields.append(item) def extend(self, iterable): self._fields.extend(iterable) + + def to_api_repr(self) -> dict: + """Build an API representation of this object. + + Returns: + Dict[str, Any]: + A dictionary in the format used by the BigQuery API. + """ + return copy.deepcopy(self._properties) + + @classmethod + def from_api_repr(cls, resource: dict) -> Schema: + """Factory: constructs an instance of the class (cls) + given its API representation. + + Args: + resource (Dict[str, Any]): + API representation of the object to be instantiated. + + Returns: + An instance of the class initialized with data from 'resource'. + """ + config = cls("") + config._properties = copy.deepcopy(resource) + return config diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 651e6f08b..e27e88578 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -464,6 +464,8 @@ def schema(self, value): if value is None: self._properties[api_field] = None + elif isinstance(value, Schema): + self._properties[api_field] = value else: value = _to_schema_fields(value) self._properties[api_field] = {"fields": _build_schema_resource(value)} diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index 806b86672..a91594004 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -20,6 +20,11 @@ ForeignTypeInfo, StorageDescriptor, SerDeInfo, + Schema, + SchemaField, + _parse_schema_resource, + _build_schema_resource, + _to_schema_fields, ) import unittest @@ -165,7 +170,6 @@ def test_to_api_repr(self): rounding_mode=ROUNDINGMODE, foreign_type_definition=None, ) - print(f"DINOSAUR: {field}\n\n{field.to_api_repr()}") self.assertEqual( field.to_api_repr(), { @@ -334,35 +338,6 @@ def test_to_standard_sql_simple_type(self): def test_to_standard_sql_struct_type(self): from google.cloud.bigquery import standard_sql - # Expected result object: - # - # name: "image_usage" - # type { - # type_kind: STRUCT - # struct_type { - # fields { - # name: "image_content" - # type {type_kind: BYTES} - # } - # fields { - # name: "last_used" - # type { - # type_kind: STRUCT - # struct_type { - # fields { - # name: "date_field" - # type {type_kind: DATE} - # } - # fields { - # name: "time_field" - # type {type_kind: TIME} - # } - # } - # } - # } - # } - # } - sql_type = self._get_standard_sql_data_type_class() # level 2 fields @@ -721,201 +696,367 @@ def _verifySchema(self, schema, resource): self._verify_field(field, r_field) -class Test_parse_schema_resource(unittest.TestCase, _SchemaBase): - def _call_fut(self, resource): - from google.cloud.bigquery.schema import _parse_schema_resource +# BEGIN PYTEST BASED SCHEMA TESTS ==================== +@pytest.fixture +def basic_resource(): + return { + "schema": { + "fields": [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, + ] + } + } - return _parse_schema_resource(resource) - def _make_resource(self): - return { - "schema": { - "fields": [ - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, - ] - } +@pytest.fixture +def resource_with_subfields(basic_resource): + basic_resource["schema"]["fields"].append( + { + "name": "phone", + "type": "RECORD", + "mode": "REPEATED", + "fields": [ + {"name": "type", "type": "STRING", "mode": "REQUIRED"}, + {"name": "number", "type": "STRING", "mode": "REQUIRED"}, + ], } + ) + return basic_resource - def test__parse_schema_resource_defaults(self): - RESOURCE = self._make_resource() - schema = self._call_fut(RESOURCE["schema"]) - self._verifySchema(schema, RESOURCE) - def test__parse_schema_resource_subfields(self): - RESOURCE = self._make_resource() - RESOURCE["schema"]["fields"].append( - { - "name": "phone", - "type": "RECORD", - "mode": "REPEATED", - "fields": [ - {"name": "type", "type": "STRING", "mode": "REQUIRED"}, - {"name": "number", "type": "STRING", "mode": "REQUIRED"}, - ], - } - ) - schema = self._call_fut(RESOURCE["schema"]) - self._verifySchema(schema, RESOURCE) +@pytest.fixture +def resource_without_mode(basic_resource): + basic_resource["schema"]["fields"].append({"name": "phone", "type": "STRING"}) + return basic_resource - def test__parse_schema_resource_fields_without_mode(self): - RESOURCE = self._make_resource() - RESOURCE["schema"]["fields"].append({"name": "phone", "type": "STRING"}) - schema = self._call_fut(RESOURCE["schema"]) - self._verifySchema(schema, RESOURCE) +class TestParseSchemaResource: + def verify_field(self, field, r_field): + assert field.name == r_field["name"] + assert field.field_type == r_field["type"] + assert field.mode == r_field.get("mode", "NULLABLE") + def verify_schema(self, schema, resource): + r_fields = resource["schema"]["fields"] + assert len(schema) == len(r_fields) -class Test_build_schema_resource(unittest.TestCase, _SchemaBase): - def _call_fut(self, resource): - from google.cloud.bigquery.schema import _build_schema_resource + for field, r_field in zip(schema, r_fields): + self.verify_field(field, r_field) - return _build_schema_resource(resource) + # Tests focused on exercising the parse_schema_resource() method + def test_parse_schema_resource_defaults(self, basic_resource): + schema = _parse_schema_resource(basic_resource["schema"]) + self.verify_schema(schema, basic_resource) - def test_defaults(self): - from google.cloud.bigquery.schema import SchemaField + def test_parse_schema_resource_subfields(self, resource_with_subfields): + schema = _parse_schema_resource(resource_with_subfields["schema"]) + self.verify_schema(schema, resource_with_subfields) - full_name = SchemaField("full_name", "STRING", mode="REQUIRED") - age = SchemaField("age", "INTEGER", mode="REQUIRED") - resource = self._call_fut([full_name, age]) - self.assertEqual(len(resource), 2) - self.assertEqual( - resource[0], - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - ) - self.assertEqual( - resource[1], - {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, - ) + def test_parse_schema_resource_fields_without_mode(self, resource_without_mode): + schema = _parse_schema_resource(resource_without_mode["schema"]) + self.verify_schema(schema, resource_without_mode) - def test_w_description(self): - from google.cloud.bigquery.schema import SchemaField - DESCRIPTION = "DESCRIPTION" - full_name = SchemaField( - "full_name", "STRING", mode="REQUIRED", description=DESCRIPTION - ) - age = SchemaField( - "age", - "INTEGER", - mode="REQUIRED", - # Explicitly unset description. - description=None, - ) - resource = self._call_fut([full_name, age]) - self.assertEqual(len(resource), 2) - self.assertEqual( - resource[0], - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "description": DESCRIPTION, - }, - ) - self.assertEqual( - resource[1], - { - "name": "age", - "type": "INTEGER", - "mode": "REQUIRED", - "description": None, - }, - ) - - def test_w_subfields(self): - from google.cloud.bigquery.schema import SchemaField - - full_name = SchemaField("full_name", "STRING", mode="REQUIRED") - ph_type = SchemaField("type", "STRING", "REQUIRED") - ph_num = SchemaField("number", "STRING", "REQUIRED") - phone = SchemaField( - "phone", "RECORD", mode="REPEATED", fields=[ph_type, ph_num] - ) - resource = self._call_fut([full_name, phone]) - self.assertEqual(len(resource), 2) - self.assertEqual( - resource[0], - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - ) - self.assertEqual( - resource[1], - { - "name": "phone", - "type": "RECORD", - "mode": "REPEATED", - "fields": [ - {"name": "type", "type": "STRING", "mode": "REQUIRED"}, - {"name": "number", "type": "STRING", "mode": "REQUIRED"}, +class TestBuildSchemaResource: + # Tests focused on exercising the build_schema_resource() method + @pytest.mark.parametrize( + "fields, expected_resource", + [ + pytest.param( # Test case 1: Basic fields + [ + SchemaField("full_name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), ], - }, - ) - - -class Test_to_schema_fields(unittest.TestCase): - @staticmethod - def _call_fut(schema): - from google.cloud.bigquery.schema import _to_schema_fields + [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, + ], + id="basic fields", + ), + pytest.param( # Test case 2: Field without mode + [SchemaField("phone", "STRING")], + [{"name": "phone", "type": "STRING", "mode": "NULLABLE"}], + id="field without mode yields NULLABLE mode", + ), + pytest.param( # Test case 3: Field with description + [ + SchemaField( + "full_name", + "STRING", + mode="REQUIRED", + description="DESCRIPTION", + ), + SchemaField("age", "INTEGER", mode="REQUIRED", description=None), + ], + [ + { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "description": "DESCRIPTION", + }, + { + "name": "age", + "type": "INTEGER", + "mode": "REQUIRED", + "description": None, + }, + ], + id="fields including description", + ), + pytest.param( # Test case 4: Field with subfields + [ + SchemaField("full_name", "STRING", mode="REQUIRED"), + SchemaField( + "phone", + "RECORD", + mode="REPEATED", + fields=[ + SchemaField("type", "STRING", "REQUIRED"), + SchemaField("number", "STRING", "REQUIRED"), + ], + ), + ], + [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + { + "name": "phone", + "type": "RECORD", + "mode": "REPEATED", + "fields": [ + {"name": "type", "type": "STRING", "mode": "REQUIRED"}, + {"name": "number", "type": "STRING", "mode": "REQUIRED"}, + ], + }, + ], + id="field with subfields", + ), + ], + ) + def test_build_schema_resource(self, fields, expected_resource): + resource = _build_schema_resource(fields) + assert resource == expected_resource - return _to_schema_fields(schema) +class TestToSchemaFields: # Test class for _to_schema_fields def test_invalid_type(self): schema = [ ("full_name", "STRING", "REQUIRED"), ("address", "STRING", "REQUIRED"), ] - with self.assertRaises(ValueError): - self._call_fut(schema) + with pytest.raises(ValueError): + _to_schema_fields(schema) def test_schema_fields_sequence(self): - from google.cloud.bigquery.schema import SchemaField - schema = [ SchemaField("full_name", "STRING", mode="REQUIRED"), - SchemaField("age", "INT64", mode="NULLABLE"), + SchemaField( + "age", "INT64", mode="NULLABLE" + ), # Using correct type name INT64 ] - result = self._call_fut(schema) - self.assertEqual(result, schema) + result = _to_schema_fields(schema) + assert result == schema def test_invalid_mapping_representation(self): schema = [ {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, {"name": "address", "typeooo": "STRING", "mode": "REQUIRED"}, ] - with self.assertRaises(Exception): - self._call_fut(schema) - - def test_valid_mapping_representation(self): - from google.cloud.bigquery.schema import SchemaField + with pytest.raises(Exception): # Or a more specific exception if known + _to_schema_fields(schema) - schema = [ - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - { - "name": "residence", - "type": "STRUCT", - "mode": "NULLABLE", - "fields": [ - {"name": "foo", "type": "DATE", "mode": "NULLABLE"}, - {"name": "bar", "type": "BYTES", "mode": "REQUIRED"}, + @pytest.mark.parametrize( + "schema, expected_schema", + [ + pytest.param( + [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + { + "name": "residence", + "type": "STRUCT", # Or RECORD, depending on usage + "mode": "NULLABLE", + "fields": [ + {"name": "foo", "type": "DATE", "mode": "NULLABLE"}, + {"name": "bar", "type": "BYTES", "mode": "REQUIRED"}, + ], + }, ], - }, - ] - - expected_schema = [ - SchemaField("full_name", "STRING", mode="REQUIRED"), - SchemaField( - "residence", - "STRUCT", - mode="NULLABLE", - fields=[ - SchemaField("foo", "DATE", mode="NULLABLE"), - SchemaField("bar", "BYTES", mode="REQUIRED"), + [ + SchemaField("full_name", "STRING", mode="REQUIRED"), + SchemaField( + "residence", + "STRUCT", # Or RECORD + mode="NULLABLE", + fields=[ + SchemaField("foo", "DATE", mode="NULLABLE"), + SchemaField("bar", "BYTES", mode="REQUIRED"), + ], + ), ], + id="valid mapping representation", + ) + ], + ) + def test_valid_mapping_representation(self, schema, expected_schema): + result = _to_schema_fields(schema) + assert result == expected_schema + + +# Testing the new Schema Class ================= +class TestSchemaObject: # New test class for Schema object interactions + def test_schema_object_field_access(self): + schema = Schema( + fields=[ + SchemaField("name", "STRING"), + SchemaField("age", "INTEGER"), + ] + ) + assert len(schema) == 2 + assert schema[0].name == "name" # Access fields using indexing + assert schema[1].field_type == "INTEGER" + + def test_schema_object_foreign_type_info(self): + schema = Schema(foreign_type_info="External") + assert schema.foreign_type_info == "External" + + schema.foreign_type_info = None + assert schema.foreign_type_info is None + + with pytest.raises(TypeError): + schema.foreign_type_info = 123 # Type check + + @pytest.mark.parametrize( + "schema, expected_repr", + [ + pytest.param( + Schema( + fields=[SchemaField("name", "STRING")], + foreign_type_info="TestInfo", + ), + "Schema([SchemaField('name', 'STRING', 'NULLABLE', None, None, (), None)], 'TestInfo')", + id="repr with foreign type info", ), - ] + pytest.param( + Schema(fields=[SchemaField("name", "STRING")]), + "Schema([SchemaField('name', 'STRING', 'NULLABLE', None, None, (), None)], None)", + id="repr without foreign type info", + ), + ], + ) + def test_repr(self, schema, expected_repr): + assert repr(schema) == expected_repr # Test __repr__ + + def test_schema_iteration(self): + schema = Schema( + fields=[SchemaField("name", "STRING"), SchemaField("age", "INTEGER")] + ) + + field_names = [field.name for field in schema] + assert field_names == ["name", "age"] + + def test_schema_object_mutability(self): # Tests __setitem__ and __delitem__ + schema = Schema( + fields=[SchemaField("name", "STRING"), SchemaField("age", "INTEGER")] + ) + + schema[0] = SchemaField( + "updated_name", "STRING" + ) # Modify a field using setitem + assert schema[0].name == "updated_name" + + del schema[1] # Test __delitem__ + assert len(schema) == 1 + assert schema[0].name == "updated_name" + + def test_schema_append(self): + schema = Schema() # create an empty schema object + schema.append( + SchemaField("name", "STRING") + ) # use the append method to add a schema field + assert len(schema) == 1 + assert schema[0].name == "name" + + def test_schema_extend(self): + schema = Schema() # create an empty schema object + schema.extend( + [SchemaField("name", "STRING"), SchemaField("age", "INTEGER")] + ) # use the extend method to add multiple schema fields + assert len(schema) == 2 + assert schema[0].name == "name" + assert schema[1].name == "age" + + @pytest.mark.parametrize( + "schema, expected_api_repr", + [ + pytest.param( + Schema( + fields=[SchemaField("name", "STRING")], + foreign_type_info="TestInfo", + ), + { + "_fields": [ + SchemaField("name", "STRING", "NULLABLE", None, None, (), None) + ], + "foreignTypeInfo": "TestInfo", + }, + id="repr with foreign type info", + ), + pytest.param( + Schema(fields=[SchemaField("name", "STRING")]), + { + "_fields": [ + SchemaField("name", "STRING", "NULLABLE", None, None, (), None) + ], + "foreignTypeInfo": None, + }, + id="repr without foreign type info", + ), + ], + ) + def test_to_api_repr(self, schema, expected_api_repr): + assert schema.to_api_repr() == expected_api_repr + + @pytest.mark.parametrize( + "api_repr, expected", + [ + pytest.param( + { + "_fields": [ + SchemaField("name", "STRING", "NULLABLE", None, None, (), None) + ], + "foreignTypeInfo": "TestInfo", + }, + Schema( + fields=[SchemaField("name", "STRING")], + foreign_type_info="TestInfo", + ), + id="repr with foreign type info", + ), + pytest.param( + { + "_fields": [ + SchemaField("name", "STRING", "NULLABLE", None, None, (), None) + ], + "foreignTypeInfo": None, + }, + Schema(fields=[SchemaField("name", "STRING")]), + id="repr without foreign type info", + ), + ], + ) + def test_from_api_repr(self, api_repr, expected): + """GIVEN an api representation of a Schema object (i.e. resource) + WHEN converted into a Schema object using from_api_repr() and + displayed as a dict + THEN it will have the same representation a Schema object created + directly and displayed as a dict. + """ + result = Schema.from_api_repr(api_repr) + assert result.to_api_repr() == expected.to_api_repr() + - result = self._call_fut(schema) - self.assertEqual(result, expected_schema) +# END PYTEST BASED SCHEMA TESTS ==================== class TestPolicyTags(unittest.TestCase): diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index c60d75339..77fa05dc4 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -29,9 +29,14 @@ from google.cloud.bigquery import _versions_helpers from google.cloud.bigquery import exceptions -from google.cloud.bigquery.table import TableReference +from google.cloud.bigquery.table import Table, TableReference from google.cloud.bigquery.dataset import DatasetReference -from google.cloud.bigquery.schema import SerDeInfo, StorageDescriptor, Schema +from google.cloud.bigquery.schema import ( + SerDeInfo, + StorageDescriptor, + Schema, + SchemaField, +) def _mock_client(): @@ -1539,6 +1544,32 @@ def test__row_from_mapping_w_schema(self): ) +# BEGIN PYTEST BASED TABLE.SCHEMA TESTS ==================== +# Sample SchemaField instances for testing +name_field = SchemaField("name", "STRING") +age_field = SchemaField("age", "INTEGER") + + +class TestTableSchema: + def test_table_schema_as_list(self): + table = Table("project.dataset.table_name") + table.schema = [name_field, age_field] + assert isinstance(table.schema, list) + assert table.schema == [name_field, age_field] + + def test_table_schema_as_schema_object(self): + table = Table("project.dataset.table_name") + schema_object = Schema( + fields=[name_field, age_field], foreign_type_info="TEST_INFO" + ) + table.schema = schema_object + assert isinstance(table.schema, Schema) + assert table.schema == schema_object + + +# END PYTEST BASED TABLE.SCHEMA TESTS ==================== + + class TestTableListItem(unittest.TestCase): @staticmethod def _get_target_class(): From aba6c10715a9b02fd1d6e0190d91fe48af72dd19 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Fri, 6 Dec 2024 18:00:38 +0000 Subject: [PATCH 05/22] Updates Schema tests for coverage, fixes serdefinfo test --- google/cloud/bigquery/schema.py | 37 +-------------------------------- 1 file changed, 1 insertion(+), 36 deletions(-) diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 0ff2bc040..2c0481225 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -578,41 +578,6 @@ def _to_schema_fields(schema): ] -# OLD TO DELETE -# def _to_schema_fields(schema): -# """Coerce `schema` to a list of schema field instances. - -# Args: -# schema(Sequence[Union[ \ -# :class:`~google.cloud.bigquery.schema.SchemaField`, \ -# Mapping[str, Any] \ -# ]]): -# Table schema to convert. If some items are passed as mappings, -# their content must be compatible with -# :meth:`~google.cloud.bigquery.schema.SchemaField.from_api_repr`. - -# Returns: -# Sequence[:class:`~google.cloud.bigquery.schema.SchemaField`] - -# Raises: -# Exception: If ``schema`` is not a sequence, or if any item in the -# sequence is not a :class:`~google.cloud.bigquery.schema.SchemaField` -# instance or a compatible mapping representation of the field. -# """ - -# for field in schema: -# if not isinstance(field, (SchemaField, collections.abc.Mapping)): -# raise ValueError( -# "Schema items must either be fields or compatible " -# "mapping representations." -# ) - -# return [ -# field if isinstance(field, SchemaField) else SchemaField.from_api_repr(field) -# for field in schema -# ] - - class PolicyTagList(object): """Define Policy Tags for a column. @@ -995,7 +960,7 @@ def __iter__(self): return iter(self._fields) def __str__(self): - return str(self._fields) # This does not handle the case where FTI exists + return f"Schema({self._fields}, {self.foreign_type_info})" def __repr__(self): return f"Schema({self._fields!r}, {self.foreign_type_info!r})" From 83aacdf64528daa7594de62bd5cdb78b2be4d523 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Fri, 6 Dec 2024 18:01:46 +0000 Subject: [PATCH 06/22] Updates Schema tests --- tests/unit/test_schema.py | 48 +++++++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index a91594004..6f7143491 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -681,21 +681,6 @@ def test_from_api_repr_none(self): self.assertEqual(None, self._get_target_class().from_api_repr(None)) -# TODO: dedup with the same class in test_table.py. -class _SchemaBase(object): - def _verify_field(self, field, r_field): - self.assertEqual(field.name, r_field["name"]) - self.assertEqual(field.field_type, r_field["type"]) - self.assertEqual(field.mode, r_field.get("mode", "NULLABLE")) - - def _verifySchema(self, schema, resource): - r_fields = resource["schema"]["fields"] - self.assertEqual(len(schema), len(r_fields)) - - for field, r_field in zip(schema, r_fields): - self._verify_field(field, r_field) - - # BEGIN PYTEST BASED SCHEMA TESTS ==================== @pytest.fixture def basic_resource(): @@ -902,6 +887,18 @@ def test_valid_mapping_representation(self, schema, expected_schema): result = _to_schema_fields(schema) assert result == expected_schema + def test_valid_schema_object(self): + schema = Schema( + fields=[SchemaField("name", "STRING")], + foreign_type_info="TestInfo", + ) + result = _to_schema_fields(schema) + expected = Schema( + [SchemaField("name", "STRING", "NULLABLE", None, None, (), None)], + "TestInfo", + ) + assert result.to_api_repr() == expected.to_api_repr() + # Testing the new Schema Class ================= class TestSchemaObject: # New test class for Schema object interactions @@ -926,6 +923,16 @@ def test_schema_object_foreign_type_info(self): with pytest.raises(TypeError): schema.foreign_type_info = 123 # Type check + def test_str(self): + schema = Schema( + fields=[SchemaField("name", "STRING")], + foreign_type_info="TestInfo", + ) + assert ( + str(schema) + == "Schema([SchemaField('name', 'STRING', 'NULLABLE', None, None, (), None)], TestInfo)" + ) + @pytest.mark.parametrize( "schema, expected_repr", [ @@ -1434,15 +1441,16 @@ def test_ctor_valid_input( output_format=output_format, serde_info=serde_info, ) - assert storage_descriptor._properties["inputFormat"] == input_format - assert storage_descriptor._properties["locationUri"] == location_uri - assert storage_descriptor._properties["outputFormat"] == output_format + assert storage_descriptor.input_format == input_format + assert storage_descriptor.location_uri == location_uri + assert storage_descriptor.output_format == output_format + if serde_info is not None: assert ( - storage_descriptor._properties["serDeInfo"] == serde_info.to_api_repr() + storage_descriptor.serde_info.to_api_repr() == serde_info.to_api_repr() ) else: - assert storage_descriptor._properties["serDeInfo"] == serde_info + assert storage_descriptor.serde_info is None @pytest.mark.parametrize( "input_format,location_uri,output_format,serde_info", From af02937b1d13102974a3efdd36cac59c95fa2a14 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Thu, 19 Dec 2024 01:16:55 +0000 Subject: [PATCH 07/22] fixes formatting --- google/cloud/bigquery/schema.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 2c0481225..8371d03f2 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -917,7 +917,7 @@ def from_api_repr(cls, resource: dict) -> SerDeInfo: return config -class Schema: +class Schema(collections.UserList): # TODO docstrings and type hints def __init__(self, fields=None, foreign_type_info=None): self._properties = {} @@ -944,6 +944,15 @@ def _fields(self, value: list) -> None: value = _isinstance_or_raise(value, list, none_allowed=True) self._properties["_fields"] = value + @property + def data(self): + return self._properties.get("_fields") + + @data.setter + def data(self, value: list): + # for simplicity, no validation in this proof of concept + self._properties["_fields"] = value + def __len__(self): return len(self._fields) From ef2b95dcc59c6718b09a80b9f5df4e1d7632f14f Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Fri, 27 Dec 2024 12:46:41 +0000 Subject: [PATCH 08/22] updates len() checks --- google/cloud/bigquery/client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 80b35cd12..97f239f7a 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -3680,7 +3680,7 @@ def insert_rows( if selected_fields is not None: schema = selected_fields - if not schema: + if len(schema) == 0: raise ValueError( ( "Could not determine schema for table '{}'. Call client.get_table() " @@ -4029,7 +4029,7 @@ def list_rows( # No schema, but no selected_fields. Assume the developer wants all # columns, so get the table resource for them rather than failing. - elif not schema: + elif len(schema) == 0: table = self.get_table(table.reference, retry=retry, timeout=timeout) schema = table.schema From cfa609cc9ff93ccbef265b4d38b5654fcce22f87 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Fri, 27 Dec 2024 12:49:33 +0000 Subject: [PATCH 09/22] update tests based on Schema superclass UserList --- google/cloud/bigquery/schema.py | 80 +++++++++++++++++++-------------- tests/unit/test_schema.py | 51 +++++++++++---------- 2 files changed, 73 insertions(+), 58 deletions(-) diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 8371d03f2..951a62ebd 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -17,9 +17,10 @@ from __future__ import annotations import collections + import copy import enum -from typing import Any, Dict, Iterable, Optional, Union, cast +from typing import Any, Dict, Iterable, Optional, Union, cast, List, Mapping from google.cloud.bigquery import standard_sql from google.cloud.bigquery._helpers import ( @@ -524,7 +525,7 @@ def __repr__(self): def _parse_schema_resource(info): - """Parse a resource fragment into a schema field. + """Parse a resource fragment into a sequence of schema fields. Args: info: (Mapping[str, Dict]): should contain a "fields" key to be parsed @@ -548,12 +549,35 @@ def _build_schema_resource(fields): return [field.to_api_repr() for field in fields] -def _to_schema_fields(schema): - """TODO docstring - CAST a list of elements to either: - * a Schema object with SchemaFields and an attribute - * a list of SchemaFields but no attribute +def _to_schema_fields( + schema: Union[Schema, List[Union[SchemaField, Mapping[str, Any]]]] +) -> Union[Schema, List[SchemaField]]: + """Convert the input to either a Schema object OR a list of SchemaField objects. + + This helper method ensures that the fields in the schema are SchemaField objects. + It accepts: + + * A :class:`~google.cloud.bigquery.schema.Schema` instance: It will + convert items that are mappings to + :class:`~google.cloud.bigquery.schema.SchemaField` instances and + preserve foreign_type_info. + + * A list of + :class:`~google.cloud.bigquery.schema.SchemaField` instances. + + * A list of mappings: It will convert each of the mapping items to + a :class:`~google.cloud.bigquery.schema.SchemaField` instance. + + Args: + schema: The schema to convert. + + Returns: + The schema as a list of SchemaField objects or a Schema object. + + Raises: + ValueError: If the items in ``schema`` are not valid. """ + for field in schema: if not isinstance(field, (SchemaField, collections.abc.Mapping)): raise ValueError( @@ -937,36 +961,24 @@ def foreign_type_info(self, value: str) -> None: @property def _fields(self) -> Any: """TODO: docstring""" - return self._properties.get("_fields") + return self._properties.get("fields") @_fields.setter def _fields(self, value: list) -> None: value = _isinstance_or_raise(value, list, none_allowed=True) - self._properties["_fields"] = value + value = _build_schema_resource(value) + self._properties["fields"] = value @property def data(self): - return self._properties.get("_fields") + return self._properties.get("fields") @data.setter def data(self, value: list): # for simplicity, no validation in this proof of concept - self._properties["_fields"] = value - - def __len__(self): - return len(self._fields) - - def __getitem__(self, index): - return self._fields[index] - - def __setitem__(self, index, value): - self._fields[index] = value - - def __delitem__(self, index): - del self._fields[index] - - def __iter__(self): - return iter(self._fields) + value = _isinstance_or_raise(value, list, none_allowed=True) + value = _build_schema_resource(value) + self._properties["fields"] = value def __str__(self): return f"Schema({self._fields}, {self.foreign_type_info})" @@ -974,12 +986,6 @@ def __str__(self): def __repr__(self): return f"Schema({self._fields!r}, {self.foreign_type_info!r})" - def append(self, item): - self._fields.append(item) - - def extend(self, iterable): - self._fields.extend(iterable) - def to_api_repr(self) -> dict: """Build an API representation of this object. @@ -987,7 +993,13 @@ def to_api_repr(self) -> dict: Dict[str, Any]: A dictionary in the format used by the BigQuery API. """ - return copy.deepcopy(self._properties) + # If this is a RECORD type, then sub-fields are also included, + # add this to the serialized representation. + answer = self._properties.copy() + schemafields = any([isinstance(f, SchemaField) for f in self._fields]) + if schemafields: + answer["fields"] = [f.to_api_repr() for f in self._fields] + return answer @classmethod def from_api_repr(cls, resource: dict) -> Schema: @@ -1002,5 +1014,5 @@ def from_api_repr(cls, resource: dict) -> Schema: An instance of the class initialized with data from 'resource'. """ config = cls("") - config._properties = copy.deepcopy(resource) + config._properties = copy.copy(resource) return config diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index 6c4a6dbf2..6476fdf35 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -681,7 +681,6 @@ def test_from_api_repr_none(self): self.assertEqual(None, self._get_target_class().from_api_repr(None)) -# BEGIN PYTEST BASED SCHEMA TESTS ==================== @pytest.fixture def basic_resource(): return { @@ -826,6 +825,7 @@ def test_build_schema_resource(self, fields, expected_resource): class TestToSchemaFields: # Test class for _to_schema_fields def test_invalid_type(self): + """Invalid list of tuples instead of list of mappings""" schema = [ ("full_name", "STRING", "REQUIRED"), ("address", "STRING", "REQUIRED"), @@ -846,7 +846,7 @@ def test_schema_fields_sequence(self): def test_invalid_mapping_representation(self): schema = [ {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - {"name": "address", "typeooo": "STRING", "mode": "REQUIRED"}, + {"name": "address", "invalid_key": "STRING", "mode": "REQUIRED"}, ] with pytest.raises(Exception): # Or a more specific exception if known _to_schema_fields(schema) @@ -889,7 +889,7 @@ def test_valid_mapping_representation(self, schema, expected_schema): def test_valid_schema_object(self): schema = Schema( - fields=[SchemaField("name", "STRING")], + fields=[SchemaField("name", "STRING", description=None, policy_tags=None)], foreign_type_info="TestInfo", ) result = _to_schema_fields(schema) @@ -900,7 +900,6 @@ def test_valid_schema_object(self): assert result.to_api_repr() == expected.to_api_repr() -# Testing the new Schema Class ================= class TestSchemaObject: # New test class for Schema object interactions def test_schema_object_field_access(self): schema = Schema( @@ -909,9 +908,10 @@ def test_schema_object_field_access(self): SchemaField("age", "INTEGER"), ] ) + assert len(schema) == 2 - assert schema[0].name == "name" # Access fields using indexing - assert schema[1].field_type == "INTEGER" + assert schema[0]["name"] == "name" # Access fields using indexing + assert schema[1]["type"] == "INTEGER" def test_schema_object_foreign_type_info(self): schema = Schema(foreign_type_info="External") @@ -930,7 +930,7 @@ def test_str(self): ) assert ( str(schema) - == "Schema([SchemaField('name', 'STRING', 'NULLABLE', None, None, (), None)], TestInfo)" + == "Schema([{'name': 'name', 'mode': 'NULLABLE', 'type': 'STRING'}], TestInfo)" ) @pytest.mark.parametrize( @@ -941,12 +941,12 @@ def test_str(self): fields=[SchemaField("name", "STRING")], foreign_type_info="TestInfo", ), - "Schema([SchemaField('name', 'STRING', 'NULLABLE', None, None, (), None)], 'TestInfo')", + "Schema([{'name': 'name', 'mode': 'NULLABLE', 'type': 'STRING'}], 'TestInfo')", id="repr with foreign type info", ), pytest.param( Schema(fields=[SchemaField("name", "STRING")]), - "Schema([SchemaField('name', 'STRING', 'NULLABLE', None, None, (), None)], None)", + "Schema([{'name': 'name', 'mode': 'NULLABLE', 'type': 'STRING'}], None)", id="repr without foreign type info", ), ], @@ -958,8 +958,7 @@ def test_schema_iteration(self): schema = Schema( fields=[SchemaField("name", "STRING"), SchemaField("age", "INTEGER")] ) - - field_names = [field.name for field in schema] + field_names = [field["name"] for field in schema] assert field_names == ["name", "age"] def test_schema_object_mutability(self): # Tests __setitem__ and __delitem__ @@ -1002,9 +1001,7 @@ def test_schema_extend(self): foreign_type_info="TestInfo", ), { - "_fields": [ - SchemaField("name", "STRING", "NULLABLE", None, None, (), None) - ], + "fields": [{"name": "name", "mode": "NULLABLE", "type": "STRING"}], "foreignTypeInfo": "TestInfo", }, id="repr with foreign type info", @@ -1012,9 +1009,7 @@ def test_schema_extend(self): pytest.param( Schema(fields=[SchemaField("name", "STRING")]), { - "_fields": [ - SchemaField("name", "STRING", "NULLABLE", None, None, (), None) - ], + "fields": [{"name": "name", "mode": "NULLABLE", "type": "STRING"}], "foreignTypeInfo": None, }, id="repr without foreign type info", @@ -1029,25 +1024,35 @@ def test_to_api_repr(self, schema, expected_api_repr): [ pytest.param( { - "_fields": [ + "fields": [ SchemaField("name", "STRING", "NULLABLE", None, None, (), None) ], "foreignTypeInfo": "TestInfo", }, Schema( - fields=[SchemaField("name", "STRING")], + fields=[ + SchemaField( + "name", "STRING", description=None, policy_tags=None + ) + ], foreign_type_info="TestInfo", ), id="repr with foreign type info", ), pytest.param( { - "_fields": [ + "fields": [ SchemaField("name", "STRING", "NULLABLE", None, None, (), None) ], "foreignTypeInfo": None, }, - Schema(fields=[SchemaField("name", "STRING")]), + Schema( + fields=[ + SchemaField( + "name", "STRING", description=None, policy_tags=None + ) + ] + ), id="repr without foreign type info", ), ], @@ -1059,13 +1064,11 @@ def test_from_api_repr(self, api_repr, expected): THEN it will have the same representation a Schema object created directly and displayed as a dict. """ + result = Schema.from_api_repr(api_repr) assert result.to_api_repr() == expected.to_api_repr() -# END PYTEST BASED SCHEMA TESTS ==================== - - class TestPolicyTags(unittest.TestCase): @staticmethod def _get_target_class(): From 1f00e76b5c61c9ba6693828d174e4bd91aface55 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Fri, 27 Dec 2024 14:10:55 +0000 Subject: [PATCH 10/22] removes whitespace --- google/cloud/bigquery/schema.py | 87 ++++++++++++++++++++++++++------- 1 file changed, 69 insertions(+), 18 deletions(-) diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 951a62ebd..8a661223a 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -269,7 +269,7 @@ def __get_int(api_repr, name): return v @classmethod - def from_api_repr(cls, api_repr: dict) -> "SchemaField": + def from_api_repr(cls, api_repr: Mapping[str, Any]) -> "SchemaField": """Return a ``SchemaField`` object deserialized from a dictionary. Args: @@ -942,67 +942,118 @@ def from_api_repr(cls, resource: dict) -> SerDeInfo: class Schema(collections.UserList): - # TODO docstrings and type hints - def __init__(self, fields=None, foreign_type_info=None): - self._properties = {} + """ + Represents a BigQuery schema, defining the structure and types of data. + + This class manages a list of schema fields and provides methods for + serialization and deserialization with the BigQuery API. It extends the + `collections.UserList` class to allow for list-like behavior. + + Args: + fields (Optional[List[Any]], optional): A list of SchemaField objects representing the fields + in the schema. Defaults to None, which creates an empty schema. + foreign_type_info (Optional[str], optional): Optional type information relevant for foreign + systems. Defaults to None. + """ + + def __init__( + self, + fields: Optional[List[Any]] = None, + foreign_type_info: Optional[str] = None, + ): + self._properties: Dict[str, Any] = {} self._fields = [] if fields is None else list(fields) # Internal List self.foreign_type_info = foreign_type_info @property - def foreign_type_info(self) -> Any: - """TODO: docstring""" + def foreign_type_info(self) -> Optional[str]: return self._properties.get("foreignTypeInfo") @foreign_type_info.setter - def foreign_type_info(self, value: str) -> None: + def foreign_type_info(self, value: Optional[str]) -> None: + """ + Sets the foreign type information for this schema. + + Args: + value (Optional[str]): The foreign type information, can be set to None. + """ value = _isinstance_or_raise(value, str, none_allowed=True) self._properties["foreignTypeInfo"] = value @property - def _fields(self) -> Any: - """TODO: docstring""" + def _fields(self) -> Optional[List[Any]]: return self._properties.get("fields") @_fields.setter - def _fields(self, value: list) -> None: + def _fields(self, value: Optional[List[Any]]) -> None: + """ + Sets the internal list of field definitions. + + NOTE: In the API representation the 'fields' key points to a sequence of schema fields. + To maintain a similarity in names, the 'Schema._fields' attribute points to the + '_properties["fields"]' key. Schema class is superclassed by UserList, which requires the + use of a '.data' attribute. The decision was made to have both of these attributes point to + the same key "fields" in the '_properties' dictionary. Thus '.data' is an alias + for '_fields'. + + Args: + value (Optional[List[Any]]): A list of schema fields, can be set to None. + """ value = _isinstance_or_raise(value, list, none_allowed=True) value = _build_schema_resource(value) self._properties["fields"] = value @property - def data(self): + def data(self) -> Any: return self._properties.get("fields") @data.setter - def data(self, value: list): - # for simplicity, no validation in this proof of concept + def data(self, value) -> None: + """ + Sets the list of schema fields. + + NOTE: In the API representation the 'fields' key points to a sequence of schema fields. + To maintain a similarity in names, the 'Schema._fields' attribute points to the + '_properties["fields"]' key. Schema class is superclassed by UserList, which requires the + use of a '.data' attribute. The decision was made to have both of these attributes point to + the same key "fields" in the '_properties' dictionary. Thus '.data' is an alias + for '_fields'. + + Args: + value (Optional[List[Any]]): A list of schema fields, can be set to None. + """ + value = _isinstance_or_raise(value, list, none_allowed=True) value = _build_schema_resource(value) self._properties["fields"] = value - def __str__(self): + def __str__(self) -> str: return f"Schema({self._fields}, {self.foreign_type_info})" - def __repr__(self): + def __repr__(self) -> str: return f"Schema({self._fields!r}, {self.foreign_type_info!r})" - def to_api_repr(self) -> dict: + def to_api_repr(self) -> Dict[str, Any]: """Build an API representation of this object. Returns: Dict[str, Any]: A dictionary in the format used by the BigQuery API. + If the schema contains SchemaField objects, the fields are + also converted to their API representations. """ # If this is a RECORD type, then sub-fields are also included, # add this to the serialized representation. answer = self._properties.copy() + if self._fields is None: + return answer schemafields = any([isinstance(f, SchemaField) for f in self._fields]) if schemafields: answer["fields"] = [f.to_api_repr() for f in self._fields] return answer @classmethod - def from_api_repr(cls, resource: dict) -> Schema: + def from_api_repr(cls, resource: Dict[str, Any]) -> "Schema": """Factory: constructs an instance of the class (cls) given its API representation. @@ -1013,6 +1064,6 @@ def from_api_repr(cls, resource: dict) -> Schema: Returns: An instance of the class initialized with data from 'resource'. """ - config = cls("") + config = cls([]) config._properties = copy.copy(resource) return config From 39d4c1d345cb10b236d81a264d5091b11a3dc74a Mon Sep 17 00:00:00 2001 From: Yu Ishikawa Date: Sat, 7 Dec 2024 05:56:34 +0900 Subject: [PATCH 11/22] feat: add property for maxStaleness in table definitions (#2087) * feat: add property for maxStaleness in table definitions Signed-off-by: Yu Ishikawa * Update google/cloud/bigquery/table.py --------- Signed-off-by: Yu Ishikawa Co-authored-by: Lingqing Gan --- google/cloud/bigquery/table.py | 35 +++++++++++++++++++++++++++ tests/unit/test_table.py | 43 ++++++++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 0ffb5014f..b8deb6f85 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -411,6 +411,7 @@ class Table(_TableBase): "require_partition_filter": "requirePartitionFilter", "table_constraints": "tableConstraints", "external_catalog_table_options": "externalCatalogTableOptions", + "max_staleness": "maxStaleness", } def __init__(self, table_ref, schema=None) -> None: @@ -1144,6 +1145,40 @@ def __repr__(self): def __str__(self): return f"{self.project}.{self.dataset_id}.{self.table_id}" + @property + def max_staleness(self): + """Union[str, None]: The maximum staleness of data that could be returned when the table is queried. + + Staleness encoded as a string encoding of sql IntervalValue type. + This property is optional and defaults to None. + + According to the BigQuery API documentation, maxStaleness specifies the maximum time + interval for which stale data can be returned when querying the table. + It helps control data freshness in scenarios like metadata-cached external tables. + + Returns: + Optional[str]: A string representing the maximum staleness interval + (e.g., '1h', '30m', '15s' for hours, minutes, seconds respectively). + """ + return self._properties.get(self._PROPERTY_TO_API_FIELD["max_staleness"]) + + @max_staleness.setter + def max_staleness(self, value): + """Set the maximum staleness for the table. + + Args: + value (Optional[str]): A string representing the maximum staleness interval. + Must be a valid time interval string. + Examples include '1h' (1 hour), '30m' (30 minutes), '15s' (15 seconds). + + Raises: + ValueError: If the value is not None and not a string. + """ + if value is not None and not isinstance(value, str): + raise ValueError("max_staleness must be a string or None") + + self._properties[self._PROPERTY_TO_API_FIELD["max_staleness"]] = value + class TableListItem(_TableBase): """A read-only table resource from a list operation. diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 711d739fa..f1922ffd0 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -1483,6 +1483,49 @@ def test___str__(self): table1 = self._make_one(TableReference(dataset, "table1")) self.assertEqual(str(table1), "project1.dataset1.table1") + def test_max_staleness_getter(self): + """Test getting max_staleness property.""" + dataset = DatasetReference("test-project", "test_dataset") + table_ref = dataset.table("test_table") + table = self._make_one(table_ref) + # Initially None + self.assertIsNone(table.max_staleness) + # Set max_staleness using setter + table.max_staleness = "1h" + self.assertEqual(table.max_staleness, "1h") + + def test_max_staleness_setter(self): + """Test setting max_staleness property.""" + dataset = DatasetReference("test-project", "test_dataset") + table_ref = dataset.table("test_table") + table = self._make_one(table_ref) + # Set valid max_staleness + table.max_staleness = "30m" + self.assertEqual(table.max_staleness, "30m") + # Set to None + table.max_staleness = None + self.assertIsNone(table.max_staleness) + + def test_max_staleness_setter_invalid_type(self): + """Test setting max_staleness with an invalid type raises ValueError.""" + dataset = DatasetReference("test-project", "test_dataset") + table_ref = dataset.table("test_table") + table = self._make_one(table_ref) + # Try setting invalid type + with self.assertRaises(ValueError): + table.max_staleness = 123 # Not a string + + def test_max_staleness_to_api_repr(self): + """Test max_staleness is correctly represented in API representation.""" + dataset = DatasetReference("test-project", "test_dataset") + table_ref = dataset.table("test_table") + table = self._make_one(table_ref) + # Set max_staleness + table.max_staleness = "1h" + # Convert to API representation + resource = table.to_api_repr() + self.assertEqual(resource.get("maxStaleness"), "1h") + class Test_row_from_mapping(unittest.TestCase, _SchemaBase): PROJECT = "prahj-ekt" From c4f5fd57e3badcde6b6c82f6ad073e140f312a2e Mon Sep 17 00:00:00 2001 From: Rin Arakaki Date: Wed, 11 Dec 2024 03:15:11 +0900 Subject: [PATCH 12/22] feat: add type hints to Client (#2044) * add type hints * Update client.py Moves import from being used solely during specific checks to being more universally available. * Update google/cloud/bigquery/client.py * Update client.py testing some minor changes to deal with mypy quirks * Update google/cloud/bigquery/client.py --------- Co-authored-by: Chalmer Lowe --- google/cloud/bigquery/client.py | 36 +++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 97f239f7a..03ded93b1 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -44,6 +44,8 @@ import uuid import warnings +import requests + from google import resumable_media # type: ignore from google.resumable_media.requests import MultipartUpload # type: ignore from google.resumable_media.requests import ResumableUpload @@ -65,6 +67,7 @@ DEFAULT_BQSTORAGE_CLIENT_INFO = None # type: ignore +from google.auth.credentials import Credentials from google.cloud.bigquery._http import Connection from google.cloud.bigquery import _job_helpers from google.cloud.bigquery import _pandas_helpers @@ -126,6 +129,7 @@ _versions_helpers.PANDAS_VERSIONS.try_import() ) # mypy check fails because pandas import is outside module, there are type: ignore comments related to this + ResumableTimeoutType = Union[ None, float, Tuple[float, float] ] # for resumable media methods @@ -133,8 +137,6 @@ if typing.TYPE_CHECKING: # pragma: NO COVER # os.PathLike is only subscriptable in Python 3.9+, thus shielding with a condition. PathType = Union[str, bytes, os.PathLike[str], os.PathLike[bytes]] - import requests # required by api-core - _DEFAULT_CHUNKSIZE = 100 * 1024 * 1024 # 100 MB _MAX_MULTIPART_SIZE = 5 * 1024 * 1024 _DEFAULT_NUM_RETRIES = 6 @@ -231,15 +233,23 @@ class Client(ClientWithProject): def __init__( self, - project=None, - credentials=None, - _http=None, - location=None, - default_query_job_config=None, - default_load_job_config=None, - client_info=None, - client_options=None, + project: Optional[str] = None, + credentials: Optional[Credentials] = None, + _http: Optional[requests.Session] = None, + location: Optional[str] = None, + default_query_job_config: Optional[QueryJobConfig] = None, + default_load_job_config: Optional[LoadJobConfig] = None, + client_info: Optional[google.api_core.client_info.ClientInfo] = None, + client_options: Optional[ + Union[google.api_core.client_options.ClientOptions, Dict[str, Any]] + ] = None, ) -> None: + if client_options is None: + client_options = {} + if isinstance(client_options, dict): + client_options = google.api_core.client_options.from_dict(client_options) + # assert isinstance(client_options, google.api_core.client_options.ClientOptions) + super(Client, self).__init__( project=project, credentials=credentials, @@ -247,14 +257,10 @@ def __init__( _http=_http, ) - kw_args = {"client_info": client_info} + kw_args: Dict[str, Any] = {"client_info": client_info} bq_host = _get_bigquery_host() kw_args["api_endpoint"] = bq_host if bq_host != _DEFAULT_HOST else None client_universe = None - if client_options is None: - client_options = {} - if isinstance(client_options, dict): - client_options = google.api_core.client_options.from_dict(client_options) if client_options.api_endpoint: api_endpoint = client_options.api_endpoint kw_args["api_endpoint"] = api_endpoint From 7006a31d9fd253417a9b7578093d69c89532c39f Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Wed, 18 Dec 2024 02:40:17 +0800 Subject: [PATCH 13/22] chore(python): update dependencies in .kokoro/docker/docs (#2088) Source-Link: https://github.com/googleapis/synthtool/commit/e808c98e1ab7eec3df2a95a05331619f7001daef Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:8e3e7e18255c22d1489258d0374c901c01f9c4fd77a12088670cd73d580aa737 Co-authored-by: Owl Bot --- .github/.OwlBot.lock.yaml | 4 +-- .kokoro/docker/docs/requirements.txt | 52 ++++++++++++++++++++++------ 2 files changed, 43 insertions(+), 13 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 6301519a9..26306af66 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:2ed982f884312e4883e01b5ab8af8b6935f0216a5a2d82928d273081fc3be562 -# created: 2024-11-12T12:09:45.821174897Z + digest: sha256:8e3e7e18255c22d1489258d0374c901c01f9c4fd77a12088670cd73d580aa737 +# created: 2024-12-17T00:59:58.625514486Z diff --git a/.kokoro/docker/docs/requirements.txt b/.kokoro/docker/docs/requirements.txt index 8bb076459..f99a5c4aa 100644 --- a/.kokoro/docker/docs/requirements.txt +++ b/.kokoro/docker/docs/requirements.txt @@ -2,11 +2,11 @@ # This file is autogenerated by pip-compile with Python 3.10 # by the following command: # -# pip-compile --allow-unsafe --generate-hashes requirements.in +# pip-compile --allow-unsafe --generate-hashes synthtool/gcp/templates/python_library/.kokoro/docker/docs/requirements.in # -argcomplete==3.5.1 \ - --hash=sha256:1a1d148bdaa3e3b93454900163403df41448a248af01b6e849edc5ac08e6c363 \ - --hash=sha256:eb1ee355aa2557bd3d0145de7b06b2a45b0ce461e1e7813f5d066039ab4177b4 +argcomplete==3.5.2 \ + --hash=sha256:036d020d79048a5d525bc63880d7a4b8d1668566b8a76daf1144c0bbe0f63472 \ + --hash=sha256:23146ed7ac4403b70bd6026402468942ceba34a6732255b9edf5b7354f68a6bb # via nox colorlog==6.9.0 \ --hash=sha256:5906e71acd67cb07a71e779c47c4bcb45fb8c2993eebe9e5adcd6a6f1b283eff \ @@ -23,7 +23,7 @@ filelock==3.16.1 \ nox==2024.10.9 \ --hash=sha256:1d36f309a0a2a853e9bccb76bbef6bb118ba92fa92674d15604ca99adeb29eab \ --hash=sha256:7aa9dc8d1c27e9f45ab046ffd1c3b2c4f7c91755304769df231308849ebded95 - # via -r requirements.in + # via -r synthtool/gcp/templates/python_library/.kokoro/docker/docs/requirements.in packaging==24.2 \ --hash=sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759 \ --hash=sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f @@ -32,11 +32,41 @@ platformdirs==4.3.6 \ --hash=sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907 \ --hash=sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb # via virtualenv -tomli==2.0.2 \ - --hash=sha256:2ebe24485c53d303f690b0ec092806a085f07af5a5aa1464f3931eec36caaa38 \ - --hash=sha256:d46d457a85337051c36524bc5349dd91b1877838e2979ac5ced3e710ed8a60ed +tomli==2.2.1 \ + --hash=sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6 \ + --hash=sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd \ + --hash=sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c \ + --hash=sha256:2d0f2fdd22b02c6d81637a3c95f8cd77f995846af7414c5c4b8d0545afa1bc4b \ + --hash=sha256:33580bccab0338d00994d7f16f4c4ec25b776af3ffaac1ed74e0b3fc95e885a8 \ + --hash=sha256:400e720fe168c0f8521520190686ef8ef033fb19fc493da09779e592861b78c6 \ + --hash=sha256:40741994320b232529c802f8bc86da4e1aa9f413db394617b9a256ae0f9a7f77 \ + --hash=sha256:465af0e0875402f1d226519c9904f37254b3045fc5084697cefb9bdde1ff99ff \ + --hash=sha256:4a8f6e44de52d5e6c657c9fe83b562f5f4256d8ebbfe4ff922c495620a7f6cea \ + --hash=sha256:4e340144ad7ae1533cb897d406382b4b6fede8890a03738ff1683af800d54192 \ + --hash=sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249 \ + --hash=sha256:6972ca9c9cc9f0acaa56a8ca1ff51e7af152a9f87fb64623e31d5c83700080ee \ + --hash=sha256:7fc04e92e1d624a4a63c76474610238576942d6b8950a2d7f908a340494e67e4 \ + --hash=sha256:889f80ef92701b9dbb224e49ec87c645ce5df3fa2cc548664eb8a25e03127a98 \ + --hash=sha256:8d57ca8095a641b8237d5b079147646153d22552f1c637fd3ba7f4b0b29167a8 \ + --hash=sha256:8dd28b3e155b80f4d54beb40a441d366adcfe740969820caf156c019fb5c7ec4 \ + --hash=sha256:9316dc65bed1684c9a98ee68759ceaed29d229e985297003e494aa825ebb0281 \ + --hash=sha256:a198f10c4d1b1375d7687bc25294306e551bf1abfa4eace6650070a5c1ae2744 \ + --hash=sha256:a38aa0308e754b0e3c67e344754dff64999ff9b513e691d0e786265c93583c69 \ + --hash=sha256:a92ef1a44547e894e2a17d24e7557a5e85a9e1d0048b0b5e7541f76c5032cb13 \ + --hash=sha256:ac065718db92ca818f8d6141b5f66369833d4a80a9d74435a268c52bdfa73140 \ + --hash=sha256:b82ebccc8c8a36f2094e969560a1b836758481f3dc360ce9a3277c65f374285e \ + --hash=sha256:c954d2250168d28797dd4e3ac5cf812a406cd5a92674ee4c8f123c889786aa8e \ + --hash=sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc \ + --hash=sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff \ + --hash=sha256:d3f5614314d758649ab2ab3a62d4f2004c825922f9e370b29416484086b264ec \ + --hash=sha256:d920f33822747519673ee656a4b6ac33e382eca9d331c87770faa3eef562aeb2 \ + --hash=sha256:db2b95f9de79181805df90bedc5a5ab4c165e6ec3fe99f970d0e302f384ad222 \ + --hash=sha256:e59e304978767a54663af13c07b3d1af22ddee3bb2fb0618ca1593e4f593a106 \ + --hash=sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272 \ + --hash=sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a \ + --hash=sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7 # via nox -virtualenv==20.27.1 \ - --hash=sha256:142c6be10212543b32c6c45d3d3893dff89112cc588b7d0879ae5a1ec03a47ba \ - --hash=sha256:f11f1b8a29525562925f745563bfd48b189450f61fb34c4f9cc79dd5aa32a1f4 +virtualenv==20.28.0 \ + --hash=sha256:23eae1b4516ecd610481eda647f3a7c09aea295055337331bb4e6892ecce47b0 \ + --hash=sha256:2c9c3262bb8e7b87ea801d715fae4495e6032450c71d2309be9550e7364049aa # via nox From 561f05fbdc03b417daafc368f1fdc08cd55e6dee Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 26 Dec 2024 08:58:58 -0800 Subject: [PATCH 14/22] chore(deps): bump jinja2 from 3.1.4 to 3.1.5 in /.kokoro (#2094) Bumps [jinja2](https://github.com/pallets/jinja) from 3.1.4 to 3.1.5. - [Release notes](https://github.com/pallets/jinja/releases) - [Changelog](https://github.com/pallets/jinja/blob/main/CHANGES.rst) - [Commits](https://github.com/pallets/jinja/compare/3.1.4...3.1.5) --- updated-dependencies: - dependency-name: jinja2 dependency-type: indirect ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .kokoro/requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt index 006d8ef93..16db448c1 100644 --- a/.kokoro/requirements.txt +++ b/.kokoro/requirements.txt @@ -254,9 +254,9 @@ jeepney==0.8.0 \ # via # keyring # secretstorage -jinja2==3.1.4 \ - --hash=sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369 \ - --hash=sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d +jinja2==3.1.5 \ + --hash=sha256:8fefff8dc3034e27bb80d67c671eb8a9bc424c0ef4c0826edbff304cceff43bb \ + --hash=sha256:aba0f4dc9ed8013c424088f68a5c226f7d6097ed89b246d7749c2ec4175c6adb # via gcp-releasetool keyring==25.4.1 \ --hash=sha256:5426f817cf7f6f007ba5ec722b1bcad95a75b27d780343772ad76b17cb47b0bf \ From e3f57a699f745f424b58f89c6baa516eb5393b02 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Fri, 27 Dec 2024 16:55:01 -0600 Subject: [PATCH 15/22] feat: preserve unknown fields from the REST API representation in `SchemaField` (#2097) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: preserve unknown fields from the REST API representaton in `SchemaField` * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * remove unnecessary variable * remove unused private method * fix pytype --------- Co-authored-by: Owl Bot --- google/cloud/bigquery/schema.py | 90 ++++++++++-------------------- tests/unit/job/test_load_config.py | 29 ++++++++-- tests/unit/test_schema.py | 37 +++++++++++- tests/unit/test_table.py | 32 +++++++++-- 4 files changed, 116 insertions(+), 72 deletions(-) diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 8a661223a..87e6b0789 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -20,8 +20,9 @@ import copy import enum -from typing import Any, Dict, Iterable, Optional, Union, cast, List, Mapping +from typing import Any, cast, Dict, Iterable, Optional, Union +from google.cloud.bigquery import _helpers from google.cloud.bigquery import standard_sql from google.cloud.bigquery._helpers import ( _isinstance_or_raise, @@ -218,7 +219,10 @@ def __init__( rounding_mode: Union[RoundingMode, str, None] = None, foreign_type_definition: Optional[str] = None, ): - self._properties: Dict[str, Any] = {} + self._properties: Dict[str, Any] = { + "name": name, + "type": field_type, + } self._properties["name"] = name if mode is not None: @@ -259,14 +263,9 @@ def __init__( ) self._properties["type"] = field_type - self._fields = tuple(fields) + if fields: # Don't set the property if it's not set. + self._properties["fields"] = [field.to_api_repr() for field in fields] - @staticmethod - def __get_int(api_repr, name): - v = api_repr.get(name, _DEFAULT_VALUE) - if v is not _DEFAULT_VALUE: - v = int(v) - return v @classmethod def from_api_repr(cls, api_repr: Mapping[str, Any]) -> "SchemaField": @@ -280,48 +279,19 @@ def from_api_repr(cls, api_repr: Mapping[str, Any]) -> "SchemaField": Returns: google.cloud.bigquery.schema.SchemaField: The ``SchemaField`` object. """ - field_type = api_repr["type"].upper() - - # Handle optional properties with default values - mode = api_repr.get("mode", "NULLABLE") - description = api_repr.get("description", _DEFAULT_VALUE) - fields = api_repr.get("fields", ()) - policy_tags = api_repr.get("policyTags", _DEFAULT_VALUE) + placeholder = cls("this_will_be_replaced", "PLACEHOLDER") - default_value_expression = api_repr.get("defaultValueExpression", None) + # Note: we don't make a copy of api_repr because this can cause + # unnecessary slowdowns, especially on deeply nested STRUCT / RECORD + # fields. See https://github.com/googleapis/python-bigquery/issues/6 + placeholder._properties = api_repr - if policy_tags is not None and policy_tags is not _DEFAULT_VALUE: - policy_tags = PolicyTagList.from_api_repr(policy_tags) - - if api_repr.get("rangeElementType"): - range_element_type = cast(dict, api_repr.get("rangeElementType")) - element_type = range_element_type.get("type") - else: - element_type = None - - rounding_mode = api_repr.get("roundingMode") - foreign_type_definition = api_repr.get("foreignTypeDefinition") - - return cls( - field_type=field_type, - fields=[cls.from_api_repr(f) for f in fields], - mode=mode.upper(), - default_value_expression=default_value_expression, - description=description, - name=api_repr["name"], - policy_tags=policy_tags, - precision=cls.__get_int(api_repr, "precision"), - scale=cls.__get_int(api_repr, "scale"), - max_length=cls.__get_int(api_repr, "maxLength"), - range_element_type=element_type, - rounding_mode=rounding_mode, - foreign_type_definition=foreign_type_definition, - ) + return placeholder @property def name(self): """str: The name of the field.""" - return self._properties["name"] + return self._properties.get("name", "") @property def field_type(self): @@ -330,7 +300,10 @@ def field_type(self): See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.type """ - return self._properties["type"] + type_ = self._properties.get("type") + if type_ is None: # Shouldn't happen, but some unit tests do this. + return None + return cast(str, type_).upper() @property def mode(self): @@ -339,7 +312,7 @@ def mode(self): See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.mode """ - return self._properties.get("mode") + return cast(str, self._properties.get("mode", "NULLABLE")).upper() @property def is_nullable(self): @@ -359,17 +332,17 @@ def description(self): @property def precision(self): """Optional[int]: Precision (number of digits) for the NUMERIC field.""" - return self._properties.get("precision") + return _helpers._int_or_none(self._properties.get("precision")) @property def scale(self): """Optional[int]: Scale (digits after decimal) for the NUMERIC field.""" - return self._properties.get("scale") + return _helpers._int_or_none(self._properties.get("scale")) @property def max_length(self): """Optional[int]: Maximum length for the STRING or BYTES field.""" - return self._properties.get("maxLength") + return _helpers._int_or_none(self._properties.get("maxLength")) @property def range_element_type(self): @@ -405,7 +378,7 @@ def fields(self): Must be empty unset if ``field_type`` is not 'RECORD'. """ - return self._fields + return tuple(_to_schema_fields(self._properties.get("fields", []))) @property def policy_tags(self): @@ -421,15 +394,10 @@ def to_api_repr(self) -> dict: Returns: Dict: A dictionary representing the SchemaField in a serialized form. """ - answer = self._properties.copy() - - # If this is a RECORD type, then sub-fields are also included, - # add this to the serialized representation. - if self.field_type.upper() in _STRUCT_TYPES: - answer["fields"] = [f.to_api_repr() for f in self.fields] - - # Done; return the serialized dictionary. - return answer + # Note: we don't make a copy of _properties because this can cause + # unnecessary slowdowns, especially on deeply nested STRUCT / RECORD + # fields. See https://github.com/googleapis/python-bigquery/issues/6 + return self._properties def _key(self): """A tuple key that uniquely describes this field. @@ -465,7 +433,7 @@ def _key(self): self.mode.upper(), # pytype: disable=attribute-error self.default_value_expression, self.description, - self._fields, + self.fields, policy_tags, ) diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py index becf3e959..3a681c476 100644 --- a/tests/unit/job/test_load_config.py +++ b/tests/unit/job/test_load_config.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import copy import warnings import pytest @@ -571,16 +572,34 @@ def test_schema_setter_valid_mappings_list(self): config._properties["load"]["schema"], {"fields": [full_name_repr, age_repr]} ) - def test_schema_setter_invalid_mappings_list(self): + def test_schema_setter_allows_unknown_properties(self): config = self._get_target_class()() schema = [ - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - {"name": "age", "typeoo": "INTEGER", "mode": "REQUIRED"}, + { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "someNewProperty": "test-value", + }, + { + "name": "age", + # Note: This type should be included, too. Avoid client-side + # validation, as it could prevent backwards-compatible + # evolution of the server-side behavior. + "typo": "INTEGER", + "mode": "REQUIRED", + "anotherNewProperty": "another-test", + }, ] - with self.assertRaises(Exception): - config.schema = schema + # Make sure the setter doesn't mutate schema. + expected_schema = copy.deepcopy(schema) + + config.schema = schema + + # _properties should include all fields, including unknown ones. + assert config._properties["load"]["schema"]["fields"] == expected_schema def test_schema_setter_unsetting_schema(self): from google.cloud.bigquery.schema import SchemaField diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index 6476fdf35..df1cab009 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +<<<<<<< HEAD from google.cloud import bigquery from google.cloud.bigquery.enums import RoundingMode from google.cloud.bigquery.standard_sql import StandardSqlStructType @@ -27,11 +28,18 @@ _to_schema_fields, ) +======= +import copy +>>>>>>> aaf1eb85 (feat: preserve unknown fields from the REST API representation in `SchemaField` (#2097)) import unittest from unittest import mock import pytest +from google.cloud import bigquery +from google.cloud.bigquery.standard_sql import StandardSqlStructType +from google.cloud.bigquery.schema import PolicyTagList + class TestSchemaField(unittest.TestCase): @staticmethod @@ -843,13 +851,40 @@ def test_schema_fields_sequence(self): result = _to_schema_fields(schema) assert result == schema - def test_invalid_mapping_representation(self): + def test_unknown_properties(self): schema = [ +<<<<<<< HEAD {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, {"name": "address", "invalid_key": "STRING", "mode": "REQUIRED"}, ] with pytest.raises(Exception): # Or a more specific exception if known _to_schema_fields(schema) +======= + { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "someNewProperty": "test-value", + }, + { + "name": "age", + # Note: This type should be included, too. Avoid client-side + # validation, as it could prevent backwards-compatible + # evolution of the server-side behavior. + "typo": "INTEGER", + "mode": "REQUIRED", + "anotherNewProperty": "another-test", + }, + ] + + # Make sure the setter doesn't mutate schema. + expected_schema = copy.deepcopy(schema) + + result = self._call_fut(schema) + + for api_repr, field in zip(expected_schema, result): + assert field.to_api_repr() == api_repr +>>>>>>> aaf1eb85 (feat: preserve unknown fields from the REST API representation in `SchemaField` (#2097)) @pytest.mark.parametrize( "schema, expected_schema", diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index f1922ffd0..eb9fc4bee 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import copy import datetime import logging import re @@ -719,14 +720,35 @@ def test_schema_setter_valid_fields(self): table.schema = [full_name, age] self.assertEqual(table.schema, [full_name, age]) - def test_schema_setter_invalid_mapping_representation(self): + def test_schema_setter_allows_unknown_properties(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) table = self._make_one(table_ref) - full_name = {"name": "full_name", "type": "STRING", "mode": "REQUIRED"} - invalid_field = {"name": "full_name", "typeooo": "STRING", "mode": "REQUIRED"} - with self.assertRaises(Exception): - table.schema = [full_name, invalid_field] + schema = [ + { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "someNewProperty": "test-value", + }, + { + "name": "age", + # Note: This type should be included, too. Avoid client-side + # validation, as it could prevent backwards-compatible + # evolution of the server-side behavior. + "typo": "INTEGER", + "mode": "REQUIRED", + "anotherNewProperty": "another-test", + }, + ] + + # Make sure the setter doesn't mutate schema. + expected_schema = copy.deepcopy(schema) + + table.schema = schema + + # _properties should include all fields, including unknown ones. + assert table._properties["schema"]["fields"] == expected_schema def test_schema_setter_valid_mapping_representation(self): from google.cloud.bigquery.schema import SchemaField From 3280e79090afe9df92f318c30eb497819b359e8e Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Thu, 2 Jan 2025 12:23:15 -0500 Subject: [PATCH 16/22] Fix: add roundingmode as str test (#2098) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: adds test of roundingmode as a str * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- google/cloud/bigquery/schema.py | 4 +++- tests/unit/test_schema.py | 5 +++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 87e6b0789..fd738d295 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -261,12 +261,14 @@ def __init__( raise ValueError( "If the 'field_type' is 'FOREIGN', then 'foreign_type_definition' is required." ) +<<<<<<< HEAD self._properties["type"] = field_type +======= +>>>>>>> 100708d8 (Fix: add roundingmode as str test (#2098)) if fields: # Don't set the property if it's not set. self._properties["fields"] = [field.to_api_repr() for field in fields] - @classmethod def from_api_repr(cls, api_repr: Mapping[str, Any]) -> "SchemaField": """Return a ``SchemaField`` object deserialized from a dictionary. diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index df1cab009..c08ae390e 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -324,6 +324,11 @@ def test_fields_property(self): schema_field = self._make_one("boat", "RECORD", fields=fields) self.assertEqual(schema_field.fields, fields) + def test_roundingmode_property_str(self): + ROUNDINGMODE = "ROUNDING_MODE_UNSPECIFIED" + schema_field = self._make_one("test", "STRING", rounding_mode=ROUNDINGMODE) + self.assertEqual(schema_field.rounding_mode, ROUNDINGMODE) + def test_to_standard_sql_simple_type(self): examples = ( # a few legacy types From 1837f81bea9e7e1eb7cd1b8c1bc0a6ab473b1afd Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Thu, 2 Jan 2025 17:55:22 +0000 Subject: [PATCH 17/22] more rebase and merge conflict resolution --- google/cloud/bigquery/schema.py | 3 - tests/unit/test_schema.py | 102 +++++++++----------------------- 2 files changed, 28 insertions(+), 77 deletions(-) diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index fd738d295..9b09da8f7 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -261,11 +261,8 @@ def __init__( raise ValueError( "If the 'field_type' is 'FOREIGN', then 'foreign_type_definition' is required." ) -<<<<<<< HEAD self._properties["type"] = field_type -======= ->>>>>>> 100708d8 (Fix: add roundingmode as str test (#2098)) if fields: # Don't set the property if it's not set. self._properties["fields"] = [field.to_api_repr() for field in fields] diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index c08ae390e..d5ff5e4f6 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -<<<<<<< HEAD + +import unittest +from unittest import mock + +import pytest + from google.cloud import bigquery from google.cloud.bigquery.enums import RoundingMode from google.cloud.bigquery.standard_sql import StandardSqlStructType @@ -28,18 +33,6 @@ _to_schema_fields, ) -======= -import copy ->>>>>>> aaf1eb85 (feat: preserve unknown fields from the REST API representation in `SchemaField` (#2097)) -import unittest -from unittest import mock - -import pytest - -from google.cloud import bigquery -from google.cloud.bigquery.standard_sql import StandardSqlStructType -from google.cloud.bigquery.schema import PolicyTagList - class TestSchemaField(unittest.TestCase): @staticmethod @@ -858,13 +851,6 @@ def test_schema_fields_sequence(self): def test_unknown_properties(self): schema = [ -<<<<<<< HEAD - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - {"name": "address", "invalid_key": "STRING", "mode": "REQUIRED"}, - ] - with pytest.raises(Exception): # Or a more specific exception if known - _to_schema_fields(schema) -======= { "name": "full_name", "type": "STRING", @@ -881,15 +867,8 @@ def test_unknown_properties(self): "anotherNewProperty": "another-test", }, ] - - # Make sure the setter doesn't mutate schema. - expected_schema = copy.deepcopy(schema) - - result = self._call_fut(schema) - - for api_repr, field in zip(expected_schema, result): - assert field.to_api_repr() == api_repr ->>>>>>> aaf1eb85 (feat: preserve unknown fields from the REST API representation in `SchemaField` (#2097)) + with pytest.raises(Exception): # Or a more specific exception if known + _to_schema_fields(schema) @pytest.mark.parametrize( "schema, expected_schema", @@ -927,19 +906,8 @@ def test_valid_mapping_representation(self, schema, expected_schema): result = _to_schema_fields(schema) assert result == expected_schema - def test_valid_schema_object(self): - schema = Schema( - fields=[SchemaField("name", "STRING", description=None, policy_tags=None)], - foreign_type_info="TestInfo", - ) - result = _to_schema_fields(schema) - expected = Schema( - [SchemaField("name", "STRING", "NULLABLE", None, None, (), None)], - "TestInfo", - ) - assert result.to_api_repr() == expected.to_api_repr() - +# Testing the new Schema Class ================= class TestSchemaObject: # New test class for Schema object interactions def test_schema_object_field_access(self): schema = Schema( @@ -948,10 +916,9 @@ def test_schema_object_field_access(self): SchemaField("age", "INTEGER"), ] ) - assert len(schema) == 2 - assert schema[0]["name"] == "name" # Access fields using indexing - assert schema[1]["type"] == "INTEGER" + assert schema[0].name == "name" # Access fields using indexing + assert schema[1].field_type == "INTEGER" def test_schema_object_foreign_type_info(self): schema = Schema(foreign_type_info="External") @@ -963,16 +930,6 @@ def test_schema_object_foreign_type_info(self): with pytest.raises(TypeError): schema.foreign_type_info = 123 # Type check - def test_str(self): - schema = Schema( - fields=[SchemaField("name", "STRING")], - foreign_type_info="TestInfo", - ) - assert ( - str(schema) - == "Schema([{'name': 'name', 'mode': 'NULLABLE', 'type': 'STRING'}], TestInfo)" - ) - @pytest.mark.parametrize( "schema, expected_repr", [ @@ -981,12 +938,12 @@ def test_str(self): fields=[SchemaField("name", "STRING")], foreign_type_info="TestInfo", ), - "Schema([{'name': 'name', 'mode': 'NULLABLE', 'type': 'STRING'}], 'TestInfo')", + "Schema([SchemaField('name', 'STRING', 'NULLABLE', None, None, (), None)], 'TestInfo')", id="repr with foreign type info", ), pytest.param( Schema(fields=[SchemaField("name", "STRING")]), - "Schema([{'name': 'name', 'mode': 'NULLABLE', 'type': 'STRING'}], None)", + "Schema([SchemaField('name', 'STRING', 'NULLABLE', None, None, (), None)], None)", id="repr without foreign type info", ), ], @@ -998,7 +955,8 @@ def test_schema_iteration(self): schema = Schema( fields=[SchemaField("name", "STRING"), SchemaField("age", "INTEGER")] ) - field_names = [field["name"] for field in schema] + + field_names = [field.name for field in schema] assert field_names == ["name", "age"] def test_schema_object_mutability(self): # Tests __setitem__ and __delitem__ @@ -1041,7 +999,9 @@ def test_schema_extend(self): foreign_type_info="TestInfo", ), { - "fields": [{"name": "name", "mode": "NULLABLE", "type": "STRING"}], + "_fields": [ + SchemaField("name", "STRING", "NULLABLE", None, None, (), None) + ], "foreignTypeInfo": "TestInfo", }, id="repr with foreign type info", @@ -1049,7 +1009,9 @@ def test_schema_extend(self): pytest.param( Schema(fields=[SchemaField("name", "STRING")]), { - "fields": [{"name": "name", "mode": "NULLABLE", "type": "STRING"}], + "_fields": [ + SchemaField("name", "STRING", "NULLABLE", None, None, (), None) + ], "foreignTypeInfo": None, }, id="repr without foreign type info", @@ -1064,35 +1026,25 @@ def test_to_api_repr(self, schema, expected_api_repr): [ pytest.param( { - "fields": [ + "_fields": [ SchemaField("name", "STRING", "NULLABLE", None, None, (), None) ], "foreignTypeInfo": "TestInfo", }, Schema( - fields=[ - SchemaField( - "name", "STRING", description=None, policy_tags=None - ) - ], + fields=[SchemaField("name", "STRING")], foreign_type_info="TestInfo", ), id="repr with foreign type info", ), pytest.param( { - "fields": [ + "_fields": [ SchemaField("name", "STRING", "NULLABLE", None, None, (), None) ], "foreignTypeInfo": None, }, - Schema( - fields=[ - SchemaField( - "name", "STRING", description=None, policy_tags=None - ) - ] - ), + Schema(fields=[SchemaField("name", "STRING")]), id="repr without foreign type info", ), ], @@ -1104,11 +1056,13 @@ def test_from_api_repr(self, api_repr, expected): THEN it will have the same representation a Schema object created directly and displayed as a dict. """ - result = Schema.from_api_repr(api_repr) assert result.to_api_repr() == expected.to_api_repr() +# END PYTEST BASED SCHEMA TESTS ==================== + + class TestPolicyTags(unittest.TestCase): @staticmethod def _get_target_class(): From 94e9e6e547729dda5ac6a6d6d27882eaf0f87aaa Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Thu, 2 Jan 2025 18:11:27 +0000 Subject: [PATCH 18/22] more conflict resolution --- tests/unit/test_schema.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index d5ff5e4f6..16f1ffd03 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -906,6 +906,18 @@ def test_valid_mapping_representation(self, schema, expected_schema): result = _to_schema_fields(schema) assert result == expected_schema + def test_valid_schema_object(self): + schema = Schema( + fields=[SchemaField("name", "STRING")], + foreign_type_info="TestInfo", + ) + result = _to_schema_fields(schema) + expected = Schema( + [SchemaField("name", "STRING", "NULLABLE", None, None, (), None)], + "TestInfo", + ) + assert result.to_api_repr() == expected.to_api_repr() + # Testing the new Schema Class ================= class TestSchemaObject: # New test class for Schema object interactions @@ -930,6 +942,16 @@ def test_schema_object_foreign_type_info(self): with pytest.raises(TypeError): schema.foreign_type_info = 123 # Type check + def test_str(self): + schema = Schema( + fields=[SchemaField("name", "STRING")], + foreign_type_info="TestInfo", + ) + assert ( + str(schema) + == "Schema([SchemaField('name', 'STRING', 'NULLABLE', None, None, (), None)], TestInfo)" + ) + @pytest.mark.parametrize( "schema, expected_repr", [ From db6ef9ec3a62c79dfe7ad0756b3676c7e60f0a3a Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Fri, 27 Dec 2024 12:49:33 +0000 Subject: [PATCH 19/22] update tests based on Schema superclass UserList --- google/cloud/bigquery/schema.py | 1 - tests/unit/test_schema.py | 64 ++++++++++++++------------------- 2 files changed, 27 insertions(+), 38 deletions(-) diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 9b09da8f7..fe78c7348 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -17,7 +17,6 @@ from __future__ import annotations import collections - import copy import enum from typing import Any, cast, Dict, Iterable, Optional, Union diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index 16f1ffd03..7a33204c1 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -851,21 +851,8 @@ def test_schema_fields_sequence(self): def test_unknown_properties(self): schema = [ - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "someNewProperty": "test-value", - }, - { - "name": "age", - # Note: This type should be included, too. Avoid client-side - # validation, as it could prevent backwards-compatible - # evolution of the server-side behavior. - "typo": "INTEGER", - "mode": "REQUIRED", - "anotherNewProperty": "another-test", - }, + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "address", "invalid_key": "STRING", "mode": "REQUIRED"}, ] with pytest.raises(Exception): # Or a more specific exception if known _to_schema_fields(schema) @@ -908,7 +895,7 @@ def test_valid_mapping_representation(self, schema, expected_schema): def test_valid_schema_object(self): schema = Schema( - fields=[SchemaField("name", "STRING")], + fields=[SchemaField("name", "STRING", description=None, policy_tags=None)], foreign_type_info="TestInfo", ) result = _to_schema_fields(schema) @@ -919,7 +906,6 @@ def test_valid_schema_object(self): assert result.to_api_repr() == expected.to_api_repr() -# Testing the new Schema Class ================= class TestSchemaObject: # New test class for Schema object interactions def test_schema_object_field_access(self): schema = Schema( @@ -928,9 +914,10 @@ def test_schema_object_field_access(self): SchemaField("age", "INTEGER"), ] ) + assert len(schema) == 2 - assert schema[0].name == "name" # Access fields using indexing - assert schema[1].field_type == "INTEGER" + assert schema[0]["name"] == "name" # Access fields using indexing + assert schema[1]["type"] == "INTEGER" def test_schema_object_foreign_type_info(self): schema = Schema(foreign_type_info="External") @@ -949,7 +936,7 @@ def test_str(self): ) assert ( str(schema) - == "Schema([SchemaField('name', 'STRING', 'NULLABLE', None, None, (), None)], TestInfo)" + == "Schema([{'name': 'name', 'mode': 'NULLABLE', 'type': 'STRING'}], TestInfo)" ) @pytest.mark.parametrize( @@ -960,12 +947,12 @@ def test_str(self): fields=[SchemaField("name", "STRING")], foreign_type_info="TestInfo", ), - "Schema([SchemaField('name', 'STRING', 'NULLABLE', None, None, (), None)], 'TestInfo')", + "Schema([{'name': 'name', 'mode': 'NULLABLE', 'type': 'STRING'}], 'TestInfo')", id="repr with foreign type info", ), pytest.param( Schema(fields=[SchemaField("name", "STRING")]), - "Schema([SchemaField('name', 'STRING', 'NULLABLE', None, None, (), None)], None)", + "Schema([{'name': 'name', 'mode': 'NULLABLE', 'type': 'STRING'}], None)", id="repr without foreign type info", ), ], @@ -977,8 +964,7 @@ def test_schema_iteration(self): schema = Schema( fields=[SchemaField("name", "STRING"), SchemaField("age", "INTEGER")] ) - - field_names = [field.name for field in schema] + field_names = [field["name"] for field in schema] assert field_names == ["name", "age"] def test_schema_object_mutability(self): # Tests __setitem__ and __delitem__ @@ -1021,9 +1007,7 @@ def test_schema_extend(self): foreign_type_info="TestInfo", ), { - "_fields": [ - SchemaField("name", "STRING", "NULLABLE", None, None, (), None) - ], + "fields": [{"name": "name", "mode": "NULLABLE", "type": "STRING"}], "foreignTypeInfo": "TestInfo", }, id="repr with foreign type info", @@ -1031,9 +1015,7 @@ def test_schema_extend(self): pytest.param( Schema(fields=[SchemaField("name", "STRING")]), { - "_fields": [ - SchemaField("name", "STRING", "NULLABLE", None, None, (), None) - ], + "fields": [{"name": "name", "mode": "NULLABLE", "type": "STRING"}], "foreignTypeInfo": None, }, id="repr without foreign type info", @@ -1048,25 +1030,35 @@ def test_to_api_repr(self, schema, expected_api_repr): [ pytest.param( { - "_fields": [ + "fields": [ SchemaField("name", "STRING", "NULLABLE", None, None, (), None) ], "foreignTypeInfo": "TestInfo", }, Schema( - fields=[SchemaField("name", "STRING")], + fields=[ + SchemaField( + "name", "STRING", description=None, policy_tags=None + ) + ], foreign_type_info="TestInfo", ), id="repr with foreign type info", ), pytest.param( { - "_fields": [ + "fields": [ SchemaField("name", "STRING", "NULLABLE", None, None, (), None) ], "foreignTypeInfo": None, }, - Schema(fields=[SchemaField("name", "STRING")]), + Schema( + fields=[ + SchemaField( + "name", "STRING", description=None, policy_tags=None + ) + ] + ), id="repr without foreign type info", ), ], @@ -1078,13 +1070,11 @@ def test_from_api_repr(self, api_repr, expected): THEN it will have the same representation a Schema object created directly and displayed as a dict. """ + result = Schema.from_api_repr(api_repr) assert result.to_api_repr() == expected.to_api_repr() -# END PYTEST BASED SCHEMA TESTS ==================== - - class TestPolicyTags(unittest.TestCase): @staticmethod def _get_target_class(): From 67c0182c7580544b65019452ede64c82866b47dd Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Fri, 3 Jan 2025 12:57:50 +0000 Subject: [PATCH 20/22] Clean up some failing tests --- google/cloud/bigquery/schema.py | 8 ++++---- tests/unit/test_schema.py | 12 +++++++++--- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index e93645cba..f6f5dabb9 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -19,7 +19,7 @@ import collections import copy import enum -from typing import Any, cast, Dict, Iterable, Optional, Union, Mapping, List +from typing import Any, cast, Dict, Iterable, Optional, Union, List from google.cloud.bigquery import _helpers from google.cloud.bigquery import standard_sql @@ -264,11 +264,11 @@ def __init__( self._properties["fields"] = [field.to_api_repr() for field in fields] @classmethod - def from_api_repr(cls, api_repr: Mapping[str, Any]) -> "SchemaField": + def from_api_repr(cls, api_repr: Dict[str, Any]) -> "SchemaField": """Return a ``SchemaField`` object deserialized from a dictionary. Args: - api_repr (Mapping[str, str]): The serialized representation + api_repr (Dict[str, str]): The serialized representation of the SchemaField, such as what is output by :meth:`to_api_repr`. @@ -514,7 +514,7 @@ def _build_schema_resource(fields): def _to_schema_fields( - schema: Union[Schema, List[Union[SchemaField, Mapping[str, Any]]]] + schema: Union[Schema, List[Union[SchemaField, Dict[str, Any]]]] ) -> Union[Schema, List[SchemaField]]: """Convert the input to either a Schema object OR a list of SchemaField objects. diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index a88a33a08..9b618a49c 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -831,6 +831,12 @@ def test_build_schema_resource(self, fields, expected_resource): class TestToSchemaFields: # Test class for _to_schema_fields + @staticmethod + def _call_fut(schema): + from google.cloud.bigquery.schema import _to_schema_fields + + return _to_schema_fields(schema) + def test_invalid_type(self): """Invalid list of tuples instead of list of mappings""" schema = [ @@ -956,7 +962,7 @@ def test_str(self): ) assert ( str(schema) - == "Schema([{'name': 'name', 'mode': 'NULLABLE', 'type': 'STRING'}], TestInfo)" + == "Schema([{'name': 'name', 'type': 'STRING', 'mode': 'NULLABLE'}], TestInfo)" ) @pytest.mark.parametrize( @@ -967,12 +973,12 @@ def test_str(self): fields=[SchemaField("name", "STRING")], foreign_type_info="TestInfo", ), - "Schema([{'name': 'name', 'mode': 'NULLABLE', 'type': 'STRING'}], 'TestInfo')", + "Schema([{'name': 'name', 'type': 'STRING', 'mode': 'NULLABLE'}], 'TestInfo')", id="repr with foreign type info", ), pytest.param( Schema(fields=[SchemaField("name", "STRING")]), - "Schema([{'name': 'name', 'mode': 'NULLABLE', 'type': 'STRING'}], None)", + "Schema([{'name': 'name', 'type': 'STRING', 'mode': 'NULLABLE'}], None)", id="repr without foreign type info", ), ], From 3fa949c32b3288b909dfb48287ae465babc85ff9 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Fri, 3 Jan 2025 17:57:11 +0000 Subject: [PATCH 21/22] adds tests to ensure code coverage --- tests/unit/test_schema.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index 9b618a49c..556ce77e1 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -945,6 +945,33 @@ def test_schema_object_field_access(self): assert schema[0]["name"] == "name" # Access fields using indexing assert schema[1]["type"] == "INTEGER" + def test_schema_object_data_access(self): + """Schema class is superclassed by UserList, which requires the + use of a '.data' attribute. The decision was made to have both of these attributes point to + the same key "fields" in the '_properties' dictionary. Thus '.data' is an alias + for '_fields'. + + This test assures that .data functions as an alias to the underlying data. + """ + + schema = Schema( + fields=[ + SchemaField("name", "STRING"), + SchemaField("age", "INTEGER"), + ] + ) + + assert len(schema.data) == 2 + assert schema.data[0]["name"] == "name" # Access fields using indexing + assert schema.data[1]["type"] == "INTEGER" + + new_fields = [ + SchemaField("new_name", "STRING"), + SchemaField("new_age", "INTEGER"), + ] + schema.data = new_fields + assert schema.data[0]["name"] == "new_name" + def test_schema_object_foreign_type_info(self): schema = Schema(foreign_type_info="External") assert schema.foreign_type_info == "External" @@ -1046,6 +1073,14 @@ def test_schema_extend(self): }, id="repr without foreign type info", ), + pytest.param( + Schema(fields=None), + { + "fields": [], + "foreignTypeInfo": None, + }, + id="repr with no fields", + ), ], ) def test_to_api_repr(self, schema, expected_api_repr): From 7c77bfbf57aa8650e2e1562537b87a79547c1635 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Fri, 3 Jan 2025 18:32:11 +0000 Subject: [PATCH 22/22] tweaks tests to increase code coverage --- google/cloud/bigquery/schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index f6f5dabb9..a35cefbb3 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -1009,7 +1009,7 @@ def to_api_repr(self) -> Dict[str, Any]: # If this is a RECORD type, then sub-fields are also included, # add this to the serialized representation. answer = self._properties.copy() - if self._fields is None: + if self._fields == []: return answer schemafields = any([isinstance(f, SchemaField) for f in self._fields]) if schemafields: