diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 3003af250..a35cefbb3 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -19,7 +19,7 @@ import collections import copy import enum -from typing import Any, cast, Dict, Iterable, Optional, Union +from typing import Any, cast, Dict, Iterable, Optional, Union, List from google.cloud.bigquery import _helpers from google.cloud.bigquery import standard_sql @@ -264,11 +264,11 @@ def __init__( self._properties["fields"] = [field.to_api_repr() for field in fields] @classmethod - def from_api_repr(cls, api_repr: dict) -> "SchemaField": + def from_api_repr(cls, api_repr: Dict[str, Any]) -> "SchemaField": """Return a ``SchemaField`` object deserialized from a dictionary. Args: - api_repr (Mapping[str, str]): The serialized representation + api_repr (Dict[str, str]): The serialized representation of the SchemaField, such as what is output by :meth:`to_api_repr`. @@ -489,7 +489,7 @@ def __repr__(self): def _parse_schema_resource(info): - """Parse a resource fragment into a schema field. + """Parse a resource fragment into a sequence of schema fields. Args: info: (Mapping[str, Dict]): should contain a "fields" key to be parsed @@ -513,25 +513,33 @@ def _build_schema_resource(fields): return [field.to_api_repr() for field in fields] -def _to_schema_fields(schema): - """Coerce `schema` to a list of schema field instances. +def _to_schema_fields( + schema: Union[Schema, List[Union[SchemaField, Dict[str, Any]]]] +) -> Union[Schema, List[SchemaField]]: + """Convert the input to either a Schema object OR a list of SchemaField objects. + + This helper method ensures that the fields in the schema are SchemaField objects. + It accepts: + + * A :class:`~google.cloud.bigquery.schema.Schema` instance: It will + convert items that are mappings to + :class:`~google.cloud.bigquery.schema.SchemaField` instances and + preserve foreign_type_info. + + * A list of + :class:`~google.cloud.bigquery.schema.SchemaField` instances. + + * A list of mappings: It will convert each of the mapping items to + a :class:`~google.cloud.bigquery.schema.SchemaField` instance. Args: - schema(Sequence[Union[ \ - :class:`~google.cloud.bigquery.schema.SchemaField`, \ - Mapping[str, Any] \ - ]]): - Table schema to convert. If some items are passed as mappings, - their content must be compatible with - :meth:`~google.cloud.bigquery.schema.SchemaField.from_api_repr`. + schema: The schema to convert. Returns: - Sequence[:class:`~google.cloud.bigquery.schema.SchemaField`] + The schema as a list of SchemaField objects or a Schema object. Raises: - Exception: If ``schema`` is not a sequence, or if any item in the - sequence is not a :class:`~google.cloud.bigquery.schema.SchemaField` - instance or a compatible mapping representation of the field. + ValueError: If the items in ``schema`` are not valid. """ for field in schema: @@ -541,6 +549,17 @@ def _to_schema_fields(schema): "mapping representations." ) + if isinstance(schema, Schema): + schema = Schema( + [ + field + if isinstance(field, SchemaField) + else SchemaField.from_api_repr(field) + for field in schema + ], + foreign_type_info=schema.foreign_type_info, + ) + return schema return [ field if isinstance(field, SchemaField) else SchemaField.from_api_repr(field) for field in schema @@ -761,8 +780,6 @@ def serde_info(self) -> Any: prop = _get_sub_prop(self._properties, ["serDeInfo"]) if prop is not None: prop = StorageDescriptor().from_api_repr(prop) - print(f"DINOSAUR prop: {prop}") - return prop @serde_info.setter @@ -886,3 +903,131 @@ def from_api_repr(cls, resource: dict) -> SerDeInfo: config = cls("") config._properties = copy.deepcopy(resource) return config + + +class Schema(collections.UserList): + """ + Represents a BigQuery schema, defining the structure and types of data. + + This class manages a list of schema fields and provides methods for + serialization and deserialization with the BigQuery API. It extends the + `collections.UserList` class to allow for list-like behavior. + + Args: + fields (Optional[List[Any]], optional): A list of SchemaField objects representing the fields + in the schema. Defaults to None, which creates an empty schema. + foreign_type_info (Optional[str], optional): Optional type information relevant for foreign + systems. Defaults to None. + """ + + def __init__( + self, + fields: Optional[List[Any]] = None, + foreign_type_info: Optional[str] = None, + ): + self._properties: Dict[str, Any] = {} + self._fields = [] if fields is None else list(fields) # Internal List + self.foreign_type_info = foreign_type_info + + @property + def foreign_type_info(self) -> Optional[str]: + return self._properties.get("foreignTypeInfo") + + @foreign_type_info.setter + def foreign_type_info(self, value: Optional[str]) -> None: + """ + Sets the foreign type information for this schema. + + Args: + value (Optional[str]): The foreign type information, can be set to None. + """ + value = _isinstance_or_raise(value, str, none_allowed=True) + self._properties["foreignTypeInfo"] = value + + @property + def _fields(self) -> Optional[List[Any]]: + return self._properties.get("fields") + + @_fields.setter + def _fields(self, value: Optional[List[Any]]) -> None: + """ + Sets the internal list of field definitions. + + NOTE: In the API representation the 'fields' key points to a sequence of schema fields. + To maintain a similarity in names, the 'Schema._fields' attribute points to the + '_properties["fields"]' key. Schema class is superclassed by UserList, which requires the + use of a '.data' attribute. The decision was made to have both of these attributes point to + the same key "fields" in the '_properties' dictionary. Thus '.data' is an alias + for '_fields'. + + Args: + value (Optional[List[Any]]): A list of schema fields, can be set to None. + """ + value = _isinstance_or_raise(value, list, none_allowed=True) + value = _build_schema_resource(value) + self._properties["fields"] = value + + @property + def data(self) -> Any: + return self._properties.get("fields") + + @data.setter + def data(self, value) -> None: + """ + Sets the list of schema fields. + + NOTE: In the API representation the 'fields' key points to a sequence of schema fields. + To maintain a similarity in names, the 'Schema._fields' attribute points to the + '_properties["fields"]' key. Schema class is superclassed by UserList, which requires the + use of a '.data' attribute. The decision was made to have both of these attributes point to + the same key "fields" in the '_properties' dictionary. Thus '.data' is an alias + for '_fields'. + + Args: + value (Optional[List[Any]]): A list of schema fields, can be set to None. + """ + + value = _isinstance_or_raise(value, list, none_allowed=True) + value = _build_schema_resource(value) + self._properties["fields"] = value + + def __str__(self) -> str: + return f"Schema({self._fields}, {self.foreign_type_info})" + + def __repr__(self) -> str: + return f"Schema({self._fields!r}, {self.foreign_type_info!r})" + + def to_api_repr(self) -> Dict[str, Any]: + """Build an API representation of this object. + + Returns: + Dict[str, Any]: + A dictionary in the format used by the BigQuery API. + If the schema contains SchemaField objects, the fields are + also converted to their API representations. + """ + # If this is a RECORD type, then sub-fields are also included, + # add this to the serialized representation. + answer = self._properties.copy() + if self._fields == []: + return answer + schemafields = any([isinstance(f, SchemaField) for f in self._fields]) + if schemafields: + answer["fields"] = [f.to_api_repr() for f in self._fields] + return answer + + @classmethod + def from_api_repr(cls, resource: Dict[str, Any]) -> "Schema": + """Factory: constructs an instance of the class (cls) + given its API representation. + + Args: + resource (Dict[str, Any]): + API representation of the object to be instantiated. + + Returns: + An instance of the class initialized with data from 'resource'. + """ + config = cls([]) + config._properties = copy.copy(resource) + return config diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index c3393e5fe..b8deb6f85 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -70,6 +70,7 @@ from google.cloud.bigquery.schema import _build_schema_resource from google.cloud.bigquery.schema import _parse_schema_resource from google.cloud.bigquery.schema import _to_schema_fields +from google.cloud.bigquery.schema import Schema from google.cloud.bigquery.external_config import ExternalCatalogTableOptions if typing.TYPE_CHECKING: # pragma: NO COVER @@ -453,10 +454,11 @@ def schema(self): instance or a compatible mapping representation of the field. """ prop = self._properties.get(self._PROPERTY_TO_API_FIELD["schema"]) - if not prop: + if not prop: # if empty Schema, empty list, None return [] - else: - return _parse_schema_resource(prop) + elif isinstance(prop, Schema): + return prop + return _parse_schema_resource(prop) @schema.setter def schema(self, value): @@ -464,6 +466,8 @@ def schema(self, value): if value is None: self._properties[api_field] = None + elif isinstance(value, Schema): + self._properties[api_field] = value else: value = _to_schema_fields(value) self._properties[api_field] = {"fields": _build_schema_resource(value)} @@ -1394,7 +1398,8 @@ def _row_from_mapping(mapping, schema): Raises: ValueError: If schema is empty. """ - if len(schema) == 0: + + if not schema: raise ValueError(_TABLE_HAS_NO_SCHEMA) row = [] diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index cd336b73f..11e6fbb8b 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -60,6 +60,7 @@ from google.cloud.bigquery import ParquetOptions import google.cloud.bigquery.retry from google.cloud.bigquery.retry import DEFAULT_TIMEOUT +from google.cloud.bigquery.schema import Schema import google.cloud.bigquery.table from test_utils.imports import maybe_fail_import @@ -2608,7 +2609,8 @@ def test_update_table_w_schema_None(self): sent = {"schema": None} self.assertEqual(req[1]["data"], sent) self.assertEqual(req[1]["path"], "/%s" % path) - self.assertEqual(len(updated_table.schema), 0) + valid_options = [Schema(), [], None] + self.assertIn(updated_table.schema, valid_options) def test_update_table_delete_property(self): from google.cloud.bigquery.table import Table diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index 07cea2d9e..556ce77e1 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -27,6 +27,11 @@ ForeignTypeInfo, StorageDescriptor, SerDeInfo, + Schema, + SchemaField, + _parse_schema_resource, + _build_schema_resource, + _to_schema_fields, ) @@ -167,7 +172,6 @@ def test_to_api_repr(self): rounding_mode=ROUNDINGMODE, foreign_type_definition=None, ) - print(f"DINOSAUR: {field}\n\n{field.to_api_repr()}") self.assertEqual( field.to_api_repr(), { @@ -341,35 +345,6 @@ def test_to_standard_sql_simple_type(self): def test_to_standard_sql_struct_type(self): from google.cloud.bigquery import standard_sql - # Expected result object: - # - # name: "image_usage" - # type { - # type_kind: STRUCT - # struct_type { - # fields { - # name: "image_content" - # type {type_kind: BYTES} - # } - # fields { - # name: "last_used" - # type { - # type_kind: STRUCT - # struct_type { - # fields { - # name: "date_field" - # type {type_kind: DATE} - # } - # fields { - # name: "time_field" - # type {type_kind: TIME} - # } - # } - # } - # } - # } - # } - sql_type = self._get_standard_sql_data_type_class() # level 2 fields @@ -713,153 +688,149 @@ def test_from_api_repr_none(self): self.assertEqual(None, self._get_target_class().from_api_repr(None)) -# TODO: dedup with the same class in test_table.py. -class _SchemaBase(object): - def _verify_field(self, field, r_field): - self.assertEqual(field.name, r_field["name"]) - self.assertEqual(field.field_type, r_field["type"]) - self.assertEqual(field.mode, r_field.get("mode", "NULLABLE")) - - def _verifySchema(self, schema, resource): - r_fields = resource["schema"]["fields"] - self.assertEqual(len(schema), len(r_fields)) - - for field, r_field in zip(schema, r_fields): - self._verify_field(field, r_field) - - -class Test_parse_schema_resource(unittest.TestCase, _SchemaBase): - def _call_fut(self, resource): - from google.cloud.bigquery.schema import _parse_schema_resource +@pytest.fixture +def basic_resource(): + return { + "schema": { + "fields": [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, + ] + } + } - return _parse_schema_resource(resource) - def _make_resource(self): - return { - "schema": { - "fields": [ - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, - ] - } +@pytest.fixture +def resource_with_subfields(basic_resource): + basic_resource["schema"]["fields"].append( + { + "name": "phone", + "type": "RECORD", + "mode": "REPEATED", + "fields": [ + {"name": "type", "type": "STRING", "mode": "REQUIRED"}, + {"name": "number", "type": "STRING", "mode": "REQUIRED"}, + ], } + ) + return basic_resource - def test__parse_schema_resource_defaults(self): - RESOURCE = self._make_resource() - schema = self._call_fut(RESOURCE["schema"]) - self._verifySchema(schema, RESOURCE) - def test__parse_schema_resource_subfields(self): - RESOURCE = self._make_resource() - RESOURCE["schema"]["fields"].append( - { - "name": "phone", - "type": "RECORD", - "mode": "REPEATED", - "fields": [ - {"name": "type", "type": "STRING", "mode": "REQUIRED"}, - {"name": "number", "type": "STRING", "mode": "REQUIRED"}, - ], - } - ) - schema = self._call_fut(RESOURCE["schema"]) - self._verifySchema(schema, RESOURCE) +@pytest.fixture +def resource_without_mode(basic_resource): + basic_resource["schema"]["fields"].append({"name": "phone", "type": "STRING"}) + return basic_resource - def test__parse_schema_resource_fields_without_mode(self): - RESOURCE = self._make_resource() - RESOURCE["schema"]["fields"].append({"name": "phone", "type": "STRING"}) - schema = self._call_fut(RESOURCE["schema"]) - self._verifySchema(schema, RESOURCE) +class TestParseSchemaResource: + def verify_field(self, field, r_field): + assert field.name == r_field["name"] + assert field.field_type == r_field["type"] + assert field.mode == r_field.get("mode", "NULLABLE") + def verify_schema(self, schema, resource): + r_fields = resource["schema"]["fields"] + assert len(schema) == len(r_fields) -class Test_build_schema_resource(unittest.TestCase, _SchemaBase): - def _call_fut(self, resource): - from google.cloud.bigquery.schema import _build_schema_resource + for field, r_field in zip(schema, r_fields): + self.verify_field(field, r_field) - return _build_schema_resource(resource) + # Tests focused on exercising the parse_schema_resource() method + def test_parse_schema_resource_defaults(self, basic_resource): + schema = _parse_schema_resource(basic_resource["schema"]) + self.verify_schema(schema, basic_resource) - def test_defaults(self): - from google.cloud.bigquery.schema import SchemaField + def test_parse_schema_resource_subfields(self, resource_with_subfields): + schema = _parse_schema_resource(resource_with_subfields["schema"]) + self.verify_schema(schema, resource_with_subfields) - full_name = SchemaField("full_name", "STRING", mode="REQUIRED") - age = SchemaField("age", "INTEGER", mode="REQUIRED") - resource = self._call_fut([full_name, age]) - self.assertEqual(len(resource), 2) - self.assertEqual( - resource[0], - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - ) - self.assertEqual( - resource[1], - {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, - ) + def test_parse_schema_resource_fields_without_mode(self, resource_without_mode): + schema = _parse_schema_resource(resource_without_mode["schema"]) + self.verify_schema(schema, resource_without_mode) - def test_w_description(self): - from google.cloud.bigquery.schema import SchemaField - DESCRIPTION = "DESCRIPTION" - full_name = SchemaField( - "full_name", "STRING", mode="REQUIRED", description=DESCRIPTION - ) - age = SchemaField( - "age", - "INTEGER", - mode="REQUIRED", - # Explicitly unset description. - description=None, - ) - resource = self._call_fut([full_name, age]) - self.assertEqual(len(resource), 2) - self.assertEqual( - resource[0], - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "description": DESCRIPTION, - }, - ) - self.assertEqual( - resource[1], - { - "name": "age", - "type": "INTEGER", - "mode": "REQUIRED", - "description": None, - }, - ) - - def test_w_subfields(self): - from google.cloud.bigquery.schema import SchemaField - - full_name = SchemaField("full_name", "STRING", mode="REQUIRED") - ph_type = SchemaField("type", "STRING", "REQUIRED") - ph_num = SchemaField("number", "STRING", "REQUIRED") - phone = SchemaField( - "phone", "RECORD", mode="REPEATED", fields=[ph_type, ph_num] - ) - resource = self._call_fut([full_name, phone]) - self.assertEqual(len(resource), 2) - self.assertEqual( - resource[0], - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - ) - self.assertEqual( - resource[1], - { - "name": "phone", - "type": "RECORD", - "mode": "REPEATED", - "fields": [ - {"name": "type", "type": "STRING", "mode": "REQUIRED"}, - {"name": "number", "type": "STRING", "mode": "REQUIRED"}, +class TestBuildSchemaResource: + # Tests focused on exercising the build_schema_resource() method + @pytest.mark.parametrize( + "fields, expected_resource", + [ + pytest.param( # Test case 1: Basic fields + [ + SchemaField("full_name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), ], - }, - ) + [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, + ], + id="basic fields", + ), + pytest.param( # Test case 2: Field without mode + [SchemaField("phone", "STRING")], + [{"name": "phone", "type": "STRING", "mode": "NULLABLE"}], + id="field without mode yields NULLABLE mode", + ), + pytest.param( # Test case 3: Field with description + [ + SchemaField( + "full_name", + "STRING", + mode="REQUIRED", + description="DESCRIPTION", + ), + SchemaField("age", "INTEGER", mode="REQUIRED", description=None), + ], + [ + { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "description": "DESCRIPTION", + }, + { + "name": "age", + "type": "INTEGER", + "mode": "REQUIRED", + "description": None, + }, + ], + id="fields including description", + ), + pytest.param( # Test case 4: Field with subfields + [ + SchemaField("full_name", "STRING", mode="REQUIRED"), + SchemaField( + "phone", + "RECORD", + mode="REPEATED", + fields=[ + SchemaField("type", "STRING", "REQUIRED"), + SchemaField("number", "STRING", "REQUIRED"), + ], + ), + ], + [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + { + "name": "phone", + "type": "RECORD", + "mode": "REPEATED", + "fields": [ + {"name": "type", "type": "STRING", "mode": "REQUIRED"}, + {"name": "number", "type": "STRING", "mode": "REQUIRED"}, + ], + }, + ], + id="field with subfields", + ), + ], + ) + def test_build_schema_resource(self, fields, expected_resource): + resource = _build_schema_resource(fields) + assert resource == expected_resource -class Test_to_schema_fields(unittest.TestCase): +class TestToSchemaFields: # Test class for _to_schema_fields @staticmethod def _call_fut(schema): from google.cloud.bigquery.schema import _to_schema_fields @@ -867,22 +838,23 @@ def _call_fut(schema): return _to_schema_fields(schema) def test_invalid_type(self): + """Invalid list of tuples instead of list of mappings""" schema = [ ("full_name", "STRING", "REQUIRED"), ("address", "STRING", "REQUIRED"), ] - with self.assertRaises(ValueError): - self._call_fut(schema) + with pytest.raises(ValueError): + _to_schema_fields(schema) def test_schema_fields_sequence(self): - from google.cloud.bigquery.schema import SchemaField - schema = [ SchemaField("full_name", "STRING", mode="REQUIRED"), - SchemaField("age", "INT64", mode="NULLABLE"), + SchemaField( + "age", "INT64", mode="NULLABLE" + ), # Using correct type name INT64 ] - result = self._call_fut(schema) - self.assertEqual(result, schema) + result = _to_schema_fields(schema) + assert result == schema def test_unknown_properties(self): schema = [ @@ -911,37 +883,257 @@ def test_unknown_properties(self): for api_repr, field in zip(expected_schema, result): assert field.to_api_repr() == api_repr - def test_valid_mapping_representation(self): - from google.cloud.bigquery.schema import SchemaField - - schema = [ - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - { - "name": "residence", - "type": "STRUCT", - "mode": "NULLABLE", - "fields": [ - {"name": "foo", "type": "DATE", "mode": "NULLABLE"}, - {"name": "bar", "type": "BYTES", "mode": "REQUIRED"}, + @pytest.mark.parametrize( + "schema, expected_schema", + [ + pytest.param( + [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + { + "name": "residence", + "type": "STRUCT", # Or RECORD, depending on usage + "mode": "NULLABLE", + "fields": [ + {"name": "foo", "type": "DATE", "mode": "NULLABLE"}, + {"name": "bar", "type": "BYTES", "mode": "REQUIRED"}, + ], + }, ], - }, + [ + SchemaField("full_name", "STRING", mode="REQUIRED"), + SchemaField( + "residence", + "STRUCT", # Or RECORD + mode="NULLABLE", + fields=[ + SchemaField("foo", "DATE", mode="NULLABLE"), + SchemaField("bar", "BYTES", mode="REQUIRED"), + ], + ), + ], + id="valid mapping representation", + ) + ], + ) + def test_valid_mapping_representation(self, schema, expected_schema): + result = _to_schema_fields(schema) + assert result == expected_schema + + def test_valid_schema_object(self): + schema = Schema( + fields=[SchemaField("name", "STRING", description=None, policy_tags=None)], + foreign_type_info="TestInfo", + ) + result = _to_schema_fields(schema) + expected = Schema( + [SchemaField("name", "STRING", "NULLABLE", None, None, (), None)], + "TestInfo", + ) + assert result.to_api_repr() == expected.to_api_repr() + + +class TestSchemaObject: # New test class for Schema object interactions + def test_schema_object_field_access(self): + schema = Schema( + fields=[ + SchemaField("name", "STRING"), + SchemaField("age", "INTEGER"), + ] + ) + + assert len(schema) == 2 + assert schema[0]["name"] == "name" # Access fields using indexing + assert schema[1]["type"] == "INTEGER" + + def test_schema_object_data_access(self): + """Schema class is superclassed by UserList, which requires the + use of a '.data' attribute. The decision was made to have both of these attributes point to + the same key "fields" in the '_properties' dictionary. Thus '.data' is an alias + for '_fields'. + + This test assures that .data functions as an alias to the underlying data. + """ + + schema = Schema( + fields=[ + SchemaField("name", "STRING"), + SchemaField("age", "INTEGER"), + ] + ) + + assert len(schema.data) == 2 + assert schema.data[0]["name"] == "name" # Access fields using indexing + assert schema.data[1]["type"] == "INTEGER" + + new_fields = [ + SchemaField("new_name", "STRING"), + SchemaField("new_age", "INTEGER"), ] + schema.data = new_fields + assert schema.data[0]["name"] == "new_name" - expected_schema = [ - SchemaField("full_name", "STRING", mode="REQUIRED"), - SchemaField( - "residence", - "STRUCT", - mode="NULLABLE", - fields=[ - SchemaField("foo", "DATE", mode="NULLABLE"), - SchemaField("bar", "BYTES", mode="REQUIRED"), - ], + def test_schema_object_foreign_type_info(self): + schema = Schema(foreign_type_info="External") + assert schema.foreign_type_info == "External" + + schema.foreign_type_info = None + assert schema.foreign_type_info is None + + with pytest.raises(TypeError): + schema.foreign_type_info = 123 # Type check + + def test_str(self): + schema = Schema( + fields=[SchemaField("name", "STRING")], + foreign_type_info="TestInfo", + ) + assert ( + str(schema) + == "Schema([{'name': 'name', 'type': 'STRING', 'mode': 'NULLABLE'}], TestInfo)" + ) + + @pytest.mark.parametrize( + "schema, expected_repr", + [ + pytest.param( + Schema( + fields=[SchemaField("name", "STRING")], + foreign_type_info="TestInfo", + ), + "Schema([{'name': 'name', 'type': 'STRING', 'mode': 'NULLABLE'}], 'TestInfo')", + id="repr with foreign type info", ), - ] + pytest.param( + Schema(fields=[SchemaField("name", "STRING")]), + "Schema([{'name': 'name', 'type': 'STRING', 'mode': 'NULLABLE'}], None)", + id="repr without foreign type info", + ), + ], + ) + def test_repr(self, schema, expected_repr): + assert repr(schema) == expected_repr # Test __repr__ - result = self._call_fut(schema) - self.assertEqual(result, expected_schema) + def test_schema_iteration(self): + schema = Schema( + fields=[SchemaField("name", "STRING"), SchemaField("age", "INTEGER")] + ) + field_names = [field["name"] for field in schema] + assert field_names == ["name", "age"] + + def test_schema_object_mutability(self): # Tests __setitem__ and __delitem__ + schema = Schema( + fields=[SchemaField("name", "STRING"), SchemaField("age", "INTEGER")] + ) + + schema[0] = SchemaField( + "updated_name", "STRING" + ) # Modify a field using setitem + assert schema[0].name == "updated_name" + + del schema[1] # Test __delitem__ + assert len(schema) == 1 + assert schema[0].name == "updated_name" + + def test_schema_append(self): + schema = Schema() # create an empty schema object + schema.append( + SchemaField("name", "STRING") + ) # use the append method to add a schema field + assert len(schema) == 1 + assert schema[0].name == "name" + + def test_schema_extend(self): + schema = Schema() # create an empty schema object + schema.extend( + [SchemaField("name", "STRING"), SchemaField("age", "INTEGER")] + ) # use the extend method to add multiple schema fields + assert len(schema) == 2 + assert schema[0].name == "name" + assert schema[1].name == "age" + + @pytest.mark.parametrize( + "schema, expected_api_repr", + [ + pytest.param( + Schema( + fields=[SchemaField("name", "STRING")], + foreign_type_info="TestInfo", + ), + { + "fields": [{"name": "name", "mode": "NULLABLE", "type": "STRING"}], + "foreignTypeInfo": "TestInfo", + }, + id="repr with foreign type info", + ), + pytest.param( + Schema(fields=[SchemaField("name", "STRING")]), + { + "fields": [{"name": "name", "mode": "NULLABLE", "type": "STRING"}], + "foreignTypeInfo": None, + }, + id="repr without foreign type info", + ), + pytest.param( + Schema(fields=None), + { + "fields": [], + "foreignTypeInfo": None, + }, + id="repr with no fields", + ), + ], + ) + def test_to_api_repr(self, schema, expected_api_repr): + assert schema.to_api_repr() == expected_api_repr + + @pytest.mark.parametrize( + "api_repr, expected", + [ + pytest.param( + { + "fields": [ + SchemaField("name", "STRING", "NULLABLE", None, None, (), None) + ], + "foreignTypeInfo": "TestInfo", + }, + Schema( + fields=[ + SchemaField( + "name", "STRING", description=None, policy_tags=None + ) + ], + foreign_type_info="TestInfo", + ), + id="repr with foreign type info", + ), + pytest.param( + { + "fields": [ + SchemaField("name", "STRING", "NULLABLE", None, None, (), None) + ], + "foreignTypeInfo": None, + }, + Schema( + fields=[ + SchemaField( + "name", "STRING", description=None, policy_tags=None + ) + ] + ), + id="repr without foreign type info", + ), + ], + ) + def test_from_api_repr(self, api_repr, expected): + """GIVEN an api representation of a Schema object (i.e. resource) + WHEN converted into a Schema object using from_api_repr() and + displayed as a dict + THEN it will have the same representation a Schema object created + directly and displayed as a dict. + """ + + result = Schema.from_api_repr(api_repr) + assert result.to_api_repr() == expected.to_api_repr() class TestPolicyTags(unittest.TestCase): diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 19ed1c169..eb9fc4bee 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -30,9 +30,14 @@ from google.cloud.bigquery import _versions_helpers from google.cloud.bigquery import exceptions -from google.cloud.bigquery.table import TableReference +from google.cloud.bigquery.table import Table, TableReference from google.cloud.bigquery.dataset import DatasetReference -from google.cloud.bigquery.schema import SerDeInfo, StorageDescriptor +from google.cloud.bigquery.schema import ( + SerDeInfo, + StorageDescriptor, + Schema, + SchemaField, +) def _mock_client(): @@ -500,7 +505,8 @@ def _verifyResourceProperties(self, table, resource): if "schema" in resource: self._verifySchema(table.schema, resource) else: - self.assertEqual(table.schema, []) + valid_options = [Schema(), [], None] + self.assertIn(table.schema, valid_options) if "externalDataConfiguration" in resource: edc = table.external_data_configuration @@ -537,7 +543,8 @@ def test_ctor(self): "/projects/%s/datasets/%s/tables/%s" % (self.PROJECT, self.DS_ID, self.TABLE_NAME), ) - self.assertEqual(table.schema, []) + valid_options = [Schema(), [], None] + self.assertIn(table.schema, valid_options) self.assertIsNone(table.created) self.assertIsNone(table.etag) @@ -1612,6 +1619,32 @@ def test__row_from_mapping_w_schema(self): ) +# BEGIN PYTEST BASED TABLE.SCHEMA TESTS ==================== +# Sample SchemaField instances for testing +name_field = SchemaField("name", "STRING") +age_field = SchemaField("age", "INTEGER") + + +class TestTableSchema: + def test_table_schema_as_list(self): + table = Table("project.dataset.table_name") + table.schema = [name_field, age_field] + assert isinstance(table.schema, list) + assert table.schema == [name_field, age_field] + + def test_table_schema_as_schema_object(self): + table = Table("project.dataset.table_name") + schema_object = Schema( + fields=[name_field, age_field], foreign_type_info="TEST_INFO" + ) + table.schema = schema_object + assert isinstance(table.schema, Schema) + assert table.schema == schema_object + + +# END PYTEST BASED TABLE.SCHEMA TESTS ==================== + + class TestTableListItem(unittest.TestCase): @staticmethod def _get_target_class():