From 16c79d231dc609966e4d3af7b24886c309ac819f Mon Sep 17 00:00:00 2001
From: Eugene M <ymatviych@bnl.gov>
Date: Thu, 12 Dec 2024 15:25:21 -0500
Subject: [PATCH 01/13] MNT: rename UnionStructure to ConsolidatedStructure

---
 tiled/_tests/test_writing.py                  | 24 +++++++++----------
 tiled/catalog/adapter.py                      | 18 +++++++-------
 ...4112_add_union_to_structure_family_enum.py |  4 ++--
 tiled/catalog/orm.py                          |  2 +-
 tiled/client/{union.py => consolidated.py}    | 14 +++++------
 tiled/client/container.py                     | 12 +++++-----
 tiled/server/dependencies.py                  |  6 ++---
 tiled/server/links.py                         |  6 ++---
 ...ntic_union.py => pydantic_consolidated.py} |  8 +++----
 tiled/server/router.py                        | 14 +++++------
 tiled/server/schemas.py                       | 10 ++++----
 tiled/structures/core.py                      | 10 ++++----
 tiled/structures/data_source.py               |  6 ++---
 tiled/structures/union.py                     |  8 +++----
 14 files changed, 71 insertions(+), 71 deletions(-)
 rename tiled/client/{union.py => consolidated.py} (88%)
 rename tiled/server/{pydantic_union.py => pydantic_consolidated.py} (66%)

diff --git a/tiled/_tests/test_writing.py b/tiled/_tests/test_writing.py
index 117edd6f0..8e70ed864 100644
--- a/tiled/_tests/test_writing.py
+++ b/tiled/_tests/test_writing.py
@@ -676,7 +676,7 @@ def test_append_partition(
         assert_frame_equal(x.read(), df3, check_dtype=False)
 
 
-def test_union_one_table(tree):
+def test_consolidated_one_table(tree):
     with Context.from_app(build_app(tree)) as context:
         client = from_context(context)
         df = pandas.DataFrame({"A": [], "B": []})
@@ -686,17 +686,17 @@ def test_union_one_table(tree):
             structure=structure,
             name="table",
         )
-        client.create_union([data_source], key="x")
+        client.create_consolidated([data_source], key="x")
 
 
-def test_union_two_tables(tree):
+def test_consolidated_two_tables(tree):
     with Context.from_app(build_app(tree)) as context:
         client = from_context(context)
         df1 = pandas.DataFrame({"A": [], "B": []})
         df2 = pandas.DataFrame({"C": [], "D": [], "E": []})
         structure1 = TableStructure.from_pandas(df1)
         structure2 = TableStructure.from_pandas(df2)
-        x = client.create_union(
+        x = client.create_consolidated(
             [
                 DataSource(
                     structure_family=StructureFamily.table,
@@ -717,7 +717,7 @@ def test_union_two_tables(tree):
         x.parts["table2"].read()
 
 
-def test_union_two_tables_colliding_names(tree):
+def test_consolidated_two_tables_colliding_names(tree):
     with Context.from_app(build_app(tree)) as context:
         client = from_context(context)
         df1 = pandas.DataFrame({"A": [], "B": []})
@@ -725,7 +725,7 @@ def test_union_two_tables_colliding_names(tree):
         structure1 = TableStructure.from_pandas(df1)
         structure2 = TableStructure.from_pandas(df2)
         with fail_with_status_code(422):
-            client.create_union(
+            client.create_consolidated(
                 [
                     DataSource(
                         structure_family=StructureFamily.table,
@@ -742,7 +742,7 @@ def test_union_two_tables_colliding_names(tree):
             )
 
 
-def test_union_two_tables_colliding_keys(tree):
+def test_consolidated_two_tables_colliding_keys(tree):
     with Context.from_app(build_app(tree)) as context:
         client = from_context(context)
         df1 = pandas.DataFrame({"A": [], "B": []})
@@ -750,7 +750,7 @@ def test_union_two_tables_colliding_keys(tree):
         structure1 = TableStructure.from_pandas(df1)
         structure2 = TableStructure.from_pandas(df2)
         with fail_with_status_code(422):
-            client.create_union(
+            client.create_consolidated(
                 [
                     DataSource(
                         structure_family=StructureFamily.table,
@@ -767,7 +767,7 @@ def test_union_two_tables_colliding_keys(tree):
             )
 
 
-def test_union_two_tables_two_arrays(tree):
+def test_consolidated_two_tables_two_arrays(tree):
     with Context.from_app(build_app(tree)) as context:
         client = from_context(context)
         df1 = pandas.DataFrame({"A": [], "B": []})
@@ -778,7 +778,7 @@ def test_union_two_tables_two_arrays(tree):
         structure2 = TableStructure.from_pandas(df2)
         structure3 = ArrayStructure.from_array(arr1)
         structure4 = ArrayStructure.from_array(arr2)
-        x = client.create_union(
+        x = client.create_consolidated(
             [
                 DataSource(
                     structure_family=StructureFamily.table,
@@ -820,7 +820,7 @@ def test_union_two_tables_two_arrays(tree):
             x[column].read()
 
 
-def test_union_table_column_array_key_collision(tree):
+def test_consolidated_table_column_array_key_collision(tree):
     with Context.from_app(build_app(tree)) as context:
         client = from_context(context)
         df = pandas.DataFrame({"A": [], "B": []})
@@ -828,7 +828,7 @@ def test_union_table_column_array_key_collision(tree):
         structure1 = TableStructure.from_pandas(df)
         structure2 = ArrayStructure.from_array(arr)
         with fail_with_status_code(422):
-            client.create_union(
+            client.create_consolidated(
                 [
                     DataSource(
                         structure_family=StructureFamily.table,
diff --git a/tiled/catalog/adapter.py b/tiled/catalog/adapter.py
index e88a23cdf..cb337c6ff 100644
--- a/tiled/catalog/adapter.py
+++ b/tiled/catalog/adapter.py
@@ -63,7 +63,7 @@
 )
 from ..query_registration import QueryTranslationRegistry
 from ..server.pydantic_container import ContainerStructure
-from ..server.pydantic_union import UnionStructure, UnionStructurePart
+from ..server.pydantic_consolidated import ConsolidatedStructure, ConsolidatedStructurePart
 from ..server.schemas import Asset, DataSource, Management, Revision, Spec
 from ..structures.core import StructureFamily
 from ..utils import (
@@ -381,12 +381,12 @@ def structure(self):
         if self.structure_family == StructureFamily.container:
             # Give no inlined contents.
             return ContainerStructure(contents=None, count=None)
-        if self.structure_family == StructureFamily.union:
+        if self.structure_family == StructureFamily.consolidated:
             parts = []
             all_keys = []
             for data_source in self.data_sources:
                 parts.append(
-                    UnionStructurePart(
+                    ConsolidatedStructurePart(
                         structure=data_source.structure,
                         structure_family=data_source.structure_family,
                         name=data_source.name,
@@ -396,7 +396,7 @@ def structure(self):
                     all_keys.extend(data_source.structure.columns)
                 else:
                     all_keys.append(data_source.name)
-            return UnionStructure(parts=parts, all_keys=all_keys)
+            return ConsolidatedStructure(parts=parts, all_keys=all_keys)
         if self.data_sources:
             assert len(self.data_sources) == 1  # more not yet implemented
             return self.data_sources[0].structure
@@ -461,11 +461,11 @@ async def lookup_adapter(
 
             for i in range(len(segments)):
                 catalog_adapter = await self.lookup_adapter(segments[:i])
-                if (catalog_adapter.structure_family == StructureFamily.union) and len(
+                if (catalog_adapter.structure_family == StructureFamily.consolidated) and len(
                     segments[i:]
                 ) == 1:
-                    # All the segments but the final segment, segments[-1], resolves
-                    # resolve to a union structure. Dispatch to the union Adapter
+                    # All the segments but the final segment, segments[-1], resolve
+                    # to a consolidated structure. Dispatch to the consolidated Adapter
                     # to get the inner Adapter for whatever type of structure it is.
                     return await ensure_awaitable(catalog_adapter.get, segments[-1])
                 if catalog_adapter.data_sources:
@@ -680,7 +680,7 @@ async def create_node(
                         ]
                     data_source.parameters = {}
                     data_uri_path_parts = self.segments + [key]
-                    if structure_family == StructureFamily.union:
+                    if structure_family == StructureFamily.consolidated:
                         data_uri_path_parts.append(data_source.name)
                     data_uri = str(self.context.writable_storage) + "".join(
                         f"/{quote_plus(segment)}" for segment in data_uri_path_parts
@@ -1573,5 +1573,5 @@ def specs_array_to_json(specs):
     StructureFamily.container: CatalogContainerAdapter,
     StructureFamily.sparse: CatalogSparseAdapter,
     StructureFamily.table: CatalogTableAdapter,
-    StructureFamily.union: CatalogUnionAdapter,
+    StructureFamily.consolidated: CatalogUnionAdapter,
 }
diff --git a/tiled/catalog/migrations/versions/0dc110294112_add_union_to_structure_family_enum.py b/tiled/catalog/migrations/versions/0dc110294112_add_union_to_structure_family_enum.py
index bf83c8baa..5dc04c83f 100644
--- a/tiled/catalog/migrations/versions/0dc110294112_add_union_to_structure_family_enum.py
+++ b/tiled/catalog/migrations/versions/0dc110294112_add_union_to_structure_family_enum.py
@@ -1,4 +1,4 @@
-"""Add 'union' to structure_family enum.
+"""Add 'consolidated' to structure_family enum.
 
 Revision ID: 0dc110294112
 Revises: 7c8130c40b8f
@@ -22,7 +22,7 @@ def upgrade():
         with op.get_context().autocommit_block():
             op.execute(
                 sa.text(
-                    "ALTER TYPE structurefamily ADD VALUE IF NOT EXISTS 'union' AFTER 'table'"
+                    "ALTER TYPE structurefamily ADD VALUE IF NOT EXISTS 'consolidated' AFTER 'table'"
                 )
             )
 
diff --git a/tiled/catalog/orm.py b/tiled/catalog/orm.py
index a176a9daf..190969eec 100644
--- a/tiled/catalog/orm.py
+++ b/tiled/catalog/orm.py
@@ -369,7 +369,7 @@ class DataSource(Timestamped, Base):
     # This relates to the mutability of the data.
     management = Column(Enum(Management), nullable=False)
     structure_family = Column(Enum(StructureFamily), nullable=False)
-    # This is used by `union` structures to address arrays.
+    # This is used by `consolidated` structures to address arrays.
     # It may have additional uses in the future.
     name = Column(Unicode(1023), nullable=True)
 
diff --git a/tiled/client/union.py b/tiled/client/consolidated.py
similarity index 88%
rename from tiled/client/union.py
rename to tiled/client/consolidated.py
index 525837b89..58b8bb0ec 100644
--- a/tiled/client/union.py
+++ b/tiled/client/consolidated.py
@@ -5,7 +5,7 @@
 from .utils import MSGPACK_MIME_TYPE, ClientError, client_for_item, handle_error
 
 
-class UnionClient(BaseClient):
+class ConsolidatedClient(BaseClient):
     def __repr__(self):
         return (
             f"<{type(self).__name__} {{"
@@ -15,7 +15,7 @@ def __repr__(self):
 
     @property
     def parts(self):
-        return UnionContents(self)
+        return ConsolidatedContents(self)
 
     def __getitem__(self, key):
         if key not in self.structure().all_keys:
@@ -48,7 +48,7 @@ def __getitem__(self, key):
         )
 
 
-class UnionContents:
+class ConsolidatedContents:
     def __init__(self, node):
         self.node = node
 
@@ -60,10 +60,10 @@ def __repr__(self):
         )
 
     def __getitem__(self, name):
-        for index, union_item in enumerate(self.node.structure().parts):
-            if union_item.name == name:
-                structure_family = union_item.structure_family
-                structure_dict = union_item.structure
+        for index, item in enumerate(self.node.structure().parts):
+            if item.name == name:
+                structure_family = item.structure_family
+                structure_dict = item.structure
                 break
         else:
             raise KeyError(name)
diff --git a/tiled/client/container.py b/tiled/client/container.py
index 937059b32..708c15600 100644
--- a/tiled/client/container.py
+++ b/tiled/client/container.py
@@ -643,7 +643,7 @@ def new(
 
         if structure_family == StructureFamily.container:
             structure = {"contents": None, "count": None}
-        elif structure_family == StructureFamily.union:
+        elif structure_family == StructureFamily.consolidated:
             structure = None
             # To be filled in below, by server response.
             # We need the server to tell us data_source_ids.
@@ -711,9 +711,9 @@ def create_container(self, key=None, *, metadata=None, dims=None, specs=None):
             specs=specs,
         )
 
-    def create_union(self, data_sources, key=None, *, metadata=None, specs=None):
+    def create_consolidated(self, data_sources, key=None, *, metadata=None, specs=None):
         """
-        EXPERIMENTAL: Create a new union backed by data sources.
+        EXPERIMENTAL: Create a new consolidated node backed by data sources.
 
         Parameters
         ----------
@@ -729,7 +729,7 @@ def create_union(self, data_sources, key=None, *, metadata=None, specs=None):
 
         """
         return self.new(
-            StructureFamily.union,
+            StructureFamily.consolidated,
             data_sources,
             key=key,
             metadata=metadata,
@@ -1083,7 +1083,7 @@ def _write_partition(x, partition_info, client):
             "table": _LazyLoad(
                 ("..dataframe", Container.__module__), "DataFrameClient"
             ),
-            "union": _LazyLoad(("..union", Container.__module__), "UnionClient"),
+            "consolidated": _LazyLoad(("..consolidated", Container.__module__), "ConsolidatedClient"),
             "xarray_dataset": _LazyLoad(
                 ("..xarray", Container.__module__), "DatasetClient"
             ),
@@ -1102,7 +1102,7 @@ def _write_partition(x, partition_info, client):
             "table": _LazyLoad(
                 ("..dataframe", Container.__module__), "DaskDataFrameClient"
             ),
-            "union": _LazyLoad(("..union", Container.__module__), "UnionClient"),
+            "consolidated": _LazyLoad(("..consolidated", Container.__module__), "ConsolidatedClient"),
             "xarray_dataset": _LazyLoad(
                 ("..xarray", Container.__module__), "DaskDatasetClient"
             ),
diff --git a/tiled/server/dependencies.py b/tiled/server/dependencies.py
index 6cc699e6e..ef94a5172 100644
--- a/tiled/server/dependencies.py
+++ b/tiled/server/dependencies.py
@@ -132,14 +132,14 @@ async def inner(
             entry.structure_family in structure_families
         ):
             return entry
-        # Handle union structure_family
-        if entry.structure_family == StructureFamily.union:
+        # Handle consolidated structure_family
+        if entry.structure_family == StructureFamily.consolidated:
             if not part:
                 raise HTTPException(
                     status_code=HTTP_404_NOT_FOUND,
                     detail=(
                         "A part query parameter is required on this endpoint "
-                        "when addressing a 'union' structure."
+                        "when addressing a 'consolidated' structure."
                     ),
                 )
             entry_for_part = entry.for_part(part)
diff --git a/tiled/server/links.py b/tiled/server/links.py
index ff186ebbc..2d1db43a6 100644
--- a/tiled/server/links.py
+++ b/tiled/server/links.py
@@ -37,7 +37,7 @@ def links_for_awkward(structure_family, structure, base_url, path_str, part=None
 
 
 def links_for_container(structure_family, structure, base_url, path_str):
-    # Cannot be used inside union, so there is no part parameter.
+    # Cannot be used inside consolidated, so there is no part parameter.
     links = {}
     links["full"] = f"{base_url}/container/full/{path_str}"
     links["search"] = f"{base_url}/search/{path_str}"
@@ -54,7 +54,7 @@ def links_for_table(structure_family, structure, base_url, path_str, part=None):
     return links
 
 
-def links_for_union(structure_family, structure, base_url, path_str):
+def links_for_consolidated(structure_family, structure, base_url, path_str):
     links = {}
     # This contains the links for each structure.
     links["parts"] = []
@@ -77,5 +77,5 @@ def links_for_union(structure_family, structure, base_url, path_str):
     StructureFamily.container: links_for_container,
     StructureFamily.sparse: links_for_array,  # sparse and array are the same
     StructureFamily.table: links_for_table,
-    StructureFamily.union: links_for_union,
+    StructureFamily.consolidated: links_for_consolidated,
 }
diff --git a/tiled/server/pydantic_union.py b/tiled/server/pydantic_consolidated.py
similarity index 66%
rename from tiled/server/pydantic_union.py
rename to tiled/server/pydantic_consolidated.py
index 7d13645df..80a24c54f 100644
--- a/tiled/server/pydantic_union.py
+++ b/tiled/server/pydantic_consolidated.py
@@ -5,7 +5,7 @@
 from ..structures.core import StructureFamily
 
 
-class UnionStructurePart(pydantic.BaseModel):
+class ConsolidatedStructurePart(pydantic.BaseModel):
     structure_family: StructureFamily
     structure: Any  # Union of Structures, but we do not want to import them...
     name: str
@@ -15,13 +15,13 @@ def from_json(cls, item):
         return cls(**item)
 
 
-class UnionStructure(pydantic.BaseModel):
-    parts: List[UnionStructurePart]
+class ConsolidatedStructure(pydantic.BaseModel):
+    parts: List[ConsolidatedStructurePart]
     all_keys: Optional[List[str]]
 
     @classmethod
     def from_json(cls, structure):
         return cls(
-            parts=[UnionStructurePart.from_json(item) for item in structure["parts"]],
+            parts=[ConsolidatedStructurePart.from_json(item) for item in structure["parts"]],
             all_keys=structure["all_keys"],
         )
diff --git a/tiled/server/router.py b/tiled/server/router.py
index be4bd600c..05d0553a7 100644
--- a/tiled/server/router.py
+++ b/tiled/server/router.py
@@ -27,7 +27,7 @@
 )
 
 from .. import __version__
-from ..server.pydantic_union import UnionStructure, UnionStructurePart
+from .pydantic_consolidated import ConsolidatedStructure, ConsolidatedStructurePart
 from ..structures.core import Spec, StructureFamily
 from ..utils import ensure_awaitable, patch_mimetypes, path_from_uri
 from ..validation_registration import ValidationError
@@ -423,7 +423,7 @@ async def array_block(
                 "Use slicing ('?slice=...') to request smaller chunks."
             ),
         )
-    if entry.structure_family == StructureFamily.union:
+    if entry.structure_family == StructureFamily.consolidated:
         structure_family = entry.data_source.structure_family
     else:
         structure_family = entry.structure_family
@@ -464,7 +464,7 @@ async def array_full(
     """
     Fetch a slice of array-like data.
     """
-    if entry.structure_family == StructureFamily.union:
+    if entry.structure_family == StructureFamily.consolidated:
         structure_family = entry.data_source.structure_family
     else:
         structure_family = entry.structure_family
@@ -729,7 +729,7 @@ async def table_full(
                 "request a smaller chunks."
             ),
         )
-    if entry.structure_family == StructureFamily.union:
+    if entry.structure_family == StructureFamily.consolidated:
         structure_family = entry.data_source.structure_family
     else:
         structure_family = entry.structure_family
@@ -1160,16 +1160,16 @@ async def _create_node(
         body.specs,
     )
     metadata_modified = False
-    if structure_family == StructureFamily.union:
+    if structure_family == StructureFamily.consolidated:
         all_keys = []
         for data_source in body.data_sources:
             if data_source.structure_family == StructureFamily.table:
                 all_keys.extend(data_source.structure.columns)
             else:
                 all_keys.append(data_source.name)
-        structure = UnionStructure(
+        structure = ConsolidatedStructure(
             parts=[
-                UnionStructurePart(
+                ConsolidatedStructurePart(
                     data_source_id=data_source.id,
                     structure=data_source.structure,
                     structure_family=data_source.structure_family,
diff --git a/tiled/server/schemas.py b/tiled/server/schemas.py
index e0edd8ecf..71096f538 100644
--- a/tiled/server/schemas.py
+++ b/tiled/server/schemas.py
@@ -16,7 +16,7 @@
 from .pydantic_awkward import AwkwardStructure
 from .pydantic_sparse import SparseStructure
 from .pydantic_table import TableStructure
-from .pydantic_union import UnionStructure
+from .pydantic_consolidated import ConsolidatedStructure
 
 if TYPE_CHECKING:
     import tiled.authn_database.orm
@@ -149,7 +149,7 @@ class DataSource(pydantic.BaseModel):
             NodeStructure,
             SparseStructure,
             TableStructure,
-            UnionStructure,
+            ConsolidatedStructure,
         ]
     ] = None
     mimetype: Optional[str] = None
@@ -186,7 +186,7 @@ class NodeAttributes(pydantic.BaseModel):
             NodeStructure,
             SparseStructure,
             TableStructure,
-            UnionStructure,
+            ConsolidatedStructure,
         ]
     ] = None
 
@@ -248,7 +248,7 @@ class UnionLinks(pydantic.BaseModel):
     StructureFamily.container: ContainerLinks,
     StructureFamily.sparse: SparseLinks,
     StructureFamily.table: DataFrameLinks,
-    StructureFamily.union: UnionLinks,
+    StructureFamily.consolidated: UnionLinks,
 }
 
 
@@ -487,7 +487,7 @@ class PostMetadataResponse(pydantic.BaseModel, Generic[ResourceLinksT]):
         NodeStructure,
         SparseStructure,
         TableStructure,
-        UnionStructure,
+        ConsolidatedStructure,
     ]
     metadata: Dict
     data_sources: List[DataSource]
diff --git a/tiled/structures/core.py b/tiled/structures/core.py
index 8f5e10795..08618f53f 100644
--- a/tiled/structures/core.py
+++ b/tiled/structures/core.py
@@ -18,7 +18,7 @@ class BaseStructureFamily(str, enum.Enum):
     container = "container"
     sparse = "sparse"
     table = "table"
-    # excludes union, which DataSources cannot have
+    # excludes consolidated, which DataSources cannot have
 
 
 class StructureFamily(str, enum.Enum):
@@ -27,7 +27,7 @@ class StructureFamily(str, enum.Enum):
     container = "container"
     sparse = "sparse"
     table = "table"
-    union = "union"
+    consolidated = "consolidated"
 
 
 @dataclass(frozen=True)
@@ -73,8 +73,8 @@ def dict(self) -> Dict[str, Optional[str]]:
         StructureFamily.sparse: lambda: importlib.import_module(
             "...structures.sparse", StructureFamily.__module__
         ).SparseStructure,
-        StructureFamily.union: lambda: importlib.import_module(
-            "...structures.union", StructureFamily.__module__
-        ).UnionStructure,
+        StructureFamily.consolidated: lambda: importlib.import_module(
+            "...structures.consolidated", StructureFamily.__module__
+        ).ConsolidatedStructure,
     }
 )
diff --git a/tiled/structures/data_source.py b/tiled/structures/data_source.py
index c3c65bfca..b64b897d6 100644
--- a/tiled/structures/data_source.py
+++ b/tiled/structures/data_source.py
@@ -54,14 +54,14 @@ def validate_container_data_sources(node_structure_family, data_sources):
     return data_sources
 
 
-def validate_union_data_sources(node_structure_family, data_sources):
+def validate_consolidated_data_sources(node_structure_family, data_sources):
     "Check that column names and keys of others (e.g. arrays) do not collide."
     keys = set()
     names = set()
     for data_source in data_sources:
         if data_source.name is None:
             raise ValueError(
-                "Data sources backing a union structure_family must "
+                "Data sources backing a consolidated structure_family must "
                 "all have non-NULL names."
             )
         if data_source.name in names:
@@ -95,4 +95,4 @@ def validate_other_data_sources(node_structure_family, data_sources):
 
 validators = collections.defaultdict(lambda: validate_other_data_sources)
 validators[StructureFamily.container] = validate_container_data_sources
-validators[StructureFamily.union] = validate_union_data_sources
+validators[StructureFamily.consolidated] = validate_consolidated_data_sources
diff --git a/tiled/structures/union.py b/tiled/structures/union.py
index 3d4a6cc4b..47cc2e43c 100644
--- a/tiled/structures/union.py
+++ b/tiled/structures/union.py
@@ -5,7 +5,7 @@
 
 
 @dataclasses.dataclass
-class UnionStructurePart:
+class ConsolidatedStructurePart:
     structure_family: StructureFamily
     structure: Any  # Union of Structures, but we do not want to import them...
     name: Optional[str]
@@ -16,13 +16,13 @@ def from_json(cls, item):
 
 
 @dataclasses.dataclass
-class UnionStructure:
-    parts: List[UnionStructurePart]
+class ConsolidatedStructure:
+    parts: List[ConsolidatedStructurePart]
     all_keys: List[str]
 
     @classmethod
     def from_json(cls, structure):
         return cls(
-            parts=[UnionStructurePart.from_json(item) for item in structure["parts"]],
+            parts=[ConsolidatedStructurePart.from_json(item) for item in structure["parts"]],
             all_keys=structure["all_keys"],
         )

From af06a43f3f877ed7ee40652f19e59d33aabf735e Mon Sep 17 00:00:00 2001
From: Eugene M <ymatviych@bnl.gov>
Date: Thu, 12 Dec 2024 15:51:16 -0500
Subject: [PATCH 02/13] MNT: rename UnionStructure to ConsolidatedStructure

---
 tiled/structures/{union.py => consolidated.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename tiled/structures/{union.py => consolidated.py} (100%)

diff --git a/tiled/structures/union.py b/tiled/structures/consolidated.py
similarity index 100%
rename from tiled/structures/union.py
rename to tiled/structures/consolidated.py

From d8ef72c78c939338d921a4e3cfd7426a266ecbdd Mon Sep 17 00:00:00 2001
From: Eugene M <ymatviych@bnl.gov>
Date: Thu, 12 Dec 2024 16:02:01 -0500
Subject: [PATCH 03/13] MNT: rename CatalogUnionAdapter and UnionLinks

---
 tiled/catalog/adapter.py | 4 ++--
 tiled/server/schemas.py  | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tiled/catalog/adapter.py b/tiled/catalog/adapter.py
index cb337c6ff..62c7c1945 100644
--- a/tiled/catalog/adapter.py
+++ b/tiled/catalog/adapter.py
@@ -1155,7 +1155,7 @@ async def append_partition(self, *args, **kwargs):
         )
 
 
-class CatalogUnionAdapter(CatalogNodeAdapter):
+class CatalogConsolidatedAdapter(CatalogNodeAdapter):
     async def get(self, key):
         if key not in self.structure().all_keys:
             return None
@@ -1573,5 +1573,5 @@ def specs_array_to_json(specs):
     StructureFamily.container: CatalogContainerAdapter,
     StructureFamily.sparse: CatalogSparseAdapter,
     StructureFamily.table: CatalogTableAdapter,
-    StructureFamily.consolidated: CatalogUnionAdapter,
+    StructureFamily.consolidated: CatalogConsolidatedAdapter,
 }
diff --git a/tiled/server/schemas.py b/tiled/server/schemas.py
index 71096f538..f9773d5c6 100644
--- a/tiled/server/schemas.py
+++ b/tiled/server/schemas.py
@@ -235,7 +235,7 @@ class SparseLinks(pydantic.BaseModel):
     block: str
 
 
-class UnionLinks(pydantic.BaseModel):
+class ConsolidatedLinks(pydantic.BaseModel):
     self: str
     contents: List[
         Union[ArrayLinks, AwkwardLinks, ContainerLinks, DataFrameLinks, SparseLinks]
@@ -248,7 +248,7 @@ class UnionLinks(pydantic.BaseModel):
     StructureFamily.container: ContainerLinks,
     StructureFamily.sparse: SparseLinks,
     StructureFamily.table: DataFrameLinks,
-    StructureFamily.consolidated: UnionLinks,
+    StructureFamily.consolidated: ConsolidatedLinks,
 }
 
 
@@ -480,7 +480,7 @@ class PutDataSourceRequest(pydantic.BaseModel):
 
 class PostMetadataResponse(pydantic.BaseModel, Generic[ResourceLinksT]):
     id: str
-    links: Union[ArrayLinks, DataFrameLinks, SparseLinks, UnionLinks]
+    links: Union[ArrayLinks, DataFrameLinks, SparseLinks, ConsolidatedLinks]
     structure: Union[
         ArrayStructure,
         AwkwardStructure,

From 2380800548b9a74f605688bcf1777f49155d8d61 Mon Sep 17 00:00:00 2001
From: Eugene M <ymatviych@bnl.gov>
Date: Thu, 12 Dec 2024 17:29:19 -0500
Subject: [PATCH 04/13] MNT: typing and lint

---
 tiled/adapters/arrow.py               |  2 +-
 tiled/adapters/csv.py                 |  2 +-
 tiled/adapters/parquet.py             |  3 ++-
 tiled/adapters/table.py               |  4 ++--
 tiled/catalog/adapter.py              | 11 +++++++----
 tiled/client/container.py             |  8 ++++++--
 tiled/server/pydantic_consolidated.py |  4 +++-
 tiled/server/router.py                |  2 +-
 tiled/server/schemas.py               |  4 ++--
 tiled/structures/consolidated.py      |  4 +++-
 tiled/structures/core.py              | 11 +----------
 tiled/structures/data_source.py       | 10 ++++++++--
 12 files changed, 37 insertions(+), 28 deletions(-)

diff --git a/tiled/adapters/arrow.py b/tiled/adapters/arrow.py
index 9c2da456c..975c27fa0 100644
--- a/tiled/adapters/arrow.py
+++ b/tiled/adapters/arrow.py
@@ -129,7 +129,7 @@ def generate_data_sources(
         """
         return [
             DataSource(
-                structure_family=self.structure_family,
+                structure_family=StructureFamily.table,
                 mimetype=mimetype,
                 structure=dict_or_none(self.structure()),
                 parameters={},
diff --git a/tiled/adapters/csv.py b/tiled/adapters/csv.py
index 8e9fd6680..b1dd6711f 100644
--- a/tiled/adapters/csv.py
+++ b/tiled/adapters/csv.py
@@ -276,7 +276,7 @@ def generate_data_sources(
         """
         return [
             DataSource(
-                structure_family=self.dataframe_adapter.structure_family,
+                structure_family=StructureFamily.table,
                 mimetype=mimetype,
                 structure=dict_or_none(self.dataframe_adapter.structure()),
                 parameters={},
diff --git a/tiled/adapters/parquet.py b/tiled/adapters/parquet.py
index 3438dc995..b8421396d 100644
--- a/tiled/adapters/parquet.py
+++ b/tiled/adapters/parquet.py
@@ -9,6 +9,7 @@
 from ..structures.table import TableStructure
 from ..type_aliases import JSON
 from ..utils import path_from_uri
+from .array import ArrayAdapter
 from .dataframe import DataFrameAdapter
 from .protocols import AccessPolicy
 
@@ -165,5 +166,5 @@ def structure(self) -> TableStructure:
         """
         return self._structure
 
-    def get(self, key):
+    def get(self, key: str) -> ArrayAdapter | None:
         return self.dataframe_adapter.get(key)
diff --git a/tiled/adapters/table.py b/tiled/adapters/table.py
index 01a89a5d7..8f13461c6 100644
--- a/tiled/adapters/table.py
+++ b/tiled/adapters/table.py
@@ -169,12 +169,12 @@ def __getitem__(self, key: str) -> ArrayAdapter:
         # Must compute to determine shape.
         return ArrayAdapter.from_array(self.read([key])[key].values)
 
-    def get(self, key):
+    def get(self, key: str) -> ArrayAdapter | None:
         if key not in self.structure().columns:
             return None
         return ArrayAdapter.from_array(self.read([key])[key].values)
 
-    def items(self):
+    def items(self) -> Iterator[Tuple[str, ArrayAdapter]]:
         yield from (
             (key, ArrayAdapter.from_array(self.read([key])[key].values))
             for key in self._structure.columns
diff --git a/tiled/catalog/adapter.py b/tiled/catalog/adapter.py
index 62c7c1945..11b3b5700 100644
--- a/tiled/catalog/adapter.py
+++ b/tiled/catalog/adapter.py
@@ -62,8 +62,11 @@
     ZARR_MIMETYPE,
 )
 from ..query_registration import QueryTranslationRegistry
+from ..server.pydantic_consolidated import (
+    ConsolidatedStructure,
+    ConsolidatedStructurePart,
+)
 from ..server.pydantic_container import ContainerStructure
-from ..server.pydantic_consolidated import ConsolidatedStructure, ConsolidatedStructurePart
 from ..server.schemas import Asset, DataSource, Management, Revision, Spec
 from ..structures.core import StructureFamily
 from ..utils import (
@@ -461,9 +464,9 @@ async def lookup_adapter(
 
             for i in range(len(segments)):
                 catalog_adapter = await self.lookup_adapter(segments[:i])
-                if (catalog_adapter.structure_family == StructureFamily.consolidated) and len(
-                    segments[i:]
-                ) == 1:
+                if (
+                    catalog_adapter.structure_family == StructureFamily.consolidated
+                ) and len(segments[i:]) == 1:
                     # All the segments but the final segment, segments[-1], resolve
                     # to a consolidated structure. Dispatch to the consolidated Adapter
                     # to get the inner Adapter for whatever type of structure it is.
diff --git a/tiled/client/container.py b/tiled/client/container.py
index 708c15600..238967f2a 100644
--- a/tiled/client/container.py
+++ b/tiled/client/container.py
@@ -1083,7 +1083,9 @@ def _write_partition(x, partition_info, client):
             "table": _LazyLoad(
                 ("..dataframe", Container.__module__), "DataFrameClient"
             ),
-            "consolidated": _LazyLoad(("..consolidated", Container.__module__), "ConsolidatedClient"),
+            "consolidated": _LazyLoad(
+                ("..consolidated", Container.__module__), "ConsolidatedClient"
+            ),
             "xarray_dataset": _LazyLoad(
                 ("..xarray", Container.__module__), "DatasetClient"
             ),
@@ -1102,7 +1104,9 @@ def _write_partition(x, partition_info, client):
             "table": _LazyLoad(
                 ("..dataframe", Container.__module__), "DaskDataFrameClient"
             ),
-            "consolidated": _LazyLoad(("..consolidated", Container.__module__), "ConsolidatedClient"),
+            "consolidated": _LazyLoad(
+                ("..consolidated", Container.__module__), "ConsolidatedClient"
+            ),
             "xarray_dataset": _LazyLoad(
                 ("..xarray", Container.__module__), "DaskDatasetClient"
             ),
diff --git a/tiled/server/pydantic_consolidated.py b/tiled/server/pydantic_consolidated.py
index 80a24c54f..00de74489 100644
--- a/tiled/server/pydantic_consolidated.py
+++ b/tiled/server/pydantic_consolidated.py
@@ -22,6 +22,8 @@ class ConsolidatedStructure(pydantic.BaseModel):
     @classmethod
     def from_json(cls, structure):
         return cls(
-            parts=[ConsolidatedStructurePart.from_json(item) for item in structure["parts"]],
+            parts=[
+                ConsolidatedStructurePart.from_json(item) for item in structure["parts"]
+            ],
             all_keys=structure["all_keys"],
         )
diff --git a/tiled/server/router.py b/tiled/server/router.py
index 05d0553a7..301b38699 100644
--- a/tiled/server/router.py
+++ b/tiled/server/router.py
@@ -27,7 +27,6 @@
 )
 
 from .. import __version__
-from .pydantic_consolidated import ConsolidatedStructure, ConsolidatedStructurePart
 from ..structures.core import Spec, StructureFamily
 from ..utils import ensure_awaitable, patch_mimetypes, path_from_uri
 from ..validation_registration import ValidationError
@@ -62,6 +61,7 @@
 )
 from .file_response_with_range import FileResponseWithRange
 from .links import links_for_node
+from .pydantic_consolidated import ConsolidatedStructure, ConsolidatedStructurePart
 from .settings import get_settings
 from .utils import filter_for_access, get_base_url, record_timing
 
diff --git a/tiled/server/schemas.py b/tiled/server/schemas.py
index f9773d5c6..58d996edb 100644
--- a/tiled/server/schemas.py
+++ b/tiled/server/schemas.py
@@ -14,9 +14,9 @@
 from ..structures.data_source import Management, validate_data_sources
 from .pydantic_array import ArrayStructure
 from .pydantic_awkward import AwkwardStructure
+from .pydantic_consolidated import ConsolidatedStructure
 from .pydantic_sparse import SparseStructure
 from .pydantic_table import TableStructure
-from .pydantic_consolidated import ConsolidatedStructure
 
 if TYPE_CHECKING:
     import tiled.authn_database.orm
@@ -237,7 +237,7 @@ class SparseLinks(pydantic.BaseModel):
 
 class ConsolidatedLinks(pydantic.BaseModel):
     self: str
-    contents: List[
+    parts: List[
         Union[ArrayLinks, AwkwardLinks, ContainerLinks, DataFrameLinks, SparseLinks]
     ]
 
diff --git a/tiled/structures/consolidated.py b/tiled/structures/consolidated.py
index 47cc2e43c..89b156719 100644
--- a/tiled/structures/consolidated.py
+++ b/tiled/structures/consolidated.py
@@ -23,6 +23,8 @@ class ConsolidatedStructure:
     @classmethod
     def from_json(cls, structure):
         return cls(
-            parts=[ConsolidatedStructurePart.from_json(item) for item in structure["parts"]],
+            parts=[
+                ConsolidatedStructurePart.from_json(item) for item in structure["parts"]
+            ],
             all_keys=structure["all_keys"],
         )
diff --git a/tiled/structures/core.py b/tiled/structures/core.py
index 08618f53f..5612b1623 100644
--- a/tiled/structures/core.py
+++ b/tiled/structures/core.py
@@ -12,22 +12,13 @@
 from ..utils import OneShotCachedMap
 
 
-class BaseStructureFamily(str, enum.Enum):
-    array = "array"
-    awkward = "awkward"
-    container = "container"
-    sparse = "sparse"
-    table = "table"
-    # excludes consolidated, which DataSources cannot have
-
-
 class StructureFamily(str, enum.Enum):
     array = "array"
     awkward = "awkward"
     container = "container"
     sparse = "sparse"
     table = "table"
-    consolidated = "consolidated"
+    consolidated = "consolidated"  # can not be used in DataSources
 
 
 @dataclass(frozen=True)
diff --git a/tiled/structures/data_source.py b/tiled/structures/data_source.py
index b64b897d6..679fc0346 100644
--- a/tiled/structures/data_source.py
+++ b/tiled/structures/data_source.py
@@ -3,7 +3,7 @@
 import enum
 from typing import Any, List, Optional
 
-from ..structures.core import BaseStructureFamily, StructureFamily
+from ..structures.core import StructureFamily
 
 
 class Management(str, enum.Enum):
@@ -24,7 +24,7 @@ class Asset:
 
 @dataclasses.dataclass
 class DataSource:
-    structure_family: BaseStructureFamily
+    structure_family: StructureFamily
     structure: Any
     id: Optional[int] = None
     mimetype: Optional[str] = None
@@ -33,6 +33,12 @@ class DataSource:
     management: Management = Management.writable
     name: Optional[str] = None
 
+    def __post_init__(self):
+        if self.structure_family == StructureFamily.consolidated:
+            raise ValueError(
+                "DataSource can not be intialized with Consolidated StructureFamliy"
+            )
+
     @classmethod
     def from_json(cls, d):
         d = d.copy()

From b76e8fdff19bfc88495465c60a0984e6886086d4 Mon Sep 17 00:00:00 2001
From: Eugene M <ymatviych@bnl.gov>
Date: Thu, 12 Dec 2024 17:31:33 -0500
Subject: [PATCH 05/13] MNT: typing and lint

---
 tiled/adapters/csv.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tiled/adapters/csv.py b/tiled/adapters/csv.py
index b1dd6711f..8e9fd6680 100644
--- a/tiled/adapters/csv.py
+++ b/tiled/adapters/csv.py
@@ -276,7 +276,7 @@ def generate_data_sources(
         """
         return [
             DataSource(
-                structure_family=StructureFamily.table,
+                structure_family=self.dataframe_adapter.structure_family,
                 mimetype=mimetype,
                 structure=dict_or_none(self.dataframe_adapter.structure()),
                 parameters={},

From 1fcc9dc9fb4e57c39dbeca7abbbee945430c9464 Mon Sep 17 00:00:00 2001
From: Eugene M <ymatviych@bnl.gov>
Date: Fri, 13 Dec 2024 10:27:23 -0500
Subject: [PATCH 06/13] ENH: refactor creation of ConsolidatedStructure as a
 classmethod

---
 tiled/catalog/adapter.py              | 16 +---------------
 tiled/server/pydantic_consolidated.py | 20 ++++++++++++++++++++
 tiled/server/router.py                | 19 +------------------
 tiled/structures/consolidated.py      | 20 ++++++++++++++++++++
 4 files changed, 42 insertions(+), 33 deletions(-)

diff --git a/tiled/catalog/adapter.py b/tiled/catalog/adapter.py
index 11b3b5700..a979b4ccd 100644
--- a/tiled/catalog/adapter.py
+++ b/tiled/catalog/adapter.py
@@ -385,21 +385,7 @@ def structure(self):
             # Give no inlined contents.
             return ContainerStructure(contents=None, count=None)
         if self.structure_family == StructureFamily.consolidated:
-            parts = []
-            all_keys = []
-            for data_source in self.data_sources:
-                parts.append(
-                    ConsolidatedStructurePart(
-                        structure=data_source.structure,
-                        structure_family=data_source.structure_family,
-                        name=data_source.name,
-                    )
-                )
-                if data_source.structure_family == StructureFamily.table:
-                    all_keys.extend(data_source.structure.columns)
-                else:
-                    all_keys.append(data_source.name)
-            return ConsolidatedStructure(parts=parts, all_keys=all_keys)
+            return ConsolidatedStructure.from_data_sources(self.data_sources)
         if self.data_sources:
             assert len(self.data_sources) == 1  # more not yet implemented
             return self.data_sources[0].structure
diff --git a/tiled/server/pydantic_consolidated.py b/tiled/server/pydantic_consolidated.py
index 00de74489..2ad79ecb5 100644
--- a/tiled/server/pydantic_consolidated.py
+++ b/tiled/server/pydantic_consolidated.py
@@ -27,3 +27,23 @@ def from_json(cls, structure):
             ],
             all_keys=structure["all_keys"],
         )
+
+    @classmethod
+    def from_data_sources(cls, data_sources):
+        all_keys = []
+        for data_source in data_sources:
+            if data_source.structure_family == StructureFamily.table:
+                all_keys.extend(data_source.structure.columns)
+            else:
+                all_keys.append(data_source.name)
+        parts=[
+                ConsolidatedStructurePart(
+                    data_source_id=data_source.id,
+                    structure=data_source.structure,
+                    structure_family=data_source.structure_family,
+                    name=data_source.name,
+                )
+                for data_source in data_sources
+            ]
+
+        return cls(parts=parts, all_keys=all_keys)
diff --git a/tiled/server/router.py b/tiled/server/router.py
index 301b38699..9e1d111cc 100644
--- a/tiled/server/router.py
+++ b/tiled/server/router.py
@@ -1161,24 +1161,7 @@ async def _create_node(
     )
     metadata_modified = False
     if structure_family == StructureFamily.consolidated:
-        all_keys = []
-        for data_source in body.data_sources:
-            if data_source.structure_family == StructureFamily.table:
-                all_keys.extend(data_source.structure.columns)
-            else:
-                all_keys.append(data_source.name)
-        structure = ConsolidatedStructure(
-            parts=[
-                ConsolidatedStructurePart(
-                    data_source_id=data_source.id,
-                    structure=data_source.structure,
-                    structure_family=data_source.structure_family,
-                    name=data_source.name,
-                )
-                for data_source in body.data_sources
-            ],
-            all_keys=all_keys,
-        )
+        structure = ConsolidatedStructure.from_data_sources(body.data_sources)
     elif body.data_sources:
         assert len(body.data_sources) == 1  # more not yet implemented
         structure = body.data_sources[0].structure
diff --git a/tiled/structures/consolidated.py b/tiled/structures/consolidated.py
index 89b156719..8a955f256 100644
--- a/tiled/structures/consolidated.py
+++ b/tiled/structures/consolidated.py
@@ -28,3 +28,23 @@ def from_json(cls, structure):
             ],
             all_keys=structure["all_keys"],
         )
+
+    @classmethod
+    def from_data_sources(cls, data_sources):
+        all_keys = []
+        for data_source in data_sources:
+            if data_source.structure_family == StructureFamily.table:
+                all_keys.extend(data_source.structure.columns)
+            else:
+                all_keys.append(data_source.name)
+        parts=[
+                ConsolidatedStructurePart(
+                    data_source_id=data_source.id,
+                    structure=data_source.structure,
+                    structure_family=data_source.structure_family,
+                    name=data_source.name,
+                )
+                for data_source in data_sources
+            ]
+
+        return cls(parts=parts, all_keys=all_keys)

From bdbb965fc11add7eb03dde82fe15f17c6dc7784a Mon Sep 17 00:00:00 2001
From: Eugene M <ymatviych@bnl.gov>
Date: Fri, 13 Dec 2024 12:10:26 -0500
Subject: [PATCH 07/13] ENH: allow iterating over ConsolidatedClient and its
 parts

---
 tiled/client/consolidated.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tiled/client/consolidated.py b/tiled/client/consolidated.py
index 58b8bb0ec..3be5cfe29 100644
--- a/tiled/client/consolidated.py
+++ b/tiled/client/consolidated.py
@@ -46,6 +46,9 @@ def __getitem__(self, key):
             item,
             include_data_sources=self._include_data_sources,
         )
+    
+    def __iter__(self):
+        yield from self.structure().all_keys
 
 
 class ConsolidatedContents:
@@ -80,3 +83,7 @@ def __getitem__(self, name):
             structure=structure,
             include_data_sources=self.node._include_data_sources,
         )
+
+    def __iter__(self):
+        for item in self.node.structure().parts:
+            yield item.name
\ No newline at end of file

From 275f42c7b5909db696dc4511e86e5e1c4806b6ce Mon Sep 17 00:00:00 2001
From: Eugene M <ymatviych@bnl.gov>
Date: Fri, 13 Dec 2024 15:27:23 -0500
Subject: [PATCH 08/13] DOC: add Consolidated Structure to the docs

---
 docs/source/explanations/catalog.md    |  3 +-
 docs/source/explanations/structures.md | 76 +++++++++++++++++++++++++-
 docs/source/how-to/register.md         | 38 ++++++++++++-
 docs/source/reference/service.md       |  2 +
 4 files changed, 116 insertions(+), 3 deletions(-)

diff --git a/docs/source/explanations/catalog.md b/docs/source/explanations/catalog.md
index 7b6f7be3e..9bd7df587 100644
--- a/docs/source/explanations/catalog.md
+++ b/docs/source/explanations/catalog.md
@@ -54,7 +54,8 @@ and `assets`, describes the format,  structure, and location of the data.
   to the Adapter
 - `management` --- enum indicating whether the data is registered `"external"` data
   or `"writable"` data managed by Tiled
-- `structure_family` --- enum of structure types (`"container"`, `"array"`, `"table"`, ...)
+- `structure_family` --- enum of structure types (`"container"`, `"array"`, `"table"`,
+  etc. -- except for `consolidated`, which can not be assigned to a Data Source)
 - `structure_id` --- a foreign key to the `structures` table
 - `node_id` --- foreign key to `nodes`
 - `id` --- integer primary key
diff --git a/docs/source/explanations/structures.md b/docs/source/explanations/structures.md
index 65ce55deb..f72ead1d9 100644
--- a/docs/source/explanations/structures.md
+++ b/docs/source/explanations/structures.md
@@ -11,7 +11,8 @@ The structure families are:
 
 * array --- a strided array, like a [numpy](https://numpy.org) array
 * awkward --- nested, variable-sized data (as implemented by [AwkwardArray](https://awkward-array.org/))
-* container --- a of other structures, akin to a dictionary or a directory
+* consolidated --- a container-like structure to combine tables and arrays in a common namespace
+* container --- a collection of other structures, akin to a dictionary or a directory
 * sparse --- a sparse array (i.e. an array which is mostly zeros)
 * table --- tabular data, as in [Apache Arrow](https://arrow.apache.org) or
   [pandas](https://pandas.pydata.org/)
@@ -575,3 +576,76 @@ response.
   "count": 5
 }
 ```
+ 
+### Consolidated
+
+This is a specialized container-like structure designed to link together multiple tables and arrays that store
+related scientific data. It does not support nesting but provides a common namespace across all columns of the
+contained tables along with the arrays (thus, name collisions are forbidden). This allows to further abstract out
+the disparate internal storage mechanisms (e.g. Parquet for tables and zarr for arrays) and present the user with a
+smooth homogeneous interface for data access. Consolidated structures do not support pagination and are not
+recommended for "wide" datasets with more than ~1000 items (cloumns and arrays) in the namespace.
+
+Below is an example of a Consolidated structure that describes two tables and two arrays of various sizes. Their
+respective structures are specfied in the `parts` list, and `all_keys` defines the internal namespace of directly
+addressible columns and arrays.
+
+```json
+{
+    "parts": [
+        {
+            "structure_family": "table",
+            "structure": {
+                "arrow_schema": "data:application/vnd.apache.arrow.file;base64,/////...FFFF",
+                "npartitions": 1,
+                "columns": ["A", "B"],
+                "resizable": false
+            },
+            "name": "table1"
+        },
+        {
+            "structure_family": "table",
+            "structure": {
+                "arrow_schema": "data:application/vnd.apache.arrow.file;base64,/////...FFFF",
+                "npartitions": 1,
+                "columns": ["C", "D", "E"],
+                "resizable": false
+            },
+            "name": "table2"
+        },
+        {
+            "structure_family": "array",
+            "structure": {
+                "data_type": {
+                    "endianness": "little",
+                    "kind": "f",
+                    "itemsize": 8,
+                    "dt_units": null
+                },
+                "chunks": [[3], [5]],
+                "shape": [3, 5],
+                "dims": null,
+                "resizable": false
+            },
+            "name": "F"
+        },
+        {
+            "structure_family": "array",
+            "structure": {
+                "data_type": {
+                    "endianness": "not_applicable",
+                    "kind": "u",
+                    "itemsize": 1,
+                    "dt_units": null
+                },
+                "chunks": [[5], [7], [3]],
+                "shape": [5, 7, 3],
+                "dims": null,
+                "resizable": false
+            },
+            "name": "G"
+        }
+    ],
+    "all_keys": ["A", "B", "C", "D", "E", "F", "G"]
+}
+```
diff --git a/docs/source/how-to/register.md b/docs/source/how-to/register.md
index 2ca73f068..9b98e6d99 100644
--- a/docs/source/how-to/register.md
+++ b/docs/source/how-to/register.md
@@ -72,7 +72,10 @@ Sometimes it is necessary to take more manual control of this registration
 process, such as if you want to take advantage of particular knowledge
 about the files to specify particular `metadata` or `specs`.
 
-Use the Python client, as in this example.
+#### Registering external data
+
+To register data from external files in Tiled, one can use the Python client and
+construct Data Source object explicitly passing the list of assets, as in the following example.
 
 ```py
 import numpy
@@ -112,3 +115,36 @@ client.new(
     specs=[],
 )
 ```
+
+#### Writing a consolidated structure
+
+Similarly, to create a consolidated container structure, one needs to specify
+its constituents as separate Data Sources. For example, to consolidate a table
+and an array, consider the following example
+
+```python
+import pandas
+
+rng = numpy.random.default_rng(12345)
+arr = rng.random(size=(3, 5), dtype="float64")
+df = pandas.DataFrame({"A": ["one", "two", "three"], "B": [1, 2, 3]})
+
+node = client.create_consolidated(
+    [
+        DataSource(
+            structure_family=StructureFamily.table,
+            structure=TableStructure.from_pandas(df),
+            name="table1",
+        ),
+        DataSource(
+            structure_family=StructureFamily.array,
+            structure=ArrayStructure.from_array(arr),
+            name="C",
+        )
+    ]
+)
+
+# Write the data
+node.parts["table1"].write(df)
+node.parts["C"].write_block(arr, (0, 0))
+```
\ No newline at end of file
diff --git a/docs/source/reference/service.md b/docs/source/reference/service.md
index 213a81d57..a28b00fb8 100644
--- a/docs/source/reference/service.md
+++ b/docs/source/reference/service.md
@@ -104,6 +104,8 @@ See {doc}`../explanations/structures` for more context.
    tiled.structures.array.BuiltinDtype
    tiled.structures.array.Endianness
    tiled.structures.array.Kind
+   tiled.structures.consolidated.ConsolidatedStructure
+   tiled.structures.consolidated.ConsolidatedStructurePart
    tiled.structures.core.Spec
    tiled.structures.core.StructureFamily
    tiled.structures.table.TableStructure

From bb8fcafccac59ed62ba4b2689150265f79f15bbb Mon Sep 17 00:00:00 2001
From: Eugene M <ymatviych@bnl.gov>
Date: Fri, 13 Dec 2024 15:29:18 -0500
Subject: [PATCH 09/13] MNT: remove dims from the Container client signature

---
 tiled/client/container.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/tiled/client/container.py b/tiled/client/container.py
index 238967f2a..2e011fa51 100644
--- a/tiled/client/container.py
+++ b/tiled/client/container.py
@@ -685,7 +685,7 @@ def new(
     # to attempt to avoid bumping into size limits.
     _SUGGESTED_MAX_UPLOAD_SIZE = 100_000_000  # 100 MB
 
-    def create_container(self, key=None, *, metadata=None, dims=None, specs=None):
+    def create_container(self, key=None, *, metadata=None, specs=None):
         """
         EXPERIMENTAL: Create a new, empty container.
 
@@ -696,8 +696,6 @@ def create_container(self, key=None, *, metadata=None, dims=None, specs=None):
         metadata : dict, optional
             User metadata. May be nested. Must contain only basic types
             (e.g. numbers, strings, lists, dicts) that are JSON-serializable.
-        dims : List[str], optional
-            A label for each dimension of the array.
         specs : List[Spec], optional
             List of names that are used to label that the data and/or metadata
             conform to some named standard specification.
@@ -718,11 +716,11 @@ def create_consolidated(self, data_sources, key=None, *, metadata=None, specs=No
         Parameters
         ----------
         data_sources : List[DataSources]
+        key : str, optional
+            Key (name) for this new node. If None, the server will provide a unique key.
         metadata : dict, optional
             User metadata. May be nested. Must contain only basic types
             (e.g. numbers, strings, lists, dicts) that are JSON-serializable.
-        dims : List[str], optional
-            A label for each dimension of the array.
         specs : List[Spec], optional
             List of names that are used to label that the data and/or metadata
             conform to some named standard specification.

From f3ec5f5bacc6a30cfb2ff07a1452496549c94cb4 Mon Sep 17 00:00:00 2001
From: Eugene M <ymatviych@bnl.gov>
Date: Fri, 13 Dec 2024 16:10:33 -0500
Subject: [PATCH 10/13] TST: add tests for writing/reading consolidated
 structures

---
 tiled/_tests/test_consolidated.py | 89 +++++++++++++++++++++++++++++++
 1 file changed, 89 insertions(+)
 create mode 100644 tiled/_tests/test_consolidated.py

diff --git a/tiled/_tests/test_consolidated.py b/tiled/_tests/test_consolidated.py
new file mode 100644
index 000000000..f593b0886
--- /dev/null
+++ b/tiled/_tests/test_consolidated.py
@@ -0,0 +1,89 @@
+import math
+import string
+import warnings
+from pathlib import Path
+
+import numpy
+import pytest
+import pandas
+import pandas.testing
+
+from ..client import Context, from_context
+from ..server.app import build_app
+from ..structures.core import StructureFamily
+from ..catalog import in_memory
+
+from ..structures.array import ArrayStructure
+from ..structures.core import StructureFamily
+from ..structures.data_source import DataSource
+from ..structures.table import TableStructure
+
+rng = numpy.random.default_rng(12345)
+
+df1 = pandas.DataFrame({"A": ["one", "two", "three"], "B": [1, 2, 3]})
+df2 = pandas.DataFrame({"C": ["red", "green", "blue", "white"], "D": [10., 20., 30., 40.], "E": [0, 0, 0, 0]})
+arr1 = rng.random(size=(3, 5), dtype="float64")
+arr2 = rng.integers(0, 255, size=(5, 7, 3), dtype="uint8")
+md = {"md_key1":"md_val1", "md_key2":2}
+
+@pytest.fixture(scope="module")
+def tree(tmp_path_factory):
+    return in_memory(writable_storage=tmp_path_factory.getbasetemp())
+
+@pytest.fixture(scope="module")
+def context(tree):
+    with Context.from_app(build_app(tree)) as context:
+        client = from_context(context)
+        x = client.create_consolidated(
+            [
+                DataSource(
+                    structure_family=StructureFamily.table,
+                    structure=TableStructure.from_pandas(df1),
+                    name="table1",
+                ),
+                DataSource(
+                    structure_family=StructureFamily.table,
+                    structure=TableStructure.from_pandas(df2),
+                    name="table2",
+                ),
+                DataSource(
+                    structure_family=StructureFamily.array,
+                    structure=ArrayStructure.from_array(arr1),
+                    name="F",
+                ),
+                DataSource(
+                    structure_family=StructureFamily.array,
+                    structure=ArrayStructure.from_array(arr2),
+                    name="G",
+                ),
+            ],
+            key="x",
+            metadata=md
+        )
+        # Write by data source.
+        x.parts["table1"].write(df1)
+        x.parts["table2"].write(df2)
+        x.parts["F"].write_block(arr1, (0, 0))
+        x.parts["G"].write_block(arr2, (0, 0, 0))
+
+        yield context
+
+
+def test_iterate_parts(context):
+    client = from_context(context)
+    for part in client['x'].parts:
+        client['x'].parts[part].read()
+
+
+def test_iterate_columns(context):
+    client = from_context(context)
+    for col in client['x']:
+        if col not in ('A', 'C'):
+            # TODO: reading string columns directly raises TypeError: Cannot interpret 'string[pyarrow]' as a data type
+            client['x'][col].read()
+            client[f'x/{col}'].read()
+
+
+def test_metadata(context):
+    client = from_context(context)
+    assert client['x'].metadata == md

From 7e74997ecefa60316bf935e184b0402cb48f475d Mon Sep 17 00:00:00 2001
From: Eugene M <ymatviych@bnl.gov>
Date: Fri, 13 Dec 2024 16:13:24 -0500
Subject: [PATCH 11/13] MNT: lint

---
 docs/source/explanations/structures.md |  2 +-
 docs/source/how-to/register.md         |  2 +-
 tiled/_tests/test_consolidated.py      | 41 +++++++++++++-------------
 tiled/catalog/adapter.py               |  5 +---
 tiled/client/consolidated.py           |  4 +--
 tiled/server/pydantic_consolidated.py  | 18 +++++------
 tiled/server/router.py                 |  2 +-
 tiled/structures/consolidated.py       | 18 +++++------
 8 files changed, 45 insertions(+), 47 deletions(-)

diff --git a/docs/source/explanations/structures.md b/docs/source/explanations/structures.md
index f72ead1d9..89039231d 100644
--- a/docs/source/explanations/structures.md
+++ b/docs/source/explanations/structures.md
@@ -576,7 +576,7 @@ response.
   "count": 5
 }
 ```
- 
+
 ### Consolidated
 
 This is a specialized container-like structure designed to link together multiple tables and arrays that store
diff --git a/docs/source/how-to/register.md b/docs/source/how-to/register.md
index 9b98e6d99..835ea1cb3 100644
--- a/docs/source/how-to/register.md
+++ b/docs/source/how-to/register.md
@@ -147,4 +147,4 @@ node = client.create_consolidated(
 # Write the data
 node.parts["table1"].write(df)
 node.parts["C"].write_block(arr, (0, 0))
-```
\ No newline at end of file
+```
diff --git a/tiled/_tests/test_consolidated.py b/tiled/_tests/test_consolidated.py
index f593b0886..4a332d281 100644
--- a/tiled/_tests/test_consolidated.py
+++ b/tiled/_tests/test_consolidated.py
@@ -1,18 +1,11 @@
-import math
-import string
-import warnings
-from pathlib import Path
-
 import numpy
-import pytest
 import pandas
 import pandas.testing
+import pytest
 
+from ..catalog import in_memory
 from ..client import Context, from_context
 from ..server.app import build_app
-from ..structures.core import StructureFamily
-from ..catalog import in_memory
-
 from ..structures.array import ArrayStructure
 from ..structures.core import StructureFamily
 from ..structures.data_source import DataSource
@@ -21,15 +14,23 @@
 rng = numpy.random.default_rng(12345)
 
 df1 = pandas.DataFrame({"A": ["one", "two", "three"], "B": [1, 2, 3]})
-df2 = pandas.DataFrame({"C": ["red", "green", "blue", "white"], "D": [10., 20., 30., 40.], "E": [0, 0, 0, 0]})
+df2 = pandas.DataFrame(
+    {
+        "C": ["red", "green", "blue", "white"],
+        "D": [10.0, 20.0, 30.0, 40.0],
+        "E": [0, 0, 0, 0],
+    }
+)
 arr1 = rng.random(size=(3, 5), dtype="float64")
 arr2 = rng.integers(0, 255, size=(5, 7, 3), dtype="uint8")
-md = {"md_key1":"md_val1", "md_key2":2}
+md = {"md_key1": "md_val1", "md_key2": 2}
+
 
 @pytest.fixture(scope="module")
 def tree(tmp_path_factory):
     return in_memory(writable_storage=tmp_path_factory.getbasetemp())
 
+
 @pytest.fixture(scope="module")
 def context(tree):
     with Context.from_app(build_app(tree)) as context:
@@ -58,7 +59,7 @@ def context(tree):
                 ),
             ],
             key="x",
-            metadata=md
+            metadata=md,
         )
         # Write by data source.
         x.parts["table1"].write(df1)
@@ -71,19 +72,19 @@ def context(tree):
 
 def test_iterate_parts(context):
     client = from_context(context)
-    for part in client['x'].parts:
-        client['x'].parts[part].read()
+    for part in client["x"].parts:
+        client["x"].parts[part].read()
 
 
 def test_iterate_columns(context):
     client = from_context(context)
-    for col in client['x']:
-        if col not in ('A', 'C'):
-            # TODO: reading string columns directly raises TypeError: Cannot interpret 'string[pyarrow]' as a data type
-            client['x'][col].read()
-            client[f'x/{col}'].read()
+    for col in client["x"]:
+        if col not in ("A", "C"):
+            # TODO: reading string columns raises TypeError: Cannot interpret 'string[pyarrow]' as a data type
+            client["x"][col].read()
+            client[f"x/{col}"].read()
 
 
 def test_metadata(context):
     client = from_context(context)
-    assert client['x'].metadata == md
+    assert client["x"].metadata == md
diff --git a/tiled/catalog/adapter.py b/tiled/catalog/adapter.py
index a979b4ccd..007dd9d11 100644
--- a/tiled/catalog/adapter.py
+++ b/tiled/catalog/adapter.py
@@ -62,10 +62,7 @@
     ZARR_MIMETYPE,
 )
 from ..query_registration import QueryTranslationRegistry
-from ..server.pydantic_consolidated import (
-    ConsolidatedStructure,
-    ConsolidatedStructurePart,
-)
+from ..server.pydantic_consolidated import ConsolidatedStructure
 from ..server.pydantic_container import ContainerStructure
 from ..server.schemas import Asset, DataSource, Management, Revision, Spec
 from ..structures.core import StructureFamily
diff --git a/tiled/client/consolidated.py b/tiled/client/consolidated.py
index 3be5cfe29..38a08e228 100644
--- a/tiled/client/consolidated.py
+++ b/tiled/client/consolidated.py
@@ -46,7 +46,7 @@ def __getitem__(self, key):
             item,
             include_data_sources=self._include_data_sources,
         )
-    
+
     def __iter__(self):
         yield from self.structure().all_keys
 
@@ -86,4 +86,4 @@ def __getitem__(self, name):
 
     def __iter__(self):
         for item in self.node.structure().parts:
-            yield item.name
\ No newline at end of file
+            yield item.name
diff --git a/tiled/server/pydantic_consolidated.py b/tiled/server/pydantic_consolidated.py
index 2ad79ecb5..5f6c49d07 100644
--- a/tiled/server/pydantic_consolidated.py
+++ b/tiled/server/pydantic_consolidated.py
@@ -36,14 +36,14 @@ def from_data_sources(cls, data_sources):
                 all_keys.extend(data_source.structure.columns)
             else:
                 all_keys.append(data_source.name)
-        parts=[
-                ConsolidatedStructurePart(
-                    data_source_id=data_source.id,
-                    structure=data_source.structure,
-                    structure_family=data_source.structure_family,
-                    name=data_source.name,
-                )
-                for data_source in data_sources
-            ]
+        parts = [
+            ConsolidatedStructurePart(
+                data_source_id=data_source.id,
+                structure=data_source.structure,
+                structure_family=data_source.structure_family,
+                name=data_source.name,
+            )
+            for data_source in data_sources
+        ]
 
         return cls(parts=parts, all_keys=all_keys)
diff --git a/tiled/server/router.py b/tiled/server/router.py
index 9e1d111cc..b72b4bc3a 100644
--- a/tiled/server/router.py
+++ b/tiled/server/router.py
@@ -61,7 +61,7 @@
 )
 from .file_response_with_range import FileResponseWithRange
 from .links import links_for_node
-from .pydantic_consolidated import ConsolidatedStructure, ConsolidatedStructurePart
+from .pydantic_consolidated import ConsolidatedStructure
 from .settings import get_settings
 from .utils import filter_for_access, get_base_url, record_timing
 
diff --git a/tiled/structures/consolidated.py b/tiled/structures/consolidated.py
index 8a955f256..4e5a3bfc4 100644
--- a/tiled/structures/consolidated.py
+++ b/tiled/structures/consolidated.py
@@ -37,14 +37,14 @@ def from_data_sources(cls, data_sources):
                 all_keys.extend(data_source.structure.columns)
             else:
                 all_keys.append(data_source.name)
-        parts=[
-                ConsolidatedStructurePart(
-                    data_source_id=data_source.id,
-                    structure=data_source.structure,
-                    structure_family=data_source.structure_family,
-                    name=data_source.name,
-                )
-                for data_source in data_sources
-            ]
+        parts = [
+            ConsolidatedStructurePart(
+                data_source_id=data_source.id,
+                structure=data_source.structure,
+                structure_family=data_source.structure_family,
+                name=data_source.name,
+            )
+            for data_source in data_sources
+        ]
 
         return cls(parts=parts, all_keys=all_keys)

From 04efd91d794796e6d1be5d4f8fcbd96a8ce463b8 Mon Sep 17 00:00:00 2001
From: Eugene M <ymatviych@bnl.gov>
Date: Fri, 13 Dec 2024 16:25:10 -0500
Subject: [PATCH 12/13] MNT: typing

---
 tiled/adapters/parquet.py | 2 +-
 tiled/adapters/table.py   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tiled/adapters/parquet.py b/tiled/adapters/parquet.py
index b8421396d..d7bf14304 100644
--- a/tiled/adapters/parquet.py
+++ b/tiled/adapters/parquet.py
@@ -166,5 +166,5 @@ def structure(self) -> TableStructure:
         """
         return self._structure
 
-    def get(self, key: str) -> ArrayAdapter | None:
+    def get(self, key: str) -> Union[ArrayAdapter, None]:
         return self.dataframe_adapter.get(key)
diff --git a/tiled/adapters/table.py b/tiled/adapters/table.py
index 8f13461c6..6b7ca2a40 100644
--- a/tiled/adapters/table.py
+++ b/tiled/adapters/table.py
@@ -169,7 +169,7 @@ def __getitem__(self, key: str) -> ArrayAdapter:
         # Must compute to determine shape.
         return ArrayAdapter.from_array(self.read([key])[key].values)
 
-    def get(self, key: str) -> ArrayAdapter | None:
+    def get(self, key: str) -> Union[ArrayAdapter, None]:
         if key not in self.structure().columns:
             return None
         return ArrayAdapter.from_array(self.read([key])[key].values)

From c7ee9629402a5cf43b100b233be3b10fb4b637a9 Mon Sep 17 00:00:00 2001
From: Eugene M <ymatviych@bnl.gov>
Date: Fri, 13 Dec 2024 19:33:13 -0500
Subject: [PATCH 13/13] FIX: reading string-dtype columns from dataframes
 individually

---
 tiled/_tests/test_consolidated.py |  6 ++----
 tiled/_tests/test_dataframe.py    | 22 ++++++++++++++++++++++
 tiled/adapters/table.py           | 24 ++++++++++--------------
 tiled/structures/array.py         |  2 +-
 4 files changed, 35 insertions(+), 19 deletions(-)

diff --git a/tiled/_tests/test_consolidated.py b/tiled/_tests/test_consolidated.py
index 4a332d281..103be7520 100644
--- a/tiled/_tests/test_consolidated.py
+++ b/tiled/_tests/test_consolidated.py
@@ -79,10 +79,8 @@ def test_iterate_parts(context):
 def test_iterate_columns(context):
     client = from_context(context)
     for col in client["x"]:
-        if col not in ("A", "C"):
-            # TODO: reading string columns raises TypeError: Cannot interpret 'string[pyarrow]' as a data type
-            client["x"][col].read()
-            client[f"x/{col}"].read()
+        client["x"][col].read()
+        client[f"x/{col}"].read()
 
 
 def test_metadata(context):
diff --git a/tiled/_tests/test_dataframe.py b/tiled/_tests/test_dataframe.py
index 1df2163bf..01570356b 100644
--- a/tiled/_tests/test_dataframe.py
+++ b/tiled/_tests/test_dataframe.py
@@ -41,6 +41,17 @@
             pandas.DataFrame({f"column_{i:03d}": i * numpy.ones(5) for i in range(10)}),
             npartitions=1,
         ),
+        # a dataframe with mixed types
+        "diverse": DataFrameAdapter.from_pandas(
+            pandas.DataFrame(
+                {
+                    "A": numpy.array([1, 2, 3], dtype="|u8"),
+                    "B": numpy.array([1, 2, 3], dtype="<f8"),
+                    "C": ["one", "two", "three"],
+                }
+            ),
+            npartitions=1,
+        ),
     }
 )
 
@@ -100,6 +111,17 @@ def test_dataframe_single_partition(context):
     pandas.testing.assert_frame_equal(actual, expected)
 
 
+def test_reading_diverse_dtypes(context):
+    client = from_context(context)
+    expected = tree["diverse"].read()
+    actual = client["diverse"].read()
+    pandas.testing.assert_frame_equal(actual, expected)
+
+    for col in expected.columns:
+        actual = client["diverse"][col].read()
+        assert numpy.array_equal(expected[col], actual)
+
+
 def test_dask(context):
     client = from_context(context, "dask")["basic"]
     expected = tree["basic"].read()
diff --git a/tiled/adapters/table.py b/tiled/adapters/table.py
index 6b7ca2a40..dab586edb 100644
--- a/tiled/adapters/table.py
+++ b/tiled/adapters/table.py
@@ -156,29 +156,25 @@ def __repr__(self) -> str:
         return f"{type(self).__name__}({self._structure.columns!r})"
 
     def __getitem__(self, key: str) -> ArrayAdapter:
-        """
+        # Must compute to determine shape
+        array = self.read([key])[key].values
 
-        Parameters
-        ----------
-        key :
+        # Convert (experimental) pandas.StringDtype to numpy's unicode string dtype
+        if isinstance(array.dtype, pandas.StringDtype):
+            import numpy
 
-        Returns
-        -------
+            max_size = max((len(i) for i in array.ravel()))
+            array = array.astype(dtype=numpy.dtype(f"<U{max_size}"))
 
-        """
-        # Must compute to determine shape.
-        return ArrayAdapter.from_array(self.read([key])[key].values)
+        return ArrayAdapter.from_array(array)
 
     def get(self, key: str) -> Union[ArrayAdapter, None]:
         if key not in self.structure().columns:
             return None
-        return ArrayAdapter.from_array(self.read([key])[key].values)
+        return self[key]
 
     def items(self) -> Iterator[Tuple[str, ArrayAdapter]]:
-        yield from (
-            (key, ArrayAdapter.from_array(self.read([key])[key].values))
-            for key in self._structure.columns
-        )
+        yield from ((key, self[key]) for key in self._structure.columns)
 
     def metadata(self) -> JSON:
         """
diff --git a/tiled/structures/array.py b/tiled/structures/array.py
index 53207b84e..901c58fc9 100644
--- a/tiled/structures/array.py
+++ b/tiled/structures/array.py
@@ -52,7 +52,7 @@ class Kind(str, enum.Enum):
     unicode = "U"  # fixed-length sequence of Py_UNICODE
     other = "V"  # "V" is for "void" -- generic fixed-size chunk of memory
 
-    # By default, do not tolerate numpy objectg arrays
+    # By default, do not tolerate numpy object arrays
     if os.getenv("TILED_ALLOW_OBJECT_ARRAYS", "0") != "0":
         object = "O"  # Object (i.e. the memory contains a pointer to PyObject)