diff --git a/tiled/_tests/test_consolidated.py b/tiled/_tests/test_consolidated.py index 4a332d281..103be7520 100644 --- a/tiled/_tests/test_consolidated.py +++ b/tiled/_tests/test_consolidated.py @@ -79,10 +79,8 @@ def test_iterate_parts(context): def test_iterate_columns(context): client = from_context(context) for col in client["x"]: - if col not in ("A", "C"): - # TODO: reading string columns raises TypeError: Cannot interpret 'string[pyarrow]' as a data type - client["x"][col].read() - client[f"x/{col}"].read() + client["x"][col].read() + client[f"x/{col}"].read() def test_metadata(context): diff --git a/tiled/_tests/test_dataframe.py b/tiled/_tests/test_dataframe.py index 1df2163bf..01570356b 100644 --- a/tiled/_tests/test_dataframe.py +++ b/tiled/_tests/test_dataframe.py @@ -41,6 +41,17 @@ pandas.DataFrame({f"column_{i:03d}": i * numpy.ones(5) for i in range(10)}), npartitions=1, ), + # a dataframe with mixed types + "diverse": DataFrameAdapter.from_pandas( + pandas.DataFrame( + { + "A": numpy.array([1, 2, 3], dtype="|u8"), + "B": numpy.array([1, 2, 3], dtype=" str: return f"{type(self).__name__}({self._structure.columns!r})" def __getitem__(self, key: str) -> ArrayAdapter: - """ + # Must compute to determine shape + array = self.read([key])[key].values - Parameters - ---------- - key : + # Convert (experimental) pandas.StringDtype to numpy's unicode string dtype + if isinstance(array.dtype, pandas.StringDtype): + import numpy - Returns - ------- + max_size = max((len(i) for i in array.ravel())) + array = array.astype(dtype=numpy.dtype(f" Union[ArrayAdapter, None]: if key not in self.structure().columns: return None - return ArrayAdapter.from_array(self.read([key])[key].values) + return self[key] def items(self) -> Iterator[Tuple[str, ArrayAdapter]]: - yield from ( - (key, ArrayAdapter.from_array(self.read([key])[key].values)) - for key in self._structure.columns - ) + yield from ((key, self[key]) for key in self._structure.columns) def metadata(self) -> JSON: """ diff --git a/tiled/structures/array.py b/tiled/structures/array.py index 53207b84e..901c58fc9 100644 --- a/tiled/structures/array.py +++ b/tiled/structures/array.py @@ -52,7 +52,7 @@ class Kind(str, enum.Enum): unicode = "U" # fixed-length sequence of Py_UNICODE other = "V" # "V" is for "void" -- generic fixed-size chunk of memory - # By default, do not tolerate numpy objectg arrays + # By default, do not tolerate numpy object arrays if os.getenv("TILED_ALLOW_OBJECT_ARRAYS", "0") != "0": object = "O" # Object (i.e. the memory contains a pointer to PyObject)