Skip to content

Commit

Permalink
Raise error for incorrect JSON serialization (#7273)
Browse files Browse the repository at this point in the history
Co-authored-by: Varad Bhatnagar <[email protected]>
  • Loading branch information
varadhbhatnagar and Varad Bhatnagar authored Nov 18, 2024
1 parent 01f91ba commit 2049c00
Showing 1 changed file with 5 additions and 0 deletions.
5 changes: 5 additions & 0 deletions src/datasets/io/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,11 @@ def write(self) -> int:
if compression not in [None, "infer", "gzip", "bz2", "xz"]:
raise NotImplementedError(f"`datasets` currently does not support {compression} compression")

if not lines and self.batch_size < self.dataset.num_rows:
raise NotImplementedError(
"Output JSON will not be formatted correctly when lines = False and batch_size < number of rows in the dataset. Use pandas.DataFrame.to_json() instead."
)

if isinstance(self.path_or_buf, (str, bytes, os.PathLike)):
with fsspec.open(
self.path_or_buf, "wb", compression=compression, **(self.storage_options or {})
Expand Down

0 comments on commit 2049c00

Please sign in to comment.