diff --git a/peakina/datasource.py b/peakina/datasource.py index 02362177..06567918 100644 --- a/peakina/datasource.py +++ b/peakina/datasource.py @@ -74,6 +74,15 @@ def get_metadata(self) -> Dict[str, Any]: return {} # no metadata for matched datasources with self.fetcher.open(self.uri) as f: assert self.type is not None + + allowed_params = get_reader_allowed_params(self.type) + # Auto-detect encoding if not present + encoding = self.reader_kwargs.get("encoding") + if "encoding" in allowed_params: + if not validate_encoding(f.name, encoding): + encoding = detect_encoding(f.name) + self.reader_kwargs["encoding"] = encoding + return get_metadata(f.name, self.type, self.reader_kwargs) @staticmethod diff --git a/tests/test_datasource.py b/tests/test_datasource.py index 688b8926..05a6471f 100644 --- a/tests/test_datasource.py +++ b/tests/test_datasource.py @@ -102,6 +102,13 @@ def test_csv_western_encoding(path): df_meta = ds.get_metadata() assert df_meta == {"df_rows": 2, "total_rows": 2} + # Encoding auto-detection + ds = DataSource(path("encoded_western_short.csv")) + df = ds.get_df() + assert df.shape == (2, 19) + df_meta = ds.get_metadata() + assert df_meta == {"df_rows": 2, "total_rows": 2} + def test_csv_header_row(path): """