Skip to content

Commit

Permalink
Merge pull request #171 from ToucanToco/prepare-v0.7.18
Browse files Browse the repository at this point in the history
Prepare v0.7.18
  • Loading branch information
lukapeschke authored Jun 8, 2022
2 parents ad2ee24 + 31e92b4 commit 3589c2f
Show file tree
Hide file tree
Showing 11 changed files with 457 additions and 56 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# Changelog

## [0.7.18]

### Changed

- Added `text/xml` to supported XML mimetypes
- Make `peakina.helpers.detect_type` return GEODATA if the stdlib returns a geojson-like mimetype

## [0.7.11] - 2022-04-05

### Changed
Expand Down
13 changes: 11 additions & 2 deletions peakina/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
excel_meta,
read_csv,
read_excel,
read_geo_data,
read_json,
read_xml,
)
Expand All @@ -42,7 +43,7 @@ class TypeInfos(NamedTuple):


# For files without MIME types, we make fake MIME types based on detected extension
CUSTOM_MIMETYPES = {".parquet": "peakina/parquet"}
CUSTOM_MIMETYPES = {".parquet": "peakina/parquet", ".geojson": "peakina/geo"}

EXTRA_PEAKINA_READER_KWARGS = ["preview_offset", "preview_nrows"]

Expand All @@ -62,13 +63,17 @@ class TypeInfos(NamedTuple):
["encoding", "decimal"],
excel_meta,
),
"geodata": TypeInfos(
["peakina/geo"],
read_geo_data,
),
"json": TypeInfos(
["application/json"],
read_json,
["filter"], # this option comes from read_json, which @wraps(pd.read_json)
),
"parquet": TypeInfos(["peakina/parquet"], pd.read_parquet),
"xml": TypeInfos(["application/xml"], read_xml),
"xml": TypeInfos(["application/xml", "text/xml"], read_xml),
}


Expand All @@ -81,6 +86,7 @@ class TypeEnum(str, Enum):
JSON = "json"
PARQUET = "parquet"
XML = "xml"
GEODATA = "geodata"


def detect_type(filepath: str, is_regex: bool = False) -> Optional[TypeEnum]:
Expand All @@ -92,6 +98,9 @@ def detect_type(filepath: str, is_regex: bool = False) -> Optional[TypeEnum]:
filepath = filepath.rstrip("$")
mimetype, _ = mimetypes.guess_type(filepath)

if mimetype in ("application/geo+json", "application/vnd.geo+json"):
return TypeEnum.GEODATA

# Fallback on custom MIME types
if mimetype is None:
_, fileext = os.path.splitext(filepath)
Expand Down
29 changes: 23 additions & 6 deletions peakina/io/ftp/ftp_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,17 @@ def connect( # type: ignore[override]
self.port = port or 990
self.timeout = timeout

self._sock = socket.create_connection((self.host, self.port), self.timeout)
self.af = self._sock.family
self.sock: ssl.SSLSocket = self.context.wrap_socket(self._sock, server_hostname=self.host)
def _setup_sock() -> socket.socket:
_sock = socket.create_connection((self.host, self.port), self.timeout)
self.af = _sock.family
return _sock

try:
self.sock = self.context.wrap_socket(_setup_sock(), server_hostname=self.host)
except ssl.SSLError: # pragma: no cover
# in some cases we must fallback to:
self.sock = _setup_sock()

self.file = self.sock.makefile("r")
self.welcome = self.getresp()
return self.welcome
Expand All @@ -57,7 +65,7 @@ def ntransfercmd( # type: ignore[override]
conn, size = ftplib.FTP.ntransfercmd(self, cmd, rest)
if self._prot_p: # type: ignore[attr-defined]
conn = self.context.wrap_socket(
conn, server_hostname=self.host, session=self.sock.session
conn, server_hostname=self.host, session=self.sock.session # type: ignore[union-attr]
) # this is the fix
return conn, size

Expand All @@ -78,8 +86,17 @@ def ftps_client(params: ParseResult) -> Generator[Tuple[FTPS, str], None, None]:
ftps = FTPS()
try:
ftps.connect(host=params.hostname or "", port=params.port, timeout=3)
ftps.prot_p()
ftps.login(user=params.username or "", passwd=params.password or "")
try:
ftps.prot_p()
ftps.login(user=params.username or "", passwd=params.password or "")
except Exception as e:
if "SSL/TLS required on the control channel" in str(e):
# This error means we should try the other way: first login, then prot_p:
ftps.login(user=params.username or "", passwd=params.password or "")
ftps.prot_p()
else:
raise

yield ftps, params.path

finally:
Expand Down
3 changes: 3 additions & 0 deletions peakina/readers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from .csv import csv_meta, read_csv
from .excel import excel_meta, read_excel
from .geodata import read_geo_data
from .json import read_json
from .xml import read_xml

Expand All @@ -14,4 +15,6 @@
"read_json",
# XML
"read_xml",
# GEOJSON
"read_geo_data",
)
16 changes: 16 additions & 0 deletions peakina/readers/geodata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from functools import wraps
from typing import Any, Optional

import geopandas as gpd


@wraps(gpd.read_file)
def read_geo_data(
path: str, preview_offset: int = 0, preview_nrows: Optional[int] = None, **kwargs: Any
) -> gpd.GeoDataFrame:
if preview_nrows and not preview_offset:
return gpd.read_file(path, rows=preview_nrows, **kwargs)
else:
return gpd.read_file(
path, rows=slice(preview_offset, preview_nrows + 1 if preview_nrows else None), **kwargs
)
Loading

0 comments on commit 3589c2f

Please sign in to comment.