diff --git a/.gitignore b/.gitignore index 201a965a0f409..edc6a54cf4345 100644 --- a/.gitignore +++ b/.gitignore @@ -38,3 +38,4 @@ pandas/io/*.json .pydevproject .settings .idea +*.pdb diff --git a/doc/source/release.rst b/doc/source/release.rst index 49de8dddd7210..77d86b8a7a9f1 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -755,6 +755,8 @@ Bug Fixes - Bug when renaming then set_index on a DataFrame (:issue:`5344`) - Test suite no longer leaves around temporary files when testing graphics. (:issue:`5347`) (thanks for catching this @yarikoptic!) + - Fixed html tests on win32. (:issue:`4580`) + pandas 0.12.0 ------------- diff --git a/pandas/io/common.py b/pandas/io/common.py index aa5fdb29f3b5b..6b8186e253199 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -9,18 +9,18 @@ if compat.PY3: - from urllib.request import urlopen + from urllib.request import urlopen, pathname2url _urlopen = urlopen from urllib.parse import urlparse as parse_url import urllib.parse as compat_parse - from urllib.parse import uses_relative, uses_netloc, uses_params, urlencode + from urllib.parse import uses_relative, uses_netloc, uses_params, urlencode, urljoin from urllib.error import URLError from http.client import HTTPException else: from urllib2 import urlopen as _urlopen - from urllib import urlencode + from urllib import urlencode, pathname2url from urlparse import urlparse as parse_url - from urlparse import uses_relative, uses_netloc, uses_params + from urlparse import uses_relative, uses_netloc, uses_params, urljoin from urllib2 import URLError from httplib import HTTPException from contextlib import contextmanager, closing @@ -134,6 +134,21 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None): return filepath_or_buffer, None +def file_path_to_url(path): + """ + converts an absolute native path to a FILE URL. + + Parameters + ---------- + path : a path in native format + + Returns + ------- + a valid FILE URL + """ + return urljoin('file:', pathname2url(path)) + + # ZipFile is not a context manager for <= 2.6 # must be tuple index here since 2.6 doesn't use namedtuple for version_info if sys.version_info[1] <= 6: diff --git a/pandas/io/tests/test_html.py b/pandas/io/tests/test_html.py index 71567fe2e599a..c26048d4cf20b 100644 --- a/pandas/io/tests/test_html.py +++ b/pandas/io/tests/test_html.py @@ -21,7 +21,7 @@ from pandas import (DataFrame, MultiIndex, read_csv, Timestamp, Index, date_range, Series) from pandas.compat import map, zip, StringIO, string_types -from pandas.io.common import URLError, urlopen +from pandas.io.common import URLError, urlopen, file_path_to_url from pandas.io.html import read_html import pandas.util.testing as tm @@ -311,7 +311,7 @@ def test_invalid_url(self): @slow def test_file_url(self): url = self.banklist_data - dfs = self.read_html('file://' + url, 'First', attrs={'id': 'table'}) + dfs = self.read_html(file_path_to_url(url), 'First', attrs={'id': 'table'}) tm.assert_isinstance(dfs, list) for df in dfs: tm.assert_isinstance(df, DataFrame) @@ -362,7 +362,7 @@ def test_multiindex_header_index_skiprows(self): @slow def test_regex_idempotency(self): url = self.banklist_data - dfs = self.read_html('file://' + url, + dfs = self.read_html(file_path_to_url(url), match=re.compile(re.compile('Florida')), attrs={'id': 'table'}) tm.assert_isinstance(dfs, list) @@ -637,9 +637,9 @@ def test_invalid_flavor(): flavor='not a* valid**++ flaver') -def get_elements_from_url(url, element='table', base_url="file://"): +def get_elements_from_file(url, element='table'): _skip_if_none_of(('bs4', 'html5lib')) - url = "".join([base_url, url]) + url = file_path_to_url(url) from bs4 import BeautifulSoup with urlopen(url) as f: soup = BeautifulSoup(f, features='html5lib') @@ -651,7 +651,7 @@ def test_bs4_finds_tables(): filepath = os.path.join(DATA_PATH, "spam.html") with warnings.catch_warnings(): warnings.filterwarnings('ignore') - assert get_elements_from_url(filepath, 'table') + assert get_elements_from_file(filepath, 'table') def get_lxml_elements(url, element):