Skip to content

Commit

Permalink
Merge branch 'hotfix/locale-and-filemagic'
Browse files Browse the repository at this point in the history
  • Loading branch information
turicas committed Sep 3, 2015
2 parents b9e88da + 9dfac85 commit 3c9335d
Show file tree
Hide file tree
Showing 10 changed files with 139 additions and 58 deletions.
55 changes: 55 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# rows' Log of Changes

## Version `0.2.0` (under development)

**Released on: (under development)**

- Enhance README
- Refactor `export_to_txt`
- Support lazy objects on `create_table`
- Add `samples` parameter to `create_table`
- Add plugin JSON (thanks [@sxslex](https://github.com/sxslex))


## Version `0.1.1`

**Released on: 2015-09-03**

- Fix code to run on Windows (thanks [@sxslex](https://github.com/sxslex))
- Fix locale (name, default name etc.)
- Remove `filemagic` dependency (waiting for `python-magic` to be available on
PyPI)
- Write log of changes for `0.1.0` and `0.1.1`


## Version `0.1.0`

**Released on: 2015-08-29**

- Implement `Table` and its basic methods
- Implement basic plugin support with many utilities and the following formats:
- `csv` (input/output)
- `html` (input/output)
- `txt` (output)
- `xls` (input/output)
- Implement the following field types - many of them with locale support:
- `ByteField`
- `BoolField`
- `IntegerField`
- `FloatField`
- `DecimalField`
- `PercentField`
- `DateField`
- `DatetimeField`
- `UnicodeField`
- Implement basic `Table` operations:
- `sum`
- `join`
- `transform`
- `serialize`
- Implement a command-line interface with the following subcommands:
- `convert`
- `join`
- `sort`
- `sum`
- Add examples to the repository
1 change: 0 additions & 1 deletion requirements/production.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
click
filemagic
lxml
requests
unicodecsv
Expand Down
3 changes: 3 additions & 0 deletions rows/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,6 @@
from rows.plugins.html import import_from_html, export_to_html
except ImportError:
pass


__version__ = '0.1.1'
45 changes: 21 additions & 24 deletions rows/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,22 +26,25 @@
from rows.utils import import_from_uri, export_to_uri


DEFAULT_INPUT_ENCODING = 'utf-8'
DEFAULT_OUTPUT_ENCODING = 'utf-8'
DEFAULT_INPUT_LOCALE = 'C'
DEFAULT_OUTPUT_LOCALE = 'C'

@click.group()
def cli():
pass


@cli.command(help='Convert table on `source` URI to `destination`')
@click.option('--input-encoding', default='utf-8')
@click.option('--output-encoding', default='utf-8')
@click.option('--input-locale', default='en_US.UTF-8')
@click.option('--output-locale', default='en_US.UTF-8')
@click.option('--input-encoding', default=DEFAULT_INPUT_ENCODING)
@click.option('--output-encoding', default=DEFAULT_OUTPUT_ENCODING)
@click.option('--input-locale', default=DEFAULT_INPUT_LOCALE)
@click.option('--output-locale', default=DEFAULT_OUTPUT_LOCALE)
@click.argument('source')
@click.argument('destination')
def convert(input_encoding, output_encoding, input_locale, output_locale,
source, destination):
input_locale = input_locale.split('.')
output_locale = output_locale.split('.')

with rows.locale_context(input_locale):
table = import_from_uri(source)
Expand All @@ -51,18 +54,16 @@ def convert(input_encoding, output_encoding, input_locale, output_locale,


@cli.command(help='Join tables from `source` URIs using `key(s)` to group rows and save into `destination`')
@click.option('--input-encoding', default='utf-8')
@click.option('--output-encoding', default='utf-8')
@click.option('--input-locale', default='en_US.UTF-8')
@click.option('--output-locale', default='en_US.UTF-8')
@click.option('--input-encoding', default=DEFAULT_INPUT_ENCODING)
@click.option('--output-encoding', default=DEFAULT_OUTPUT_ENCODING)
@click.option('--input-locale', default=DEFAULT_INPUT_LOCALE)
@click.option('--output-locale', default=DEFAULT_OUTPUT_LOCALE)
@click.argument('keys')
@click.argument('sources', nargs=-1, required=True)
@click.argument('destination')
def join(input_encoding, output_encoding, input_locale, output_locale, keys,
sources, destination):
keys = [key.strip() for key in keys.split(',')]
input_locale = input_locale.split('.')
output_locale = output_locale.split('.')

with rows.locale_context(input_locale):
tables = [import_from_uri(source) for source in sources]
Expand All @@ -74,17 +75,15 @@ def join(input_encoding, output_encoding, input_locale, output_locale, keys,


@cli.command(help='Sort from `source` by `key(s)` and save into `destination`')
@click.option('--input-encoding', default='utf-8')
@click.option('--output-encoding', default='utf-8')
@click.option('--input-locale', default='en_US.UTF-8')
@click.option('--output-locale', default='en_US.UTF-8')
@click.option('--input-encoding', default=DEFAULT_INPUT_ENCODING)
@click.option('--output-encoding', default=DEFAULT_OUTPUT_ENCODING)
@click.option('--input-locale', default=DEFAULT_INPUT_LOCALE)
@click.option('--output-locale', default=DEFAULT_OUTPUT_LOCALE)
@click.argument('key')
@click.argument('source')
@click.argument('destination')
def sort(input_encoding, output_encoding, input_locale, output_locale, key,
source, destination):
input_locale = input_locale.split('.')
output_locale = output_locale.split('.')
key = key.replace('^', '-')

with rows.locale_context(input_locale):
Expand All @@ -96,16 +95,14 @@ def sort(input_encoding, output_encoding, input_locale, output_locale, key,


@cli.command(help='Sum tables from `source` URIs and save into `destination`')
@click.option('--input-encoding', default='utf-8')
@click.option('--output-encoding', default='utf-8')
@click.option('--input-locale', default='en_US.UTF-8')
@click.option('--output-locale', default='en_US.UTF-8')
@click.option('--input-encoding', default=DEFAULT_INPUT_ENCODING)
@click.option('--output-encoding', default=DEFAULT_OUTPUT_ENCODING)
@click.option('--input-locale', default=DEFAULT_INPUT_LOCALE)
@click.option('--output-locale', default=DEFAULT_OUTPUT_LOCALE)
@click.argument('sources', nargs=-1, required=True)
@click.argument('destination')
def sum(input_encoding, output_encoding, input_locale, output_locale, sources,
destination):
input_locale = input_locale.split('.')
output_locale = output_locale.split('.')

with rows.locale_context(input_locale):
tables = [import_from_uri(source) for source in sources]
Expand Down
13 changes: 9 additions & 4 deletions rows/localization.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,17 @@
def locale_context(name, category=locale.LC_ALL):

old_name = locale.getlocale(category)
if type(name) is types.UnicodeType:
name = name.split('.')
locale.setlocale(category, name)
if None not in old_name:
old_name = '.'.join(old_name)
if isinstance(name, types.UnicodeType):
name = str(name)

if old_name != name:
locale.setlocale(category, name)
rows.fields.SHOULD_NOT_USE_LOCALE = False
try:
yield
finally:
locale.setlocale(category, old_name)
if old_name != name:
locale.setlocale(category, old_name)
rows.fields.SHOULD_NOT_USE_LOCALE = True
1 change: 0 additions & 1 deletion rows/plugins/txt.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
from rows.operations import serialize
from rows.utils import get_filename_and_fobj


DASH, PLUS, PIPE = '-', '+', '|'

def _max_column_sizes(table, encoding, *args, **kwargs):
Expand Down
9 changes: 4 additions & 5 deletions rows/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@

from unicodedata import normalize

import magic
import requests

import rows
Expand Down Expand Up @@ -103,7 +102,6 @@ def create_table(data, meta=None, force_headers=None, fields=None,
else:
header = make_header(fields.keys())


# TODO: may reuse max_columns from html
max_columns = max(len(row) for row in table_rows)
assert len(fields) == max_columns
Expand Down Expand Up @@ -137,9 +135,10 @@ def download_file(uri):
content_type = response.headers['content-type']
plugin_name = content_type.split('/')[-1]
except (KeyError, IndexError):
with magic.Magic() as file_type_guesser:
file_type = file_type_guesser.id_buffer(content)
plugin_name = file_type.strip().split()[0]
try:
plugin_name = uri.split('/')[-1].split('.')[-1].lower()
except IndexError:
raise RuntimeError('Could not identify file type.')

tmp = tempfile.NamedTemporaryFile()
filename = '{}.{}'.format(tmp.name, plugin_name)
Expand Down
8 changes: 4 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,20 @@
setup(name='rows',
description='Import and export tabular data easily with Python',
long_description='',
version='0.1.0',
version='0.1.1',
author=u'Álvaro Justen',
author_email='[email protected]',
url='https://github.com/turicas/rows/',
packages=['rows', 'rows.plugins'],
install_requires=['unicodecsv', 'click', 'filemagic', 'requests'],
install_requires=['unicodecsv', 'click', 'requests'],
extras_require = {
'csv': ['unicodecsv'],
'html': ['lxml'], # apt: libxslt-dev libxml2-dev
'cli': ['click', 'filemagic', 'requests'],
'cli': ['click', 'requests'],
'xls': ['xlrd', 'xlwt'],
'all': ['unicodecsv',
'lxml',
'click', 'filemagic', 'requests',
'click', 'requests',
'xlrd', 'xlwt'],
},
keywords=['tabular', 'table', 'csv', 'xls', 'html', 'rows'],
Expand Down
55 changes: 37 additions & 18 deletions tests/tests_fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,14 @@
from decimal import Decimal

import rows
import platform

from rows import fields

if platform.system() == 'Windows':
locale_name = str('ptb_bra')
else:
locale_name = 'pt_BR.UTF-8'

class FieldsTestCase(unittest.TestCase):

Expand Down Expand Up @@ -94,7 +99,7 @@ def test_IntegerField(self):
types.UnicodeType)
self.assertEqual(fields.IntegerField.deserialize(None), None)

with rows.locale_context('pt_BR.UTF-8'):
with rows.locale_context(locale_name):
self.assertEqual(fields.IntegerField.serialize(42000), '42000')
self.assertIs(type(fields.IntegerField.serialize(42000)),
types.UnicodeType)
Expand All @@ -121,7 +126,7 @@ def test_FloatField(self):
self.assertIs(type(fields.FloatField.serialize(42.0)),
types.UnicodeType)

with rows.locale_context('pt_BR.UTF-8'):
with rows.locale_context(locale_name):
self.assertEqual(fields.FloatField.serialize(42000.0),
'42000,000000')
self.assertIs(type(fields.FloatField.serialize(42000.0)),
Expand Down Expand Up @@ -152,9 +157,11 @@ def test_DecimalField(self):
Decimal('21.21657469231'))
self.assertEqual(fields.DecimalField.deserialize(None), None)

with rows.locale_context('pt_BR.UTF-8'):
self.assertEqual(types.UnicodeType,
type(fields.DecimalField.serialize(deserialized)))
with rows.locale_context(locale_name):
self.assertEqual(
types.UnicodeType,
type(fields.DecimalField.serialize(deserialized))
)
self.assertEqual(fields.DecimalField.serialize(Decimal('4200')),
'4200')
self.assertEqual(fields.DecimalField.serialize(Decimal('42.0')),
Expand All @@ -163,9 +170,13 @@ def test_DecimalField(self):
'42000,0')
self.assertEqual(fields.DecimalField.deserialize('42.000,00'),
Decimal('42000.00'))
self.assertEqual(fields.DecimalField.serialize(Decimal('42000.0'),
grouping=True),
'42.000,0')
self.assertEqual(
fields.DecimalField.serialize(
Decimal('42000.0'),
grouping=True
),
'42.000,0'
)

def test_PercentField(self):
deserialized = Decimal('0.42010')
Expand All @@ -186,10 +197,11 @@ def test_PercentField(self):
self.assertEqual(fields.PercentField.serialize(Decimal('42.010')),
'4201.0%')
self.assertEqual(fields.PercentField.serialize(Decimal('0.01')), '1%')

with rows.locale_context('pt_BR.UTF-8'):
self.assertEqual(type(fields.PercentField.serialize(deserialized)),
types.UnicodeType)
with rows.locale_context(locale_name):
self.assertEqual(
type(fields.PercentField.serialize(deserialized)),
types.UnicodeType
)
self.assertEqual(fields.PercentField.serialize(Decimal('42.0')),
'4200%')
self.assertEqual(fields.PercentField.serialize(Decimal('42000.0')),
Expand Down Expand Up @@ -256,9 +268,13 @@ def test_UnicodeField(self):
types.UnicodeType)
self.assertIs(type(fields.UnicodeField.deserialize('test')),
fields.UnicodeField.TYPE)
self.assertEqual(fields.UnicodeField.deserialize('Álvaro'.encode('utf-8'),
encoding='utf-8'),
'Álvaro')
self.assertEqual(
fields.UnicodeField.deserialize(
'Álvaro'.encode('utf-8'),
encoding='utf-8'
),
'Álvaro'
)
self.assertEqual(fields.UnicodeField.deserialize('Álvaro'),
'Álvaro')
self.assertIs(fields.UnicodeField.deserialize(None), None)
Expand Down Expand Up @@ -286,11 +302,14 @@ def setUp(self):
'date_column': fields.DateField,
'datetime_column': fields.DatetimeField,
'unicode_column': fields.UnicodeField,
'null_column': fields.ByteField,}
'null_column': fields.ByteField, }

def test_detect_types_utf8(self):
result = fields.detect_types(self.fields, self.data,
encoding='utf-8')
result = fields.detect_types(
self.fields,
self.data,
encoding='utf-8'
)
self.assertEqual(type(result), collections.OrderedDict)
self.assertEqual(result.keys(), self.fields)
self.assertDictEqual(dict(result), self.expected)
Expand Down
7 changes: 6 additions & 1 deletion tests/tests_localization.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from __future__ import unicode_literals

import unittest
import platform

import rows
import rows.fields
Expand All @@ -33,6 +34,10 @@ def test_locale_context_present_in_main_namespace(self):

def test_locale_context(self):
self.assertTrue(rows.fields.SHOULD_NOT_USE_LOCALE)
with locale_context('pt_BR.UTF-8'):
if platform.system() == 'Windows':
name = str('ptb_bra')
else:
name = 'pt_BR.UTF-8'
with locale_context(name):
self.assertFalse(rows.fields.SHOULD_NOT_USE_LOCALE)
self.assertTrue(rows.fields.SHOULD_NOT_USE_LOCALE)

0 comments on commit 3c9335d

Please sign in to comment.