diff --git a/.travis.yml b/.travis.yml index c98493889693..03ba00510991 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,8 +12,8 @@ env: install: - pip install --only-binary=numpy,scipy numpy scipy - if [ $PANDAS=latest ]; then pip install pandas; else pip install pandas==$PANDAS; fi - - pip install coverage coveralls - - pip install -r requirements.txt + - pip install -r requirements-dev.txt script: - - coverage run --source great_expectations -m unittest tests + - pytest --cov=great_expectations tests/ +after_success: - coveralls diff --git a/CONTRIBUTING b/CONTRIBUTING new file mode 100644 index 000000000000..62d0b40f3c7b --- /dev/null +++ b/CONTRIBUTING @@ -0,0 +1,55 @@ + +## How to contribute +We're excited for contributions to Great Expectations. If you see places where the code or documentation could be improved, please get involved! + +Submitting your changes +Once your changes and tests are ready to submit for review: + +1. Test your changes + + Run the test suite to make sure that nothing is broken. See the the section on testing below for help running tests. (Hint: `pytest` from the great_expectations root.) + +2. Sign the Contributor License Agreement + + **When you contribute code, you affirm that the contribution is your original work and that you license the work to the project under the project’s open source license. Whether or not you state this explicitly, by submitting any copyrighted material via pull request, email, or other means you agree to license the material under the project’s open source license and warrant that you have the legal authority to do so.** + + {Aspirational:} Please make sure you have signed our Contributor License Agreement. We are not asking you to assign copyright to us, but to give us the right to distribute your code without restriction. We ask this of all contributors in order to assure our users of the origin and continuing existence of the code. You only need to sign the CLA once. + +3. Rebase your changes + + Update your local repository with the most recent code from the main Great Expectations repository, and rebase your branch on top of the latest `develop` branch. We prefer small, incremental commits, because it makes the thought process behind changes easier to review. + +4. Submit a pull request + + Push your local changes to your forked copy of the repository and submit a pull request. In the pull request, choose a title which sums up the changes that you have made, and in the body provide more details about what your changes do. Also mention the number of the issue where discussion has taken place, eg "Closes #123". + +5. Participate in review + + There will probably be discussion about the pull request. It's normal for a request to require some changes before merging it into the main Great Expectations project. We enjoy working with contributors to get their code accepted. There are many approaches to fixing a problem and it is important to find the best approach before writing too much code. + +## Testing +Currently, (as of 3/9/2018) the tests are a bit of a mess. Consolidating them is an important next step. That means two things for contributors: + +First, don't worry about the mess. Write tests in whatever style suits your fancy. We (the core contributors) will worry about refactoring them later. As long as your thing works and is well-tested, you're good. + +(This is **not** an excuse to avoid writing tests. All contributions must be under test. We're just not dogmatic about the style of those tests today.) + +Second, if you have opinions on the testing framework, we'd love to hear them! Feedback based on your perspective and experience is very welcome. + +Most of the discussion to date is encapsulated here: https://github.com/great-expectations/great_expectations/issues/167. The `refactor_tests` branch is intended as a pilot implementation. + +## Conventions and Style + +* Avoid abbreviations (`column_idx` < `column_index`) +* Use unambiguous expectation names, even if they're a bit longer. (`expect_columns_to_be` < `expect_columns_to_match_ordered_list`) + +Expectations aren't just tests---they're also a kind of data documentation. Because we want expectations to be easy to interpret, we're avoiding abbreviations almost everywhere. We're not entirely consistent about this yet, but there's pretty strong consensus among early team and users that we should be heading in that direction. + +These guidelines should be followed consistently for methods and variables exposed in the API. They aren't intended to be strict rules for every internal line of code in every function. + +* Expectation names should reflect their decorators. + +`expect_table_...` for methods decorated directly with `@expectation` +`expect_column_values_...` for `@column_map_expectation` +`expect_column_...` for `@column_aggregate_expectation` +`expect_column_pair_values...` for `@column_pair_map_expectation` diff --git a/Changelog.md b/Changelog.md deleted file mode 100644 index 7c2745c22a60..000000000000 --- a/Changelog.md +++ /dev/null @@ -1,18 +0,0 @@ -Great Expectations Changelog - -v.0.3.2 ------ -* Include requirements file in source dist to support conda - -v.0.3.1 ------ -* Fix infinite recursion error when building custom expectations -* Catch dateutil parsing overflow errors - -0.2 ---- -* Distributional expectations and associated helpers are improved and renamed to be more clear regarding the tests they apply -* Expectation decorators have been refactored significantly to streamline implementing expectations and support custom expectations -* API and examples for custom expectations are available -* New output formats are available for all expectations -* Significant improvements to test suite and compatibility diff --git a/MANIFEST.in b/MANIFEST.in index ab30e9aceec1..cfe22042be05 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1 +1,4 @@ include *.txt +include LICENSE +graft tests +global-exclude *.py[co] \ No newline at end of file diff --git a/README.md b/README.md index 4a200234420a..2378fef88f1e 100644 --- a/README.md +++ b/README.md @@ -10,11 +10,10 @@ Great Expectations *Always know what to expect from your data.* - What is great_expectations? -------------------------------------------------------------------------------- -Great Expectations is a python framework for bringing data pipelines and products under test. +Great Expectations is a framework that helps teams save time and promote analytic integrity with a new twist on automated testing: pipeline tests. Pipeline tests are applied to data (instead of code) and at batch time (instead of compile or deploy time). Software developers have long known that automated testing is essential for managing complex codebases. Great Expectations brings the same discipline, confidence, and acceleration to data science and engineering teams. @@ -31,5 +30,56 @@ To get more done with data, faster. Teams use great_expectations to * Simplify debugging data pipelines if (when) they break. * Codify assumptions used to build models when sharing with distributed teams or other analysts. +How do I get started? +-------------------------------------------------------------------------------- + +It's easy! Just use pip install: + + + $ pip install great_expectations + +You can also clone the repository, which includes examples of using great_expectations. + + $ git clone https://github.com/great-expectations/great_expectations.git + $ pip install great_expectations/ + +What expectations are available? +-------------------------------------------------------------------------------- + +Expectations include: +- `expect_table_row_count_to_equal` +- `expect_column_values_to_be_unique` +- `expect_column_values_to_be_in_set` +- `expect_column_mean_to_be_between` +- ...and many more + +Visit the [glossary of expectations](http://great-expectations.readthedocs.io/en/latest/glossary.html) for a complete list of expectations that are currently part of the great expectations vocabulary. + +Can I contribute? +-------------------------------------------------------------------------------- +Absolutely. Yes, please. Start [here](https://github.com/great-expectations/great_expectations/blob/docs/contributor_docs/CONTRIBUTING), and don't be shy with questions! + + +How do I learn more? +-------------------------------------------------------------------------------- + +For full documentation, visit [Great Expectations on readthedocs.io](http://great-expectations.readthedocs.io/en/latest/). + +[Down with Pipeline Debt!](https://medium.com/@expectgreatdata/down-with-pipeline-debt-introducing-great-expectations-862ddc46782a) explains the core philosophy behind Great Expectations. Please give it a read, and clap, follow, and share while you're at it. + +For quick, hands-on introductions to Great Expectations' key features, check out our walkthrough videos: + +* [Introduction to Great Expectations](https://www.youtube.com/watch?v=-_0tG7ACNU4) +* [Using Distributional Expectations](https://www.youtube.com/watch?v=l3DYPVZAUmw&t=20s) + + +What's the best way to get in touch with the Great Expectations team? +-------------------------------------------------------------------------------- + +[Issues on GitHub](https://github.com/great-expectations/great_expectations/issues). If you have questions, comments, feature requests, etc., [opening an issue](https://github.com/great-expectations/great_expectations/issues/new) is definitely the best path forward. + + +Great Expectations doesn't do X. Is it right for my use case? +-------------------------------------------------------------------------------- -Visit [the Great Expectations documentation](http://great-expectations.readthedocs.io/en/latest/) for more info. +It depends. If you have needs that the library doesn't meet yet, please [upvote an existing issue(s)](https://github.com/great-expectations/great_expectations/issues) or [open a new issue](https://github.com/great-expectations/great_expectations/issues/new) and we'll see what we can do. Great Expectations is under active development, so your use case might be supported soon. diff --git a/bin/great_expectations b/bin/great_expectations index dfea7a357ba2..578c91d6f174 100755 --- a/bin/great_expectations +++ b/bin/great_expectations @@ -18,22 +18,13 @@ def initialize(): @argh.arg('data_set') @argh.arg('expectations_config_file') -@argh.arg('--output_format', '-o', default="SUMMARY") +@argh.arg('--result_format', '-o', default="SUMMARY") @argh.arg('--catch_exceptions', '-e', default=True) -@argh.arg('--include_config', '-n', default=None) @argh.arg('--only_return_failures', '-f', default=False) @argh.arg('--custom_dataset_module', '-m', default=None) @argh.arg('--custom_dataset_class', '-c', default=None) def validate(data_set, expectations_config_file, **kwargs): - if kwargs["include_config"]: - if kwargs["include_config"] == "True": - kwargs["include_config"] = True - elif kwargs["include_config"] == "False": - kwargs["include_config"] = False - else: - raise ValueError("includ_config expects None, True, or False. Got "+kwargs["include_config"]+" instead.") - expectations_config = json.load(open(expectations_config_file)) if kwargs["custom_dataset_module"]: @@ -43,14 +34,13 @@ def validate(data_set, expectations_config_file, **kwargs): dataset_class = getattr(custom_module, kwargs["custom_dataset_class"]) else: - dataset_class = ge.dataset.PandasDataSet + dataset_class = ge.dataset.PandasDataset df = ge.read_csv(data_set, expectations_config=expectations_config, dataset_class=dataset_class) result = df.validate( - output_format=kwargs["output_format"], + result_format=kwargs["result_format"], catch_exceptions=kwargs["catch_exceptions"], - include_config=kwargs["include_config"], only_return_failures=kwargs["only_return_failures"], ) @@ -59,4 +49,4 @@ def validate(data_set, expectations_config_file, **kwargs): argh.dispatch_commands([ initialize, validate, -]) \ No newline at end of file +]) diff --git a/docs/source/conf.py b/docs/source/conf.py index ff60079e2a78..55064cdb59af 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -52,7 +52,7 @@ # General information about the project. project = u'great_expectations' -copyright = u'2017, The Great Expectations Team' +copyright = u'2018, The Great Expectations Team' author = u'The Great Expectations Team' # The version info for the project you're documenting, acts as replacement for @@ -100,7 +100,7 @@ # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +#html_static_path = ['_static'] # -- Options for Napoleon Extension -------------------------------------------- diff --git a/docs/source/conventions.rst b/docs/source/conventions.rst index 7dcc703254e0..7368ed7db94e 100644 --- a/docs/source/conventions.rst +++ b/docs/source/conventions.rst @@ -11,6 +11,6 @@ Naming conventions Extending Great Expectations ================================================================================ -When implementing an expectation defined in the base DataSet for a new backend, add the `@DocInherit` decorator first to use the default DataSet documentation for the expectation. That can help users of your DataSet see consistent documentation no matter which backend is implementing the great_expectations API. +When implementing an expectation defined in the base `Dataset` for a new backend, add the `@DocInherit` decorator first to use the default dataset documentation for the expectation. That can help users of your dataset see consistent documentation no matter which backend is implementing the great_expectations API. -`@DocInherit` overrides your function's __get__ method with one that will replace the local docstring with the docstring from its parent. It is defined in `dataset.util`. +`@DocInherit` overrides your function's __get__ method with one that will replace the local docstring with the docstring from its parent. It is defined in `Dataset.util`. diff --git a/docs/source/custom_expectations.rst b/docs/source/custom_expectations.rst index 7b4b31cda04d..0a51a7663730 100644 --- a/docs/source/custom_expectations.rst +++ b/docs/source/custom_expectations.rst @@ -21,77 +21,114 @@ Side note: in future versions, Great Expectations will probably grow to include The easy way -------------------------------------------------------------------------------- -1. Create a subclass from the DataSet class of your choice +1. Create a subclass from the dataset class of your choice 2. Define custom functions containing your business logic -3. Use the `@column_map_expectation` and `@column_aggregate_expectation` decorators to turn them into full Expectations +3. Use the `column_map_expectation` and `column_aggregate_expectation` decorators to turn them into full Expectations. Note that each dataset class implements its own versions of `@column_map_expectation` and `@column_aggregate_expectation`, so you should consult the documentation of each class to ensure you +are returning the correct information to the decorator. Note: following Great Expectations :ref:`naming_conventions` is highly recommended, but not strictly required. If you want to confuse yourself with bad names, the package won't stop you. -.. code-block:: bash +For example, in Pandas: + +`@MetaPandasDataset.column_map_expectation` decorates a custom function, wrapping it with all the business logic required to turn it into a fully-fledged Expectation. This spares you the hassle of defining logic to handle required arguments like `mostly` and `result_format`. Your custom function can focus exclusively on the business logic of passing or failing the expectation. + +To work with these decorators, your custom function must accept two arguments: `self` and `column`. When your function is called, `column` will contain all the non-null values in the given column. Your function must return a series of boolean values in the same order, with the same index. + +`@MetaPandasDataset.column_aggregate_expectation` accepts `self` and `column`. It must return a dictionary containing a boolean `success` value, and a nested dictionary called `result` which contains an `observed_value` argument. + - from great_expectations.dataset import PandasDataSet, column_expectation, elementwise_expectation +.. code-block:: python - class CustomPandasDataSet(ge.dataset.PandasDataSet): + from great_expectations.Dataset import PandasDataset, MetaPandasDataset - @column_map_expectation - def expect_column_values_to_equal_2(self, series): - return series.map(lambda x: x==2) + class CustomPandasDataset(PandasDataset): - @column_aggregate_expectation - def expect_column_mode_to_equal_0(self, series): - mode = series.mode[0] + @MetaPandasDataset.column_map_expectation + def expect_column_values_to_equal_2(self, column): + return column.map(lambda x: x==2) + + @MetaPandasDataset.column_aggregate_expectation + def expect_column_mode_to_equal_0(self, column): + mode = column.mode[0] return { "success" : mode == 0, - "true_value" : mode, - "summary_obj" : {} + "result": { + "observed_value": mode, + } } -`@column_map_expectation` decorates a custom function, wrapping it with all the business logic required to turn it into a fully-fledged Expectation. This spares you the hassle of defining logic to handle required arguments like `mostly` and `output_format`. Your custom function can focus exclusively on the business logic of passing or failing the expectation. +For SqlAlchemyDataset, the decorators work slightly differently. See the MetaSqlAlchemy class docstrings for more information. + +.. code-block:: python + + import sqlalchemy as sa + from great_expectations.Dataset import SqlAlchemyDataset, MetaSqlAlchemyDataset -To work with these decorators, your custom function must accept two arguments: `self` and `series`. When your function is called, `series` will contain all the non-null values in the given column. Your function must return a series of boolean values in the same order, with the same index. + class CustomSqlAlchemyDataset(SqlAlchemyDataset): + + @MetaSqlAlchemyDataset.column_map_expectation + def expect_column_values_to_equal_2(self, column): + return (sa.column(column) == 2) + + @MetaSqlAlchemyDataset.column_aggregate_expectation + def expect_column_mode_to_equal_0(self, column): + mode_query = sa.select([ + sa.column(column).label('value'), + sa.func.count(sa.column(column)).label('frequency') + ]).select_from(sa.table(self.table_name)).group_by(sa.column(column)).order_by(sa.desc(sa.column('frequency'))) + + mode = self.engine.execute(mode_query).scalar() + return { + "success": mode == 0, + "result": { + "observed_value": mode, + } + } -`@column_aggregate_expectation` accepts `self` and `series`. It must return a dictionary containing a boolean `success` value, and a `true_value` argument. The hard way -------------------------------------------------------------------------------- -1. Create a subclass from the DataSet class of your choice +1. Create a subclass from the dataset class of your choice 2. Write the whole expectation yourself 3. Decorate it with the `@expectation` decorator -This is more complicated, since you have to handle all the logic of additional parameters and output formats. Pay special attention to proper formatting of :ref:`output_format`. Malformed result objects can break Great Expectations in subtle and unanticipated ways. +This is more complicated, since you have to handle all the logic of additional parameters and output formats. Pay special attention to proper formatting of :ref:`result_format`. Malformed result objects can break Great Expectations in subtle and unanticipated ways. .. code-block:: bash - from great_expectations.dataset import PandasDataSet, expectation + from great_expectations.Dataset import PandasDataset + + class CustomPandasDataset(PandasDataset): + + @Dataset.expectation(["column", "mostly"]) + def expect_column_values_to_equal_1(self, column, mostly=None): + not_null = self[column].notnull() - class CustomPandasDataSet(ge.dataset.PandasDataSet): + result = self[column][not_null] == 1 + unexpected_values = list(self[column][not_null][result==False]) - @expectation - def expect_column_values_to_equal_1(self, column, mostly=None, suppress_expectations=False): - notnull = self[column].notnull() - - result = self[column][notnull] == 1 - exceptions = list(self[column][notnull][result==False]) - if mostly: #Prevent division-by-zero errors - if len(not_null_values) == 0: + if len(not_null) == 0: return { 'success':True, - 'exception_list':exceptions + 'unexpected_list':unexpected_values, + 'unexpected_index_list':self.index[result], } - percent_properly_formatted = float(sum(properly_formatted))/len(not_null_values) + percent_equaling_1 = float(sum(result))/len(not_null) return { - "success" : percent_properly_formatted >= mostly, - "exception_list" : exceptions + "success" : percent_equaling_1 >= mostly, + "unexpected_list" : unexpected_values[:20], + "unexpected_index_list" : list(self.index[result==False])[:20], } else: return { - "success" : len(exceptions) == 0, - "exception_list" : exceptions + "success" : len(unexpected_values) == 0, + "unexpected_list" : unexpected_values[:20], + "unexpected_index_list" : list(self.index[result==False])[:20], } The quick way @@ -101,11 +138,11 @@ For rapid prototyping, you can use the following syntax to quickly iterate on th .. code-block:: bash - >> dataset.test_expectation_function(my_func) + >> Dataset.test_expectation_function(my_func) - >> dataset.test_column_map_expectation_function(my_map_func, column='my_column') + >> Dataset.test_column_map_expectation_function(my_map_func, column='my_column') - >> dataset.test_column_aggregate_expectation_function(my_agg_func, column='my_column') + >> Dataset.test_column_aggregate_expectation_function(my_agg_func, column='my_column') These functions will return output just like regular expectations. However, they will NOT save a copy of the expectation to the config. @@ -113,21 +150,21 @@ These functions will return output just like regular expectations. However, they Using custom expectations -------------------------------------------------------------------------------- -Let's suppose you've defined `CustomPandasDataSet` in a module called `custom_dataset.py`. You can instantiate a DataSet with your custom expectations simply by adding `dataset_class=CustomPandasDataSet` in `ge.read_csv`. +Let's suppose you've defined `CustomPandasDataset` in a module called `custom_dataset.py`. You can instantiate a dataset with your custom expectations simply by adding `dataset_class=CustomPandasDataset` in `ge.read_csv`. Once you do this, all the functionality of your new expectations will be available for uses. .. code-block:: bash >> import great_expectations as ge - >> from custom_dataset import CustomPandasDataSet + >> from custom_dataset import CustomPandasDataset - >> my_df = ge.read_csv("my_data_file.csv", dataset_class=CustomPandasDataSet) + >> my_df = ge.read_csv("my_data_file.csv", dataset_class=CustomPandasDataset) >> my_df.expect_column_values_to_equal_1("all_twos") { "success": False, - "exception_list": [2,2,2,2,2,2,2,2] + "unexpected_list": [2,2,2,2,2,2,2,2] } A similar approach works for the command-line tool. @@ -137,7 +174,7 @@ A similar approach works for the command-line tool. >> great_expectations validate \ my_data_file.csv \ my_expectations.json \ - dataset_class=custom_dataset.CustomPandasDataSet + dataset_class=custom_dataset.CustomPandasDataset diff --git a/docs/source/data_context_module.rst b/docs/source/data_context_module.rst new file mode 100644 index 000000000000..ea46c61ef664 --- /dev/null +++ b/docs/source/data_context_module.rst @@ -0,0 +1,52 @@ +.. _data_context_module: + +Data Context Module +=================== + +.. automodule:: great_expectations.data_context + :members: + :undoc-members: + :show-inheritance: + +great_expectations.data_context.base +--------------------------------------------- + +.. automodule:: great_expectations.data_context.base + :members: + :undoc-members: + :show-inheritance: + :exclude-members: DataContext + + .. autoclass:: great_expectations.data_context.base.DataContext + :members: + :undoc-members: + :show-inheritance: + +great_expectations.data_context.PandasCSVDataContext +---------------------------------------------------- + +.. automodule:: great_expectations.data_context.PandasCSVDataContext + :members: + :undoc-members: + :show-inheritance: + :exclude-members: PandasCSVDataContext + + .. autoclass:: great_expectations.data_context.pandas_context.PandasCSVDataContext + :members: + :undoc-members: + :show-inheritance: + +great_expectations.data_context.SqlAlchemyDataContext +----------------------------------------------------- + +.. automodule:: great_expectations.data_context.SqlAlchemyDataContext + :members: + :undoc-members: + :show-inheritance: + :exclude-members: SqlAlchemyDataContext + + .. autoclass:: great_expectations.data_context.sqlalchemy_context.SqlAlchemyDataContext + :members: + :undoc-members: + :show-inheritance: + diff --git a/docs/source/data_contexts.rst b/docs/source/data_contexts.rst new file mode 100644 index 000000000000..e287a93ba49e --- /dev/null +++ b/docs/source/data_contexts.rst @@ -0,0 +1,41 @@ +.. _data_contexts: + +================================================================================ +Data Contexts +================================================================================ + +Data Contexts manage connections to Great Expectations Datasets. + +To get a data context, simply call `get_data_context()` on the ge object: + +.. code-block:: bash + + >> import great_expectations as ge + >> options = { ## my connection options } + >> sql_context = ge.get_data_context('sqlalchemy_context', options) + + >> sql_dataset = sql_context.get_dataset('table_name') + + +There are currently two types of data contexts: + - :ref:`PandasCSVDataContext`: The PandasCSVDataContext ('PandasCSV') exposes a local directory containing files as datasets. + - :ref:`SqlAlchemyDataContext`: The SqlAlchemyDataContext ('SqlAlchemy') exposes tables from a SQL-compliant database as datasets. + +All data contexts expose the following methods: + - list_datasets(): lists datasets available in current context + - get_dataset(dataset_name): returns a dataset with the matching name (e.g. filename or tablename) + +.. _PandasCSVDataContext: + +`PandasCSVDataContext` +---------------------- + +The `options` paramater for a PandasCSVDataContext is simply the glob pattern matching the files to be available. + + +.. _SqlAlchemyDataContext: + +`SqlAlchemyDataContext` +----------------------- + +The `options` parameter for a SqlAlchemyDataContext is the sqlalchemy connection string to connect to the database. diff --git a/docs/source/dataset_module.rst b/docs/source/dataset_module.rst new file mode 100644 index 000000000000..7e74309b0ba5 --- /dev/null +++ b/docs/source/dataset_module.rst @@ -0,0 +1,61 @@ +.. _dataset_module: + +Dataset Module +================================== + +great_expectations.dataset.base +------------------------------- + +.. automodule:: great_expectations.Dataset.base + :members: + :undoc-members: + :show-inheritance: + +great_expectations.dataset.pandas_dataset +----------------------------------------- + +.. automodule:: great_expectations.Dataset.pandas_Dataset + :members: + :undoc-members: + :show-inheritance: + :exclude-members: MetaPandasDataset, PandasDataset + + .. autoclass:: great_expectations.Dataset.pandas_Dataset.MetaPandasDataset + :members: + :undoc-members: + :show-inheritance: + + .. autoclass:: great_expectations.Dataset.pandas_Dataset.PandasDataset + :members: + :undoc-members: + :show-inheritance: + :exclude-members: expect_column_to_exist, expect_table_row_count_to_be_between, expect_table_row_count_to_equal, expect_column_values_to_be_unique, expect_column_values_to_not_be_null, expect_column_values_to_be_null, expect_column_values_to_be_of_type, expect_column_values_to_be_in_type_list, expect_column_values_to_be_in_set, expect_column_values_to_not_be_in_set, expect_column_values_to_be_between, expect_column_values_to_be_increasing, expect_column_values_to_be_decreasing, expect_column_value_lengths_to_be_between, expect_column_value_lengths_to_equal, expect_column_values_to_match_regex, expect_column_values_to_not_match_regex, expect_column_values_to_match_regex_list, expect_column_values_to_match_strftime_format, expect_column_values_to_be_dateutil_parseable, expect_column_values_to_be_json_parseable, expect_column_values_to_match_json_schema, expect_column_mean_to_be_between, expect_column_median_to_be_between, expect_column_stdev_to_be_between, expect_column_unique_value_count_to_be_between, expect_column_proportion_of_unique_values_to_be_between, expect_column_most_common_value_to_be, expect_column_most_common_value_to_be_in_set, expect_column_chisquare_test_p_value_to_be_greater_than, expect_column_bootstrapped_ks_test_p_value_to_be_greater_than, expect_column_kl_divergence_to_be_less_than + +great_expectations.dataset.sqlalchemy_dataset +--------------------------------------------- + +.. automodule:: great_expectations.dataset.sqlalchemy_dataset + :members: + :undoc-members: + :show-inheritance: + :exclude-members: MetaSqlAlchemyDataset, SqlAlchemyDataset + + .. autoclass:: great_expectations.dataset.sqlalchemy_dataset.MetaSqlAlchemyDataset + :members: + :undoc-members: + :show-inheritance: + + .. autoclass:: great_expectations.dataset.sqlalchemy_dataset.SqlAlchemyDataset + :members: + :undoc-members: + :show-inheritance: + :exclude-members: expect_column_to_exist, expect_table_row_count_to_be_between, expect_table_row_count_to_equal, expect_column_values_to_be_unique, expect_column_values_to_not_be_null, expect_column_values_to_be_null, expect_column_values_to_be_of_type, expect_column_values_to_be_in_type_list, expect_column_values_to_be_in_set, expect_column_values_to_not_be_in_set, expect_column_values_to_be_between, expect_column_values_to_be_increasing, expect_column_values_to_be_decreasing, expect_column_value_lengths_to_be_between, expect_column_value_lengths_to_equal, expect_column_values_to_match_regex, expect_column_values_to_not_match_regex, expect_column_values_to_match_regex_list, expect_column_values_to_match_strftime_format, expect_column_values_to_be_dateutil_parseable, expect_column_values_to_be_json_parseable, expect_column_values_to_match_json_schema, expect_column_mean_to_be_between, expect_column_median_to_be_between, expect_column_stdev_to_be_between, expect_column_unique_value_count_to_be_between, expect_column_proportion_of_unique_values_to_be_between, expect_column_most_common_value_to_be, expect_column_most_common_value_to_be_in_set, expect_column_chisquare_test_p_value_to_be_greater_than, expect_column_bootstrapped_ks_test_p_value_to_be_greater_than, expect_column_kl_divergence_to_be_less_than + + +great_expectations.dataset.util +------------------------------- + +.. automodule:: great_expectations.dataset.util + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/distributional_expectations.rst b/docs/source/distributional_expectations.rst index 8bc8a5d08123..a09976439d03 100644 --- a/docs/source/distributional_expectations.rst +++ b/docs/source/distributional_expectations.rst @@ -91,15 +91,15 @@ Distributional expectations rely on three tests for their work. Kullback-Leibler (KL) divergence is available as an expectation for both categorical and continuous data (continuous data will be discretized according to the provided partition prior to computing divergence). Unlike KS and Chi-Squared tests which can use a p-value, you must provide a threshold for the relative entropy to use KL divergence. Further, KL divergence is not symmetric. -* :func:`expect_column_kl_divergence_to_be_less_than ` +* :func:`expect_column_kl_divergence_to_be_less_than ` For continuous data, the expect_column_bootstrapped_ks_test_p_value_to_be_greater_than expectation uses the Kolmogorov-Smirnov (KS) test, which compares the actual and expected cumulative densities of the data. Because of the partition_object's piecewise uniform approximation of the expected distribution, the test would be overly sensitive to differences when used with a sample of data of much larger than the size of the partition. The expectation consequently uses a bootstrapping method to sample the provided data with tunable specificity. -* :func:`expect_column_bootstrapped_ks_test_p_value_to_be_greater_than ` +* :func:`expect_column_bootstrapped_ks_test_p_value_to_be_greater_than ` For categorical data, the expect_column_chisquare_test_p_value_to_be_greater_than expectation uses the Chi-Squared test. The provided weights are scaled to the size of the data in the tested column at the time of the test. -* :func:`expect_column_chisquare_test_p_value_to_be_greater_than ` +* :func:`expect_column_chisquare_test_p_value_to_be_greater_than ` diff --git a/docs/source/expectations.rst b/docs/source/expectations.rst index 5194517408f4..172ae0bb70b0 100644 --- a/docs/source/expectations.rst +++ b/docs/source/expectations.rst @@ -23,10 +23,10 @@ Great Expectations's connect-and-expect API makes it easy to declare Expectation { 'success': True, 'summary_obj': { - 'exception_count': 0, - 'exception_percent': 0.0, - 'exception_percent_nonmissing': 0.0, - 'partial_exception_list': [] + 'unexpected_count': 0, + 'unexpected_percent': 0.0, + 'unexpected_percent_nonmissing': 0.0, + 'partial_unexpected_list': [] } } @@ -55,10 +55,10 @@ For example: { 'success': False, 'summary_obj': { - 'exception_count': 1, - 'exception_percent': 0.0007616146230007616, - 'exception_percent_nonmissing': 0.0007616146230007616, - 'partial_exception_list': ['*'] + 'unexpected_count': 1, + 'unexpected_percent': 0.0007616146230007616, + 'unexpected_percent_nonmissing': 0.0007616146230007616, + 'partial_unexpected_list': ['*'] } } @@ -73,10 +73,10 @@ Another example: { 'success': False, 'summary_obj': { - 'exception_count': 16, - 'exception_percent': 0.012185833968012186, - 'exception_percent_nonmissing': 0.012185833968012186, - 'partial_exception_list': [ + 'unexpected_count': 16, + 'unexpected_percent': 0.012185833968012186, + 'unexpected_percent_nonmissing': 0.012185833968012186, + 'partial_unexpected_list': [ 'Bjornstrm-Steffansson, Mr Mauritz Hakan', 'Brown, Mrs James Joseph (Margaret Molly" Tobin)"', 'Frolicher-Stehli, Mr Maxmillian', @@ -133,5 +133,5 @@ This is how you always know what to expect from your data. >> my_df.save_expectations_config("my_titanic_expectations.json") -For more detail on how to control expectation output, please see :ref:`standard_arguments` and :ref:`output_format`. +For more detail on how to control expectation output, please see :ref:`standard_arguments` and :ref:`result_format`. diff --git a/docs/source/glossary.rst b/docs/source/glossary.rst index 7f2c8bcc4f99..afb1201e14b8 100644 --- a/docs/source/glossary.rst +++ b/docs/source/glossary.rst @@ -8,70 +8,71 @@ Table shape -------------------------------------------------------------------------------- * :func:`expect_column_to_exist ` +* :func:`expect_columns_to_be ` * :func:`expect_table_row_count_to_be_between ` * :func:`expect_table_row_count_to_equal ` Missing values, unique values, and types -------------------------------------------------------------------------------- -* :func:`expect_column_values_to_be_unique ` -* :func:`expect_column_values_to_not_be_null ` -* :func:`expect_column_values_to_be_null ` -* :func:`expect_column_values_to_be_of_type ` -* :func:`expect_column_values_to_be_in_type_list ` +* :func:`expect_column_values_to_be_unique ` +* :func:`expect_column_values_to_not_be_null ` +* :func:`expect_column_values_to_be_null ` +* :func:`expect_column_values_to_be_of_type ` +* :func:`expect_column_values_to_be_in_type_list ` Sets and ranges -------------------------------------------------------------------------------- -* :func:`expect_column_values_to_be_in_set ` -* :func:`expect_column_values_to_not_be_in_set ` -* :func:`expect_column_values_to_be_between ` -* :func:`expect_column_values_to_be_increasing ` -* :func:`expect_column_values_to_be_decreasing ` +* :func:`expect_column_values_to_be_in_set ` +* :func:`expect_column_values_to_not_be_in_set ` +* :func:`expect_column_values_to_be_between ` +* :func:`expect_column_values_to_be_increasing ` +* :func:`expect_column_values_to_be_decreasing ` String matching -------------------------------------------------------------------------------- -* :func:`expect_column_value_lengths_to_be_between ` -* :func:`expect_column_values_to_match_regex ` -* :func:`expect_column_values_to_not_match_regex ` -* :func:`expect_column_values_to_match_regex_list ` +* :func:`expect_column_value_lengths_to_be_between ` +* :func:`expect_column_values_to_match_regex ` +* :func:`expect_column_values_to_not_match_regex ` +* :func:`expect_column_values_to_match_regex_list ` Datetime and JSON parsing -------------------------------------------------------------------------------- -* :func:`expect_column_values_to_match_strftime_format ` -* :func:`expect_column_values_to_be_dateutil_parseable ` -* :func:`expect_column_values_to_be_json_parseable ` -* :func:`expect_column_values_to_match_json_schema ` +* :func:`expect_column_values_to_match_strftime_format ` +* :func:`expect_column_values_to_be_dateutil_parseable ` +* :func:`expect_column_values_to_be_json_parseable ` +* :func:`expect_column_values_to_match_json_schema ` Aggregate functions -------------------------------------------------------------------------------- -* :func:`expect_column_mean_to_be_between ` -* :func:`expect_column_median_to_be_between ` -* :func:`expect_column_stdev_to_be_between ` -* :func:`expect_column_unique_value_count_to_be_between ` -* :func:`expect_column_proportion_of_unique_values_to_be_between ` -* :func:`expect_column_most_common_value_to_be ` -* :func:`expect_column_most_common_value_to_be_in_set ` +* :func:`expect_column_mean_to_be_between ` +* :func:`expect_column_median_to_be_between ` +* :func:`expect_column_stdev_to_be_between ` +* :func:`expect_column_unique_value_count_to_be_between ` +* :func:`expect_column_proportion_of_unique_values_to_be_between ` +* :func:`expect_column_most_common_value_to_be ` +* :func:`expect_column_most_common_value_to_be_in_set ` Distributional functions -------------------------------------------------------------------------------- -* :func:`expect_column_kl_divergence_to_be_less_than ` -* :func:`expect_column_bootstrapped_ks_test_p_value_to_be_greater_than ` -* :func:`expect_column_chisquare_test_p_value_to_be_greater_than ` +* :func:`expect_column_kl_divergence_to_be_less_than ` +* :func:`expect_column_bootstrapped_ks_test_p_value_to_be_greater_than ` +* :func:`expect_column_chisquare_test_p_value_to_be_greater_than ` Distributional function helpers -------------------------------------------------------------------------------- -* :func:`continuous_partition_data ` -* :func:`categorical_partition_data ` -* :func:`kde_partition_data ` -* :func:`is_valid_partition_object ` -* :func:`is_valid_continuous_partition_object ` -* :func:`is_valid_categorical_partition_object ` +* :func:`continuous_partition_data ` +* :func:`categorical_partition_data ` +* :func:`kde_partition_data ` +* :func:`is_valid_partition_object ` +* :func:`is_valid_continuous_partition_object ` +* :func:`is_valid_categorical_partition_object ` diff --git a/docs/source/great_expectations.dataset.rst b/docs/source/great_expectations.dataset.rst deleted file mode 100644 index 28ea0b380882..000000000000 --- a/docs/source/great_expectations.dataset.rst +++ /dev/null @@ -1,50 +0,0 @@ -great_expectations.dataset package -================================== - -Submodules ----------- - -great_expectations.dataset.base module --------------------------------------- - -.. automodule:: great_expectations.dataset.base - :members: - :undoc-members: - :show-inheritance: - -great_expectations.dataset.pandas_dataset module ------------------------------------------------- - -.. automodule:: great_expectations.dataset.pandas_dataset - :members: - :undoc-members: - :show-inheritance: - :exclude-members: MetaPandasDataSet, PandasDataSet - - .. autoclass:: great_expectations.dataset.pandas_dataset.MetaPandasDataSet - :members: - :undoc-members: - :show-inheritance: - - .. autoclass:: great_expectations.dataset.pandas_dataset.PandasDataSet - :members: - :undoc-members: - :show-inheritance: - :exclude-members: expect_column_to_exist, expect_table_row_count_to_be_between, expect_table_row_count_to_equal, expect_column_values_to_be_unique, expect_column_values_to_not_be_null, expect_column_values_to_be_null, expect_column_values_to_be_of_type, expect_column_values_to_be_in_type_list, expect_column_values_to_be_in_set, expect_column_values_to_not_be_in_set, expect_column_values_to_be_between, expect_column_values_to_be_increasing, expect_column_values_to_be_decreasing, expect_column_value_lengths_to_be_between, expect_column_value_lengths_to_equal, expect_column_values_to_match_regex, expect_column_values_to_not_match_regex, expect_column_values_to_match_regex_list, expect_column_values_to_match_strftime_format, expect_column_values_to_be_dateutil_parseable, expect_column_values_to_be_json_parseable, expect_column_values_to_match_json_schema, expect_column_mean_to_be_between, expect_column_median_to_be_between, expect_column_stdev_to_be_between, expect_column_unique_value_count_to_be_between, expect_column_proportion_of_unique_values_to_be_between, expect_column_most_common_value_to_be, expect_column_most_common_value_to_be_in_set, expect_column_chisquare_test_p_value_to_be_greater_than, expect_column_bootstrapped_ks_test_p_value_to_be_greater_than, expect_column_kl_divergence_to_be_less_than - -great_expectations.dataset.util module --------------------------------------- - -.. automodule:: great_expectations.dataset.util - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: great_expectations.dataset - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/great_expectations.rst b/docs/source/great_expectations.rst deleted file mode 100644 index 2ce02a2cd65c..000000000000 --- a/docs/source/great_expectations.rst +++ /dev/null @@ -1,29 +0,0 @@ -great_expectations package -========================== - -Subpackages ------------ - -.. toctree:: - - great_expectations.dataset - -Submodules ----------- - -great_expectations.util module ------------------------------- - -.. automodule:: great_expectations.util - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: great_expectations - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/index.rst b/docs/source/index.rst index 0f3836b7ef89..91b60ac41276 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -13,6 +13,7 @@ Welcome to Great Expectations! :maxdepth: 2 intro + data_contexts expectations distributional_expectations validation @@ -26,10 +27,18 @@ Advanced :maxdepth: 2 standard_arguments - output_format + result_format custom_expectations conventions - roadmap + roadmap_changelog + +Module Docs +------------- +.. toctree:: + :maxdepth: 2 + + dataset_module + data_context_module Indices and tables ------------------ diff --git a/docs/source/intro.rst b/docs/source/intro.rst index 5f134e0c2caf..761117db27a9 100644 --- a/docs/source/intro.rst +++ b/docs/source/intro.rst @@ -12,7 +12,7 @@ Introduction What is Great Expectations? -------------------------------------------------------------------------------- -Great Expectations is a framework for bringing data pipelines and products under test. +Great Expectations is a framework that helps teams save time and promote analytic integrity with a new twist on automated testing: pipeline tests. Pipeline tests are applied to data (instead of code) and at batch time (instead of compile or deploy time). Software developers have long known that automated testing is essential for managing complex codebases. Great Expectations brings the same discipline, confidence, and acceleration to data science and engineering teams. @@ -32,10 +32,11 @@ To get more done with data, faster. Teams use Great Expectations to See :ref:`workflow_advantages` to learn more about how Great Expectations speeds up data teams. -Getting started + +How do I get started? -------------------------------------------------------------------------------- -...is easy. Just use pip install: +It's easy! Just use pip install: .. code-block:: bash @@ -48,10 +49,27 @@ You can also clone the repository, which includes examples of using great_expect $ git clone https://github.com/great-expectations/great_expectations.git $ pip install great_expectations/ -Since Great Expectation is under active development, the `develop` branch is often a ahead of the latest production release. If you want to work from the latest commit on `develop`, we recommend you install by branch name or hash. -branch-name: +How do I learn more? +-------------------------------------------------------------------------------- + +For full documentation, visit [Great Expectations on readthedocs.io](http://great-expectations.readthedocs.io/en/latest/). -.. code-block:: bash +[Down with Pipeline Debt!](https://medium.com/@expectgreatdata/down-with-pipeline-debt-introducing-great-expectations-862ddc46782a) explains the core philosophy behind Great Expectations. Please give it a read, and clap, follow, and share while you're at it. + +For quick, hands-on introductions to Great Expectations' key features, check out our walkthrough videos: + +* [Introduction to Great Expectations](https://www.useloom.com/share/3eb1d429823744288c99ea26e2c4d443) +* [Using Distributional Expectations](https://www.useloom.com/share/c74b3e9c8dd349e9b8c4aa230cc4bedc) + + +What's the best way to get in touch with the Great Expectations team? +-------------------------------------------------------------------------------- + +[Issues on GitHub](https://github.com/great-expectations/great_expectations/issues). If you have questions, comments, feature requests, etc., [opening an issue](https://github.com/great-expectations/great_expectations/issues/new) is definitely the best path forward. + + +Great Expectations doesn't do X. Is it right for my use case? +-------------------------------------------------------------------------------- - $ pip install git+git://github.com/great-expectations/great_expectations.git@develop +It depends. If you have needs that the library doesn't meet yet, please [upvote an existing issue(s)](https://github.com/great-expectations/great_expectations/issues) or [open a new issue](https://github.com/great-expectations/great_expectations/issues/new) and we'll see what we can do. Great Expectations is under active development, so your use case might be supported soon. diff --git a/docs/source/output_format.rst b/docs/source/output_format.rst deleted file mode 100644 index 835652bb7d9f..000000000000 --- a/docs/source/output_format.rst +++ /dev/null @@ -1,264 +0,0 @@ -.. _output_format: - -================================================================================ -Expectation output formats -================================================================================ - -All Expectations accept an `output_format` parameter. Great Expectations defines four values for `output_format`: `BOOLEAN_ONLY`, `BASIC`, `COMPLETE`, and `SUMMARY`. The API also allows you to define new formats that mix, match, extend this initial set. - -.. code-block:: bash - - >> print list(my_df.my_var) - ['A', 'B', 'B', 'C', 'C', 'C', 'D', 'D', 'D', 'D', 'E', 'E', 'E', 'E', 'E', 'F', 'F', 'F', 'F', 'F', 'F', 'G', 'G', 'G', 'G', 'G', 'G', 'G', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H'] - - >> my_df.expect_column_values_to_be_in_set( - "my_var", - ["B", "C", "D", "F", "G", "H"], - output_format="BOOLEAN_ONLY" - ) - False - - >> my_df.expect_column_values_to_be_in_set( - "my_var", - ["B", "C", "D", "F", "G", "H"], - output_format="BASIC" - ) - { - 'success': False, - 'summary_obj': { - 'exception_count': 6, - 'exception_percent': 0.16666666666666666, - 'exception_percent_nonmissing': 0.16666666666666666, - 'partial_exception_list': ['A', 'E', 'E', 'E', 'E', 'E'] - } - } - - >> my_df.expect_column_values_to_be_in_set( - "my_var", - ["B", "C", "D", "F", "G", "H"], - output_format="COMPLETE" - ) - { - 'exception_index_list': [0, 10, 11, 12, 13, 14], - 'exception_list': ['A', 'E', 'E', 'E', 'E', 'E'], - 'success': False - } - - >> expect_column_values_to_match_regex( - "my_column", - "[A-Z][a-z]+", - output_format="SUMMARY" - ) - { - 'success': False, - 'summary_obj': { - 'element_count': 36, - 'exception_count': 6, - 'exception_percent': 0.16666666666666666, - 'exception_percent_nonmissing': 0.16666666666666666, - 'missing_count': 0, - 'missing_percent': 0.0, - 'partial_exception_counts': {'A': 1, 'E': 5}, - 'partial_exception_index_list': [0, 10, 11, 12, 13, 14], - 'partial_exception_list': ['A', 'E', 'E', 'E', 'E', 'E'] - } - } - -The out-of-the-box default is `output_format=BASIC`. - -Note: accepting a single parameter for `output_format` should make the library of formats relatively easy to extend in the future. - - -Behavior for `BOOLEAN_ONLY` result objects ------------------------------------------------------------------------------- -...is simple: if the expectation is satisfied, it returns True. Otherwise it returns False. - -.. code-block:: bash - - >> my_df.expect_column_values_to_be_in_set( - "possible_benefactors", - ["Joe Gargery", "Mrs. Gargery", "Mr. Pumblechook", "Ms. Havisham", "Mr. Jaggers"] - output_format="BOOLEAN_ONLY" - ) - False - - >> my_df.expect_column_values_to_be_in_set( - "possible_benefactors", - ["Joe Gargery", "Mrs. Gargery", "Mr. Pumblechook", "Ms. Havisham", "Mr. Jaggers", "Mr. Magwitch"] - output_format="BOOLEAN_ONLY" - ) - False - -Behavior for `BASIC` result objects ------------------------------------------------------------------------------- -...depends on the expectation. Great Expectations has native support for three types of Expectations: `column_map_expectation`, `column_aggregate_expectation`, and a base type `expectation`. - -`column_map_expectations` apply a boolean test function to each element within a column. -This format is intended for quick, at-a-glance feedback. For example, it tends to work well -in jupyter notebooks. - -The basic format is: - -.. code-block:: bash - - { - "success" : Boolean, - "summary_obj" : { - "partial_exception_list" : [A list of up to 20 values that violate the expectation] - "exception_count" : The total count of exceptions in the column - "exception_percent" : The overall percent of exceptions - "exception_percent_nonmissing" : The percent of exceptions, excluding mising values from the denominator - } - } - - -Note: when exception values are duplicated, `exception_list` will contain multiple copies of the value. - -.. code-block:: bash - - [1,2,2,3,3,3,None,None,None,None] - - expect_column_values_to_be_unique - - { - "success" : Boolean, - "summary_obj" : { - "exception_list" : [2,2,3,3,3] - "exception_index_list" : [1,2,3,4,5] - "exception_count" : 5, - "exception_percent" : 0.5, - "exception_percent_nonmissing" : 0.8333333, - } - } - - -`column_aggregate_expectations` compute a single value for the column and put it into `true_value`. - -Format: - -.. code-block:: bash - - { - "success" : Boolean, - "true_value" : Depends - } - - -For example: - -.. code-block:: bash - - expect_table_row_count_to_be_between - - { - "success" : true, - "true_value" : 7 - } - - - expect_column_stdev_to_be_between - { - "success" : false - "true_value" : 3.04 - } - - expect_column_most_common_value_to_be - { - "success" : ... - "true_value" : ... - } - - -Behavior for `SUMMARY` result objects ------------------------------------------------------------------------------- - -`SUMMARY` provides a `summary_obj` with values usef of common exception values. For `column_map_expectations`, the standard format is: - -.. code-block:: bash - - { - 'success': False, - 'summary_obj': { - 'element_count': 36, - 'exception_count': 6, - 'exception_percent': 0.16666666666666666, - 'exception_percent_nonmissing': 0.16666666666666666, - 'missing_count': 0, - 'missing_percent': 0.0, - 'partial_exception_counts': {'A': 1, 'E': 5}, - 'partial_exception_index_list': [0, 10, 11, 12, 13, 14], - 'partial_exception_list': ['A', 'E', 'E', 'E', 'E', 'E'] - } - } - - - -For `column_aggregate_expectations`, `SUMMARY` output is the same as `BASIC` output, plus a `summary_obj`. - -.. code-block:: bash - - { - 'success': False, - 'true_value': 3.04, - 'summary_obj': { - 'element_count': 77, - 'missing_count': 7, - 'missing_percent': 0.1, - } - } - - -Quick reference -------------------------------------------------------------------------------- - -+---------------------------------------+-------+-----------+---------------------------+ -| Expectation result fields |BASIC |SUMMARY |COMPLETE | -+=======================================+=======+===========+===========================+ -|success (boolean) |Included for all 3 output_formats | -+---------------------------------------+-------+-----------+---------------------------+ -+---------------------------------------+-------+-----------+---------------------------+ -|expectation_type (string) |Included if and only if include_config=True | -+---------------------------------------+-------+-----------+---------------------------+ -|expectation_kwargs (dict) |Included if and only if include_config=True | -+---------------------------------------+-------+-----------+---------------------------+ -|raised_exception (boolean) |Included if and only if catch_exceptions=True | -+---------------------------------------+-------+-----------+---------------------------+ -|exception_traceback (string or None) |Included if and only if catch_exceptions=True | -+---------------------------------------+-------+-----------+---------------------------+ -|meta (dict) |Included if and only if meta=True | -+---------------------------------------+-------+-----------+---------------------------+ -|true_value (depends) |Included for all column_aggregate_expectations | -+---------------------------------------+-------+-----------+---------------------------+ -+---------------------------------------+-------+-----------+---------------------------+ -|exception_index_list (list) |no |no |yes | -+---------------------------------------+-------+-----------+---------------------------+ -|exception_list (list) |no |no |yes | -+---------------------------------------+-------+-----------+---------------------------+ -|summary_obj (dict) |yes |yes |no | -+---------------------------------------+-------+-----------+---------------------------+ - -+---------------------------------------+----------------------+------------------------+ -|Fields within `summary_obj` |BASIC |SUMMARY | -+=======================================+======================+========================+ -| partial_exception_list |yes* |yes* | -+---------------------------------------+----------------------+------------------------+ -| partial_exception_index_list |no |yes* | -+---------------------------------------+----------------------+------------------------+ -| exception_count |yes* |yes* | -+---------------------------------------+----------------------+------------------------+ -| exception_percent |yes* |yes* | -+---------------------------------------+----------------------+------------------------+ -| exception_percent_nonmissing |yes* |yes* | -+---------------------------------------+----------------------+------------------------+ -| element_count |no |yes | -+---------------------------------------+----------------------+------------------------+ -| missing_count |no |yes | -+---------------------------------------+----------------------+------------------------+ -| missing_percent |no |yes | -+---------------------------------------+----------------------+------------------------+ -| partial_exception_counts |no |yes* | -+---------------------------------------+----------------------+------------------------+ -| Other... |Defined on a case by case basis. | -+---------------------------------------+----------------------+------------------------+ - -yes* : These variables are only defined for `column_map_expectations`. diff --git a/docs/source/result_format.rst b/docs/source/result_format.rst new file mode 100644 index 000000000000..376e973250df --- /dev/null +++ b/docs/source/result_format.rst @@ -0,0 +1,425 @@ +.. _result_format: + +================================================================================ +result_format +================================================================================ + +The `result_format` parameter may be either a string or a dictionary which specifies the fields to return in `result`. + - For string usage, see :ref:`result_format`. + - For dictionary usage, `result_format` which may include the following keys: + + - :ref:`result_format`: Sets the fields to return in result. + - partial_unexpected_count: Sets the number of results to include in partial_unexpected_count, if applicable. + + +.. _result_format: + +`result_format` +------------------------------------------------------------------------------ + +Great Expectations supports four values for `result_format`: `BOOLEAN_ONLY`, `BASIC`, `SUMMARY`, and `COMPLETE`. \ +Each successive value includes more detail and so can support different use cases for working with Great Expectations, \ +including interactive exploratory work and automatic validation. + + ++---------------------------------------+----------------+----------------+----------------+----------------+ +| Fields within `result` |BOOLEAN_ONLY |BASIC |SUMMARY |COMPLETE | ++=======================================+================+================+================+================+ +| element_count |no |yes |yes |yes | ++---------------------------------------+----------------+----------------+----------------+----------------+ +| missing_count |no |yes |yes |yes | ++---------------------------------------+----------------+----------------+----------------+----------------+ +| missing_percent |no |yes |yes |yes | ++---------------------------------------+----------------+----------------+----------------+----------------+ +| detail (dictionary) |Defined on a per-expectation basis | ++---------------------------------------+----------------+----------------+----------------+----------------+ +| Fields defined only for `column_map_expectation` type expectations: | ++---------------------------------------+----------------+----------------+----------------+----------------+ +| unexpected_count |no |yes |yes |yes | ++---------------------------------------+----------------+----------------+----------------+----------------+ +| unexpected_percent |no |yes |yes |yes | ++---------------------------------------+----------------+----------------+----------------+----------------+ +| unexpected_percent_nonmissing |no |yes |yes |yes | ++---------------------------------------+----------------+----------------+----------------+----------------+ +| partial_unexpected_list |no |yes |yes |yes | ++---------------------------------------+----------------+----------------+----------------+----------------+ +| partial_unexpected_index_list |no |no |yes |yes | ++---------------------------------------+----------------+----------------+----------------+----------------+ +| partial_unexpected_counts |no |no |yes |yes | ++---------------------------------------+----------------+----------------+----------------+----------------+ +| unexpected_index_list |no |no |no |yes | ++---------------------------------------+----------------+----------------+----------------+----------------+ +| unexpected_list |no |no |no |yes | ++---------------------------------------+----------------+----------------+----------------+----------------+ +| Fields defined only for `column_aggregate_expectation` type expectations: | ++---------------------------------------+----------------+----------------+----------------+----------------+ +| observed_value |no |yes |yes |yes | ++---------------------------------------+----------------+----------------+----------------+----------------+ +| details (e.g. statistical details) |no |no |yes |yes | ++---------------------------------------+----------------+----------------+----------------+----------------+ + + ++---------------------------------------+--------------------------------------------------------------+ +| `result_format` Setting | Example use case | ++=======================================+==============================================================+ +| BOOLEAN_ONLY | Automatic validation. No result is returned. | ++---------------------------------------+--------------------------------------------------------------+ +| BASIC | Exploratory analysis in a notebook. | ++---------------------------------------+--------------------------------------------------------------+ +| SUMMARY | Detailed exploratory work with follow-on investigation. | ++---------------------------------------+--------------------------------------------------------------+ +| COMPLETE | Debugging pipelines or developing detailed regression tests. | ++---------------------------------------+--------------------------------------------------------------+ + + +result_format examples +------------------------------------------------------------------------------ + +.. code-block:: bash + + >> print(list(my_df.my_var)) + ['A', 'B', 'B', 'C', 'C', 'C', 'D', 'D', 'D', 'D', 'E', 'E', 'E', 'E', 'E', 'F', 'F', 'F', 'F', 'F', 'F', 'G', 'G', 'G', 'G', 'G', 'G', 'G', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H'] + + >> my_df.expect_column_values_to_be_in_set( + "my_var", + ["B", "C", "D", "F", "G", "H"], + result_format={'result_format': 'BOOLEAN_ONLY'} + ) + { + 'success': False + } + + >> my_df.expect_column_values_to_be_in_set( + "my_var", + ["B", "C", "D", "F", "G", "H"], + result_format={'result_format': 'BASIC'} + ) + { + 'success': False, + 'result': { + 'unexpected_count': 6, + 'unexpected_percent': 0.16666666666666666, + 'unexpected_percent_nonmissing': 0.16666666666666666, + 'partial_unexpected_list': ['A', 'E', 'E', 'E', 'E', 'E'] + } + } + + >> expect_column_values_to_match_regex( + "my_column", + "[A-Z][a-z]+", + result_format={'result_format': 'SUMMARY'} + ) + { + 'success': False, + 'result': { + 'element_count': 36, + 'unexpected_count': 6, + 'unexpected_percent': 0.16666666666666666, + 'unexpected_percent_nonmissing': 0.16666666666666666, + 'missing_count': 0, + 'missing_percent': 0.0, + 'partial_unexpected_counts': [{'value': 'A', 'count': 1}, {'value': 'E', 'count': 5}], + 'partial_unexpected_index_list': [0, 10, 11, 12, 13, 14], + 'partial_unexpected_list': ['A', 'E', 'E', 'E', 'E', 'E'] + } + } + + >> my_df.expect_column_values_to_be_in_set( + "my_var", + ["B", "C", "D", "F", "G", "H"], + result_format={'result_format': 'COMPLETE'} + ) + { + 'success': False, + 'result': { + 'unexpected_index_list': [0, 10, 11, 12, 13, 14], + 'unexpected_list': ['A', 'E', 'E', 'E', 'E', 'E'] + } + } + + + +The out-of-the-box default is `{'result_format'='BASIC'}`. + + +Behavior for `BOOLEAN_ONLY` +------------------------------------------------------------------------------ +When the `result_format` is `BOOLEAN_ONLY`, no `result` is returned. The result of evaluating the expectation is \ +exclusively returned via the value of the `success` parameter. + +For example: + +.. code-block:: bash + + >> my_df.expect_column_values_to_be_in_set( + "possible_benefactors", + ["Joe Gargery", "Mrs. Gargery", "Mr. Pumblechook", "Ms. Havisham", "Mr. Jaggers"] + result_format={'result_format': 'BOOLEAN_ONLY'} + ) + { + 'success': False + } + + >> my_df.expect_column_values_to_be_in_set( + "possible_benefactors", + ["Joe Gargery", "Mrs. Gargery", "Mr. Pumblechook", "Ms. Havisham", "Mr. Jaggers", "Mr. Magwitch"] + result_format={'result_format': 'BOOLEAN_ONLY'} + ) + { + 'success': False + } + + +Behavior for `BASIC` +------------------------------------------------------------------------------ +A `result` is generated with a basic justification for why an expectation was met or not. The format is intended \ +for quick, at-a-glance feedback. For example, it tends to work well in jupyter notebooks. + +Great Expectations has standard behavior for support for describing the results of `column_map_expectation` and +`column_aggregate_expectation` expectations. + +`column_map_expectation` applies a boolean test function to each element within a column, and so returns a list of \ +unexpected values to justify the expectation result. + + +The basic `result` includes: + +.. code-block:: bash + + { + "success" : Boolean, + "result" : { + "partial_unexpected_list" : [A list of up to 20 values that violate the expectation] + "unexpected_count" : The total count of unexpected values in the column + "unexpected_percent" : The overall percent of unexpected values + "unexpected_percent_nonmissing" : The percent of unexpected values, excluding missing values from the denominator + } + } + +Note: when unexpected values are duplicated, `unexpected_list` will contain multiple copies of the value. + +.. code-block:: bash + + [1,2,2,3,3,3,None,None,None,None] + + expect_column_values_to_be_unique + + { + "success" : Boolean, + "result" : { + "partial_unexpected_list" : [2,2,3,3,3] + "unexpected_count" : 5, + "unexpected_percent" : 0.5, + "unexpected_percent_nonmissing" : 0.8333333 + } + } + + +`column_aggregate_expectation` computes a single aggregate value for the column, and so returns a single `observed_value` \ +to justify the expectation result. + +The basic `result` includes: + +.. code-block:: bash + + + { + "success" : Boolean, + "result" : { + "observed_value" : The aggregate statistic computed for the column + } + } + +For example: + +.. code-block:: bash + + [1, 1, 2, 2] + + expect_column_mean_to_be_between + + { + "success" : Boolean, + "result" : { + "observed_value" : 1.5 + } + } + + +Behavior for `SUMMARY` +------------------------------------------------------------------------------ +A `result` is generated with a summary justification for why an expectation was met or not. The format is intended \ +for more detailed exploratory work and includes additional information beyond what is included by `BASIC`. +For example, it can support generating dashboard results of whether a set of expectations are being met. + +Great Expectations has standard behavior for support for describing the results of `column_map_expectation` and +`column_aggregate_expectation` expectations. + +`column_map_expectation` applies a boolean test function to each element within a column, and so returns a list of \ +unexpected values to justify the expectation result. + +The summary `result` includes: + +.. code-block:: bash + + { + 'success': False, + 'result': { + 'element_count': The total number of values in the column + 'unexpected_count': The total count of unexpected values in the column (also in `BASIC`) + 'unexpected_percent': The overall percent of unexpected values (also in `BASIC`) + 'unexpected_percent_nonmissing': The percent of unexpected values, excluding missing values from the denominator (also in `BASIC`) + "partial_unexpected_list" : [A list of up to 20 values that violate the expectation] (also in `BASIC`) + 'missing_count': The number of missing values in the column + 'missing_percent': The total percent of missing values in the column + 'partial_unexpected_counts': [{A list of objects with value and counts, showing the number of times each of the unexpected values occurs}] + 'partial_unexpected_index_list': [A list of up to 20 of the indices of the unexpected values in the column] + } + } + +For example: + +.. code-block:: bash + + { + 'success': False, + 'result': { + 'element_count': 36, + 'unexpected_count': 6, + 'unexpected_percent': 0.16666666666666666, + 'unexpected_percent_nonmissing': 0.16666666666666666, + 'missing_count': 0, + 'missing_percent': 0.0, + 'partial_unexpected_counts': [{'value': 'A', 'count': 1}, {'value': 'E', 'count': 5}], + 'partial_unexpected_index_list': [0, 10, 11, 12, 13, 14], + 'partial_unexpected_list': ['A', 'E', 'E', 'E', 'E', 'E'] + } + } + + +`column_aggregate_expectation` computes a single aggregate value for the column, and so returns a `true_value` \ +to justify the expectation result. It also includes additional information regarding observed values and counts, \ +depending on the specific expectation. + + +The summary `result` includes: + + +.. code-block:: bash + + { + 'success': False, + 'result': { + 'true_value': The aggregate statistic computed for the column (also in `BASIC`) + 'element_count': The total number of values in the column + 'missing_count': The number of missing values in the column + 'missing_percent': The total percent of missing values in the column + 'details': {} + } + } + +For example: + +.. code-block:: bash + + [1, 1, 2, 2, NaN] + + expect_column_mean_to_be_between + + { + "success" : Boolean, + "result" : { + "true_value" : 1.5, + 'element_count': 5, + 'missing_count: 1, + 'missing_percent: 0.2 + } + } + + +Behavior for `COMPLETE` +------------------------------------------------------------------------------ +A `result` is generated with all available justification for why an expectation was met or not. The format is \ +intended for debugging pipelines or developing detailed regression tests. + +Great Expectations has standard behavior for support for describing the results of `column_map_expectation` and +`column_aggregate_expectation` expectations. + +`column_map_expectation` applies a boolean test function to each element within a column, and so returns a list of \ +unexpected values to justify the expectation result. + +The complete `result` includes: + +.. code-block:: bash + + { + 'success': False, + 'result': { + "unexpected_list" : [A list of all values that violate the expectation] + 'unexpected_index_list': [A list of the indices of the unexpected values in the column] + 'element_count': The total number of values in the column (also in `SUMMARY`) + 'unexpected_count': The total count of unexpected values in the column (also in `SUMMARY`) + 'unexpected_percent': The overall percent of unexpected values (also in `SUMMARY`) + 'unexpected_percent_nonmissing': The percent of unexpected values, excluding missing values from the denominator (also in `SUMMARY`) + 'missing_count': The number of missing values in the column (also in `SUMMARY`) + 'missing_percent': The total percent of missing values in the column (also in `SUMMARY`) + } + } + +For example: + +.. code-block:: bash + + { + 'success': False, + 'result': { + 'element_count': 36, + 'unexpected_count': 6, + 'unexpected_percent': 0.16666666666666666, + 'unexpected_percent_nonmissing': 0.16666666666666666, + 'missing_count': 0, + 'missing_percent': 0.0, + 'unexpected_index_list': [0, 10, 11, 12, 13, 14], + 'unexpected_list': ['A', 'E', 'E', 'E', 'E', 'E'] + } + } + + +`column_aggregate_expectation` computes a single aggregate value for the column, and so returns a `true_value` \ +to justify the expectation result. It also includes additional information regarding observed values and counts, \ +depending on the specific expectation. + + +The complete `result` includes: + + +.. code-block:: bash + + { + 'success': False, + 'result': { + 'true_value': The aggregate statistic computed for the column (also in `SUMMARY`) + 'element_count': The total number of values in the column (also in `SUMMARY`) + 'missing_count': The number of missing values in the column (also in `SUMMARY`) + 'missing_percent': The total percent of missing values in the column (also in `SUMMARY`) + 'details': {} + } + } + +For example: + +.. code-block:: bash + + [1, 1, 2, 2, NaN] + + expect_column_mean_to_be_between + + { + "success" : Boolean, + "result" : { + "true_value" : 1.5, + 'element_count': 5, + 'missing_count: 1, + 'missing_percent: 0.2 + } + } \ No newline at end of file diff --git a/docs/source/roadmap.rst b/docs/source/roadmap.rst deleted file mode 100644 index f47dc3905921..000000000000 --- a/docs/source/roadmap.rst +++ /dev/null @@ -1,15 +0,0 @@ -Roadmap -======= -Updated December 2017 - -v0.4 ---------------- -* Support for meta variables -* Support for multi-column expectations - -v0.5 ---------------- -* Generic datasources API (SQL, Spark, Spark Streaming) - - Support non-tabular datasources (e.g. JSON, XML, AVRO) - - Real-time/streaming and adaption of distributional expectations - - Database constraint/schema <-> expectation api \ No newline at end of file diff --git a/docs/source/roadmap_changelog.rst b/docs/source/roadmap_changelog.rst new file mode 100644 index 000000000000..a2e4368060dc --- /dev/null +++ b/docs/source/roadmap_changelog.rst @@ -0,0 +1,59 @@ +.. _roadmap_changelog: + +Changelog and Roadmap +===================== + +Planned Features +---------------- +* More expectation coverage in SqlAlchemyDataset +* Support for meta variables +* Support for multi-column expectations +* Improved variable typing +* New Datasets (e.g. Spark) +* Support for non-tabular datasources (e.g. JSON, XML, AVRO) +* Real-time/streaming and adaption of distributional expectations + +v.0.4.0 +------- +* Initial implementation of data context API and SqlAlchemyDataset including implementations of the following expectations: + * expect_column_to_exist + * expect_table_row_count_to_be + * expect_table_row_count_to_be_between + * expect_column_values_to_not_be_null + * expect_column_values_to_be_null + * expect_column_values_to_be_in_set + * expect_column_values_to_be_between + * expect_column_mean_to_be + * expect_column_min_to_be + * expect_column_max_to_be + * expect_column_sum_to_be + * expect_column_unique_value_count_to_be_between + * expect_column_proportion_of_unique_values_to_be_between +* Major refactor of output_format to new result_format parameter. See docs for full details. + * exception_list and related uses of the term exception have been renamed to unexpected + * the output formats are explicitly hierarchical now, with BOOLEAN_ONLY < BASIC < SUMMARY < COMPLETE. `column_aggregate_expectation`s now return element count and related information included at the BASIC level or higher. +* New expectation available for parameterized distributions--expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than (what a name! :) -- (@ccnobbli) +* ge.from_pandas() utility (thanks @shrockn) +* Pandas operations on a PandasDataset now return another PandasDataset (thanks @dlwhite5) +* expect_column_to_exist now takes a column_index parameter to specify column order (thanks @louispotok) +* Top-level validate option (ge.validate()) +* ge.read_json() helper (thanks @rjurney) +* Behind-the-scenes improvements to testing framework to ensure parity across data contexts. +* Documentation improvements, bug-fixes, and internal api improvements + +v.0.3.2 +------- +* Include requirements file in source dist to support conda + +v.0.3.1 +-------- +* Fix infinite recursion error when building custom expectations +* Catch dateutil parsing overflow errors + +v.0.2 +----- +* Distributional expectations and associated helpers are improved and renamed to be more clear regarding the tests they apply +* Expectation decorators have been refactored significantly to streamline implementing expectations and support custom expectations +* API and examples for custom expectations are available +* New output formats are available for all expectations +* Significant improvements to test suite and compatibility diff --git a/docs/source/standard_arguments.rst b/docs/source/standard_arguments.rst index ee7d4ee246fc..d1dbbb451911 100644 --- a/docs/source/standard_arguments.rst +++ b/docs/source/standard_arguments.rst @@ -4,12 +4,19 @@ Standard arguments for expectations ================================================================================ -All expectations share four standard (optional) arguments: +All Expectations return a json-serializable dictionary when evaluated, and share four standard (optional) arguments: -* `include_config` -* `catch_exceptions` -* `meta` -* `output_format` + - :ref:`result_format`: controls what information is returned from the evaluation of the expectation expectation. + - :ref:`include_config`: If true, then the expectation config itself is returned as part of the result object. + - :ref:`catch_exceptions`: If true, execution will not fail if the Expectation encounters an error. Instead, it will \ + return success = False and provide an informative error message. + - :ref:`meta`: allows user-supplied meta-data to be stored with an expectation. + + +`result_format` +------------------------------------------------------------------------------ + +See :ref:`result_format` for more information. .. _include_config: @@ -23,7 +30,7 @@ All Expectations accept a boolean `include_config` parameter. If true, then the >> expect_column_values_to_be_in_set( "my_var", ['B', 'C', 'D', 'F', 'G', 'H'], - output_format="COMPLETE", + result_format="COMPLETE", include_config=True, ) @@ -33,7 +40,7 @@ All Expectations accept a boolean `include_config` parameter. If true, then the 'expectation_type': 'expect_column_values_to_be_in_set', 'expectation_kwargs': { 'column': 'my_var', - 'output_format': 'COMPLETE', + 'result_format': 'COMPLETE', 'value_set': ['B', 'C', 'D', 'F', 'G', 'H'] }, 'success': False @@ -82,10 +89,6 @@ All Expectations accept an optional `meta` parameter. If `meta` is a valid JSON- } } -`output_format` ------------------------------------------------------------------------------- - -See :ref:`output_format` for more detail. .. _mostly: @@ -132,32 +135,33 @@ Expectations with `mostly` return exception lists even if they succeed: { "success": true "summary_obj": { - "exception_percent": 0.2, - "partial_exception_index_list": [ + "unexpected_percent": 0.2, + "partial_unexpected_index_list": [ 8, 9 - ], - "partial_exception_list": [ - 8, + ], + "partial_unexpected_list": [ + 8, 9 - ], - "exception_percent_nonmissing": 0.2, - "exception_count": 2 + ], + "unexpected_percent_nonmissing": 0.2, + "unexpected_count": 2 } } -DataSet defaults + +Dataset defaults ------------------------------------------------------------------------------ -This default behavior for `output_format`, `include_config`, `catch_exceptions` can be overridden at the DataSet level: +This default behavior for `result_format`, `include_config`, `catch_exceptions` can be overridden at the Dataset level: .. code-block:: bash - my_dataset.set_default_expectation_argument("output_format", "SUMMARY") + my_dataset.set_default_expectation_argument("result_format", "SUMMARY") In validation mode, they can be overridden using flags: .. code-block:: bash - great_expectations my_dataset.csv my_expectations.json --output_format=BOOLEAN_ONLY --catch_exceptions=False --include_config=True + great_expectations my_dataset.csv my_expectations.json --result_format=BOOLEAN_ONLY --catch_exceptions=False --include_config=True diff --git a/docs/source/validation.rst b/docs/source/validation.rst index fe2433d3a7c3..83414d61e75f 100644 --- a/docs/source/validation.rst +++ b/docs/source/validation.rst @@ -28,7 +28,7 @@ Once you've constructed Expectations, you can use them to validate new data. }, ... { - "exception_list": 30.397989417989415, + "unexpected_list": 30.397989417989415, "expectation_type": "expect_column_mean_to_be_between", "success": True, "kwargs": { @@ -38,7 +38,7 @@ Once you've constructed Expectations, you can use them to validate new data. } }, { - "exception_list": [], + "unexpected_list": [], "expectation_type": "expect_column_values_to_be_between", "success": True, "kwargs": { @@ -48,7 +48,7 @@ Once you've constructed Expectations, you can use them to validate new data. } }, { - "exception_list": [ + "unexpected_list": [ "Downton (?Douton), Mr William James", "Jacobsohn Mr Samuel", "Seman Master Betros" @@ -62,7 +62,7 @@ Once you've constructed Expectations, you can use them to validate new data. } }, { - "exception_list": [ + "unexpected_list": [ "*" ], "expectation_type": "expect_column_values_to_be_in_set", @@ -101,7 +101,7 @@ This is especially powerful when combined with great_expectations's command line }, ... { - "exception_list": 30.397989417989415, + "unexpected_list": 30.397989417989415, "expectation_type": "expect_column_mean_to_be_between", "success": True, "kwargs": { @@ -111,7 +111,7 @@ This is especially powerful when combined with great_expectations's command line } }, { - "exception_list": [], + "unexpected_list": [], "expectation_type": "expect_column_values_to_be_between", "success": True, "kwargs": { @@ -121,7 +121,7 @@ This is especially powerful when combined with great_expectations's command line } }, { - "exception_list": [ + "unexpected_list": [ "Downton (?Douton), Mr William James", "Jacobsohn Mr Samuel", "Seman Master Betros" @@ -135,7 +135,7 @@ This is especially powerful when combined with great_expectations's command line } }, { - "exception_list": [ + "unexpected_list": [ "*" ], "expectation_type": "expect_column_values_to_be_in_set", diff --git a/examples/notebooks/Tutorial.ipynb b/examples/notebooks/Tutorial.ipynb index 5937be2bcde4..63f2f0567752 100644 --- a/examples/notebooks/Tutorial.ipynb +++ b/examples/notebooks/Tutorial.ipynb @@ -30,7 +30,7 @@ "outputs": [], "source": [ "df = pd.read_csv('/PATHTO/hospital_charge_data.csv')\n", - "df = ge.dataset.PandasDataSet(df)\n", + "df = ge.dataset.PandasDataset(df)\n", "print(df.columns)" ] }, @@ -131,4 +131,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/examples/notebooks/test_broken_timestamp_example.ipynb b/examples/notebooks/test_broken_timestamp_example.ipynb index 7bfdb58fb896..03b5a853a368 100644 --- a/examples/notebooks/test_broken_timestamp_example.ipynb +++ b/examples/notebooks/test_broken_timestamp_example.ipynb @@ -26,7 +26,7 @@ "}\n", "\n", "df = pd.DataFrame(df_as_json)\n", - "df = ge.dataset.pandas_dataset.PandasDataSet(df)\n", + "df = ge.dataset.pandas_dataset.PandasDataset(df)\n", "df.head()" ] }, @@ -124,4 +124,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/great_expectations/__init__.py b/great_expectations/__init__.py index 313d227e54d2..59ea2590a059 100644 --- a/great_expectations/__init__.py +++ b/great_expectations/__init__.py @@ -1,45 +1,71 @@ +import json import pandas as pd - from .util import * from great_expectations import dataset +from great_expectations.data_context import get_data_context from .version import __version__ -def list_sources(): - raise NotImplementedError - -def connect_to_datasource(): - raise NotImplementedError +def _convert_to_dataset_class(df, dataset_class, expectations_config=None): + """ + Convert a (pandas) dataframe to a great_expectations dataset, with (optional) expectations_config + """ + if expectations_config is not None: + # Cast the dataframe into the new class, and manually initialize expectations according to the provided configuration + df.__class__ = dataset_class + df.initialize_expectations(expectations_config) + else: + # Instantiate the new Dataset with default expectations + try: + df = dataset_class(df) + except: + raise NotImplementedError("read_csv requires a Dataset class that can be instantiated from a Pandas DataFrame") -def connect_to_dataset(): - raise NotImplementedError + return df def read_csv( filename, - dataset_class=dataset.pandas_dataset.PandasDataSet, + dataset_class=dataset.pandas_dataset.PandasDataset, expectations_config=None, *args, **kwargs ): df = pd.read_csv(filename, *args, **kwargs) - if expectations_config is not None: - # Cast the dataframe into the new class, and manually initialize expectations according to the provided configuration - df.__class__ = dataset_class - df.initialize_expectations(expectations_config) + df = _convert_to_dataset_class(df, dataset_class, expectations_config) + return df + +def read_json( + filename, + dataset_class=dataset.pandas_dataset.PandasDataset, + expectations_config=None, + accessor_func=None, + *args, **kwargs +): + if accessor_func != None: + json_obj = json.load(open(filename, 'rb')) + json_obj = accessor_func(json_obj) + df = pd.read_json(json.dumps(json_obj), *args, **kwargs) + else: - # Instantiate the new DataSet with default expectations - try: - df = dataset_class(df) - except: - raise NotImplementedError("read_csv requires a DataSet class that can be instantiated from a Pandas DataFrame") + df = pd.read_json(filename, *args, **kwargs) + + df = _convert_to_dataset_class(df, dataset_class, expectations_config) return df -# Removed. Preferred solution will be to use ge.dataset.PandasDataSet(df) instead. -# def df(df, dataset_config=None, *args, **kwargs): -# df.__class__ = dataset.pandas_dataset.PandasDataSet -# df.initialize_expectations(dataset_config) -# -# return df +def from_pandas(pandas_df, expectations_config=None): + return _convert_to_dataset_class( + pandas_df, + dataset.pandas_dataset.PandasDataset, + expectations_config + ) + +def validate(df, expectations_config, *args, **kwargs): + #FIXME: I'm not sure that this should always default to PandasDataset + dataset_ = _convert_to_dataset_class(df, + dataset.pandas_dataset.PandasDataset, + expectations_config + ) + return dataset_.validate(*args, **kwargs) def expect(data_source_str, expectation): raise NotImplementedError diff --git a/great_expectations/data_context/__init__.py b/great_expectations/data_context/__init__.py new file mode 100644 index 000000000000..be4cefd469f1 --- /dev/null +++ b/great_expectations/data_context/__init__.py @@ -0,0 +1,17 @@ +from .pandas_context import PandasCSVDataContext +from .sqlalchemy_context import SqlAlchemyDataContext + +def get_data_context(context_type, options): + """Return a data_context object which exposes options to list datasets and get a dataset from + that context. This is a new API in Great Expectations 0.4, and is subject to rapid change. + + :param context_type: (string) one of "SqlAlchemy" or "PandasCSV" + :param options: options to be passed to the data context's connect method. + :return: a new DataContext object + """ + if context_type == "SqlAlchemy": + return SqlAlchemyDataContext(options) + elif context_type == "PandasCSV": + return PandasCSVDataContext(options) + else: + raise ValueError("Unknown data context.") \ No newline at end of file diff --git a/great_expectations/data_context/base.py b/great_expectations/data_context/base.py new file mode 100644 index 000000000000..5647ebd321ff --- /dev/null +++ b/great_expectations/data_context/base.py @@ -0,0 +1,17 @@ +class DataContext(object): + """A generic DataContext, exposing the base API including constructor with `options` parameter, list_datasets, + and get_dataset. + + Warning: this feature is new in v0.4 and may change based on community feedback. + """ + def __init__(self, options): + self.connect(options) + + def connect(self, options): + return NotImplementedError + + def list_datasets(self): + return NotImplementedError + + def get_data_set(self, dataset_name): + return NotImplementedError diff --git a/great_expectations/data_context/pandas_context.py b/great_expectations/data_context/pandas_context.py new file mode 100644 index 000000000000..4b89b93a3fe7 --- /dev/null +++ b/great_expectations/data_context/pandas_context.py @@ -0,0 +1,26 @@ +import pandas as pd +import os + +from .base import DataContext +from ..dataset.pandas_dataset import PandasDataset + +class PandasCSVDataContext(DataContext): + """ + A PandasCSVDataContext makes it easy to get a list of files available in the list_datasets + method. Its get_dataset method returns a new Pandas dataset with the provided name. + + Warning: this feature is new in v0.4 and may change based on community feedback. + """ + + def __init__(self, *args, **kwargs): + super(PandasCSVDataContext, self).__init__(*args, **kwargs) + + def connect(self, options): + self.directory = options + + def list_datasets(self): + return os.listdir(self.directory) + + def get_dataset(self, dataset_name, *args, **kwargs): + df = pd.read_csv(os.path.join(self.directory, dataset_name), *args, **kwargs) + return PandasDataset(df) diff --git a/great_expectations/data_context/sqlalchemy_context.py b/great_expectations/data_context/sqlalchemy_context.py new file mode 100644 index 000000000000..c038d51e26b6 --- /dev/null +++ b/great_expectations/data_context/sqlalchemy_context.py @@ -0,0 +1,28 @@ +from .base import DataContext +from ..dataset.sqlalchemy_dataset import SqlAlchemyDataset + +from sqlalchemy import create_engine, MetaData + + +class SqlAlchemyDataContext(DataContext): + """ + A SqlAlchemyDataContext creates a SQLAlchemy engine and provides a list of tables available in the list_datasets + method. Its get_dataset method returns a new SqlAlchemy dataset with the provided name. + + Warning: this feature is new in v0.4 and may change based on community feedback. + """ + + def __init__(self, *args, **kwargs): + super(SqlAlchemyDataContext, self).__init__(*args, **kwargs) + self.meta = MetaData() + + def connect(self, options): + self.engine = create_engine(options) + + def list_datasets(self): + self.meta.reflect(bind=self.engine) + tables = [str(table) for table in self.meta.sorted_tables] + return tables + + def get_dataset(self, dataset_name): + return SqlAlchemyDataset(table_name=dataset_name, engine=self.engine) diff --git a/great_expectations/dataset/__init__.py b/great_expectations/dataset/__init__.py index fada9e2f42b6..7a0036f8904e 100644 --- a/great_expectations/dataset/__init__.py +++ b/great_expectations/dataset/__init__.py @@ -1,2 +1,3 @@ -from .base import DataSet -from .pandas_dataset import MetaPandasDataSet, PandasDataSet +from .base import Dataset +from .pandas_dataset import MetaPandasDataset, PandasDataset +from .sqlalchemy_dataset import MetaSqlAlchemyDataset, SqlAlchemyDataset \ No newline at end of file diff --git a/great_expectations/dataset/base.py b/great_expectations/dataset/base.py index 7b6b568bfd63..07ef638eabcc 100644 --- a/great_expectations/dataset/base.py +++ b/great_expectations/dataset/base.py @@ -1,4 +1,3 @@ -from .util import DotDict, recursively_convert_to_json_serializable import json import inspect @@ -6,24 +5,27 @@ from functools import wraps import traceback import warnings +from six import string_types -import pandas as pd -from collections import defaultdict +from collections import ( + Counter, + defaultdict +) from ..version import __version__ -from .util import DotDict, recursively_convert_to_json_serializable, DocInherit +from .util import DotDict, recursively_convert_to_json_serializable, parse_result_format -class DataSet(object): +class Dataset(object): def __init__(self, *args, **kwargs): - super(DataSet, self).__init__(*args, **kwargs) + super(Dataset, self).__init__(*args, **kwargs) self.initialize_expectations() @classmethod def expectation(cls, method_arg_names): """Manages configuration and running of expectation objects. - Expectation builds and saves a new expectation configuration to the DataSet object. It is the core decorator \ + Expectation builds and saves a new expectation configuration to the Dataset object. It is the core decorator \ used by great expectations to manage expectation configurations. Args: @@ -33,8 +35,8 @@ def expectation(cls, method_arg_names): Notes: Intermediate decorators that call the core @expectation decorator will most likely need to pass their \ - decorated methods' signature up to the expectation decorator. For example, the MetaPandasDataSet \ - column_map_expectation decorator relies on the DataSet expectation decorator, but will pass through the \ + decorated methods' signature up to the expectation decorator. For example, the MetaPandasDataset \ + column_map_expectation decorator relies on the Dataset expectation decorator, but will pass through the \ signature from the implementing method. @expectation intercepts and takes action based on the following parameters: @@ -44,9 +46,9 @@ def expectation(cls, method_arg_names): * catch_exceptions (boolean or None) : \ If True, then catch exceptions and include them as part of the result object. \ For more detail, see :ref:`catch_exceptions`. - * output_format (str or None) : \ + * result_format (str or None) : \ Which output mode to use: `BOOLEAN_ONLY`, `BASIC`, `COMPLETE`, or `SUMMARY`. - For more detail, see :ref:`output_format `. + For more detail, see :ref:`result_format `. * meta (dict or None): \ A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. \ For more detail, see :ref:`meta`. @@ -75,10 +77,10 @@ def wrapper(self, *args, **kwargs): else: catch_exceptions = self.default_expectation_args["catch_exceptions"] - if "output_format" in kwargs: - output_format = kwargs["output_format"] + if "result_format" in kwargs: + result_format = kwargs["result_format"] else: - output_format = self.default_expectation_args["output_format"] + result_format = self.default_expectation_args["result_format"] if "meta" in kwargs: meta = kwargs["meta"] @@ -87,11 +89,11 @@ def wrapper(self, *args, **kwargs): meta = None # This intends to get the signature of the inner wrapper, if there is one. - if "output_format" in inspect.getargspec(func)[0][1:]: - all_args["output_format"] = output_format + if "result_format" in inspect.getargspec(func)[0][1:]: + all_args["result_format"] = result_format else: - if "output_format" in all_args: - del all_args["output_format"] + if "result_format" in all_args: + del all_args["result_format"] all_args = recursively_convert_to_json_serializable(all_args) expectation_args = copy.deepcopy(all_args) @@ -107,8 +109,9 @@ def wrapper(self, *args, **kwargs): raised_exception = False exception_traceback = None + exception_message = None - #Finally, execute the expectation method itself + # Finally, execute the expectation method itself try: return_obj = func(self, **expectation_args) @@ -116,40 +119,34 @@ def wrapper(self, *args, **kwargs): if catch_exceptions: raised_exception = True exception_traceback = traceback.format_exc() + exception_message = str(err) + + return_obj = { + "success": False + } - if output_format != "BOOLEAN_ONLY": - return_obj = { - "success": False - } - else: - return_obj = False else: raise(err) - #Add a "success" object to the config - if output_format == "BOOLEAN_ONLY": - expectation_config["success_on_last_run"] = return_obj - else: - expectation_config["success_on_last_run"] = return_obj["success"] - - #Append the expectation to the config. + # Append the expectation to the config. self.append_expectation(expectation_config) - if output_format != 'BOOLEAN_ONLY': + if include_config: + return_obj["expectation_config"] = copy.deepcopy(expectation_config) - if include_config: - return_obj["expectation_type"] = expectation_config["expectation_type"] - return_obj["expectation_kwargs"] = copy.deepcopy(dict(expectation_config["kwargs"])) + if catch_exceptions: + return_obj["exception_info"] = { + "raised_exception": raised_exception, + "exception_message": exception_message, + "exception_traceback": exception_traceback + } - if catch_exceptions: - return_obj["raised_exception"] = raised_exception - return_obj["exception_traceback"] = exception_traceback + # Add a "success" object to the config + expectation_config["success_on_last_run"] = return_obj["success"] return_obj = recursively_convert_to_json_serializable(return_obj) return return_obj - # wrapper.__name__ = func.__name__ - # wrapper.__doc__ = func.__doc__ return wrapper return outer_wrapper @@ -169,18 +166,18 @@ def column_map_expectation(cls, func): Notes: column_map_expectation intercepts and takes action based on the following parameters: - mostly (None or a float between 0 and 1): \ - Return `"success": True` if the percentage of exceptions less than or equal to `mostly`. \ - For more detail, see :ref:`mostly`. + mostly (None or a float between 0 and 1): \ + Return `"success": True` if the percentage of unexpected values is less than or equal to `mostly`. \ + For more detail, see :ref:`mostly`. column_map_expectation *excludes null values* from being passed to the function - Depending on the `output_format` selected, column_map_expectation can additional data to a return object, \ - including `element_count`, `nonnull_values`, `nonnull_count`, `success_count`, `exception_list`, and \ - `exception_index_list`. See :func:`_format_column_map_output ` + Depending on the `result_format` selected, column_map_expectation can additional data to a return object, \ + including `element_count`, `nonnull_values`, `nonnull_count`, `success_count`, `unexpected_list`, and \ + `unexpected_index_list`. See :func:`_format_column_map_output ` See also: - :func:`expect_column_values_to_be_unique ` \ + :func:`expect_column_values_to_be_unique ` \ for an example of a column_map_expectation """ raise NotImplementedError @@ -201,7 +198,7 @@ def column_aggregate_expectation(cls, func): column_aggregate_expectation *excludes null values* from being passed to the function See also: - :func:`expect_column_mean_to_be_between ` \ + :func:`expect_column_mean_to_be_between ` \ for an example of a column_aggregate_expectation """ raise NotImplementedError @@ -211,22 +208,34 @@ def initialize_expectations(self, config=None, name=None): #!!! Should validate the incoming config with jsonschema here # Copy the original so that we don't overwrite it by accident - self._expectations_config = DotDict(copy.deepcopy(config)) + ## Pandas incorrectly interprets this as an attempt to create a column and throws up a warning. Suppress it + ## since we are subclassing. + with warnings.catch_warnings(): + warnings.simplefilter("ignore", category=UserWarning) + self._expectations_config = DotDict(copy.deepcopy(config)) else: - self._expectations_config = DotDict({ - "dataset_name" : name, - "meta": { - "great_expectations.__version__": __version__ - }, - "expectations" : [] - }) - - self.default_expectation_args = { - "include_config" : False, - "catch_exceptions" : False, - "output_format" : 'BASIC', - } + ## Pandas incorrectly interprets this as an attempt to create a column and throws up a warning. Suppress it + ## since we are subclassing. + with warnings.catch_warnings(): + warnings.simplefilter("ignore", category=UserWarning) + self._expectations_config = DotDict({ + "dataset_name" : name, + "meta": { + "great_expectations.__version__": __version__ + }, + "expectations" : [] + }) + + ## Pandas incorrectly interprets this as an attempt to create a column and throws up a warning. Suppress it + ## since we are subclassing. + with warnings.catch_warnings(): + warnings.simplefilter("ignore", category=UserWarning) + self.default_expectation_args = { + "include_config" : False, + "catch_exceptions" : False, + "result_format" : 'BASIC', + } def append_expectation(self, expectation_config): expectation_type = expectation_config['expectation_type'] @@ -261,7 +270,7 @@ def append_expectation(self, expectation_config): def _copy_and_clean_up_expectation(self, expectation, - discard_output_format_kwargs=True, + discard_result_format_kwargs=True, discard_include_configs_kwargs=True, discard_catch_exceptions_kwargs=True, ): @@ -270,10 +279,10 @@ def _copy_and_clean_up_expectation(self, if "success_on_last_run" in new_expectation: del new_expectation["success_on_last_run"] - if discard_output_format_kwargs: - if "output_format" in new_expectation["kwargs"]: - del new_expectation["kwargs"]["output_format"] - # discards["output_format"] += 1 + if discard_result_format_kwargs: + if "result_format" in new_expectation["kwargs"]: + del new_expectation["kwargs"]["result_format"] + # discards["result_format"] += 1 if discard_include_configs_kwargs: if "include_configs" in new_expectation["kwargs"]: @@ -290,7 +299,7 @@ def _copy_and_clean_up_expectation(self, def _copy_and_clean_up_expectations_from_indexes( self, match_indexes, - discard_output_format_kwargs=True, + discard_result_format_kwargs=True, discard_include_configs_kwargs=True, discard_catch_exceptions_kwargs=True, ): @@ -299,7 +308,7 @@ def _copy_and_clean_up_expectations_from_indexes( rval.append( self._copy_and_clean_up_expectation( self._expectations_config.expectations[i], - discard_output_format_kwargs, + discard_result_format_kwargs, discard_include_configs_kwargs, discard_catch_exceptions_kwargs, ) @@ -352,7 +361,7 @@ def find_expectations(self, expectation_type=None, column=None, expectation_kwargs=None, - discard_output_format_kwargs=True, + discard_result_format_kwargs=True, discard_include_configs_kwargs=True, discard_catch_exceptions_kwargs=True, ): @@ -361,7 +370,7 @@ def find_expectations(self, expectation_type=None : The name of the expectation type to be matched. column=None : The name of the column to be matched. expectation_kwargs=None : A dictionary of kwargs to match against. - discard_output_format_kwargs=True : In returned expectation object(s), suppress the `output_format` parameter. + discard_result_format_kwargs=True : In returned expectation object(s), suppress the `result_format` parameter. discard_include_configs_kwargs=True : In returned expectation object(s), suppress the `include_configs` parameter. discard_catch_exceptions_kwargs=True : In returned expectation object(s), suppress the `catch_exceptions` parameter. @@ -378,7 +387,7 @@ def find_expectations(self, return self._copy_and_clean_up_expectations_from_indexes( match_indexes, - discard_output_format_kwargs, + discard_result_format_kwargs, discard_include_configs_kwargs, discard_catch_exceptions_kwargs, ) @@ -406,7 +415,7 @@ def remove_expectation(self, Note: If remove_expectation doesn't find any matches, it raises a ValueError. If remove_expectation finds more than one matches and remove_multiple_matches!=True, it raises a ValueError. - If dry_run=True, then `remove_expectation` acts as a thin layer to find_expectations, with the default values for discard_output_format_kwargs, discard_include_configs_kwargs, and discard_catch_exceptions_kwargs + If dry_run=True, then `remove_expectation` acts as a thin layer to find_expectations, with the default values for discard_result_format_kwargs, discard_include_configs_kwargs, and discard_catch_exceptions_kwargs """ match_indexes = self.find_expectation_indexes( @@ -442,18 +451,27 @@ def remove_expectation(self, else: return expectation + def discard_failing_expectations(self): + res = self.validate(only_return_failures=True).get('results') + if any(res): + for item in res: + self.remove_expectation(expectation_type=item['expectation_config']['expectation_type'], + expectation_kwargs=item['expectation_config']['kwargs']) +# print("WARNING: Removed %s expectations that were 'False'" % len(res)) + warnings.warn("Removed %s expectations that were 'False'" % len(res)) + def get_default_expectation_arguments(self): - """Fetch default expectation arguments for this DataSet + """Fetch default expectation arguments for this dataset Returns: - A dictionary containing all the current default expectation arguments for a DataSet + A dictionary containing all the current default expectation arguments for a dataset Ex:: { "include_config" : False, "catch_exceptions" : False, - "output_format" : 'BASIC' + "result_format" : 'BASIC' } See also: @@ -462,7 +480,7 @@ def get_default_expectation_arguments(self): return self.default_expectation_args def set_default_expectation_argument(self, argument, value): - """Set a default expectation argument for this DataSet + """Set a default expectation argument for this dataset Args: argument (string): The argument to be replaced @@ -480,7 +498,7 @@ def set_default_expectation_argument(self, argument, value): def get_expectations_config(self, discard_failed_expectations=True, - discard_output_format_kwargs=True, + discard_result_format_kwargs=True, discard_include_configs_kwargs=True, discard_catch_exceptions_kwargs=True, suppress_warnings=False @@ -488,7 +506,7 @@ def get_expectations_config(self, """Returns _expectation_config as a JSON object, and perform some cleaning along the way. Args: discard_failed_expectations=True : Only include expectations with success_on_last_run=True in the exported config. - discard_output_format_kwargs=True : In returned expectation objects, suppress the `output_format` parameter. + discard_result_format_kwargs=True : In returned expectation objects, suppress the `result_format` parameter. discard_include_configs_kwargs=True : In returned expectation objects, suppress the `include_configs` parameter. discard_catch_exceptions_kwargs=True : In returned expectation objects, suppress the `catch_exceptions` parameter. @@ -524,10 +542,10 @@ def get_expectations_config(self, if "success_on_last_run" in expectation: del expectation["success_on_last_run"] - if discard_output_format_kwargs: - if "output_format" in expectation["kwargs"]: - del expectation["kwargs"]["output_format"] - discards["output_format"] += 1 + if discard_result_format_kwargs: + if "result_format" in expectation["kwargs"]: + del expectation["kwargs"]["result_format"] + discards["result_format"] += 1 if discard_include_configs_kwargs: if "include_configs" in expectation["kwargs"]: @@ -544,22 +562,22 @@ def get_expectations_config(self, """ WARNING: get_expectations_config discarded 12 failing expectations - 44 output_format kwargs + 44 result_format kwargs 0 include_config kwargs 1 catch_exceptions kwargs -If you wish to change this behavior, please set discard_failed_expectations, discard_output_format_kwargs, discard_include_configs_kwargs, and discard_catch_exceptions_kwargs appropirately. +If you wish to change this behavior, please set discard_failed_expectations, discard_result_format_kwargs, discard_include_configs_kwargs, and discard_catch_exceptions_kwargs appropirately. """ - if any([discard_failed_expectations, discard_output_format_kwargs, discard_include_configs_kwargs, discard_catch_exceptions_kwargs]): + if any([discard_failed_expectations, discard_result_format_kwargs, discard_include_configs_kwargs, discard_catch_exceptions_kwargs]): print ("WARNING: get_expectations_config discarded") if discard_failed_expectations: print ("\t%d failing expectations" % discards["failed_expectations"]) - if discard_output_format_kwargs: - print ("\t%d output_format kwargs" % discards["output_format"]) + if discard_result_format_kwargs: + print ("\t%d result_format kwargs" % discards["result_format"]) if discard_include_configs_kwargs: print ("\t%d include_configs kwargs" % discards["include_configs"]) if discard_catch_exceptions_kwargs: print ("\t%d catch_exceptions kwargs" % discards["catch_exceptions"]) - print ("If you wish to change this behavior, please set discard_failed_expectations, discard_output_format_kwargs, discard_include_configs_kwargs, and discard_catch_exceptions_kwargs appropirately.") + print ("If you wish to change this behavior, please set discard_failed_expectations, discard_result_format_kwargs, discard_include_configs_kwargs, and discard_catch_exceptions_kwargs appropirately.") config["expectations"] = expectations return config @@ -568,7 +586,7 @@ def save_expectations_config( self, filepath=None, discard_failed_expectations=True, - discard_output_format_kwargs=True, + discard_result_format_kwargs=True, discard_include_configs_kwargs=True, discard_catch_exceptions_kwargs=True, suppress_warnings=False @@ -579,7 +597,7 @@ def save_expectations_config( expectations_config = self.get_expectations_config( discard_failed_expectations, - discard_output_format_kwargs, + discard_result_format_kwargs, discard_include_configs_kwargs, discard_catch_exceptions_kwargs, suppress_warnings @@ -587,16 +605,18 @@ def save_expectations_config( expectation_config_str = json.dumps(expectations_config, indent=2) open(filepath, 'w').write(expectation_config_str) - def validate(self, expectations_config=None, catch_exceptions=True, output_format=None, include_config=None, only_return_failures=False): + def validate(self, expectations_config=None, catch_exceptions=True, result_format=None, only_return_failures=False): results = [] if expectations_config is None: expectations_config = self.get_expectations_config( discard_failed_expectations=False, - discard_output_format_kwargs=False, + discard_result_format_kwargs=False, discard_include_configs_kwargs=False, discard_catch_exceptions_kwargs=False, ) + elif isinstance(expectations_config, string_types): + expectations_config = json.load(open(expectations_config, 'r')) # Warn if our version is different from the version in the configuration try: @@ -606,27 +626,46 @@ def validate(self, expectations_config=None, catch_exceptions=True, output_forma warnings.warn("WARNING: No great_expectations version found in configuration object.") for expectation in expectations_config['expectations']: - expectation_method = getattr(self, expectation['expectation_type']) + try: + expectation_method = getattr(self, expectation['expectation_type']) - if output_format is not None: - expectation['kwargs'].update({"output_format": output_format}) + if result_format is not None: + expectation['kwargs'].update({"result_format": result_format}) - if include_config is not None: - expectation['kwargs'].update({"include_config": include_config}) + result = expectation_method( + catch_exceptions=catch_exceptions, + **expectation['kwargs'] + ) - result = expectation_method( - catch_exceptions=catch_exceptions, - **expectation['kwargs'] - ) + except Exception as err: + if catch_exceptions: + raised_exception = True + exception_traceback = traceback.format_exc() + + result = { + "success": False, + "exception_info": { + "raised_exception": raised_exception, + "exception_traceback": exception_traceback, + "exception_message": str(err) + } + } - if output_format != "BOOLEAN_ONLY": - results.append( - dict(list(expectation.items()) + list(result.items())) - ) - else: - results.append( - dict(list(expectation.items()) + [("success", result)]) - ) + else: + raise(err) + + #if include_config: + result["expectation_config"] = copy.deepcopy(expectation) + + # Add an empty exception_info object if no exception was caught + if catch_exceptions and ('exception_info' not in result): + result["exception_info"] = { + "raised_exception": False, + "exception_traceback": None, + "exception_message": None + } + + results.append(result) if only_return_failures: abbrev_results = [] @@ -642,99 +681,93 @@ def validate(self, expectations_config=None, catch_exceptions=True, output_forma ##### Output generation ##### def _format_column_map_output(self, - output_format, success, - element_count, - nonnull_values, nonnull_count, - boolean_mapped_success_values, success_count, - exception_list, exception_index_list + result_format, success, + element_count, nonnull_count, + unexpected_list, unexpected_index_list ): """Helper function to construct expectation result objects for column_map_expectations. - Expectations support four output_formats: BOOLEAN_ONLY, BASIC, SUMMARY, and COMPLETE. + Expectations support four result_formats: BOOLEAN_ONLY, BASIC, SUMMARY, and COMPLETE. In each case, the object returned has a different set of populated fields. - See :ref:`output_format` for more information. + See :ref:`result_format` for more information. This function handles the logic for mapping those fields for column_map_expectations. """ - if output_format == "BOOLEAN_ONLY": - return_obj = success - elif output_format == "BASIC": - exception_count = len(exception_list) + # Retain support for string-only output formats: + result_format = parse_result_format(result_format) - if element_count > 0: - if nonnull_count > 0: - exception_percent = float(exception_count) / element_count - exception_percent_nonmissing = float(exception_count) / nonnull_count + # Incrementally add to result and return when all values for the specified level are present + return_obj = { + 'success': success + } - else: - exception_percent = float(exception_count) / element_count - exception_percent_nonmissing = None - else: - exception_percent = None - exception_percent_nonmissing = None - - return_obj = { - "success": success, - "summary_obj": { - "partial_exception_list": exception_list[:20], - "exception_count": exception_count, - "exception_percent": exception_percent, - "exception_percent_nonmissing": exception_percent_nonmissing, - } - } + if result_format['result_format'] == 'BOOLEAN_ONLY': + return return_obj - elif output_format == "COMPLETE": - return_obj = { - "success": success, - "exception_list": exception_list, - "exception_index_list": exception_index_list, - } + missing_count = element_count - nonnull_count + unexpected_count = len(unexpected_list) - elif output_format == "SUMMARY": - # element_count = int(len(series)) - missing_count = element_count-int(len(nonnull_values))#int(null_indexes.sum()) - exception_count = len(exception_list) + if element_count > 0: + unexpected_percent = float(unexpected_count) / element_count + missing_percent = float(missing_count) / element_count - exception_value_series = pd.Series(exception_list).value_counts().iloc[:20] - partial_exception_counts = dict(zip( - list(exception_value_series.index), - list(exception_value_series.values), - )) + if nonnull_count > 0: + unexpected_percent_nonmissing = float(unexpected_count) / nonnull_count + else: + unexpected_percent_nonmissing = None - if element_count > 0: - missing_percent = float(missing_count) / element_count - exception_percent = float(exception_count) / element_count + else: + missing_percent = None + unexpected_percent = None + unexpected_percent_nonmissing = None + + return_obj['result'] = { + 'element_count': element_count, + 'missing_count': missing_count, + 'missing_percent': missing_percent, + 'unexpected_count': unexpected_count, + 'unexpected_percent': unexpected_percent, + 'unexpected_percent_nonmissing': unexpected_percent_nonmissing, + 'partial_unexpected_list': unexpected_list[:result_format['partial_unexpected_count']] + } - if nonnull_count > 0: - exception_percent_nonmissing = float(exception_count) / nonnull_count - else: - exception_percent_nonmissing = None + if result_format['result_format'] == 'BASIC': + return return_obj - else: - missing_percent = None - exception_percent = None - exception_percent_nonmissing = None - - return_obj = { - "success": success, - "summary_obj": { - "element_count": element_count, - "missing_count": missing_count, - "missing_percent": missing_percent, - "exception_count": exception_count, - "exception_percent": exception_percent, - "exception_percent_nonmissing": exception_percent_nonmissing, - "partial_exception_counts": partial_exception_counts, - "partial_exception_list": exception_list[:20], - "partial_exception_index_list": exception_index_list[:20], - } + # Try to return the most common values, if possible. + try: + partial_unexpected_counts = [ + {'value': key, 'count': value} + for key, value + in sorted( + Counter(unexpected_list).most_common(result_format['partial_unexpected_count']), + key=lambda x: (-x[1], x[0])) + ] + except TypeError: + partial_unexpected_counts = ['partial_exception_counts requires a hashable type'] + + return_obj['result'].update( + { + 'partial_unexpected_index_list': unexpected_index_list[:result_format['partial_unexpected_count']] if unexpected_index_list is not None else None, + 'partial_unexpected_counts': partial_unexpected_counts } + ) - else: - raise ValueError("Unknown output_format %s." % (output_format,)) + if result_format['result_format'] == 'SUMMARY': + return return_obj + + return_obj['result'].update( + { + 'unexpected_list': unexpected_list, + 'unexpected_index_list': unexpected_index_list + } + ) + + if result_format['result_format'] == 'COMPLETE': + return return_obj - return return_obj + raise ValueError("Unknown result_format %s." % (result_format['result_format'],)) def _calc_map_expectation_success(self, success_count, nonnull_count, mostly): """Calculate success and percent_success for column_map_expectations @@ -834,13 +867,13 @@ def test_column_aggregate_expectation_function(self, function, *args, **kwargs): ##### Table shape expectations ##### - def expect_column_to_exist(self, - column, - output_format=None, include_config=False, catch_exceptions=None, meta=None + def expect_column_to_exist( + self, column, column_index=None, result_format=None, include_config=False, + catch_exceptions=None, meta=None ): """Expect the specified column to exist. - expect_column_to_exist is a :func:`expectation `, not a \ + expect_column_to_exist is a :func:`expectation `, not a \ `column_map_expectation` or `column_aggregate_expectation`. Args: @@ -848,9 +881,12 @@ def expect_column_to_exist(self, The column name. Other Parameters: - output_format (str or None): \ + column_index (int or None): \ + If not None, checks the order of the columns. The expectation will fail if the \ + column is not in location column_index (zero-indexed). + result_format (str or None): \ Which output mode to use: `BOOLEAN_ONLY`, `BASIC`, `COMPLETE`, or `SUMMARY`. - For more detail, see :ref:`output_format `. + For more detail, see :ref:`result_format `. include_config (boolean): \ If True, then include the expectation config as part of the result object. \ For more detail, see :ref:`include_config`. @@ -864,7 +900,44 @@ def expect_column_to_exist(self, Returns: A JSON-serializable expectation result object. - Exact fields vary depending on the values passed to :ref:`output_format ` and + Exact fields vary depending on the values passed to :ref:`result_format ` and + :ref:`include_config`, :ref:`catch_exceptions`, and :ref:`meta`. + + """ + + raise NotImplementedError + + def expect_table_columns_to_match_ordered_list(self, + column_list, + result_format=None, include_config=False, catch_exceptions=None, meta=None + ): + """Expect the columns to exactly match a specified list. + + expect_table_columns_to_match_ordered_list is a :func:`expectation `, not a \ + `column_map_expectation` or `column_aggregate_expectation`. + + Args: + column_list (list of str): \ + The column names, in the correct order. + + Other Parameters: + result_format (str or None): \ + Which output mode to use: `BOOLEAN_ONLY`, `BASIC`, `COMPLETE`, or `SUMMARY`. + For more detail, see :ref:`result_format `. + include_config (boolean): \ + If True, then include the expectation config as part of the result object. \ + For more detail, see :ref:`include_config`. + catch_exceptions (boolean or None): \ + If True, then catch exceptions and include them as part of the result object. \ + For more detail, see :ref:`catch_exceptions`. + meta (dict or None): \ + A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. \ + For more detail, see :ref:`meta`. + + Returns: + A JSON-serializable expectation result object. + + Exact fields vary depending on the values passed to :ref:`result_format ` and :ref:`include_config`, :ref:`catch_exceptions`, and :ref:`meta`. """ @@ -874,11 +947,11 @@ def expect_column_to_exist(self, def expect_table_row_count_to_be_between(self, min_value=0, max_value=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None + result_format=None, include_config=False, catch_exceptions=None, meta=None ): """Expect the number of rows to be between two values. - expect_table_row_count_to_be_between is a :func:`expectation `, \ + expect_table_row_count_to_be_between is a :func:`expectation `, \ not a `column_map_expectation` or `column_aggregate_expectation`. Keyword Args: @@ -888,9 +961,9 @@ def expect_table_row_count_to_be_between(self, The maximum number of rows, inclusive. Other Parameters: - output_format (str or None): \ + result_format (str or None): \ Which output mode to use: `BOOLEAN_ONLY`, `BASIC`, `COMPLETE`, or `SUMMARY`. - For more detail, see :ref:`output_format `. + For more detail, see :ref:`result_format `. include_config (boolean): \ If True, then include the expectation config as part of the result object. \ For more detail, see :ref:`include_config`. @@ -904,7 +977,7 @@ def expect_table_row_count_to_be_between(self, Returns: A JSON-serializable expectation result object. - Exact fields vary depending on the values passed to :ref:`output_format ` and + Exact fields vary depending on the values passed to :ref:`result_format ` and :ref:`include_config`, :ref:`catch_exceptions`, and :ref:`meta`. Notes: @@ -919,11 +992,11 @@ def expect_table_row_count_to_be_between(self, def expect_table_row_count_to_equal(self, value, - output_format=None, include_config=False, catch_exceptions=None, meta=None + result_format=None, include_config=False, catch_exceptions=None, meta=None ): """Expect the number of rows to equal a value. - expect_table_row_count_to_equal is a basic :func:`expectation `, \ + expect_table_row_count_to_equal is a basic :func:`expectation `, \ not a `column_map_expectation` or `column_aggregate_expectation`. Args: @@ -931,9 +1004,9 @@ def expect_table_row_count_to_equal(self, The expected number of rows. Other Parameters: - output_format (string or None): \ + result_format (string or None): \ Which output mode to use: `BOOLEAN_ONLY`, `BASIC`, `COMPLETE`, or `SUMMARY`. - For more detail, see :ref:`output_format `. + For more detail, see :ref:`result_format `. include_config (boolean): \ If True, then include the expectation config as part of the result object. \ For more detail, see :ref:`include_config`. @@ -947,7 +1020,7 @@ def expect_table_row_count_to_equal(self, Returns: A JSON-serializable expectation result object. - Exact fields vary depending on the values passed to :ref:`output_format ` and + Exact fields vary depending on the values passed to :ref:`result_format ` and :ref:`include_config`, :ref:`catch_exceptions`, and :ref:`meta`. See Also: @@ -960,15 +1033,15 @@ def expect_table_row_count_to_equal(self, def expect_column_values_to_be_unique(self, column, mostly=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None + result_format=None, include_config=False, catch_exceptions=None, meta=None ): """Expect each column value to be unique. This expectation detects duplicates. All duplicated values are counted as exceptions. - For example, `[1, 2, 3, 3, 3]` will return `[3, 3, 3]` in `summary_obj.exceptions_list`, with `exception_percent=0.6.` + For example, `[1, 2, 3, 3, 3]` will return `[3, 3, 3]` in `result.exceptions_list`, with `unexpected_percent=0.6.` - expect_column_values_to_be_unique is a :func:`column_map_expectation `. + expect_column_values_to_be_unique is a :func:`column_map_expectation `. Args: column (str): \ @@ -976,13 +1049,13 @@ def expect_column_values_to_be_unique(self, Keyword Args: mostly (None or a float between 0 and 1): \ - Return `"success": True` if the percentage of exceptions less than or equal to `mostly`. \ + Return `"success": True` if the percentage of unexpected values is less than or equal to `mostly`. \ For more detail, see :ref:`mostly`. Other Parameters: - output_format (str or None): \ + result_format (str or None): \ Which output mode to use: `BOOLEAN_ONLY`, `BASIC`, `COMPLETE`, or `SUMMARY`. - For more detail, see :ref:`output_format `. + For more detail, see :ref:`result_format `. include_config (boolean): \ If True, then include the expectation config as part of the result object. \ For more detail, see :ref:`include_config`. @@ -996,7 +1069,7 @@ def expect_column_values_to_be_unique(self, Returns: A JSON-serializable expectation result object. - Exact fields vary depending on the values passed to :ref:`output_format ` and + Exact fields vary depending on the values passed to :ref:`result_format ` and :ref:`include_config`, :ref:`catch_exceptions`, and :ref:`meta`. """ raise NotImplementedError @@ -1004,14 +1077,14 @@ def expect_column_values_to_be_unique(self, def expect_column_values_to_not_be_null(self, column, mostly=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None + result_format=None, include_config=False, catch_exceptions=None, meta=None ): """Expect column values to not be null. To be counted as an exception, values must be explicitly null or missing, such as a NULL in PostgreSQL or an np.NaN in pandas. Empty strings don't count as null unless they have been coerced to a null type. - expect_column_values_to_not_be_null is a :func:`column_map_expectation `. + expect_column_values_to_not_be_null is a :func:`column_map_expectation `. Args: column (str): \ @@ -1019,13 +1092,13 @@ def expect_column_values_to_not_be_null(self, Keyword Args: mostly (None or a float between 0 and 1): \ - Return `"success": True` if the percentage of exceptions less than or equal to `mostly`. \ + Return `"success": True` if the percentage of unexpected values is less than or equal to `mostly`. \ For more detail, see :ref:`mostly`. Other Parameters: - output_format (str or None): \ + result_format (str or None): \ Which output mode to use: `BOOLEAN_ONLY`, `BASIC`, `COMPLETE`, or `SUMMARY`. - For more detail, see :ref:`output_format `. + For more detail, see :ref:`result_format `. include_config (boolean): \ If True, then include the expectation config as part of the result object. \ For more detail, see :ref:`include_config`. @@ -1039,7 +1112,7 @@ def expect_column_values_to_not_be_null(self, Returns: A JSON-serializable expectation result object. - Exact fields vary depending on the values passed to :ref:`output_format ` and + Exact fields vary depending on the values passed to :ref:`result_format ` and :ref:`include_config`, :ref:`catch_exceptions`, and :ref:`meta`. See Also: @@ -1051,11 +1124,11 @@ def expect_column_values_to_not_be_null(self, def expect_column_values_to_be_null(self, column, mostly=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None + result_format=None, include_config=False, catch_exceptions=None, meta=None ): """Expect column values to be null. - expect_column_values_to_be_null is a :func:`column_map_expectation `. + expect_column_values_to_be_null is a :func:`column_map_expectation `. Args: column (str): \ @@ -1063,13 +1136,13 @@ def expect_column_values_to_be_null(self, Keyword Args: mostly (None or a float between 0 and 1): \ - Return `"success": True` if the percentage of exceptions less than or equal to `mostly`. \ + Return `"success": True` if the percentage of unexpected values is less than or equal to `mostly`. \ For more detail, see :ref:`mostly`. Other Parameters: - output_format (str or None): \ + result_format (str or None): \ Which output mode to use: `BOOLEAN_ONLY`, `BASIC`, `COMPLETE`, or `SUMMARY`. - For more detail, see :ref:`output_format `. + For more detail, see :ref:`result_format `. include_config (boolean): \ If True, then include the expectation config as part of the result object. \ For more detail, see :ref:`include_config`. @@ -1083,7 +1156,7 @@ def expect_column_values_to_be_null(self, Returns: A JSON-serializable expectation result object. - Exact fields vary depending on the values passed to :ref:`output_format ` and + Exact fields vary depending on the values passed to :ref:`result_format ` and :ref:`include_config`, :ref:`catch_exceptions`, and :ref:`meta`. See Also: @@ -1098,11 +1171,11 @@ def expect_column_values_to_be_of_type( type_, target_datasource="numpy", mostly=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None + result_format=None, include_config=False, catch_exceptions=None, meta=None ): """Expect each column entry to be a specified data type. - expect_column_values_to_be_of_type is a :func:`column_map_expectation `. + expect_column_values_to_be_of_type is a :func:`column_map_expectation `. Args: column (str): \ @@ -1116,13 +1189,13 @@ def expect_column_values_to_be_of_type( Keyword Args: mostly (None or a float between 0 and 1): \ - Return `"success": True` if the percentage of exceptions less than or equal to `mostly`. \ + Return `"success": True` if the percentage of unexpected values is less than or equal to `mostly`. \ For more detail, see :ref:`mostly`. Other Parameters: - output_format (str or None): \ + result_format (str or None): \ Which output mode to use: `BOOLEAN_ONLY`, `BASIC`, `COMPLETE`, or `SUMMARY`. - For more detail, see :ref:`output_format `. + For more detail, see :ref:`result_format `. include_config (boolean): \ If True, then include the expectation config as part of the result object. \ For more detail, see :ref:`include_config`. @@ -1136,14 +1209,14 @@ def expect_column_values_to_be_of_type( Returns: A JSON-serializable expectation result object. - Exact fields vary depending on the values passed to :ref:`output_format ` and + Exact fields vary depending on the values passed to :ref:`result_format ` and :ref:`include_config`, :ref:`catch_exceptions`, and :ref:`meta`. Warning: expect_column_values_to_be_of_type is slated for major changes in future versions of great_expectations. As of v0.3, great_expectations is exclusively based on pandas, which handles typing in its own peculiar way. - Future versions of great_expectations will allow for datasets in SQL, spark, etc. + Future versions of great_expectations will allow for Datasets in SQL, spark, etc. When we make that change, we expect some breaking changes in parts of the codebase that are based strongly on pandas notions of typing. See also: @@ -1157,11 +1230,11 @@ def expect_column_values_to_be_in_type_list( type_list, target_datasource="numpy", mostly=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None + result_format=None, include_config=False, catch_exceptions=None, meta=None ): """Expect each column entry to match a list of specified data types. - expect_column_values_to_be_in_type_list is a :func:`column_map_expectation `. + expect_column_values_to_be_in_type_list is a :func:`column_map_expectation `. Args: column (str): \ @@ -1175,13 +1248,13 @@ def expect_column_values_to_be_in_type_list( Keyword Args: mostly (None or a float between 0 and 1): \ - Return `"success": True` if the percentage of exceptions less than or equal to `mostly`. \ + Return `"success": True` if the percentage of unexpected values is less than or equal to `mostly`. \ For more detail, see :ref:`mostly`. Other Parameters: - output_format (str or None): \ + result_format (str or None): \ Which output mode to use: `BOOLEAN_ONLY`, `BASIC`, `COMPLETE`, or `SUMMARY`. - For more detail, see :ref:`output_format `. + For more detail, see :ref:`result_format `. include_config (boolean): \ If True, then include the expectation config as part of the result object. \ For more detail, see :ref:`include_config`. @@ -1195,14 +1268,14 @@ def expect_column_values_to_be_in_type_list( Returns: A JSON-serializable expectation result object. - Exact fields vary depending on the values passed to :ref:`output_format ` and + Exact fields vary depending on the values passed to :ref:`result_format ` and :ref:`include_config`, :ref:`catch_exceptions`, and :ref:`meta`. Warning: expect_column_values_to_be_in_type_list is slated for major changes in future versions of great_expectations. As of v0.3, great_expectations is exclusively based on pandas, which handles typing in its own peculiar way. - Future versions of great_expectations will allow for datasets in SQL, spark, etc. + Future versions of great_expectations will allow for Datasets in SQL, spark, etc. When we make that change, we expect some breaking changes in parts of the codebase that are based strongly on pandas notions of typing. See also: @@ -1216,7 +1289,7 @@ def expect_column_values_to_be_in_set(self, column, values_set, mostly=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None + result_format=None, include_config=False, catch_exceptions=None, meta=None ): """Expect each column value to be in a given set. @@ -1230,17 +1303,17 @@ def expect_column_values_to_be_in_set(self, ) { "success": false - "summary_obj": { - "exception_count": 1 - "exception_percent": 0.16666666666666666, - "exception_percent_nonmissing": 0.16666666666666666, - "partial_exception_list": [ + "result": { + "unexpected_count": 1 + "unexpected_percent": 0.16666666666666666, + "unexpected_percent_nonmissing": 0.16666666666666666, + "partial_unexpected_list": [ 1 ], }, } - expect_column_values_to_be_in_set is a :func:`column_map_expectation `. + expect_column_values_to_be_in_set is a :func:`column_map_expectation `. Args: @@ -1251,13 +1324,13 @@ def expect_column_values_to_be_in_set(self, Keyword Args: mostly (None or a float between 0 and 1): \ - Return `"success": True` if the percentage of exceptions less than or equal to `mostly`. \ + Return `"success": True` if the percentage of unexpected values is less than or equal to `mostly`. \ For more detail, see :ref:`mostly`. Other Parameters: - output_format (str or None): \ + result_format (str or None): \ Which output mode to use: `BOOLEAN_ONLY`, `BASIC`, `COMPLETE`, or `SUMMARY`. - For more detail, see :ref:`output_format `. + For more detail, see :ref:`result_format `. include_config (boolean): \ If True, then include the expectation config as part of the result object. \ For more detail, see :ref:`include_config`. @@ -1271,7 +1344,7 @@ def expect_column_values_to_be_in_set(self, Returns: A JSON-serializable expectation result object. - Exact fields vary depending on the values passed to :ref:`output_format ` and + Exact fields vary depending on the values passed to :ref:`result_format ` and :ref:`include_config`, :ref:`catch_exceptions`, and :ref:`meta`. See Also: @@ -1283,7 +1356,7 @@ def expect_column_values_to_not_be_in_set(self, column, values_set, mostly=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None + result_format=None, include_config=False, catch_exceptions=None, meta=None ): """Expect column entries to not be in the set. @@ -1297,17 +1370,17 @@ def expect_column_values_to_not_be_in_set(self, ) { "success": false - "summary_obj": { - "exception_count": 3 - "exception_percent": 0.5, - "exception_percent_nonmissing": 0.5, - "partial_exception_list": [ + "result": { + "unexpected_count": 3 + "unexpected_percent": 0.5, + "unexpected_percent_nonmissing": 0.5, + "partial_unexpected_list": [ 1, 2, 2 ], }, } - expect_column_values_to_not_be_in_set is a :func:`column_map_expectation `. + expect_column_values_to_not_be_in_set is a :func:`column_map_expectation `. Args: column (str): \ @@ -1317,13 +1390,13 @@ def expect_column_values_to_not_be_in_set(self, Keyword Args: mostly (None or a float between 0 and 1): \ - Return `"success": True` if the percentage of exceptions less than or equal to `mostly`. \ + Return `"success": True` if the percentage of unexpected values is less than or equal to `mostly`. \ For more detail, see :ref:`mostly`. Other Parameters: - output_format (str or None): \ + result_format (str or None): \ Which output mode to use: `BOOLEAN_ONLY`, `BASIC`, `COMPLETE`, or `SUMMARY`. - For more detail, see :ref:`output_format `. + For more detail, see :ref:`result_format `. include_config (boolean): \ If True, then include the expectation config as part of the result object. \ For more detail, see :ref:`include_config`. @@ -1337,7 +1410,7 @@ def expect_column_values_to_not_be_in_set(self, Returns: A JSON-serializable expectation result object. - Exact fields vary depending on the values passed to :ref:`output_format ` and + Exact fields vary depending on the values passed to :ref:`result_format ` and :ref:`include_config`, :ref:`catch_exceptions`, and :ref:`meta`. See Also: @@ -1352,11 +1425,11 @@ def expect_column_values_to_be_between(self, allow_cross_type_comparisons=None, parse_strings_as_datetimes=None, mostly=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None + result_format=None, include_config=False, catch_exceptions=None, meta=None ): """Expect column entries to be between a minimum value and a maximum value (inclusive). - expect_column_values_to_be_between is a :func:`column_map_expectation `. + expect_column_values_to_be_between is a :func:`column_map_expectation `. Args: column (str): \ @@ -1370,13 +1443,13 @@ def expect_column_values_to_be_between(self, parse_strings_as_datetimes (boolean or None) : If True, parse min_value, max_value, and all non-null column\ values to datetimes before making comparisons. mostly (None or a float between 0 and 1): \ - Return `"success": True` if the percentage of exceptions less than or equal to `mostly`. \ + Return `"success": True` if the percentage of unexpected values is less than or equal to `mostly`. \ For more detail, see :ref:`mostly`. Other Parameters: - output_format (str or None): \ + result_format (str or None): \ Which output mode to use: `BOOLEAN_ONLY`, `BASIC`, `COMPLETE`, or `SUMMARY`. - For more detail, see :ref:`output_format `. + For more detail, see :ref:`result_format `. include_config (boolean): \ If True, then include the expectation config as part of the result object. \ For more detail, see :ref:`include_config`. @@ -1390,7 +1463,7 @@ def expect_column_values_to_be_between(self, Returns: A JSON-serializable expectation result object. - Exact fields vary depending on the values passed to :ref:`output_format ` and + Exact fields vary depending on the values passed to :ref:`result_format ` and :ref:`include_config`, :ref:`catch_exceptions`, and :ref:`meta`. Notes: @@ -1409,7 +1482,7 @@ def expect_column_values_to_be_increasing(self, strictly=None, parse_strings_as_datetimes=None, mostly=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None + result_format=None, include_config=False, catch_exceptions=None, meta=None ): """Expect column values to be increasing. @@ -1419,7 +1492,7 @@ def expect_column_values_to_be_increasing(self, If `strictly=True`, then this expectation is only satisfied if each consecutive value is strictly increasing--equal values are treated as failures. - expect_column_values_to_be_increasing is a :func:`column_map_expectation `. + expect_column_values_to_be_increasing is a :func:`column_map_expectation `. Args: column (str): \ @@ -1431,13 +1504,13 @@ def expect_column_values_to_be_increasing(self, parse_strings_as_datetimes (boolean or None) : \ If True, all non-null column values to datetimes before making comparisons mostly (None or a float between 0 and 1): \ - Return `"success": True` if the percentage of exceptions less than or equal to `mostly`. \ + Return `"success": True` if the percentage of unexpected values is less than or equal to `mostly`. \ For more detail, see :ref:`mostly`. Other Parameters: - output_format (str or None): \ + result_format (str or None): \ Which output mode to use: `BOOLEAN_ONLY`, `BASIC`, `COMPLETE`, or `SUMMARY`. - For more detail, see :ref:`output_format `. + For more detail, see :ref:`result_format `. include_config (boolean): \ If True, then include the expectation config as part of the result object. \ For more detail, see :ref:`include_config`. @@ -1451,7 +1524,7 @@ def expect_column_values_to_be_increasing(self, Returns: A JSON-serializable expectation result object. - Exact fields vary depending on the values passed to :ref:`output_format ` and + Exact fields vary depending on the values passed to :ref:`result_format ` and :ref:`include_config`, :ref:`catch_exceptions`, and :ref:`meta`. See Also: @@ -1464,7 +1537,7 @@ def expect_column_values_to_be_decreasing(self, strictly=None, parse_strings_as_datetimes=None, mostly=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None + result_format=None, include_config=False, catch_exceptions=None, meta=None ): """Expect column values to be decreasing. @@ -1474,7 +1547,7 @@ def expect_column_values_to_be_decreasing(self, If `strictly=True`, then this expectation is only satisfied if each consecutive value is strictly decreasing--equal values are treated as failures. - expect_column_values_to_be_decreasing is a :func:`column_map_expectation `. + expect_column_values_to_be_decreasing is a :func:`column_map_expectation `. Args: column (str): \ @@ -1486,13 +1559,13 @@ def expect_column_values_to_be_decreasing(self, parse_strings_as_datetimes (boolean or None) : \ If True, all non-null column values to datetimes before making comparisons mostly (None or a float between 0 and 1): \ - Return `"success": True` if the percentage of exceptions less than or equal to `mostly`. \ + Return `"success": True` if the percentage of unexpected values is less than or equal to `mostly`. \ For more detail, see :ref:`mostly`. Other Parameters: - output_format (str or None): \ + result_format (str or None): \ Which output mode to use: `BOOLEAN_ONLY`, `BASIC`, `COMPLETE`, or `SUMMARY`. - For more detail, see :ref:`output_format `. + For more detail, see :ref:`result_format `. include_config (boolean): \ If True, then include the expectation config as part of the result object. \ For more detail, see :ref:`include_config`. @@ -1506,7 +1579,7 @@ def expect_column_values_to_be_decreasing(self, Returns: A JSON-serializable expectation result object. - Exact fields vary depending on the values passed to :ref:`output_format ` and + Exact fields vary depending on the values passed to :ref:`result_format ` and :ref:`include_config`, :ref:`catch_exceptions`, and :ref:`meta`. See Also: @@ -1523,13 +1596,13 @@ def expect_column_value_lengths_to_be_between(self, min_value=None, max_value=None, mostly=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None + result_format=None, include_config=False, catch_exceptions=None, meta=None ): """Expect column entries to be strings with length between a minimum value and a maximum value (inclusive). This expectation only works for string-type values. Invoking it on ints or floats will raise a TypeError. - expect_column_value_lengths_to_be_between is a :func:`column_map_expectation `. + expect_column_value_lengths_to_be_between is a :func:`column_map_expectation `. Args: column (str): \ @@ -1541,13 +1614,13 @@ def expect_column_value_lengths_to_be_between(self, max_value (int or None): \ The maximum value for a column entry length. mostly (None or a float between 0 and 1): \ - Return `"success": True` if the percentage of exceptions less than or equal to `mostly`. \ + Return `"success": True` if the percentage of unexpected values is less than or equal to `mostly`. \ For more detail, see :ref:`mostly`. Other Parameters: - output_format (str or None): \ + result_format (str or None): \ Which output mode to use: `BOOLEAN_ONLY`, `BASIC`, `COMPLETE`, or `SUMMARY`. - For more detail, see :ref:`output_format `. + For more detail, see :ref:`result_format `. include_config (boolean): \ If True, then include the expectation config as part of the result object. \ For more detail, see :ref:`include_config`. @@ -1561,7 +1634,7 @@ def expect_column_value_lengths_to_be_between(self, Returns: A JSON-serializable expectation result object. - Exact fields vary depending on the values passed to :ref:`output_format ` and + Exact fields vary depending on the values passed to :ref:`result_format ` and :ref:`include_config`, :ref:`catch_exceptions`, and :ref:`meta`. Notes: @@ -1578,13 +1651,13 @@ def expect_column_value_lengths_to_equal(self, column, value, mostly=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None + result_format=None, include_config=False, catch_exceptions=None, meta=None ): """Expect column entries to be strings with length equal to the provided value. This expectation only works for string-type values. Invoking it on ints or floats will raise a TypeError. - expect_column_values_to_be_between is a :func:`column_map_expectation `. + expect_column_values_to_be_between is a :func:`column_map_expectation `. Args: column (str): \ @@ -1594,13 +1667,13 @@ def expect_column_value_lengths_to_equal(self, Keyword Args: mostly (None or a float between 0 and 1): \ - Return `"success": True` if the percentage of exceptions less than or equal to `mostly`. \ + Return `"success": True` if the percentage of unexpected values is less than or equal to `mostly`. \ For more detail, see :ref:`mostly`. Other Parameters: - output_format (str or None): \ + result_format (str or None): \ Which output mode to use: `BOOLEAN_ONLY`, `BASIC`, `COMPLETE`, or `SUMMARY`. - For more detail, see :ref:`output_format `. + For more detail, see :ref:`result_format `. include_config (boolean): \ If True, then include the expectation config as part of the result object. \ For more detail, see :ref:`include_config`. @@ -1614,7 +1687,7 @@ def expect_column_value_lengths_to_equal(self, Returns: A JSON-serializable expectation result object. - Exact fields vary depending on the values passed to :ref:`output_format ` and + Exact fields vary depending on the values passed to :ref:`result_format ` and :ref:`include_config`, :ref:`catch_exceptions`, and :ref:`meta`. See Also: @@ -1625,11 +1698,11 @@ def expect_column_values_to_match_regex(self, column, regex, mostly=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None + result_format=None, include_config=False, catch_exceptions=None, meta=None ): """Expect column entries to be strings that match a given regular expression. - expect_column_values_to_match_regex is a :func:`column_map_expectation `. + expect_column_values_to_match_regex is a :func:`column_map_expectation `. Args: column (str): \ @@ -1639,13 +1712,13 @@ def expect_column_values_to_match_regex(self, Keyword Args: mostly (None or a float between 0 and 1): \ - Return `"success": True` if the percentage of exceptions less than or equal to `mostly`. \ + Return `"success": True` if the percentage of unexpected values is less than or equal to `mostly`. \ For more detail, see :ref:`mostly`. Other Parameters: - output_format (str or None): \ + result_format (str or None): \ Which output mode to use: `BOOLEAN_ONLY`, `BASIC`, `COMPLETE`, or `SUMMARY`. - For more detail, see :ref:`output_format `. + For more detail, see :ref:`result_format `. include_config (boolean): \ If True, then include the expectation config as part of the result object. \ For more detail, see :ref:`include_config`. @@ -1659,7 +1732,7 @@ def expect_column_values_to_match_regex(self, Returns: A JSON-serializable expectation result object. - Exact fields vary depending on the values passed to :ref:`output_format ` and + Exact fields vary depending on the values passed to :ref:`result_format ` and :ref:`include_config`, :ref:`catch_exceptions`, and :ref:`meta`. See Also: @@ -1672,11 +1745,11 @@ def expect_column_values_to_not_match_regex(self, column, regex, mostly=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None + result_format=None, include_config=False, catch_exceptions=None, meta=None ): """Expect column entries to be strings that do NOT match a given regular expression. - expect_column_values_to_not_match_regex is a :func:`column_map_expectation `. + expect_column_values_to_not_match_regex is a :func:`column_map_expectation `. Args: column (str): \ @@ -1686,13 +1759,13 @@ def expect_column_values_to_not_match_regex(self, Keyword Args: mostly (None or a float between 0 and 1): \ - Return `"success": True` if the percentage of exceptions less than or equal to `mostly`. \ + Return `"success": True` if the percentage of unexpected values is less than or equal to `mostly`. \ For more detail, see :ref:`mostly`. Other Parameters: - output_format (str or None): \ + result_format (str or None): \ Which output mode to use: `BOOLEAN_ONLY`, `BASIC`, `COMPLETE`, or `SUMMARY`. - For more detail, see :ref:`output_format `. + For more detail, see :ref:`result_format `. include_config (boolean): \ If True, then include the expectation config as part of the result object. \ For more detail, see :ref:`include_config`. @@ -1706,7 +1779,7 @@ def expect_column_values_to_not_match_regex(self, Returns: A JSON-serializable expectation result object. - Exact fields vary depending on the values passed to :ref:`output_format ` and + Exact fields vary depending on the values passed to :ref:`result_format ` and :ref:`include_config`, :ref:`catch_exceptions`, and :ref:`meta`. See Also: @@ -1720,11 +1793,11 @@ def expect_column_values_to_match_regex_list(self, regex_list, match_on="any", mostly=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None + result_format=None, include_config=False, catch_exceptions=None, meta=None ): """Expect the column entries to be strings that match either any of or all of a list of regular expressions. - expect_column_values_to_match_regex_list is a :func:`column_map_expectation `. + expect_column_values_to_match_regex_list is a :func:`column_map_expectation `. Args: column (str): \ @@ -1738,13 +1811,13 @@ def expect_column_values_to_match_regex_list(self, Use "any" if the value should match at least one regular expression in the list. Use "all" if it should match each regular expression in the list. mostly (None or a float between 0 and 1): \ - Return `"success": True` if the percentage of exceptions less than or equal to `mostly`. \ + Return `"success": True` if the percentage of unexpected values is less than or equal to `mostly`. \ For more detail, see :ref:`mostly`. Other Parameters: - output_format (str or None): \ + result_format (str or None): \ Which output mode to use: `BOOLEAN_ONLY`, `BASIC`, `COMPLETE`, or `SUMMARY`. - For more detail, see :ref:`output_format `. + For more detail, see :ref:`result_format `. include_config (boolean): \ If True, then include the expectation config as part of the result object. \ For more detail, see :ref:`include_config`. @@ -1758,7 +1831,7 @@ def expect_column_values_to_match_regex_list(self, Returns: A JSON-serializable expectation result object. - Exact fields vary depending on the values passed to :ref:`output_format ` and + Exact fields vary depending on the values passed to :ref:`result_format ` and :ref:`include_config`, :ref:`catch_exceptions`, and :ref:`meta`. See Also: @@ -1773,11 +1846,11 @@ def expect_column_values_to_match_strftime_format(self, column, strftime_format, mostly=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None + result_format=None, include_config=False, catch_exceptions=None, meta=None ): """Expect column entries to be strings representing a date or time with a given format. - expect_column_values_to_match_strftime_format is a :func:`column_map_expectation `. + expect_column_values_to_match_strftime_format is a :func:`column_map_expectation `. Args: column (str): \ @@ -1787,13 +1860,13 @@ def expect_column_values_to_match_strftime_format(self, Keyword Args: mostly (None or a float between 0 and 1): \ - Return `"success": True` if the percentage of exceptions less than or equal to `mostly`. \ + Return `"success": True` if the percentage of unexpected values is less than or equal to `mostly`. \ For more detail, see :ref:`mostly`. Other Parameters: - output_format (str or None): \ + result_format (str or None): \ Which output mode to use: `BOOLEAN_ONLY`, `BASIC`, `COMPLETE`, or `SUMMARY`. - For more detail, see :ref:`output_format `. + For more detail, see :ref:`result_format `. include_config (boolean): \ If True, then include the expectation config as part of the result object. \ For more detail, see :ref:`include_config`. @@ -1807,7 +1880,7 @@ def expect_column_values_to_match_strftime_format(self, Returns: A JSON-serializable expectation result object. - Exact fields vary depending on the values passed to :ref:`output_format ` and + Exact fields vary depending on the values passed to :ref:`result_format ` and :ref:`include_config`, :ref:`catch_exceptions`, and :ref:`meta`. """ @@ -1816,11 +1889,11 @@ def expect_column_values_to_match_strftime_format(self, def expect_column_values_to_be_dateutil_parseable(self, column, mostly=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None + result_format=None, include_config=False, catch_exceptions=None, meta=None ): """Expect column entries to be parseable using dateutil. - expect_column_values_to_be_dateutil_parseable is a :func:`column_map_expectation `. + expect_column_values_to_be_dateutil_parseable is a :func:`column_map_expectation `. Args: column (str): \ @@ -1828,13 +1901,13 @@ def expect_column_values_to_be_dateutil_parseable(self, Keyword Args: mostly (None or a float between 0 and 1): \ - Return `"success": True` if the percentage of exceptions less than or equal to `mostly`. \ + Return `"success": True` if the percentage of unexpected values is less than or equal to `mostly`. \ For more detail, see :ref:`mostly`. Other Parameters: - output_format (str or None): \ + result_format (str or None): \ Which output mode to use: `BOOLEAN_ONLY`, `BASIC`, `COMPLETE`, or `SUMMARY`. - For more detail, see :ref:`output_format `. + For more detail, see :ref:`result_format `. include_config (boolean): \ If True, then include the expectation config as part of the result object. \ For more detail, see :ref:`include_config`. @@ -1848,7 +1921,7 @@ def expect_column_values_to_be_dateutil_parseable(self, Returns: A JSON-serializable expectation result object. - Exact fields vary depending on the values passed to :ref:`output_format ` and + Exact fields vary depending on the values passed to :ref:`result_format ` and :ref:`include_config`, :ref:`catch_exceptions`, and :ref:`meta`. """ raise NotImplementedError @@ -1856,11 +1929,11 @@ def expect_column_values_to_be_dateutil_parseable(self, def expect_column_values_to_be_json_parseable(self, column, mostly=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None + result_format=None, include_config=False, catch_exceptions=None, meta=None ): """Expect column entries to be data written in JavaScript Object Notation. - expect_column_values_to_be_json_parseable is a :func:`column_map_expectation `. + expect_column_values_to_be_json_parseable is a :func:`column_map_expectation `. Args: column (str): \ @@ -1868,13 +1941,13 @@ def expect_column_values_to_be_json_parseable(self, Keyword Args: mostly (None or a float between 0 and 1): \ - Return `"success": True` if the percentage of exceptions less than or equal to `mostly`. \ + Return `"success": True` if the percentage of unexpected values is less than or equal to `mostly`. \ For more detail, see :ref:`mostly`. Other Parameters: - output_format (str or None): \ + result_format (str or None): \ Which output mode to use: `BOOLEAN_ONLY`, `BASIC`, `COMPLETE`, or `SUMMARY`. - For more detail, see :ref:`output_format `. + For more detail, see :ref:`result_format `. include_config (boolean): \ If True, then include the expectation config as part of the result object. \ For more detail, see :ref:`include_config`. @@ -1888,7 +1961,7 @@ def expect_column_values_to_be_json_parseable(self, Returns: A JSON-serializable expectation result object. - Exact fields vary depending on the values passed to :ref:`output_format ` and + Exact fields vary depending on the values passed to :ref:`result_format ` and :ref:`include_config`, :ref:`catch_exceptions`, and :ref:`meta`. See Also: @@ -1900,11 +1973,11 @@ def expect_column_values_to_match_json_schema(self, column, json_schema, mostly=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None + result_format=None, include_config=False, catch_exceptions=None, meta=None ): """Expect column entries to be JSON objects matching a given JSON schema. - expect_column_values_to_match_json_schema is a :func:`column_map_expectation `. + expect_column_values_to_match_json_schema is a :func:`column_map_expectation `. Args: column (str): \ @@ -1912,13 +1985,13 @@ def expect_column_values_to_match_json_schema(self, Keyword Args: mostly (None or a float between 0 and 1): \ - Return `"success": True` if the percentage of exceptions less than or equal to `mostly`. \ + Return `"success": True` if the percentage of unexpected values is less than or equal to `mostly`. \ For more detail, see :ref:`mostly`. Other Parameters: - output_format (str or None): \ + result_format (str or None): \ Which output mode to use: `BOOLEAN_ONLY`, `BASIC`, `COMPLETE`, or `SUMMARY`. - For more detail, see :ref:`output_format `. + For more detail, see :ref:`result_format `. include_config (boolean): \ If True, then include the expectation config as part of the result object. \ For more detail, see :ref:`include_config`. @@ -1932,7 +2005,7 @@ def expect_column_values_to_match_json_schema(self, Returns: A JSON-serializable expectation result object. - Exact fields vary depending on the values passed to :ref:`output_format ` and + Exact fields vary depending on the values passed to :ref:`result_format ` and :ref:`include_config`, :ref:`catch_exceptions`, and :ref:`meta`. See Also: @@ -1944,15 +2017,89 @@ def expect_column_values_to_match_json_schema(self, ##### Aggregate functions ##### + def expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than(self, + column, distribution, + p_value=0.05, params=None, + result_format=None, + include_config=False, + catch_exceptions=None, meta=None): + """ + Expect the column values to be distributed similarly to a scipy distribution. \ + + This expectation compares the provided column to the specified continuous distribution with a parameteric \ + Kolmogorov-Smirnov test. The K-S test compares the provided column to the cumulative density function (CDF) of \ + the specified scipy distribution. If you don't know the desired distribution shape parameters, use the \ + `ge.dataset.util.infer_distribution_parameters()` utility function to estimate them. + + It returns 'success'=True if the p-value from the K-S test is greater than or equal to the provided p-value. + + expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than is a \ + :func:`column_aggregate_expectation `. + + Args: + column (str): \ + The column name. + distribution (str): \ + The scipy distribution name. See: https://docs.scipy.org/doc/scipy/reference/stats.html + p_value (float): \ + The threshold p-value for a passing test. Default is 0.05. + params (dict or list) : \ + A dictionary or positional list of shape parameters that describe the distribution you want to test the\ + data against. Include key values specific to the distribution from the appropriate scipy \ + distribution CDF function. 'loc' and 'scale' are used as translational parameters.\ + See https://docs.scipy.org/doc/scipy/reference/stats.html#continuous-distributions + + Other Parameters: + result_format (str or None): \ + Which output mode to use: `BOOLEAN_ONLY`, `BASIC`, `COMPLETE`, or `SUMMARY`. + For more detail, see :ref:`result_format `. + include_config (boolean): \ + If True, then include the expectation config as part of the result object. \ + For more detail, see :ref:`include_config`. + catch_exceptions (boolean or None): \ + If True, then catch exceptions and include them as part of the result object. \ + For more detail, see :ref:`catch_exceptions`. + meta (dict or None): \ + A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. \ + For more detail, see :ref:`meta`. + + Returns: + A JSON-serializable expectation result object. + + Exact fields vary depending on the values passed to :ref:`result_format ` and + :ref:`include_config`, :ref:`catch_exceptions`, and :ref:`meta`. + + Notes: + These fields in the result object are customized for this expectation: + :: + + { + "details": + "expected_params" (dict): The specified or inferred parameters of the distribution to test against + "ks_results" (dict): The raw result of stats.kstest() + } + + * The Kolmogorov-Smirnov test's null hypothesis is that the column is similar to the provided distribution. + * Supported scipy distributions: + -norm + -beta + -gamma + -uniform + -chi2 + -expon + + """ + raise NotImplementedError + def expect_column_mean_to_be_between(self, column, min_value=None, max_value=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None + result_format=None, include_config=False, catch_exceptions=None, meta=None ): """Expect the column mean to be between a minimum value and a maximum value (inclusive). - expect_column_mean_to_be_between is a :func:`column_aggregate_expectation `. + expect_column_mean_to_be_between is a :func:`column_aggregate_expectation `. Args: column (str): \ @@ -1963,9 +2110,9 @@ def expect_column_mean_to_be_between(self, The maximum value for the column mean. Other Parameters: - output_format (str or None): \ + result_format (str or None): \ Which output mode to use: `BOOLEAN_ONLY`, `BASIC`, `COMPLETE`, or `SUMMARY`. - For more detail, see :ref:`output_format `. + For more detail, see :ref:`result_format `. include_config (boolean): \ If True, then include the expectation config as part of the result object. \ For more detail, see :ref:`include_config`. @@ -1979,7 +2126,7 @@ def expect_column_mean_to_be_between(self, Returns: A JSON-serializable expectation result object. - Exact fields vary depending on the values passed to :ref:`output_format ` and + Exact fields vary depending on the values passed to :ref:`result_format ` and :ref:`include_config`, :ref:`catch_exceptions`, and :ref:`meta`. Notes: @@ -1987,7 +2134,7 @@ def expect_column_mean_to_be_between(self, :: { - "true_value": (float) The true mean for the column + "observed_value": (float) The true mean for the column } * min_value and max_value are both inclusive. @@ -2004,11 +2151,11 @@ def expect_column_median_to_be_between(self, column, min_value=None, max_value=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None + result_format=None, include_config=False, catch_exceptions=None, meta=None ): """Expect the column median to be between a minimum value and a maximum value. - expect_column_median_to_be_between is a :func:`column_aggregate_expectation `. + expect_column_median_to_be_between is a :func:`column_aggregate_expectation `. Args: column (str): \ @@ -2019,9 +2166,9 @@ def expect_column_median_to_be_between(self, The maximum value for the column median. Other Parameters: - output_format (str or None): \ + result_format (str or None): \ Which output mode to use: `BOOLEAN_ONLY`, `BASIC`, `COMPLETE`, or `SUMMARY`. - For more detail, see :ref:`output_format `. + For more detail, see :ref:`result_format `. include_config (boolean): \ If True, then include the expectation config as part of the result object. \ For more detail, see :ref:`include_config`. @@ -2035,7 +2182,7 @@ def expect_column_median_to_be_between(self, Returns: A JSON-serializable expectation result object. - Exact fields vary depending on the values passed to :ref:`output_format ` and + Exact fields vary depending on the values passed to :ref:`result_format ` and :ref:`include_config`, :ref:`catch_exceptions`, and :ref:`meta`. Notes: @@ -2043,7 +2190,7 @@ def expect_column_median_to_be_between(self, :: { - "true_value": (float) The true median for the column + "observed_value": (float) The true median for the column } * min_value and max_value are both inclusive. @@ -2061,11 +2208,11 @@ def expect_column_stdev_to_be_between(self, column, min_value=None, max_value=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None + result_format=None, include_config=False, catch_exceptions=None, meta=None ): """Expect the column standard deviation to be between a minimum value and a maximum value. - expect_column_stdev_to_be_between is a :func:`column_aggregate_expectation `. + expect_column_stdev_to_be_between is a :func:`column_aggregate_expectation `. Args: column (str): \ @@ -2076,9 +2223,9 @@ def expect_column_stdev_to_be_between(self, The maximum value for the column standard deviation. Other Parameters: - output_format (str or None): \ + result_format (str or None): \ Which output mode to use: `BOOLEAN_ONLY`, `BASIC`, `COMPLETE`, or `SUMMARY`. - For more detail, see :ref:`output_format `. + For more detail, see :ref:`result_format `. include_config (boolean): \ If True, then include the expectation config as part of the result object. \ For more detail, see :ref:`include_config`. @@ -2092,7 +2239,7 @@ def expect_column_stdev_to_be_between(self, Returns: A JSON-serializable expectation result object. - Exact fields vary depending on the values passed to :ref:`output_format ` and + Exact fields vary depending on the values passed to :ref:`result_format ` and :ref:`include_config`, :ref:`catch_exceptions`, and :ref:`meta`. Notes: @@ -2100,7 +2247,7 @@ def expect_column_stdev_to_be_between(self, :: { - "true_value": (float) The true standard deviation for the column + "observed_value": (float) The true standard deviation for the column } * min_value and max_value are both inclusive. @@ -2117,11 +2264,11 @@ def expect_column_unique_value_count_to_be_between(self, column, min_value=None, max_value=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None + result_format=None, include_config=False, catch_exceptions=None, meta=None ): """Expect the number of unique values to be between a minimum value and a maximum value. - expect_column_unique_value_count_to_be_between is a :func:`column_aggregate_expectation `. + expect_column_unique_value_count_to_be_between is a :func:`column_aggregate_expectation `. Args: column (str): \ @@ -2132,9 +2279,9 @@ def expect_column_unique_value_count_to_be_between(self, The maximum number of unique values allowed. Other Parameters: - output_format (str or None): \ + result_format (str or None): \ Which output mode to use: `BOOLEAN_ONLY`, `BASIC`, `COMPLETE`, or `SUMMARY`. - For more detail, see :ref:`output_format `. + For more detail, see :ref:`result_format `. include_config (boolean): \ If True, then include the expectation config as part of the result object. \ For more detail, see :ref:`include_config`. @@ -2148,7 +2295,7 @@ def expect_column_unique_value_count_to_be_between(self, Returns: A JSON-serializable expectation result object. - Exact fields vary depending on the values passed to :ref:`output_format ` and + Exact fields vary depending on the values passed to :ref:`result_format ` and :ref:`include_config`, :ref:`catch_exceptions`, and :ref:`meta`. Notes: @@ -2156,7 +2303,7 @@ def expect_column_unique_value_count_to_be_between(self, :: { - "true_value": (int) The number of unique values in the column + "observed_value": (int) The number of unique values in the column } * min_value and max_value are both inclusive. @@ -2172,7 +2319,7 @@ def expect_column_proportion_of_unique_values_to_be_between(self, column, min_value=0, max_value=1, - output_format=None, include_config=False, catch_exceptions=None, meta=None + result_format=None, include_config=False, catch_exceptions=None, meta=None ): """Expect the proportion of unique values to be between a minimum value and a maximum value. @@ -2187,12 +2334,12 @@ def expect_column_proportion_of_unique_values_to_be_between(self, max_value (float or None): \ The maximum proportion of unique values. (Proportions are on the range 0 to 1) - expect_column_unique_value_count_to_be_between is a :func:`column_aggregate_expectation `. + expect_column_unique_value_count_to_be_between is a :func:`column_aggregate_expectation `. Other Parameters: - output_format (str or None): \ + result_format (str or None): \ Which output mode to use: `BOOLEAN_ONLY`, `BASIC`, `COMPLETE`, or `SUMMARY`. - For more detail, see :ref:`output_format `. + For more detail, see :ref:`result_format `. include_config (boolean): \ If True, then include the expectation config as part of the result object. \ For more detail, see :ref:`include_config`. @@ -2206,7 +2353,7 @@ def expect_column_proportion_of_unique_values_to_be_between(self, Returns: A JSON-serializable expectation result object. - Exact fields vary depending on the values passed to :ref:`output_format ` and + Exact fields vary depending on the values passed to :ref:`result_format ` and :ref:`include_config`, :ref:`catch_exceptions`, and :ref:`meta`. Notes: @@ -2214,7 +2361,7 @@ def expect_column_proportion_of_unique_values_to_be_between(self, :: { - "true_value": (float) The proportion of unique values in the column + "observed_value": (float) The proportion of unique values in the column } * min_value and max_value are both inclusive. @@ -2230,11 +2377,11 @@ def expect_column_most_common_value_to_be_in_set(self, column, value_set, ties_okay=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None + result_format=None, include_config=False, catch_exceptions=None, meta=None ): """Expect the most common value to be within the designated value set - expect_column_most_common_value_to_be_in_set is a :func:`column_aggregate_expectation `. + expect_column_most_common_value_to_be_in_set is a :func:`column_aggregate_expectation `. Args: column (str): \ @@ -2247,9 +2394,9 @@ def expect_column_most_common_value_to_be_in_set(self, If True, then the expectation will still succeed if values outside the designated set are as common (but not more common) than designated values Other Parameters: - output_format (str or None): \ + result_format (str or None): \ Which output mode to use: `BOOLEAN_ONLY`, `BASIC`, `COMPLETE`, or `SUMMARY`. - For more detail, see :ref:`output_format `. + For more detail, see :ref:`result_format `. include_config (boolean): \ If True, then include the expectation config as part of the result object. \ For more detail, see :ref:`include_config`. @@ -2263,7 +2410,7 @@ def expect_column_most_common_value_to_be_in_set(self, Returns: A JSON-serializable expectation result object. - Exact fields vary depending on the values passed to :ref:`output_format ` and + Exact fields vary depending on the values passed to :ref:`result_format ` and :ref:`include_config`, :ref:`catch_exceptions`, and :ref:`meta`. Notes: @@ -2271,12 +2418,12 @@ def expect_column_most_common_value_to_be_in_set(self, :: { - "true_value": (list) The most common values in the column + "observed_value": (list) The most common values in the column } - `true_value` contains a list of the most common values. + `observed_value` contains a list of the most common values. Often, this will just be a single element. But if there's a tie for most common among multiple values, - `true_value` will contain a single copy of each most common value. + `observed_value` will contain a single copy of each most common value. """ raise NotImplementedError @@ -2285,13 +2432,12 @@ def expect_column_sum_to_be_between(self, column, min_value=None, max_value=None, - ties_okay=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None + result_format=None, include_config=False, catch_exceptions=None, meta=None ): """Expect the column to sum to be between an min and max value - expect_column_sum_to_be_between is a :func:`column_aggregate_expectation `. - + expect_column_sum_to_be_between is a :func:`column_aggregate_expectation `. + Args: column (str): \ The column name @@ -2301,9 +2447,9 @@ def expect_column_sum_to_be_between(self, The maximum number of unique values allowed. Other Parameters: - output_format (str or None): \ + result_format (str or None): \ Which output mode to use: `BOOLEAN_ONLY`, `BASIC`, `COMPLETE`, or `SUMMARY`. - For more detail, see :ref:`output_format `. + For more detail, see :ref:`result_format `. include_config (boolean): \ If True, then include the expectation config as part of the result object. \ For more detail, see :ref:`include_config`. @@ -2317,7 +2463,7 @@ def expect_column_sum_to_be_between(self, Returns: A JSON-serializable expectation result object. - Exact fields vary depending on the values passed to :ref:`output_format ` and + Exact fields vary depending on the values passed to :ref:`result_format ` and :ref:`include_config`, :ref:`catch_exceptions`, and :ref:`meta`. Notes: @@ -2325,7 +2471,7 @@ def expect_column_sum_to_be_between(self, :: { - "true_value": (list) The actual column sum + "observed_value": (list) The actual column sum } @@ -2342,12 +2488,12 @@ def expect_column_min_to_be_between(self, max_value=None, parse_strings_as_datetimes=None, output_strftime_format=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None + result_format=None, include_config=False, catch_exceptions=None, meta=None ): """Expect the column to sum to be between an min and max value - expect_column_min_to_be_between is a :func:`column_aggregate_expectation `. - + expect_column_min_to_be_between is a :func:`column_aggregate_expectation `. + Args: column (str): \ The column name @@ -2355,7 +2501,7 @@ def expect_column_min_to_be_between(self, The minimum number of unique values allowed. max_value (comparable type or None): \ The maximum number of unique values allowed. - + Keyword Args: parse_strings_as_datetimes (Boolean or None): \ If True, parse min_value, max_values, and all non-null column values to datetimes before making comparisons. @@ -2363,9 +2509,9 @@ def expect_column_min_to_be_between(self, A valid strfime format for datetime output. Only used if parse_strings_as_datetimes=True. Other Parameters: - output_format (str or None): \ + result_format (str or None): \ Which output mode to use: `BOOLEAN_ONLY`, `BASIC`, `COMPLETE`, or `SUMMARY`. - For more detail, see :ref:`output_format `. + For more detail, see :ref:`result_format `. include_config (boolean): \ If True, then include the expectation config as part of the result object. \ For more detail, see :ref:`include_config`. @@ -2379,7 +2525,7 @@ def expect_column_min_to_be_between(self, Returns: A JSON-serializable expectation result object. - Exact fields vary depending on the values passed to :ref:`output_format ` and + Exact fields vary depending on the values passed to :ref:`result_format ` and :ref:`include_config`, :ref:`catch_exceptions`, and :ref:`meta`. Notes: @@ -2387,7 +2533,7 @@ def expect_column_min_to_be_between(self, :: { - "true_value": (list) The actual column min + "observed_value": (list) The actual column min } @@ -2404,12 +2550,12 @@ def expect_column_max_to_be_between(self, max_value=None, parse_strings_as_datetimes=None, output_strftime_format=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None + result_format=None, include_config=False, catch_exceptions=None, meta=None ): """Expect the column max to be between an min and max value - expect_column_sum_to_be_between is a :func:`column_aggregate_expectation `. - + expect_column_sum_to_be_between is a :func:`column_aggregate_expectation `. + Args: column (str): \ The column name @@ -2417,7 +2563,7 @@ def expect_column_max_to_be_between(self, The minimum number of unique values allowed. max_value (comparable type or None): \ The maximum number of unique values allowed. - + Keyword Args: parse_strings_as_datetimes (Boolean or None): \ If True, parse min_value, max_values, and all non-null column values to datetimes before making comparisons. @@ -2425,9 +2571,9 @@ def expect_column_max_to_be_between(self, A valid strfime format for datetime output. Only used if parse_strings_as_datetimes=True. Other Parameters: - output_format (str or None): \ + result_format (str or None): \ Which output mode to use: `BOOLEAN_ONLY`, `BASIC`, `COMPLETE`, or `SUMMARY`. - For more detail, see :ref:`output_format `. + For more detail, see :ref:`result_format `. include_config (boolean): \ If True, then include the expectation config as part of the result object. \ For more detail, see :ref:`include_config`. @@ -2441,7 +2587,7 @@ def expect_column_max_to_be_between(self, Returns: A JSON-serializable expectation result object. - Exact fields vary depending on the values passed to :ref:`output_format ` and + Exact fields vary depending on the values passed to :ref:`result_format ` and :ref:`include_config`, :ref:`catch_exceptions`, and :ref:`meta`. Notes: @@ -2449,7 +2595,7 @@ def expect_column_max_to_be_between(self, :: { - "true_value": (list) The actual column max + "observed_value": (list) The actual column max } @@ -2466,14 +2612,14 @@ def expect_column_chisquare_test_p_value_to_be_greater_than(self, partition_object=None, p=0.05, tail_weight_holdout=0, - output_format=None, include_config=False, catch_exceptions=None, meta=None + result_format=None, include_config=False, catch_exceptions=None, meta=None ): """Expect column values to be distributed similarly to the provided categorical partition. \ This expectation compares categorical distributions using a Chi-squared test. \ It returns `success=True` if values in the column match the distribution of the provided partition. - expect_column_chisquare_test_p_value_to_be_greater_than is a :func:`column_aggregate_expectation `. + expect_column_chisquare_test_p_value_to_be_greater_than is a :func:`column_aggregate_expectation `. Args: column (str): \ @@ -2494,9 +2640,9 @@ def expect_column_chisquare_test_p_value_to_be_greater_than(self, partition. Other Parameters: - output_format (str or None): \ + result_format (str or None): \ Which output mode to use: `BOOLEAN_ONLY`, `BASIC`, `COMPLETE`, or `SUMMARY`. - For more detail, see :ref:`output_format `. + For more detail, see :ref:`result_format `. include_config (boolean): \ If True, then include the expectation config as part of the result object. \ For more detail, see :ref:`include_config`. @@ -2510,7 +2656,7 @@ def expect_column_chisquare_test_p_value_to_be_greater_than(self, Returns: A JSON-serializable expectation result object. - Exact fields vary depending on the values passed to :ref:`output_format ` and + Exact fields vary depending on the values passed to :ref:`result_format ` and :ref:`include_config`, :ref:`catch_exceptions`, and :ref:`meta`. Notes: @@ -2518,8 +2664,8 @@ def expect_column_chisquare_test_p_value_to_be_greater_than(self, :: { - "true_value": (float) The true p-value of the Chi-squared test - "summary_obj": { + "observed_value": (float) The true p-value of the Chi-squared test + "details": { "observed_partition" (dict): The partition observed in the data. "expected_partition" (dict): @@ -2536,7 +2682,7 @@ def expect_column_bootstrapped_ks_test_p_value_to_be_greater_than(self, p=0.05, bootstrap_samples=None, bootstrap_sample_size=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None + result_format=None, include_config=False, catch_exceptions=None, meta=None ): """Expect column values to be distributed similarly to the provided continuous partition. This expectation \ compares continuous distributions using a bootstrapped Kolmogorov-Smirnov test. It returns `success=True` if \ @@ -2546,7 +2692,7 @@ def expect_column_bootstrapped_ks_test_p_value_to_be_greater_than(self, using the provided weights. Consequently the test expects a piecewise uniform distribution using the bins from \ the provided partition object. - expect_column_bootstrapped_ks_test_p_value_to_be_greater_than is a :func:`column_aggregate_expectation `. + expect_column_bootstrapped_ks_test_p_value_to_be_greater_than is a :func:`column_aggregate_expectation `. Args: column (str): \ @@ -2567,9 +2713,9 @@ def expect_column_bootstrapped_ks_test_p_value_to_be_greater_than(self, specificity of the test. Defaults to 2 * len(partition_object['weights']) Other Parameters: - output_format (str or None): \ + result_format (str or None): \ Which output mode to use: `BOOLEAN_ONLY`, `BASIC`, `COMPLETE`, or `SUMMARY`. - For more detail, see :ref:`output_format `. + For more detail, see :ref:`result_format `. include_config (boolean): \ If True, then include the expectation config as part of the result object. \ For more detail, see :ref:`include_config`. @@ -2583,7 +2729,7 @@ def expect_column_bootstrapped_ks_test_p_value_to_be_greater_than(self, Returns: A JSON-serializable expectation result object. - Exact fields vary depending on the values passed to :ref:`output_format ` and + Exact fields vary depending on the values passed to :ref:`result_format ` and :ref:`include_config`, :ref:`catch_exceptions`, and :ref:`meta`. Notes: @@ -2591,8 +2737,8 @@ def expect_column_bootstrapped_ks_test_p_value_to_be_greater_than(self, :: { - "true_value": (float) The true p-value of the KS test - "summary_obj": { + "observed_value": (float) The true p-value of the KS test + "details": { "bootstrap_samples": The number of bootstrap rounds used "bootstrap_sample_size": The number of samples taken from the column in each bootstrap round @@ -2621,7 +2767,7 @@ def expect_column_kl_divergence_to_be_less_than(self, threshold=None, tail_weight_holdout=0, internal_weight_holdout=0, - output_format=None, include_config=False, catch_exceptions=None, meta=None): + result_format=None, include_config=False, catch_exceptions=None, meta=None): """Expect the Kulback-Leibler (KL) divergence (relative entropy) of the specified column with respect to the \ partition object to be lower than the provided threshold. @@ -2633,7 +2779,7 @@ def expect_column_kl_divergence_to_be_less_than(self, This expectation works on both categorical and continuous partitions. See notes below for details. - expect_column_kl_divergence_to_be_less_than is a :func:`column_aggregate_expectation `. + expect_column_kl_divergence_to_be_less_than is a :func:`column_aggregate_expectation `. Args: column (str): \ @@ -2660,9 +2806,9 @@ def expect_column_kl_divergence_to_be_less_than(self, Defaults to 0. Other Parameters: - output_format (str or None): \ + result_format (str or None): \ Which output mode to use: `BOOLEAN_ONLY`, `BASIC`, `COMPLETE`, or `SUMMARY`. - For more detail, see :ref:`output_format `. + For more detail, see :ref:`result_format `. include_config (boolean): \ If True, then include the expectation config as part of the result object. \ For more detail, see :ref:`include_config`. @@ -2676,7 +2822,7 @@ def expect_column_kl_divergence_to_be_less_than(self, Returns: A JSON-serializable expectation result object. - Exact fields vary depending on the values passed to :ref:`output_format ` and + Exact fields vary depending on the values passed to :ref:`result_format ` and :ref:`include_config`, :ref:`catch_exceptions`, and :ref:`meta`. Notes: @@ -2684,8 +2830,8 @@ def expect_column_kl_divergence_to_be_less_than(self, :: { - "true_value": (float) The true KL divergence (relative entropy) - "summary_obj": { + "observed_value": (float) The true KL divergence (relative entropy) + "details": { "observed_partition": (dict) The partition observed in the data "expected_partition": (dict) The partition against which the data were compared, after applying specified weight holdouts. diff --git a/great_expectations/dataset/pandas_dataset.py b/great_expectations/dataset/pandas_dataset.py index b1e3a6ed88d9..e58078b1cea7 100644 --- a/great_expectations/dataset/pandas_dataset.py +++ b/great_expectations/dataset/pandas_dataset.py @@ -7,27 +7,32 @@ from functools import wraps import jsonschema +from numbers import Number + import numpy as np import pandas as pd from dateutil.parser import parse from scipy import stats from six import string_types -from .base import DataSet +from .base import Dataset from .util import DocInherit, recursively_convert_to_json_serializable, \ - is_valid_partition_object, is_valid_categorical_partition_object, is_valid_continuous_partition_object + is_valid_partition_object, is_valid_categorical_partition_object, is_valid_continuous_partition_object, \ + infer_distribution_parameters, _scipy_distribution_positional_args_from_dict, validate_distribution_parameters,\ + parse_result_format + -class MetaPandasDataSet(DataSet): +class MetaPandasDataset(Dataset): """ - MetaPandasDataSet is a thin layer between DataSet and PandasDataSet. This two-layer inheritance is required to make @classmethod decorators work. + MetaPandasDataset is a thin layer between Dataset and PandasDataset. This two-layer inheritance is required to make @classmethod decorators work. - Practically speaking, that means that MetaPandasDataSet implements + Practically speaking, that means that MetaPandasDataset implements expectation decorators, like `column_map_expectation` and `column_aggregate_expectation`, and PandasDataset implements the expectation methods themselves. """ def __init__(self, *args, **kwargs): - super(MetaPandasDataSet, self).__init__(*args, **kwargs) + super(MetaPandasDataset, self).__init__(*args, **kwargs) @classmethod @@ -35,20 +40,20 @@ def column_map_expectation(cls, func): """Constructs an expectation using column-map semantics. - The MetaPandasDataSet implementation replaces the "column" parameter supplied by the user with a pandas Series + The MetaPandasDataset implementation replaces the "column" parameter supplied by the user with a pandas Series object containing the actual column from the relevant pandas dataframe. This simplifies the implementing expectation - logic while preserving the standard DataSet signature and expected behavior. + logic while preserving the standard Dataset signature and expected behavior. - See :func:`column_map_expectation ` \ + See :func:`column_map_expectation ` \ for full documentation of this function. """ @cls.expectation(inspect.getargspec(func)[0][1:]) @wraps(func) - def inner_wrapper(self, column, mostly=None, output_format=None, *args, **kwargs): + def inner_wrapper(self, column, mostly=None, result_format=None, *args, **kwargs): - if output_format is None: - output_format = self.default_expectation_args["output_format"] + if result_format is None: + result_format = self.default_expectation_args["result_format"] series = self[column] boolean_mapped_null_values = series.isnull() @@ -60,18 +65,15 @@ def inner_wrapper(self, column, mostly=None, output_format=None, *args, **kwargs boolean_mapped_success_values = func(self, nonnull_values, *args, **kwargs) success_count = boolean_mapped_success_values.sum() - exception_list = list(series[(boolean_mapped_success_values==False)&(boolean_mapped_null_values==False)]) - exception_index_list = list(series[(boolean_mapped_success_values==False)&(boolean_mapped_null_values==False)].index) - exception_count = len(exception_list) + unexpected_list = list(series[(boolean_mapped_success_values==False)&(boolean_mapped_null_values==False)]) + unexpected_index_list = list(series[(boolean_mapped_success_values==False)&(boolean_mapped_null_values==False)].index) success, percent_success = self._calc_map_expectation_success(success_count, nonnull_count, mostly) return_obj = self._format_column_map_output( - output_format, success, - element_count, - nonnull_values, nonnull_count, - boolean_mapped_success_values, success_count, - exception_list, exception_index_list + result_format, success, + element_count, nonnull_count, + unexpected_list, unexpected_index_list ) return return_obj @@ -86,19 +88,19 @@ def inner_wrapper(self, column, mostly=None, output_format=None, *args, **kwargs def column_aggregate_expectation(cls, func): """Constructs an expectation using column-aggregate semantics. - The MetaPandasDataSet implementation replaces the "column" parameter supplied by the user with a pandas + The MetaPandasDataset implementation replaces the "column" parameter supplied by the user with a pandas Series object containing the actual column from the relevant pandas dataframe. This simplifies the implementing - expectation logic while preserving the standard DataSet signature and expected behavior. + expectation logic while preserving the standard Dataset signature and expected behavior. - See :func:`column_aggregate_expectation ` \ + See :func:`column_aggregate_expectation ` \ for full documentation of this function. """ @cls.expectation(inspect.getargspec(func)[0][1:]) @wraps(func) - def inner_wrapper(self, column, output_format = None, *args, **kwargs): + def inner_wrapper(self, column, result_format = None, *args, **kwargs): - if output_format is None: - output_format = self.default_expectation_args["output_format"] + if result_format is None: + result_format = self.default_expectation_args["result_format"] series = self[column] null_indexes = series.isnull() @@ -108,69 +110,86 @@ def inner_wrapper(self, column, output_format = None, *args, **kwargs): nonnull_count = int((null_indexes == False).sum()) null_count = element_count - nonnull_count - result_obj = func(self, nonnull_values, *args, **kwargs) + evaluation_result = func(self, nonnull_values, *args, **kwargs) - #!!! This would be the right place to validate result_obj - #!!! It should contain: - #!!! success: bool - #!!! true_value: int or float - #!!! summary_obj: json-serializable dict + if 'success' not in evaluation_result: + raise ValueError("Column aggregate expectation failed to return required information: success") - # if not output_format in ["BASIC", "COMPLETE", "SUMMARY", "BOOLEAN_ONLY"]: - # print ("Warning: Unknown output_format %s. Defaulting to %s." % (output_format, self.default_expectation_args["output_format"])) + if ('result' not in evaluation_result) or ('observed_value' not in evaluation_result['result']): + raise ValueError("Column aggregate expectation failed to return required information: observed_value") + # Retain support for string-only output formats: + result_format = parse_result_format(result_format) - if output_format in ["BASIC", "COMPLETE"]: - return_obj = { - "success" : bool(result_obj["success"]), - "true_value" : result_obj["true_value"], - } + return_obj = { + 'success': bool(evaluation_result['success']) + } - elif (output_format == "SUMMARY"): - new_summary_obj = { - "element_count": element_count, - "missing_count": null_count, - "missing_percent": null_count*1.0 / element_count if element_count > 0 else None - } + if result_format['result_format'] == 'BOOLEAN_ONLY': + return return_obj - if "summary_obj" in result_obj and result_obj["summary_obj"] is not None: - result_obj["summary_obj"].update(new_summary_obj) - else: - result_obj["summary_obj"] = new_summary_obj + return_obj['result'] = { + 'observed_value': evaluation_result['result']['observed_value'], + "element_count": element_count, + "missing_count": null_count, + "missing_percent": null_count * 1.0 / element_count if element_count > 0 else None + } - return_obj = { - "success" : bool(result_obj["success"]), - "true_value" : result_obj["true_value"], - "summary_obj" : result_obj["summary_obj"] - } + if result_format['result_format'] == 'BASIC': + return return_obj - elif output_format=="BOOLEAN_ONLY": - return_obj = bool(result_obj["success"]) + if 'details' in evaluation_result['result']: + return_obj['result']['details'] = evaluation_result['result']['details'] - else: - raise ValueError("Unknown output_format %s." % (output_format,)) + if result_format['result_format'] in ["SUMMARY", "COMPLETE"]: + return return_obj - return return_obj + raise ValueError("Unknown result_format %s." % (result_format['result_format'],)) return inner_wrapper -class PandasDataSet(MetaPandasDataSet, pd.DataFrame): +class PandasDataset(MetaPandasDataset, pd.DataFrame): """ PandasDataset instantiates the great_expectations Expectations API as a subclass of a pandas.DataFrame. - For the full API reference, please see :func:`DataSet ` + For the full API reference, please see :func:`Dataset ` + + NB + 1. Samples and Subsets of PandaDataSet have ALL the expectations of the original + data frame unless the user specifies the discard_subset_failing_expectations=True + property on the original data frame. + 2. Concatenations, joins, and merges of PandaDataSets ONLY contain the + default_expectations (see :func: `add_default_expectations`) """ + @property + def _constructor(self): + return PandasDataset + +# Do we need to define _constructor_sliced and/or _constructor_expanddim? See http://pandas.pydata.org/pandas-docs/stable/internals.html#subclassing-pandas-data-structures + + def __finalize__(self, other, method=None, **kwargs): + if isinstance(other, PandasDataset): + self.initialize_expectations(other.get_expectations_config( + discard_failed_expectations=False, + discard_result_format_kwargs=False, + discard_include_configs_kwargs=False, + discard_catch_exceptions_kwargs=False)) + self.discard_subset_failing_expectations = other.discard_subset_failing_expectations + if self.discard_subset_failing_expectations: + self.discard_failing_expectations() + super(PandasDataset, self).__finalize__(other, method, **kwargs) + return self + def __init__(self, *args, **kwargs): - super(PandasDataSet, self).__init__(*args, **kwargs) + super(PandasDataset, self).__init__(*args, **kwargs) + self.discard_subset_failing_expectations = kwargs.get('discard_subset_failing_expectations', False) self.add_default_expectations() def add_default_expectations(self): """ - The default behavior for PandasDataSet is to explicitly include expectations that every column present upon initialization exists. - - FIXME: This should probably live in the grandparent class, DataSet, instead. + The default behavior for PandasDataset is to explicitly include expectations that every column present upon initialization exists. """ for col in self.columns: @@ -183,10 +202,28 @@ def add_default_expectations(self): ### Expectation methods ### @DocInherit - @DataSet.expectation(['column']) - def expect_column_to_exist(self, column, - output_format=None, include_config=False, catch_exceptions=None, meta=None): + @Dataset.expectation(['column']) + def expect_column_to_exist( + self, column, column_index=None, result_format=None, include_config=False, + catch_exceptions=None, meta=None + ): + if column in self: + return { + "success": (column_index is None) or (self.columns.get_loc(column) == column_index) + } + + else: + return { + "success": False + } + + @DocInherit + @Dataset.expectation(['column_list']) + def expect_table_columns_to_match_ordered_list(self, column_list, + result_format=None, include_config=False, catch_exceptions=None, meta=None): + + if list(self.columns) == list(column_list): return { "success" : True } @@ -196,11 +233,11 @@ def expect_column_to_exist(self, column, } @DocInherit - @DataSet.expectation(['min_value', 'max_value']) + @Dataset.expectation(['min_value', 'max_value']) def expect_table_row_count_to_be_between(self, min_value=0, max_value=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None + result_format=None, include_config=False, catch_exceptions=None, meta=None ): # Assert that min_value and max_value are integers try: @@ -226,14 +263,16 @@ def expect_table_row_count_to_be_between(self, return { 'success': outcome, - 'true_value': row_count + 'result': { + 'observed_value': row_count + } } @DocInherit - @DataSet.expectation(['value']) + @Dataset.expectation(['value']) def expect_table_row_count_to_equal(self, value, - output_format=None, include_config=False, catch_exceptions=None, meta=None + result_format=None, include_config=False, catch_exceptions=None, meta=None ): try: if value is not None: @@ -242,6 +281,8 @@ def expect_table_row_count_to_equal(self, except ValueError: raise ValueError("value must be an integer") + if value is None: + raise ValueError("value must be provided") if self.shape[0] == value: outcome = True @@ -250,24 +291,26 @@ def expect_table_row_count_to_equal(self, return { 'success':outcome, - 'true_value':self.shape[0] + 'result': { + 'observed_value':self.shape[0] + } } @DocInherit - @MetaPandasDataSet.column_map_expectation + @MetaPandasDataset.column_map_expectation def expect_column_values_to_be_unique(self, column, mostly=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None): + result_format=None, include_config=False, catch_exceptions=None, meta=None): dupes = set(column[column.duplicated()]) return column.map(lambda x: x not in dupes) @DocInherit - @DataSet.expectation(['column', 'mostly', 'output_format']) + @Dataset.expectation(['column', 'mostly', 'result_format']) def expect_column_values_to_not_be_null(self, column, mostly=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None): - if output_format is None: - output_format = self.default_expectation_args["output_format"] + result_format=None, include_config=False, catch_exceptions=None, meta=None): + if result_format is None: + result_format = self.default_expectation_args["result_format"] series = self[column] boolean_mapped_null_values = series.isnull() @@ -279,30 +322,28 @@ def expect_column_values_to_not_be_null(self, column, boolean_mapped_success_values = boolean_mapped_null_values==False success_count = boolean_mapped_success_values.sum() - exception_list = [None for i in list(series[(boolean_mapped_success_values==False)])] - exception_index_list = list(series[(boolean_mapped_success_values==False)].index) - exception_count = len(exception_list) + unexpected_list = [None for i in list(series[(boolean_mapped_success_values==False)])] + unexpected_index_list = list(series[(boolean_mapped_success_values==False)].index) + unexpected_count = len(unexpected_list) # Pass element_count instead of nonnull_count, because that's the right denominator for this expectation success, percent_success = self._calc_map_expectation_success(success_count, element_count, mostly) return_obj = self._format_column_map_output( - output_format, success, - element_count, - nonnull_values, nonnull_count, - boolean_mapped_success_values, success_count, - exception_list, exception_index_list + result_format, success, + element_count, nonnull_count, + unexpected_list, unexpected_index_list ) return return_obj @DocInherit - @DataSet.expectation(['column', 'mostly', 'output_format']) + @Dataset.expectation(['column', 'mostly', 'result_format']) def expect_column_values_to_be_null(self, column, mostly=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None): - if output_format is None: - output_format = self.default_expectation_args["output_format"] + result_format=None, include_config=False, catch_exceptions=None, meta=None): + if result_format is None: + result_format = self.default_expectation_args["result_format"] series = self[column] boolean_mapped_null_values = series.isnull() @@ -314,28 +355,26 @@ def expect_column_values_to_be_null(self, column, boolean_mapped_success_values = boolean_mapped_null_values success_count = boolean_mapped_success_values.sum() - exception_list = list(series[(boolean_mapped_success_values==False)]) - exception_index_list = list(series[(boolean_mapped_success_values==False)].index) - exception_count = len(exception_list) + unexpected_list = list(series[(boolean_mapped_success_values==False)]) + unexpected_index_list = list(series[(boolean_mapped_success_values==False)].index) + unexpected_count = len(unexpected_list) # Pass element_count instead of nonnull_count, because that's the right denominator for this expectation success, percent_success = self._calc_map_expectation_success(success_count, element_count, mostly) return_obj = self._format_column_map_output( - output_format, success, - element_count, - nonnull_values, nonnull_count, - boolean_mapped_success_values, success_count, - exception_list, exception_index_list + result_format, success, + element_count, nonnull_count, + unexpected_list, unexpected_index_list ) return return_obj @DocInherit - @MetaPandasDataSet.column_map_expectation + @MetaPandasDataset.column_map_expectation def expect_column_values_to_be_of_type(self, column, type_, target_datasource="numpy", mostly=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None): + result_format=None, include_config=False, catch_exceptions=None, meta=None): python_avro_types = { "null":type(None), "boolean":bool, @@ -366,10 +405,10 @@ def expect_column_values_to_be_of_type(self, column, type_, target_datasource="n return result @DocInherit - @MetaPandasDataSet.column_map_expectation + @MetaPandasDataset.column_map_expectation def expect_column_values_to_be_in_type_list(self, column, type_list, target_datasource="numpy", mostly=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None): + result_format=None, include_config=False, catch_exceptions=None, meta=None): python_avro_types = { "null":type(None), @@ -401,28 +440,28 @@ def expect_column_values_to_be_in_type_list(self, column, type_list, target_data return result @DocInherit - @MetaPandasDataSet.column_map_expectation + @MetaPandasDataset.column_map_expectation def expect_column_values_to_be_in_set(self, column, values_set, mostly=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None): + result_format=None, include_config=False, catch_exceptions=None, meta=None): return column.map(lambda x: x in values_set) @DocInherit - @MetaPandasDataSet.column_map_expectation + @MetaPandasDataset.column_map_expectation def expect_column_values_to_not_be_in_set(self, column, values_set, mostly=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None): + result_format=None, include_config=False, catch_exceptions=None, meta=None): return column.map(lambda x: x not in values_set) @DocInherit - @MetaPandasDataSet.column_map_expectation + @MetaPandasDataset.column_map_expectation def expect_column_values_to_be_between(self, column, min_value=None, max_value=None, parse_strings_as_datetimes=None, allow_cross_type_comparisons=None, mostly=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None + result_format=None, include_config=False, catch_exceptions=None, meta=None ): if min_value is None and max_value is None: raise ValueError("min_value and max_value cannot both be None") @@ -439,8 +478,8 @@ def expect_column_values_to_be_between(self, else: temp_column = column - if min_value > max_value: - raise ValueError("min_value is greater than max_value") + if min_value != None and max_value != None and min_value > max_value: + raise ValueError("min_value cannot be greater than max_value") def is_between(val): # TODO Might be worth explicitly defining comparisons between types (for example, between strings and ints). @@ -496,10 +535,10 @@ def is_between(val): return temp_column.map(is_between) @DocInherit - @MetaPandasDataSet.column_map_expectation + @MetaPandasDataset.column_map_expectation def expect_column_values_to_be_increasing(self, column, strictly=None, parse_strings_as_datetimes=None, mostly=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None): + result_format=None, include_config=False, catch_exceptions=None, meta=None): if parse_strings_as_datetimes: temp_column = column.map(parse) @@ -524,10 +563,10 @@ def expect_column_values_to_be_increasing(self, column, strictly=None, parse_str return col_diff >= 0 @DocInherit - @MetaPandasDataSet.column_map_expectation + @MetaPandasDataset.column_map_expectation def expect_column_values_to_be_decreasing(self, column, strictly=None, parse_strings_as_datetimes=None, mostly=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None): + result_format=None, include_config=False, catch_exceptions=None, meta=None): if parse_strings_as_datetimes: temp_column = column.map(parse) @@ -552,10 +591,10 @@ def expect_column_values_to_be_decreasing(self, column, strictly=None, parse_str return col_diff <= 0 @DocInherit - @MetaPandasDataSet.column_map_expectation + @MetaPandasDataset.column_map_expectation def expect_column_value_lengths_to_be_between(self, column, min_value=None, max_value=None, mostly=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None): + result_format=None, include_config=False, catch_exceptions=None, meta=None): if min_value is None and max_value is None: raise ValueError("min_value and max_value cannot both be None") @@ -567,7 +606,7 @@ def expect_column_value_lengths_to_be_between(self, column, min_value=None, max_ if max_value is not None and not float(max_value).is_integer(): raise ValueError("min_value and max_value must be integers") - + except ValueError: raise ValueError("min_value and max_value must be integers") @@ -588,33 +627,33 @@ def length_is_between(val): return column.map(length_is_between) @DocInherit - @MetaPandasDataSet.column_map_expectation + @MetaPandasDataset.column_map_expectation def expect_column_value_lengths_to_equal(self, column, value, mostly=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None): + result_format=None, include_config=False, catch_exceptions=None, meta=None): return column.map(lambda x : len(x) == value) @DocInherit - @MetaPandasDataSet.column_map_expectation + @MetaPandasDataset.column_map_expectation def expect_column_values_to_match_regex(self, column, regex, mostly=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None): + result_format=None, include_config=False, catch_exceptions=None, meta=None): return column.map( lambda x: re.findall(regex, str(x)) != [] ) @DocInherit - @MetaPandasDataSet.column_map_expectation + @MetaPandasDataset.column_map_expectation def expect_column_values_to_not_match_regex(self, column, regex, mostly=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None): + result_format=None, include_config=False, catch_exceptions=None, meta=None): return column.map(lambda x: re.findall(regex, str(x)) == []) @DocInherit - @MetaPandasDataSet.column_map_expectation + @MetaPandasDataset.column_map_expectation def expect_column_values_to_match_regex_list(self, column, regex_list, match_on="any", mostly=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None): + result_format=None, include_config=False, catch_exceptions=None, meta=None): if match_on=="any": @@ -635,10 +674,10 @@ def match_in_list(val): return column.map(match_in_list) @DocInherit - @MetaPandasDataSet.column_map_expectation + @MetaPandasDataset.column_map_expectation def expect_column_values_to_match_strftime_format(self, column, strftime_format, mostly=None, - output_format=None, include_config=False, catch_exceptions=None, + result_format=None, include_config=False, catch_exceptions=None, meta=None): ## Below is a simple validation that the provided format can both format and parse a datetime object. ## %D is an example of a format that can format but not parse, e.g. @@ -661,10 +700,10 @@ def is_parseable_by_format(val): return column.map(is_parseable_by_format) @DocInherit - @MetaPandasDataSet.column_map_expectation + @MetaPandasDataset.column_map_expectation def expect_column_values_to_be_dateutil_parseable(self, column, mostly=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None): + result_format=None, include_config=False, catch_exceptions=None, meta=None): def is_parseable(val): try: if type(val) != str: @@ -679,10 +718,10 @@ def is_parseable(val): return column.map(is_parseable) @DocInherit - @MetaPandasDataSet.column_map_expectation + @MetaPandasDataset.column_map_expectation def expect_column_values_to_be_json_parseable(self, column, mostly=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None): + result_format=None, include_config=False, catch_exceptions=None, meta=None): def is_json(val): try: json.loads(val) @@ -693,10 +732,10 @@ def is_json(val): return column.map(is_json) @DocInherit - @MetaPandasDataSet.column_map_expectation + @MetaPandasDataset.column_map_expectation def expect_column_values_to_match_json_schema(self, column, json_schema, mostly=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None): + result_format=None, include_config=False, catch_exceptions=None, meta=None): def matches_json_schema(val): try: val_json = json.loads(val) @@ -709,30 +748,73 @@ def matches_json_schema(val): return column.map(matches_json_schema) + @DocInherit + @MetaPandasDataset.column_aggregate_expectation + def expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than(self, column, distribution, + p_value=0.05, params=None, + result_format=None, + include_config=False, + catch_exceptions=None, meta=None): + if p_value <= 0 or p_value >= 1: + raise ValueError("p_value must be between 0 and 1 exclusive") + + # Validate params + try: + validate_distribution_parameters(distribution=distribution, params=params) + except ValueError as e: + raise e + + # Format arguments for scipy.kstest + if (isinstance(params, dict)): + positional_parameters = _scipy_distribution_positional_args_from_dict(distribution, params) + else: + positional_parameters = params + + # K-S Test + ks_result = stats.kstest(column, distribution, + args=positional_parameters) + + return { + "success": ks_result[1] >= p_value, + "result": { + "observed_value": ks_result[1], + "details": { + "expected_params": positional_parameters, + "observed_ks_result": ks_result + } + } + } @DocInherit - @MetaPandasDataSet.column_aggregate_expectation + @MetaPandasDataset.column_aggregate_expectation def expect_column_mean_to_be_between(self, column, min_value=None, max_value=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None): + result_format=None, include_config=False, catch_exceptions=None, meta=None): if min_value is None and max_value is None: raise ValueError("min_value and max_value cannot both be None") + if min_value is not None and not isinstance(min_value, (Number)): + raise ValueError("min_value must be a number") + + if max_value is not None and not isinstance(max_value, (Number)): + raise ValueError("max_value must be a number") + column_mean = column.mean() return { - "success": ( + 'success': ( ((min_value is None) or (min_value <= column_mean)) and ((max_value is None) or (column_mean <= max_value)) ), - "true_value": column_mean, - "summary_obj": {} + 'result': { + 'observed_value': column_mean + } } @DocInherit - @MetaPandasDataSet.column_aggregate_expectation + @MetaPandasDataset.column_aggregate_expectation def expect_column_median_to_be_between(self, column, min_value=None, max_value=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None): + result_format=None, include_config=False, catch_exceptions=None, meta=None): if min_value is None and max_value is None: raise ValueError("min_value and max_value cannot both be None") @@ -744,14 +826,15 @@ def expect_column_median_to_be_between(self, column, min_value=None, max_value=N ((min_value or None) or (min_value <= column_median)) and ((max_value or None) or (column_median <= max_value)) ), - "true_value": column_median, - "summary_obj": {} + "result":{ + "observed_value": column_median + } } @DocInherit - @MetaPandasDataSet.column_aggregate_expectation + @MetaPandasDataset.column_aggregate_expectation def expect_column_stdev_to_be_between(self, column, min_value=None, max_value=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None): + result_format=None, include_config=False, catch_exceptions=None, meta=None): if min_value is None and max_value is None: raise ValueError("min_value and max_value cannot both be None") @@ -763,14 +846,15 @@ def expect_column_stdev_to_be_between(self, column, min_value=None, max_value=No ((min_value is None) or (min_value <= column_stdev)) and ((max_value is None) or (column_stdev <= max_value)) ), - "true_value": column_stdev, - "summary_obj": {} + "result": { + "observed_value": column_stdev + } } @DocInherit - @MetaPandasDataSet.column_aggregate_expectation + @MetaPandasDataset.column_aggregate_expectation def expect_column_unique_value_count_to_be_between(self, column, min_value=None, max_value=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None): + result_format=None, include_config=False, catch_exceptions=None, meta=None): if min_value is None and max_value is None: raise ValueError("min_value and max_value cannot both be None") @@ -782,14 +866,19 @@ def expect_column_unique_value_count_to_be_between(self, column, min_value=None, ((min_value is None) or (min_value <= unique_value_count)) and ((max_value is None) or (unique_value_count <= max_value)) ), - "true_value": unique_value_count, - "summary_obj": {} + "result": { + "observed_value": unique_value_count + } } @DocInherit - @MetaPandasDataSet.column_aggregate_expectation + @MetaPandasDataset.column_aggregate_expectation def expect_column_proportion_of_unique_values_to_be_between(self, column, min_value=0, max_value=1, - output_format=None, include_config=False, catch_exceptions=None, meta=None): + result_format=None, include_config=False, catch_exceptions=None, meta=None): + + if min_value is None and max_value is None: + raise ValueError("min_value and max_value cannot both be None") + unique_value_count = column.value_counts().shape[0] total_value_count = int(len(column))#.notnull().sum() @@ -803,14 +892,15 @@ def expect_column_proportion_of_unique_values_to_be_between(self, column, min_va ((min_value is None) or (min_value <= proportion_unique)) and ((max_value is None) or (proportion_unique <= max_value)) ), - "true_value": proportion_unique, - "summary_obj": {} + "result": { + "observed_value": proportion_unique + } } @DocInherit - @MetaPandasDataSet.column_aggregate_expectation + @MetaPandasDataset.column_aggregate_expectation def expect_column_most_common_value_to_be_in_set(self, column, value_set, ties_okay=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None): + result_format=None, include_config=False, catch_exceptions=None, meta=None): mode_list = list(column.mode().values) intersection_count = len(set(value_set).intersection(mode_list)) @@ -824,18 +914,19 @@ def expect_column_most_common_value_to_be_in_set(self, column, value_set, ties_o success = intersection_count==1 return { - "success" : success, - "true_value": mode_list, - "summary_obj": {}, + 'success' : success, + 'result': { + 'observed_value': mode_list + } } @DocInherit - @MetaPandasDataSet.column_aggregate_expectation + @MetaPandasDataset.column_aggregate_expectation def expect_column_sum_to_be_between(self, column, min_value=None, max_value=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None + result_format=None, include_config=False, catch_exceptions=None, meta=None ): if min_value is None and max_value is None: raise ValueError("min_value and max_value cannot both be None") @@ -853,19 +944,20 @@ def expect_column_sum_to_be_between(self, return { "success" : success, - "true_value" : col_sum, - "summary_obj" : {} + "result": { + "observed_value" : col_sum + } } @DocInherit - @MetaPandasDataSet.column_aggregate_expectation + @MetaPandasDataset.column_aggregate_expectation def expect_column_min_to_be_between(self, column, min_value=None, max_value=None, parse_strings_as_datetimes=None, output_strftime_format=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None + result_format=None, include_config=False, catch_exceptions=None, meta=None ): if min_value is None and max_value is None: raise ValueError("min_value and max_value cannot both be None") @@ -898,22 +990,23 @@ def expect_column_min_to_be_between(self, col_min = datetime.strftime(col_min, output_strftime_format) else: col_min = str(col_min) - return { - "success" : success, - "true_value" : col_min, - "summary_obj" : {} + 'success' : success, + 'result': { + 'observed_value' : col_min + } } + @DocInherit - @MetaPandasDataSet.column_aggregate_expectation + @MetaPandasDataset.column_aggregate_expectation def expect_column_max_to_be_between(self, column, min_value=None, max_value=None, parse_strings_as_datetimes=None, output_strftime_format=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None + result_format=None, include_config=False, catch_exceptions=None, meta=None ): if min_value is None and max_value is None: raise ValueError("min_value and max_value cannot both be None") @@ -950,14 +1043,16 @@ def expect_column_max_to_be_between(self, return { "success" : success, - "true_value" : col_max, - "summary_obj" : {} + "result": { + "observed_value" : col_max + } } + @DocInherit - @MetaPandasDataSet.column_aggregate_expectation + @MetaPandasDataset.column_aggregate_expectation def expect_column_chisquare_test_p_value_to_be_greater_than(self, column, partition_object=None, p=0.05, tail_weight_holdout=0, - output_format=None, include_config=False, catch_exceptions=None, meta=None): + result_format=None, include_config=False, catch_exceptions=None, meta=None): if not is_valid_categorical_partition_object(partition_object): raise ValueError("Invalid partition object.") @@ -980,27 +1075,29 @@ def expect_column_chisquare_test_p_value_to_be_greater_than(self, column, partit test_result = stats.chisquare(test_df[column.name], test_df['expected'])[1] - result_obj = { + return_obj = { "success": test_result > p, - "true_value": test_result, - "summary_obj": { - "observed_partition": { - "values": test_df.index.tolist(), - "weights": test_df[column.name].tolist() - }, - "expected_partition": { - "values": test_df.index.tolist(), - "weights": test_df['expected'].tolist() + "result": { + "observed_value": test_result, + "details": { + "observed_partition": { + "values": test_df.index.tolist(), + "weights": test_df[column.name].tolist() + }, + "expected_partition": { + "values": test_df.index.tolist(), + "weights": test_df['expected'].tolist() + } } } } - return result_obj + return return_obj @DocInherit - @MetaPandasDataSet.column_aggregate_expectation + @MetaPandasDataset.column_aggregate_expectation def expect_column_bootstrapped_ks_test_p_value_to_be_greater_than(self, column, partition_object=None, p=0.05, bootstrap_samples=None, bootstrap_sample_size=None, - output_format=None, include_config=False, catch_exceptions=None, meta=None): + result_format=None, include_config=False, catch_exceptions=None, meta=None): if not is_valid_continuous_partition_object(partition_object): raise ValueError("Invalid continuous partition object.") @@ -1050,38 +1147,40 @@ def estimated_cdf(x): observed_cdf_values = np.cumsum(observed_weights) - result_obj = { + return_obj = { "success" : test_result > p, - "true_value": test_result, - "summary_obj": { - "bootstrap_samples": bootstrap_samples, - "bootstrap_sample_size": bootstrap_sample_size, - "observed_partition": { - "bins": observed_bins, - "weights": observed_weights.tolist() - }, - "expected_partition": { - "bins": partition_object['bins'], - "weights": partition_object['weights'] - }, - "observed_cdf": { - "x": observed_bins, - "cdf_values": [0] + observed_cdf_values.tolist() - }, - "expected_cdf": { - "x": partition_object['bins'], - "cdf_values": test_cdf.tolist() + "result": { + "observed_value": test_result, + "details": { + "bootstrap_samples": bootstrap_samples, + "bootstrap_sample_size": bootstrap_sample_size, + "observed_partition": { + "bins": observed_bins, + "weights": observed_weights.tolist() + }, + "expected_partition": { + "bins": partition_object['bins'], + "weights": partition_object['weights'] + }, + "observed_cdf": { + "x": observed_bins, + "cdf_values": [0] + observed_cdf_values.tolist() + }, + "expected_cdf": { + "x": partition_object['bins'], + "cdf_values": test_cdf.tolist() + } } } } - return result_obj + return return_obj @DocInherit - @MetaPandasDataSet.column_aggregate_expectation + @MetaPandasDataset.column_aggregate_expectation def expect_column_kl_divergence_to_be_less_than(self, column, partition_object=None, threshold=None, tail_weight_holdout=0, internal_weight_holdout=0, - output_format=None, include_config=False, catch_exceptions=None, meta=None): + result_format=None, include_config=False, catch_exceptions=None, meta=None): if not is_valid_partition_object(partition_object): raise ValueError("Invalid partition object.") @@ -1118,17 +1217,19 @@ def expect_column_kl_divergence_to_be_less_than(self, column, partition_object=N kl_divergence = stats.entropy(pk, qk) - result_obj = { + return_obj = { "success": kl_divergence <= threshold, - "true_value": kl_divergence, - "summary_obj": { - "observed_partition": { - "values": test_df.index.tolist(), - "weights": pk.tolist() - }, - "expected_partition": { - "values": test_df.index.tolist(), - "weights": qk.tolist() + "result": { + "observed_value": kl_divergence, + "details": { + "observed_partition": { + "values": test_df.index.tolist(), + "weights": pk.tolist() + }, + "expected_partition": { + "values": test_df.index.tolist(), + "weights": qk.tolist() + } } } } @@ -1180,21 +1281,23 @@ def expect_column_kl_divergence_to_be_less_than(self, column, partition_object=N expected_weights = np.concatenate(([tail_weight_holdout / 2], expected_weights, [tail_weight_holdout / 2])) kl_divergence = stats.entropy(observed_weights, expected_weights) - - result_obj = { + return_obj = { "success": kl_divergence <= threshold, - "true_value": kl_divergence, - "summary_obj": { - "observed_partition": { - # return expected_bins, since we used those bins to compute the observed_weights - "bins": expected_bins, - "weights": observed_weights.tolist() - }, - "expected_partition": { - "bins": expected_bins, - "weights": expected_weights.tolist() + "result": { + "observed_value": kl_divergence, + "details": { + "observed_partition": { + # return expected_bins, since we used those bins to compute the observed_weights + "bins": expected_bins, + "weights": observed_weights.tolist() + }, + "expected_partition": { + "bins": expected_bins, + "weights": expected_weights.tolist() + } } } } - return result_obj + + return return_obj diff --git a/great_expectations/dataset/sqlalchemy_dataset.py b/great_expectations/dataset/sqlalchemy_dataset.py new file mode 100644 index 000000000000..2fd1680e95c3 --- /dev/null +++ b/great_expectations/dataset/sqlalchemy_dataset.py @@ -0,0 +1,599 @@ +from __future__ import division + +from great_expectations.dataset import Dataset + +from functools import wraps +import inspect + +from .util import DocInherit, parse_result_format + +import sqlalchemy as sa +from sqlalchemy.engine import reflection + +from numbers import Number + + +class MetaSqlAlchemyDataset(Dataset): + + def __init__(self, *args, **kwargs): + super(MetaSqlAlchemyDataset, self).__init__(*args, **kwargs) + + @classmethod + def column_map_expectation(cls, func): + """For SqlAlchemy, this decorator allows individual column_map_expectations to simply return the filter + that describes the expected condition on their data. + + The decorator will then use that filter to obtain unexpected elements, relevant counts, and return the formatted + object. + """ + + @cls.expectation(inspect.getargspec(func)[0][1:]) + @wraps(func) + def inner_wrapper(self, column, mostly=None, result_format=None, *args, **kwargs): + if result_format is None: + result_format = self.default_expectation_args["result_format"] + + result_format = parse_result_format(result_format) + + if result_format['result_format'] == 'COMPLETE': + unexpected_count_limit = None + else: + unexpected_count_limit = result_format['partial_unexpected_count'] + + expected_condition = func(self, column, *args, **kwargs) + + count_query = sa.select([ + sa.func.count().label('element_count'), + sa.func.sum( + sa.case([(sa.column(column) == None, 1)], else_=0) + ).label('null_count'), + sa.func.sum( + sa.case([(sa.not_(expected_condition), 1)], else_=0) + ).label('unexpected_count') + ]).select_from(sa.table(self.table_name)) + + count_results = self.engine.execute(count_query).fetchone() + + unexpected_query_results = self.engine.execute( + sa.select([sa.column(column)]).select_from(sa.table(self.table_name)).where(sa.not_(expected_condition)).limit(unexpected_count_limit) + ) + + nonnull_count = count_results['element_count'] - count_results['null_count'] + maybe_limited_unexpected_list = [x[column] for x in unexpected_query_results.fetchall()] + success_count = nonnull_count - count_results['unexpected_count'] + success, percent_success = self._calc_map_expectation_success(success_count, nonnull_count, mostly) + + return_obj = self._format_column_map_output( + result_format, success, + count_results['element_count'], nonnull_count, + maybe_limited_unexpected_list, None + ) + + return return_obj + + inner_wrapper.__name__ = func.__name__ + inner_wrapper.__doc__ = func.__doc__ + + return inner_wrapper + + + @classmethod + def column_aggregate_expectation(cls, func): + """Constructs an expectation using column-aggregate semantics. + """ + @cls.expectation(inspect.getargspec(func)[0][1:]) + @wraps(func) + def inner_wrapper(self, column, result_format = None, *args, **kwargs): + + if result_format is None: + result_format = self.default_expectation_args["result_format"] + + result_format = parse_result_format(result_format) + + evaluation_result = func(self, column, *args, **kwargs) + + if 'success' not in evaluation_result: + raise ValueError("Column aggregate expectation failed to return required information: success") + + if ('result' not in evaluation_result) or ('observed_value' not in evaluation_result['result']): + raise ValueError("Column aggregate expectation failed to return required information: observed_value") + + return_obj = { + 'success': bool(evaluation_result['success']) + } + + if result_format['result_format'] == 'BOOLEAN_ONLY': + return return_obj + + # Use the element and null count information from a column_map_expectation if it needed + # it anyway to avoid an extra trip to the database + + if 'element_count' not in evaluation_result and 'null_count' not in evaluation_result: + count_query = sa.select([ + sa.func.count().label('element_count'), + sa.func.sum( + sa.case([(sa.column(column) == None, 1)], else_=0) + ).label('null_count'), + ]).select_from(sa.table(self.table_name)) + + count_results = self.engine.execute(count_query).fetchone() + + return_obj['result'] = { + 'observed_value': evaluation_result['result']['observed_value'], + "element_count": count_results['element_count'], + "missing_count": count_results['null_count'], + "missing_percent": count_results['null_count'] / count_results['element_count'] if count_results['element_count'] > 0 else None + } + else: + return_obj['result'] = { + 'observed_value': evaluation_result['result']['observed_value'], + "element_count": evaluation_result["element_count"], + "missing_count": evaluation_result["null_count"], + "missing_percent": evaluation_result['null_count'] / evaluation_result['element_count'] if evaluation_result['element_count'] > 0 else None + } + + if result_format['result_format'] == 'BASIC': + return return_obj + + if 'details' in evaluation_result['result']: + return_obj['result']['details'] = evaluation_result['result']['details'] + + if result_format['result_format'] in ["SUMMARY", "COMPLETE"]: + return return_obj + + raise ValueError("Unknown result_format %s." % (result_format['result_format'],)) + + return inner_wrapper + + +class SqlAlchemyDataset(MetaSqlAlchemyDataset): + + def __init__(self, table_name=None, engine=None, connection_string=None): + super(SqlAlchemyDataset, self).__init__() + + if table_name is None: + raise ValueError("No table_name provided.") + + self.table_name = table_name + + if engine is None and connection_string is None: + raise ValueError("Engine or connection_string must be provided.") + + if engine is not None: + self.engine = engine + + else: + try: + self.engine = sa.create_engine(connection_string) + except Exception as err: + # Currently we do no error handling if the engine doesn't work out of the box. + raise err + + insp = reflection.Inspector.from_engine(engine) + self.columns = insp.get_columns(self.table_name) + + def add_default_expectations(self): + """ + The default behavior for SqlAlchemyDataset is to explicitly include expectations that every column present upon + initialization exists. + """ + for col in self.columns: + self.append_expectation({ + "expectation_type": "expect_column_to_exist", + "kwargs": { + "column": col["name"] + } + }) + + def _is_numeric_column(self, column): + for col in self.columns: + if (col['name'] == column and + isinstance(col['type'], + (sa.types.Integer, sa.types.BigInteger, sa.types.Float, sa.types.Numeric, sa.types.SmallInteger, sa.types.Boolean) + ) + ): + return True + + return False + + ### + ### + ### + # + # Table level implementations + # + ### + ### + ### + + @DocInherit + @Dataset.expectation(['value']) + def expect_table_row_count_to_equal(self, + value=None, + result_format=None, include_config=False, catch_exceptions=None, meta=None + ): + # Assert that min_value and max_value are integers + try: + if value is not None: + float(value).is_integer() + + except ValueError: + raise ValueError("value must an integer") + + if value is None: + raise ValueError("value must be provided") + + count_query = sa.select([sa.func.count()]).select_from(sa.table(self.table_name)) + row_count = self.engine.execute(count_query).scalar() + + return { + 'success': row_count == value, + 'result': { + 'observed_value': row_count + } + } + + @DocInherit + @Dataset.expectation(['min_value', 'max_value']) + def expect_table_row_count_to_be_between(self, + min_value=0, + max_value=None, + result_format=None, include_config=False, catch_exceptions=None, meta=None + ): + # Assert that min_value and max_value are integers + try: + if min_value is not None: + float(min_value).is_integer() + + if max_value is not None: + float(max_value).is_integer() + + except ValueError: + raise ValueError("min_value and max_value must be integers") + + count_query = sa.select([sa.func.count()]).select_from(sa.table(self.table_name)) + row_count = self.engine.execute(count_query).scalar() + + if min_value != None and max_value != None: + outcome = row_count >= min_value and row_count <= max_value + + elif min_value == None and max_value != None: + outcome = row_count <= max_value + + elif min_value != None and max_value == None: + outcome = row_count >= min_value + + return { + 'success': outcome, + 'result': { + 'observed_value': row_count + } + } + + @DocInherit + @Dataset.expectation(['column_list']) + def expect_table_columns_to_match_ordered_list(self, column_list, + result_format=None, include_config=False, catch_exceptions=None, meta=None): + + if [col['name'] for col in self.columns] == list(column_list): + return { + "success" : True + } + else: + return { + "success": False + } + + @DocInherit + @Dataset.expectation(['column']) + def expect_column_to_exist(self, + column, column_index=None, result_format=None, include_config=False, + catch_exceptions=None, meta=None + ): + + col_names = [col['name'] for col in self.columns] + + if column_index is None: + success = column in col_names + else: + try: + col_index = col_names.index(column) + success = (column_index == col_index) + except ValueError: + success = False + + return { + 'success': success + } + + ### + ### + ### + # + # Column Map Expectation Implementations + # + ### + ### + ### + + @DocInherit + @MetaSqlAlchemyDataset.column_map_expectation + def expect_column_values_to_be_null(self, + column, + mostly=None, + result_format=None, include_config=False, catch_exceptions=None, meta=None + ): + + return sa.column(column) == None + + @DocInherit + @MetaSqlAlchemyDataset.column_map_expectation + def expect_column_values_to_not_be_null(self, + column, + mostly=None, + result_format=None, include_config=False, catch_exceptions=None, meta=None + ): + + return sa.column(column) != None + + + @DocInherit + @MetaSqlAlchemyDataset.column_map_expectation + def expect_column_values_to_be_in_set(self, + column, + values_set, + mostly=None, + result_format=None, include_config=False, catch_exceptions=None, meta=None + ): + return sa.column(column).in_(tuple(values_set)) + + @DocInherit + @MetaSqlAlchemyDataset.column_map_expectation + def expect_column_values_to_be_between(self, + column, + min_value=None, + max_value=None, + allow_cross_type_comparisons=None, + parse_strings_as_datetimes=None, + mostly=None, + result_format=None, include_config=False, catch_exceptions=None, meta=None + ): + if parse_strings_as_datetimes is not None: + raise ValueError("parse_strings_as_datetimes is not currently supported in SqlAlchemy.") + + if min_value != None and max_value != None and min_value > max_value: + raise ValueError("min_value cannot be greater than max_value") + + if min_value is None and max_value is None: + raise ValueError("min_value and max_value cannot both be None") + + + if min_value is None: + return sa.column(column) <= max_value + + elif max_value is None: + return min_value <= sa.column(column) + + else: + return sa.and_( + min_value <= sa.column(column), + sa.column(column) <= max_value + ) + + + ### + ### + ### + # + # Column Aggregate Expectation Implementations + # + ### + ### + ### + + @DocInherit + @MetaSqlAlchemyDataset.column_aggregate_expectation + def expect_column_max_to_be_between(self, + column, + min_value=None, + max_value=None, + parse_strings_as_datetimes=None, + output_strftime_format=None, + result_format=None, include_config=False, catch_exceptions=None, meta=None + ): + + if min_value is None and max_value is None: + raise ValueError("min_value and max_value cannot both be None") + + if parse_strings_as_datetimes: + raise ValueError("parse_strings_as_datetimes is not supported in SqlAlchemy") + + col_max = self.engine.execute( + sa.select([sa.func.max(sa.column(column))]).select_from(sa.table(self.table_name)) + ).scalar() + + if min_value != None and max_value != None: + success = (min_value <= col_max) and (col_max <= max_value) + + elif min_value == None and max_value != None: + success = (col_max <= max_value) + + elif min_value != None and max_value == None: + success = (min_value <= col_max) + + return { + 'success' : success, + 'result': { + 'observed_value' : col_max + } + } + + + @DocInherit + @MetaSqlAlchemyDataset.column_aggregate_expectation + def expect_column_min_to_be_between(self, + column, + min_value=None, + max_value=None, + parse_strings_as_datetimes=None, + output_strftime_format=None, + result_format=None, include_config=False, catch_exceptions=None, meta=None + ): + + if min_value is None and max_value is None: + raise ValueError("min_value and max_value cannot both be None") + + if parse_strings_as_datetimes: + raise ValueError("parse_strings_as_datetimes is not supported in SqlAlchemy") + + col_min = self.engine.execute( + sa.select([sa.func.min(sa.column(column))]).select_from(sa.table(self.table_name)) + ).scalar() + + if min_value != None and max_value != None: + success = (min_value <= col_min) and (col_min <= max_value) + + elif min_value == None and max_value != None: + success = (col_min <= max_value) + + elif min_value != None and max_value == None: + success = (min_value <= col_min) + + return { + 'success' : success, + 'result': { + 'observed_value' : col_min + } + } + + @DocInherit + @MetaSqlAlchemyDataset.column_aggregate_expectation + def expect_column_sum_to_be_between(self, + column, + min_value=None, + max_value=None, + result_format=None, include_config=False, catch_exceptions=None, meta=None + ): + + if min_value is None and max_value is None: + raise ValueError("min_value and max_value cannot both be None") + + col_sum = self.engine.execute( + sa.select([sa.func.sum(sa.column(column))]).select_from(sa.table(self.table_name)) + ).scalar() + + if min_value != None and max_value != None: + success = (min_value <= col_sum) and (col_sum <= max_value) + + elif min_value == None and max_value != None: + success = (col_sum <= max_value) + + elif min_value != None and max_value == None: + success = (min_value <= col_sum) + + return { + 'success' : success, + 'result': { + 'observed_value' : col_sum + } + } + + @DocInherit + @MetaSqlAlchemyDataset.column_aggregate_expectation + def expect_column_mean_to_be_between(self, + column, + min_value=None, + max_value=None, + result_format=None, include_config=False, catch_exceptions=None, meta=None + ): + + if min_value is None and max_value is None: + raise ValueError("min_value and max_value cannot both be None") + + if min_value is not None and not isinstance(min_value, (Number)): + raise ValueError("min_value must be a number") + + if max_value is not None and not isinstance(max_value, (Number)): + raise ValueError("max_value must be a number") + + if not self._is_numeric_column(column): + raise ValueError("column is not numeric") + + col_avg = self.engine.execute( + sa.select([sa.func.avg(sa.column(column))]).select_from(sa.table(self.table_name)) + ).scalar() + + if min_value != None and max_value != None: + success = (min_value <= col_avg) and (col_avg <= max_value) + + elif min_value == None and max_value != None: + success = (col_avg <= max_value) + + elif min_value != None and max_value == None: + success = (min_value <= col_avg) + + return { + 'success': success, + 'result': { + 'observed_value': col_avg + } + } + + @DocInherit + @MetaSqlAlchemyDataset.column_aggregate_expectation + def expect_column_unique_value_count_to_be_between(self, column, min_value=None, max_value=None, + result_format=None, include_config=False, catch_exceptions=None, meta=None): + + if min_value is None and max_value is None: + raise ValueError("min_value and max_value cannot both be None") + + unique_value_count = self.engine.execute( + sa.select([sa.func.count(sa.func.distinct(sa.column(column)))]).select_from(sa.table(self.table_name)) + ).scalar() + + return { + "success" : ( + ((min_value is None) or (min_value <= unique_value_count)) and + ((max_value is None) or (unique_value_count <= max_value)) + ), + "result": { + "observed_value": unique_value_count + } + } + + @DocInherit + @MetaSqlAlchemyDataset.column_aggregate_expectation + def expect_column_proportion_of_unique_values_to_be_between(self, column, min_value=0, max_value=1, + result_format=None, include_config=False, catch_exceptions=None, meta=None): + + if min_value is None and max_value is None: + raise ValueError("min_value and max_value cannot both be None") + + count_query = self.engine.execute( + sa.select([ + sa.func.count().label('element_count'), + sa.func.sum( + sa.case([(sa.column(column) == None, 1)], else_=0) + ).label('null_count'), + sa.func.count(sa.func.distinct(sa.column(column))).label('unique_value_count') + ]).select_from(sa.table(self.table_name)) + ) + + counts = count_query.fetchone() + + if counts['element_count'] - counts['null_count'] > 0: + proportion_unique = counts['unique_value_count'] / (counts['element_count'] - counts['null_count']) + else: + proportion_unique = None + + return { + "success": ( + ((min_value is None) or (min_value <= proportion_unique)) and + ((max_value is None) or (proportion_unique <= max_value)) + ), + "element_count": counts["element_count"], + "null_count": counts["null_count"], + "result": { + "observed_value": proportion_unique + } + } diff --git a/great_expectations/dataset/util.py b/great_expectations/dataset/util.py index 699bf2e3b496..447d5a4e9bb4 100644 --- a/great_expectations/dataset/util.py +++ b/great_expectations/dataset/util.py @@ -1,18 +1,35 @@ -# Utility methods for dealing with DataSet objects +# Utility methods for dealing with Dataset objects from __future__ import division + +from six import string_types, integer_types + import numpy as np from scipy import stats import pandas as pd import warnings import sys import copy -import json import datetime from functools import wraps +def parse_result_format(result_format): + """This is a simple helper utility that can be used to parse a string result_format into the dict format used + internally by great_expectations. It is not necessary but allows shorthand for result_format in cases where + there is no need to specify a custom partial_unexpected_count.""" + if isinstance(result_format, string_types): + result_format = { + 'result_format': result_format, + 'partial_unexpected_count': 20 + } + else: + if 'partial_unexpected_count' not in result_format: + result_format['partial_unexpected_count'] = 20 + + return result_format + class DotDict(dict): """dot.notation access to dictionary attributes""" @@ -30,51 +47,50 @@ def __deepcopy__(self, memo): return DotDict([(copy.deepcopy(k, memo), copy.deepcopy(v, memo)) for k, v in self.items()]) -class DocInherit(object): - """Docstring inheriting method descriptor +"""Docstring inheriting descriptor. Note that this is not a docstring so that this is not added to @DocInherit-\ +decorated functions' hybrid docstrings. - The class itself is also used as a decorator - doc_inherit decorator +Usage:: - Usage:: + class Foo(object): + def foo(self): + "Frobber" + pass - class Foo(object): - def foo(self): - "Frobber" - pass + class Bar(Foo): + @doc_inherit + def foo(self): + pass - class Bar(Foo): - @doc_inherit - def foo(self): - pass + Now, Bar.foo.__doc__ == Bar().foo.__doc__ == Foo.foo.__doc__ == "Frobber" - Now, Bar.foo.__doc__ == Bar().foo.__doc__ == Foo.foo.__doc__ == "Frobber" + Original implementation cribbed from: + https://stackoverflow.com/questions/2025562/inherit-docstrings-in-python-class-inheritance, + following a discussion on comp.lang.python that resulted in: + http://code.activestate.com/recipes/576862/. Unfortunately, the + original authors did not anticipate deep inheritance hierarchies, and + we ran into a recursion issue when implementing custom subclasses of + PandasDataset: + https://github.com/great-expectations/great_expectations/issues/177. - Original implementation cribbed from: - https://stackoverflow.com/questions/2025562/inherit-docstrings-in-python-class-inheritance, - following a discussion on comp.lang.python that resulted in: - http://code.activestate.com/recipes/576862/. Unfortunately, the - original authors did not anticipate deep inheritance hierarchies, and - we ran into a recursion issue when implementing custom subclasses of - PandasDataSet: - https://github.com/great-expectations/great_expectations/issues/177. + Our new homegrown implementation directly searches the MRO, instead + of relying on super, and concatenates documentation together. +""" +class DocInherit(object): - Our new homegrown implementation directly searches the MRO, instead - of relying on super. - """ def __init__(self, mthd): self.mthd = mthd self.name = mthd.__name__ + self.mthd_doc = mthd.__doc__ def __get__(self, obj, cls): - doc = None + doc = self.mthd_doc if self.mthd_doc is not None else '' for parent in cls.mro(): if self.name not in parent.__dict__: continue - doc = parent.__dict__[self.name].__doc__ - if doc is not None: - break + if parent.__dict__[self.name].__doc__ is not None: + doc = doc + '\n' + parent.__dict__[self.name].__doc__ @wraps(self.mthd, assigned=('__name__', '__module__')) def f(*args, **kwargs): @@ -97,12 +113,9 @@ def recursively_convert_to_json_serializable(test_obj): Warning: test_obj may also be converted in place. - FIXME: Somebody else must have already written this function. Can we use a fully-baked version instead? """ # Validate that all aruguments are of approved types, coerce if it's easy, else exception - # print(type(test_obj), test_obj) - - if isinstance(test_obj, (str, int, float, bool)): + if isinstance(test_obj, (string_types, integer_types, float, bool)): # No problem to encode json return test_obj @@ -139,16 +152,8 @@ def recursively_convert_to_json_serializable(test_obj): elif isinstance(test_obj, (datetime.datetime, datetime.date)): return str(test_obj) - else: - try: - # In Python 2, unicode and long should still be valid. - # This will break in Python 3 and throw the exception instead. - if isinstance(test_obj, (long, unicode)): - # No problem to encode json - return test_obj - except: - raise TypeError('%s is of type %s which cannot be serialized.' % (str(test_obj), type(test_obj).__name__)) + raise TypeError('%s is of type %s which cannot be serialized.' % (str(test_obj), type(test_obj).__name__)) def is_valid_partition_object(partition_object): @@ -292,3 +297,257 @@ def continuous_partition_data(data, bins='auto', n_bins=10): "bins": bin_edges, "weights": hist / len(data) } + + +def infer_distribution_parameters(data, distribution, params=None): + """Convenience method for determining the shape parameters of a given distribution + + Args: + data (list-like): The data to build shape parameters from. + distribution (string): Scipy distribution, determines which parameters to build. + params (dict or None): The known parameters. Parameters given here will not be altered. \ + Keep as None to infer all necessary parameters from the data data. + + Returns: + A dictionary of named parameters:: + + { + "mean": (float), + "std_dev": (float), + "loc": (float), + "scale": (float), + "alpha": (float), + "beta": (float), + "min": (float), + "max": (float), + "df": (float) + } + + See: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.kstest.html#scipy.stats.kstest + """ + + if params is None: + params = dict() + elif not isinstance(params, dict): + raise TypeError("params must be a dictionary object, see great_expectations documentation") + + if 'mean' not in params.keys(): + params['mean'] = data.mean() + + if 'std_dev' not in params.keys(): + params['std_dev'] = data.std() + + if distribution == "beta": + # scipy cdf(x, a, b, loc=0, scale=1) + if 'alpha' not in params.keys(): + # from https://stats.stackexchange.com/questions/12232/calculating-the-parameters-of-a-beta-distribution-using-the-mean-and-variance + params['alpha'] = (params['mean'] ** 2) * ( + ((1 - params['mean']) / params['std_dev'] ** 2) - (1 / params['mean'])) + if 'beta' not in params.keys(): + params['beta'] = params['alpha'] * ((1 / params['mean']) - 1) + + elif distribution == 'gamma': + # scipy cdf(x, a, loc=0, scale=1) + if 'alpha' not in params.keys(): + # Using https://en.wikipedia.org/wiki/Gamma_distribution + params['alpha'] = (params['mean'] / params.get('scale', 1)) + + + #elif distribution == 'poisson': + # if 'lambda' not in params.keys(): + # params['lambda'] = params['mean'] + + elif distribution == 'uniform': + # scipy cdf(x, loc=0, scale=1) + if 'min' not in params.keys(): + if 'loc' in params.keys(): + params['min'] = params['loc'] + else: + params['min'] = min(data) + if 'max' not in params.keys(): + if 'scale' in params.keys(): + params['max'] = params['scale'] + else: + params['max'] = max(data) - params['min'] + + elif distribution == 'chi2': + # scipy cdf(x, df, loc=0, scale=1) + if 'df' not in params.keys(): + # from https://en.wikipedia.org/wiki/Chi-squared_distribution + params['df'] = params['mean'] + + # Expon only uses loc and scale, use default + #elif distribution == 'expon': + # scipy cdf(x, loc=0, scale=1) + # if 'lambda' in params.keys(): + # Lambda is optional + # params['scale'] = 1 / params['lambda'] + elif distribution is not 'norm': + raise AttributeError("Unsupported distribution type. Please refer to Great Expectations Documentation") + + params['loc'] = params.get('loc', 0) + params['scale'] = params.get('scale', 1) + + return params + +def _scipy_distribution_positional_args_from_dict(distribution, params): + """Helper function that returns positional arguments for a scipy distribution using a dict of parameters. + + See the `cdf()` function here https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.beta.html#Methods\ + to see an example of scipy's positional arguments. This function returns the arguments specified by the \ + scipy.stat.distribution.cdf() for tha distribution. + + Args: + distribution (string): \ + The scipy distribution name. + params (dict): \ + A dict of named parameters. + + Raises: + AttributeError: \ + If an unsupported distribution is provided. + """ + + params['loc'] = params.get('loc', 0) + if 'scale' not in params: + params['scale'] = 1 + + if distribution == 'norm': + return params['mean'], params['std_dev'] + elif distribution == 'beta': + return params['alpha'], params['beta'], params['loc'], params['scale'] + elif distribution == 'gamma': + return params['alpha'], params['loc'], params['scale'] + #elif distribution == 'poisson': + # return params['lambda'], params['loc'] + elif distribution == 'uniform': + return params['min'], params['max'] + elif distribution == 'chi2': + return params['df'], params['loc'], params['scale'] + elif distribution == 'expon': + return params['loc'], params['scale'] + + +def validate_distribution_parameters(distribution, params): + """Ensures that necessary parameters for a distribution are present and that all parameters are sensical. + + If parameters necessary to construct a distribution are missing or invalid, this function raises ValueError\ + with an informative description. Note that 'loc' and 'scale' are optional arguments, and that 'scale'\ + must be positive. + + Args: + distribution (string): \ + The scipy distribution name, e.g. normal distribution is 'norm'. + params (dict or list): \ + The distribution shape parameters in a named dictionary or positional list form following the scipy \ + cdf argument scheme. + + params={'mean': 40, 'std_dev': 5} or params=[40, 5] + + Exceptions: + ValueError: \ + With an informative description, usually when necessary parameters are omitted or are invalid. + + """ + + norm_msg = "norm distributions require 0 parameters and optionally 'mean', 'std_dev'." + beta_msg = "beta distributions require 2 positive parameters 'alpha', 'beta' and optionally 'loc', 'scale'." + gamma_msg = "gamma distributions require 1 positive parameter 'alpha' and optionally 'loc','scale'." + # poisson_msg = "poisson distributions require 1 positive parameter 'lambda' and optionally 'loc'." + uniform_msg = "uniform distributions require 0 parameters and optionally 'loc', 'scale'." + chi2_msg = "chi2 distributions require 1 positive parameter 'df' and optionally 'loc', 'scale'." + expon_msg = "expon distributions require 0 parameters and optionally 'loc', 'scale'." + + if (distribution not in ['norm', 'beta', 'gamma', 'poisson', 'uniform', 'chi2', 'expon']): + raise AttributeError("Unsupported distribution provided: %s" % distribution) + + if isinstance(params, dict): + # `params` is a dictionary + if params.get("std_dev", 1) <= 0 or params.get('scale', 1) <= 0: + raise ValueError("std_dev and scale must be positive.") + + # alpha and beta are required and positive + if distribution == 'beta' and (params.get('alpha', -1) <= 0 or params.get('beta', -1) <= 0): + raise ValueError("Invalid parameters: %s" %beta_msg) + + # alpha is required and positive + elif distribution == 'gamma' and params.get('alpha', -1) <= 0: + raise ValueError("Invalid parameters: %s" %gamma_msg) + + # lambda is a required and positive + #elif distribution == 'poisson' and params.get('lambda', -1) <= 0: + # raise ValueError("Invalid parameters: %s" %poisson_msg) + + # df is necessary and required to be positve + elif distribution == 'chi2' and params.get('df', -1) <= 0: + raise ValueError("Invalid parameters: %s:" %chi2_msg) + + elif isinstance(params, tuple) or isinstance(params, list): + scale = None + + # `params` is a tuple or a list + if distribution == 'beta': + if len(params) < 2: + raise ValueError("Missing required parameters: %s" %beta_msg) + if params[0] <= 0 or params[1] <= 0: + raise ValueError("Invalid parameters: %s" %beta_msg) + if len(params) == 4: + scale = params[3] + elif len(params) > 4: + raise ValueError("Too many parameters provided: %s" %beta_msg) + + elif distribution == 'norm': + if len(params) > 2: + raise ValueError("Too many parameters provided: %s" %norm_msg) + if len(params) == 2: + scale = params[1] + + elif distribution == 'gamma': + if len(params) < 1: + raise ValueError("Missing required parameters: %s" %gamma_msg) + if len(params) == 3: + scale = params[2] + if len(params) > 3: + raise ValueError("Too many parameters provided: %s" % gamma_msg) + elif params[0] <= 0: + raise ValueError("Invalid parameters: %s" %gamma_msg) + + #elif distribution == 'poisson': + # if len(params) < 1: + # raise ValueError("Missing required parameters: %s" %poisson_msg) + # if len(params) > 2: + # raise ValueError("Too many parameters provided: %s" %poisson_msg) + # elif params[0] <= 0: + # raise ValueError("Invalid parameters: %s" %poisson_msg) + + elif distribution == 'uniform': + if len(params) == 2: + scale = params[1] + if len(params) > 2: + raise ValueError("Too many arguments provided: %s" %uniform_msg) + + elif distribution == 'chi2': + if len(params) < 1: + raise ValueError("Missing required parameters: %s" %chi2_msg) + elif len(params) == 3: + scale = params[2] + elif len(params) > 3: + raise ValueError("Too many arguments provided: %s" %chi2_msg) + if params[0] <= 0: + raise ValueError("Invalid parameters: %s" %chi2_msg) + + elif distribution == 'expon': + + if len(params) == 2: + scale = params[1] + if len(params) > 2: + raise ValueError("Too many arguments provided: %s" %expon_msg) + + if scale is not None and scale <= 0: + raise ValueError("std_dev and scale must be positive.") + + else: + raise ValueError( + "params must be a dict or list, or use ge.dataset.util.infer_distribution_parameters(data, distribution)") + + return diff --git a/great_expectations/util.py b/great_expectations/util.py index c8bba2129c51..0729888bc7cd 100644 --- a/great_expectations/util.py +++ b/great_expectations/util.py @@ -1,3 +1,5 @@ +import hashlib + class DotDict(dict): """dot.notation access to dictionary attributes""" def __getattr__(self, attr): @@ -6,3 +8,46 @@ def __getattr__(self, attr): __delattr__= dict.__delitem__ def __dir__(self): return self.keys() + +def expect_file_hash_to_equal(filename, value, hash_alg='md5'): + """ + Return True or False indicating whether the hash matches the specified value for the default (md5) or user-specified hash algorithm + + Parameters + ---------- + filename : string + file on which the hash is computed + value : string + value to compare to computed hash + hash_alg : string, default='md5' + hash alogorithm to use. See hashlib.algorithms_available for supported algorithms. + + Returns + ------- + True if the computed hash matches the specified value; False otherwise + + Raises + ------ + IOError + if there is a problem reading the specified file + ValueError + if the specified hash algorithm is not defined by hashlib + + """ + success = False + try: + hash = hashlib.new(hash_alg) + # Limit file reads to 64 KB chunks at a time + BLOCKSIZE = 65536 + try: + with open(filename, 'rb') as file: + file_buffer = file.read(BLOCKSIZE) + while len(file_buffer) > 0: + hash.update(file_buffer) + file_buffer = file.read(BLOCKSIZE) + success = hash.hexdigest() == value + except IOError: + raise + except ValueError: + raise + return success diff --git a/great_expectations/version.py b/great_expectations/version.py index 5ebd7d198eca..49c37a85403e 100644 --- a/great_expectations/version.py +++ b/great_expectations/version.py @@ -1 +1 @@ -__version__ = "0.3.2" \ No newline at end of file +__version__ = "0.4.0" \ No newline at end of file diff --git a/meta.yaml b/meta.yaml index 7b1d2372475f..0764d1022b40 100644 --- a/meta.yaml +++ b/meta.yaml @@ -1,9 +1,9 @@ package: name: great_expectations - version: "v0.3.2" + version: "v0.4.0" source: - git_rev: "v0.3.2" + git_rev: "v0.4.0" git_url: https://github.com/great-expectations/great_expectations.git requirements: diff --git a/requirements-dev.txt b/requirements-dev.txt index 39da0ca1536d..766109ec336b 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -6,4 +6,9 @@ pytz>=2015.6 six>=1.9.0 argh>=0.26.2 jsonschema>=2.5.1 -sphinxcontrib-napoleon>=0.6.1 \ No newline at end of file +sqlalchemy>=1.2 +sphinxcontrib-napoleon>=0.6.1 +pypandoc>=1.4 +pytest>=3.2.5 +pytest-cov>=2.5 +coveralls>=1.3 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 53c0633cc2a5..25d7cfd88b27 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,4 +5,5 @@ python-dateutil>=2.4.2 pytz>=2015.6 six>=1.9.0 argh>=0.26.2 -jsonschema>=2.5.1 \ No newline at end of file +jsonschema>=2.5.1 +sqlalchemy>=1.2 \ No newline at end of file diff --git a/scratch/convert_text_sets.py b/scratch/convert_text_sets.py deleted file mode 100644 index 1ae77c908c8e..000000000000 --- a/scratch/convert_text_sets.py +++ /dev/null @@ -1,13 +0,0 @@ -import json -J = json.load(file('tests/test_sets/expect_column_values_to_be_between_test_set.json')) - -for j in J: - print(j) -for j in J: - j['in'] = { - 'column' : j['in'][0], - 'min_value' : j['in'][1], - 'max_value' : j['in'][2], - } -J -file('tests/test_sets/expect_column_values_to_be_between_test_set_ADJ.json', 'w').write(json.dumps(J, indent=2)) diff --git a/setup.py b/setup.py index 6877517ca15e..94361835ca86 100644 --- a/setup.py +++ b/setup.py @@ -7,8 +7,11 @@ with open('requirements.txt') as f: required = f.read().splitlines() -with open('docs/source/intro.rst') as f: - long_description = f.read() +try: + import pypandoc + long_description = pypandoc.convert('README.md', 'rst') +except (IOError, ImportError): + long_description = '' exec(open('great_expectations/version.py').read()) @@ -16,7 +19,7 @@ 'description': 'Always know what to expect from your data.', 'author': 'The Great Expectations Team', 'url': 'https://github.com/great-expectations/great_expectations', - 'author_email': 'great_expectations@superconductivehealth.com', + 'author_email': 'team@greatexpectations.io', 'version': __version__, 'install_requires': required, 'packages': [ diff --git a/tests/__init__.py b/tests/__init__.py index dd72103cf501..e69de29bb2d1 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,7 +0,0 @@ -from .test_great_expectations import * -from .test_util import * -from .test_dataset import * -from .test_pandas_dataset import * -from .test_pandas_dataset_distributional_expectations import * -from .test_expectation_decorators import * -from .test_cli import * diff --git a/tests/column_aggregate_expectations/__init__.py b/tests/column_aggregate_expectations/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/column_aggregate_expectations/expect_column_max_to_be_between.json b/tests/column_aggregate_expectations/expect_column_max_to_be_between.json new file mode 100644 index 000000000000..fc23102b4f97 --- /dev/null +++ b/tests/column_aggregate_expectations/expect_column_max_to_be_between.json @@ -0,0 +1,115 @@ +{ + "expectation_type" : "expect_column_max_to_be_between", + "datasets" : [{ + "data" : { + "w" : [1, 2, 3, 4, 5, 5, 4, 3, 2, 1], + "x" : [2, 3, 4, 5, 6, 7, 8, 9, null, null], + "y" : [1, 1, 1, 2, 2, 2, 3, 3, 3, 4], + "z" : ["a", "b", "c", "d", "e", null, null, null, null, null], + "zz" : ["1/1/2016", "1/2/2016", "2/2/2016", "2/2/2016", "3/1/2016", "2/1/2017", null, null, null, null], + "a" : [null, 0, null, null, 1, null, null, 2, null, null], + "b" : [null, 0, null, null, 2, null, null, 1, null, null] + }, + "tests" : [{ + "title": "Basic positive test case", + "exact_match_out" : false, + "in": { + "column": "w", + "result_format": "BASIC", + "min_value": 4, + "max_value": 6 + }, + "out": { + "success": true, + "observed_value": 5 + } + },{ + "title": "Basic negative test case", + "exact_match_out" : false, + "in": { + "column": "w", + "result_format": "BASIC", + "min_value": null, + "max_value": 4 + }, + "out": { + "success": false, + "observed_value": 5 + } + },{ + "title": "Test case with result_format=SUMMARY. Also verifies that max_value is inclusive", + "exact_match_out" : false, + "in": { + "column": "w", + "result_format": "SUMMARY", + "min_value": 0, + "max_value": 5 + }, + "out": { + "success": true, + "observed_value": 5 + } + },{ + "title": "Test case with only a lower bound, and a missing value", + "exact_match_out" : false, + "in": { + "column": "x", + "min_value": 3 + }, + "out": { + "success": true, + "observed_value": 9.0 + } + },{ + "title": "Negative test case with only a lower bound", + "exact_match_out" : false, + "in": { + "column": "w", + "min_value": 50 + }, + "out": { + "success": false, + "observed_value": 5 + } + },{ + "title": "Test on a series containing dates, with an output_strftime_format value", + "exact_match_out" : false, + "in": { + "column": "zz", + "min_value": "2/1/2016", + "max_value": "3/1/2016", + "parse_strings_as_datetimes": true, + "output_strftime_format" : "%m/%d/%Y" + }, + "out": { + "success": false, + "observed_value": "02/01/2017" + } + },{ + "title": "Test on a date-like series containing strings", + "exact_match_out" : false, + "in": { + "column": "zz", + "min_value": "2/1/2016", + "max_value": "3/1/2016", + "parse_strings_as_datetimes": false + }, + "out": { + "success": true, + "observed_value": "3/1/2016" + } + },{ + "title": "Test on strings", + "exact_match_out" : false, + "in": { + "column": "z", + "min_value": "d", + "max_value": "f" + }, + "out": { + "success": true, + "observed_value": "e" + } + }] + }] +} \ No newline at end of file diff --git a/tests/column_aggregate_expectations/expect_column_mean_to_be_between.json b/tests/column_aggregate_expectations/expect_column_mean_to_be_between.json new file mode 100644 index 000000000000..f4f5d315cce9 --- /dev/null +++ b/tests/column_aggregate_expectations/expect_column_mean_to_be_between.json @@ -0,0 +1,191 @@ +{ + "expectation_type": "expect_column_mean_to_be_between", + "datasets": [{ + "data": { + "x": [2.0, 5.0], + "y": [5.0, 5], + "z": [0, 10], + "n": [0, null], + "b": [true, false] + }, + "tests": [{ + "title": "simple mean: positive range", + "exact_match_out": false, + "in": { + "column": "x", + "min_value": 2, + "max_value": 5 + }, + "out": { + "success": true, + "observed_value": 3.5 + } + }, + { + "title": "simple mean: negative range", + "exact_match_out": false, + "in": { + "column": "x", + "min_value": 1, + "max_value": 2 + }, + "out": { + "success": false, + "observed_value": 3.5 + } + }, + { + "title": "simple mean: positive exact match", + "exact_match_out": false, + "in": { + "column": "y", + "min_value": 5, + "max_value": 5 + }, + "out": { + "success": true, + "observed_value": 5 + } + }, + { + "title": "simple mean: negative exact match", + "exact_match_out": false, + "in": { + "column": "y", + "min_value": 4, + "max_value": 4 + }, + "out": { + "success": false, + "observed_value": 5 + } + }, + { + "title": "simple mean: positive exact match", + "exact_match_out": false, + "in": { + "column": "z", + "min_value": 5, + "max_value": 5 + }, + "out": { + "success": true, + "observed_value": 5 + + } + }, + { + "title": "simple mean: negative range match", + "exact_match_out": false, + "in": { + "column": "z", + "min_value": 13, + "max_value": 14 + }, + "out": { + "success": false, + "observed_value": 5 + + } + }, + { + "title": "simple mean: includes null", + "exact_match_out": false, + "in": { + "column": "n", + "min_value": 0, + "max_value": 0 + }, + "out": { + "success": true, + "observed_value": 0.0 + } + } + ] + }, + { + "data": { + "s": ["s", null, null, null], + "b": [true, false, false, true], + "x": [true, null, false, null] + }, + "tests": [{ + "title": "type mismatch: null observed_value", + "exact_match_out": false, + "in": { + "column": "s", + "min_value": 0, + "max_value": 0, + "catch_exceptions": true + }, + "out": { + "traceback_substring": "numeric" + } + }, + { + "title": "coerced types: true/false values", + "exact_match_out": false, + "in": { + "column": "b", + "min_value": 0, + "max_value": 1 + }, + "out": { + "success": true, + "observed_value": 0.5 + } + }, + { + "title": "coerced types: true/false and null", + "exact_match_out": false, + "in": { + "column": "x", + "min_value": 0, + "max_value": 1 + }, + "out": { + "success": true, + "observed_value": 0.5 + } + }, + { + "title": "TypeError: catch exceptions. Non-number min-value", + "exact_match_out": false, + "in": { + "column": "x", + "min_value": "s", + "catch_exceptions": true + }, + "out": { + "traceback_substring": "ValueError" + } + }, + { + "title": "TypeError: catch exceptions. Non-number max-value", + "exact_match_out": false, + "in": { + "column": "x", + "max_value": "s", + "catch_exceptions": true + }, + "out": { + "traceback_substring": "ValueError" + } + }, + { + "title": "TypeError: missing min and max", + "exact_match_out": false, + "in": { + "column": "x", + "min_value": null, + "max_value": null, + "catch_exceptions": true + }, + "out": { + "traceback_substring": "ValueError" + } + } + ] + } + ] +} diff --git a/tests/column_aggregate_expectations/expect_column_min_to_be_between.json b/tests/column_aggregate_expectations/expect_column_min_to_be_between.json new file mode 100644 index 000000000000..a16186f1a22d --- /dev/null +++ b/tests/column_aggregate_expectations/expect_column_min_to_be_between.json @@ -0,0 +1,137 @@ +{ + "expectation_type" : "expect_column_min_to_be_between", + "datasets" : [{ + "data" : { + "w" : [1, 2, 3, 4, 5, 5, 4, 3, 2, 1], + "x" : [2, 3, 4, 5, 6, 7, 8, 9, null, null], + "y" : [1, 1, 1, 2, 2, 2, 3, 3, 3, 4], + "z" : ["a", "b", "c", "d", "e", null, null, null, null, null], + "zz" : ["2/1/2016", "2/2/2016", "2/2/2016", "10/1/2016", "1/2/2017", "10/1/2017", null, null, null, null], + "a" : [null, 0, null, null, 1, null, null, 2, null, null], + "b" : [null, 0, null, null, 2, null, null, 1, null, null] + }, + "tests" : [{ + "title": "Basic positive test case", + "exact_match_out" : false, + "in": { + "column": "w", + "result_format": "BASIC", + "min_value": -10, + "max_value": 5 + }, + "out": { + "success": true, + "observed_value": 1 + } + },{ + "title": "Negative test case, with max_value=None", + "exact_match_out" : false, + "in": { + "column": "w", + "result_format": "BASIC", + "min_value": 4, + "max_value": null + }, + "out": { + "success": false, + "observed_value": 1 + } + },{ + "title": "Test case with result_format=SUMMARY. Also verifies that max_value is inclusive", + "exact_match_out" : false, + "in": { + "column": "w", + "result_format": "SUMMARY", + "min_value": 0, + "max_value": 1 + }, + "out": { + "success": true, + "observed_value": 1 + } + },{ + "title": "Test case with only a lower bound, and a missing value", + "exact_match_out" : false, + "in": { + "column": "x", + "min_value": 1 + }, + "out": { + "success": true, + "observed_value": 2.0 + } + },{ + "title": "Negative test case with only a lower bound", + "exact_match_out" : false, + "in": { + "column": "w", + "min_value": 50 + }, + "out": { + "success": false, + "observed_value": 1 + } + },{ + "title": "Test on a series with mostly nulls", + "exact_match_out" : false, + "in": { + "column": "a", + "min_value": 1, + "max_value": 2 + }, + "out": { + "success": false, + "observed_value": 0 + } + },{ + "title": "Test on a series containing dates", + "exact_match_out" : false, + "in": { + "column": "zz", + "min_value": "2/1/2016", + "max_value": "3/1/2016", + "parse_strings_as_datetimes": true + }, + "out": { + "success": true, + "observed_value": "2016-02-01 00:00:00" + } + },{ + "title": "Test on a series containing dates, with an output_strftime_format value", + "exact_match_out" : false, + "in": { + "column": "zz", + "min_value": "2/1/2016", + "max_value": "3/1/2016", + "parse_strings_as_datetimes": true, + "output_strftime_format" : "%m/%d/%Y" + }, + "out": { + "success": true, + "observed_value": "02/01/2016" + } + },{ + "title": "Negative test case with only a max", + "exact_match_out" : false, + "in": { + "column": "y", + "max_value": 0 + }, + "out": { + "success": false, + "observed_value": 1 + } + },{ + "title": "Raise ValueError with both max and min are missing", + "exact_match_out" : false, + "in": { + "column": "y", + "catch_exceptions": true + }, + "out": {}, + "error": { + "traceback_substring": "cannot both be None" + } + }] + }] +} \ No newline at end of file diff --git a/tests/column_aggregate_expectations/expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than.json b/tests/column_aggregate_expectations/expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than.json new file mode 100644 index 000000000000..d8e89334f7e7 --- /dev/null +++ b/tests/column_aggregate_expectations/expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than.json @@ -0,0 +1,323 @@ +{ + "expectation_type" : "expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than", + "datasets" : [{ + "data" : { + "beta" : [5.85049379045,5.21244596453,5.07065422106,5.97773253677,5.06708769233,5.00591832601,7.31543149453,5.78646382546,5.07470465641,5.37899225634,5.55685776784,5.00000825995,5.09108817165,5.16736704821,5.89913711145,5.04632339569,5.06792614547,5.02343097976,5.88046364616,5.34040516182,5.11729311707,6.07025434789,5.57998263389,8.81198584306,7.14111733635,5.03682326118,5.32084048698,5.02084081818,6.70165982502,7.30520245414,7.67591993861,5.04451974459,5.76288717089,5.00065624499,5.00672824343,5.35142826856,5.02909009781,5.6554247915,5.6799476842,5.00721523719,6.09785759829,5.01072232241,6.169279956,5.05532374261,5.11592165043,6.16766640159,5.02007177441,5.52126884915,7.76871883377,7.25037504727,5.30951566535,5.05884263422,6.1047978282,5.08227521354,5.23064843808,5.10719184969,5.50619358346,5.17135913697,5.0190301807,5.08738503586,5.1543564601,6.76967485283,5.34229903969,5.00359434198,6.21282110428,5.09529279357,5.90702666014,5.50446278676,5.10218926737,5.02685011928,6.91920361923,5.00733109478,6.50250821421,5.001023564,5.09145817895,5.07016814175,5.00500916835,5.00106289986,5.37290922283,5.27196468161,5.01288037189,5.2084985286,5.03013832879,5.36929405356,6.67682791651,5.74862416091,5.07437293364,5.04010433672,5.82309590197,5.15930708956,5.02547566679,5.65541551896,5.11504461441,5.74808018074,5.07068717355,5.03008172833,5.20608423771,6.93205350325,6.76795594857,5.13772021468,5.00082669716,5.93443615307,5.04259977248,6.37988908805,5.42858807128,5.062614606,5.17216852602,5.14308978382,5.09815020131,5.22572505752,5.11773994555,5.00923245104,5.34178733222,5.88925569497,5.00245191524,5.0219891334,5.02748348998,5.70401729116,5.65640275742,5.0711178719,5.00031816269,5.53802658249,5.17367338543,5.65721764371,5.05420553573,6.34393239748,5.01386433194,5.2889049356,5.00588455903,5.0001739514,5.0048097054,5.58251872772,5.48800203809,5.0555990542,5.26733419537,5.0961659335,5.05496792955,5.50529625082,5.45290864535,5.00630804853,5.92689114833,5.95017573287,5.12912666199,5.54819722806,5.2039773489,5.00024555037,6.8070838143,5.58143123497,5.0186773661,5.42843038824,5.9906100242,5.38179144623,7.05323648054,5.35165446997,5.47241442773,5.40531724651,5.0006815916,5.25893189048,5.29632698864,6.97597721359,5.62440499272,5.34457278932,6.03624024123,5.01791375861,5.0289040214,5.11256204003,5.20916050857,5.23705084902,5.43854809323,5.16647686184,5.65669686615,6.03279310585,5.07285516467,6.58901837996,6.79034300328,5.00280654363,5.72194971214,6.4335039258,6.78725122992,5.00836934462,6.21979484983,5.0075501364,5.01072925541,5.0138184917,5.10422208577,6.18924119907,5.57749629802,5.04902408931,5.31868758541,5.81449096838,5.1214863575,5.22994802805,5.43975185294,5.06538322749,7.21106262827,7.64785255389,5.05674806405,6.31239339285,7.22283149658,5.01049484067,5.19683879241,5.09825203414,8.83132520289,5.00967242066,5.51767114767,6.14877085052,5.08832614411,5.00681856885,5.00052984705,6.01659432533,5.68963742432,5.79534256437,5.00142271553,5.8181745129,5.49931399542,5.92723717537,6.14388473949,5.10938046325,6.08359118263,6.71377563437,5.03031284778,5.34186406081,5.96554753595,5.00388752097,5.48709927951,5.04387359515,5.49102744479,5.54152244519,5.66223480738,5.34242919521,5.16803038415,5.33554947197,5.33223277973,7.06737383734,5.68469455971,5.06603165211,5.6380012428,5.00021665736,6.50296085308,6.20217919393,5.15069449866,5.01004141242,5.00657473435,5.23037507874,5.00210075889,5.15261815755,5.06996175321,5.90110213065,5.98882133754,5.00004704004,5.57643855458,6.78823702579,5.03429037873,5.01153030744,5.0585305658,5.01785362704,6.12223146093,5.053870281,5.34745130965,6.80968152875,5.01561151749,5.77406766134,5.2856970204,6.72330697215,5.04096925756,5.19844677565,6.58483192495,5.47412013378,5.92329762463,5.29893225797,8.33495989819,5.19939619545,5.52002555507,5.09177592186,6.26103281687,5.21584619611,5.37360033588,5.04796922652,5.26555136985,5.40582425229,5.34782565871,5.93164897254,5.17245511222,5.01578214489,5.12756506696,5.13896653173,5.46184279782,6.59083235177,6.293729294,5.00212151537,7.93351659913,5.57508677216,8.27430924134,5.32037660647,5.05916419472,5.75977922954,6.8733531145,5.49076666387,5.28252951131,5.53758368732,5.10841458633,5.17970377904,5.6817046048,6.48867721127,5.0122746782,5.78567782994,5.31654791743,5.31643330823,6.86219541917,5.11409950122,5.26969807148,6.22817649986,5.59599434331,5.04649691006,8.51726308749,7.74644438896,6.36047146637,6.26102629814,5.00252543204,5.02261699386,5.52759640291,5.95726567976,5.65122627666,5.38391897283,5.2357689963,6.29560159836,5.38631858293,5.32712248372,5.03377635836,5.01499454414,6.24667362146,5.00749556937,5.08906501268,5.89651139304,5.11414570023,5.66754795268,5.49290775605,6.01997357488,5.0005757182,5.08971814776,5.26496851629,6.22930074336,5.30059046131,5.10477931387,5.76091202563,5.21994377271,6.48583314694,5.62861229243,5.18641671933,5.92159452862,5.00037988646,5.07576115639,5.84649809831,5.30336000909,5.5873754086,6.93702905576,5.09985979124,5.00799471284,5.00201062403,5.10998421637,5.00669936954,5.4863676172,5.76095607399,5.01640237443,5.69665000981,5.33872530095,5.21427666221,5.02934382809,5.02271167325,5.00149952419,5.53345240543,5.00000410096,5.0404971886,5.2365290203,8.18954572768,5.01224617643,5.06573153342,5.07761479912,5.02096011617,5.2999294958,5.00154039054,5.34937237761,5.0482821032,6.96933576481,5.30243284095,5.24014448849,5.06047069543,5.30706902077,5.00167346971,5.33133291133,6.71794114321,5.11461866706,5.29230114507,5.21302696594,5.0254846741,5.01355395916,6.12290890675,5.07720453915,5.02282694469,5.01315779485,5.73373568044,6.02647872346,6.24910030381,5.05433772571,5.09820783163,5.00074336489,5.55441628919,5.18336702291,5.0090194697,5.22866811688,5.40904712663,7.92867901496,6.33259272089,5.20256845629,5.29209873314,5.37324702275,5.00214663987,5.72588480981,5.01607694333,7.89194085497,5.56651398908,5.0228299548,5.48817828506,7.876897102,5.13818010279,5.61058224818,5.3172239619,6.39136992112,5.01370811787,5.02506106002,5.49581934891,5.13769593786,6.49548844493,5.36836022876,5.20207116155,5.20978364036,5.81889314196,5.01181447295,7.20812760853,5.02394728416,5.81278988953,5.00153016661,5.00177767271,5.11118798548,5.0007402966,5.02280054287,5.03341758946,6.28060848763,5.02377637811,5.89602305564,5.61092446091,5.27649878517,5.68406005403,6.05602295961,5.38068063436,5.05924695009,5.23858897507,5.05209761333,5.06463295681,5.06301948811,5.06327128273,5.22909870866,5.51075923844,5.21891194219,5.08883096523,5.09895239096,5.66941984233,5.01681794776,5.00597357028,6.87556724012,5.13018807678,5.36623298782,5.66410515544,5.22302758816,5.12770982759,5.08566544477,5.55532066532,5.70621282514,5.05093746427,5.34108431674,5.00470684237,6.23171040566,5.70887956287,5.28900035873,5.02438488984,5.61523467551,5.06419435981,6.32495050302,6.53163282487,5.2174067149,5.03976523607,5.41343155571,5.29689448172,6.11928120444,5.26486548936,5.60696900246,5.058876487,5.8957845307,5.98635366819,5.25057476532], + "chi2" : [168.375838384,139.09834212,201.156634322,143.132559095,119.074399249,82.9303520596,131.517110499,194.661912619,113.347710915,160.326410097,160.501933108,111.839510105,100.020543022,170.704863677,165.718061354,172.894368073,135.92497862,109.641685149,194.843515436,145.529884812,163.768793244,125.31871065,105.466361115,166.089076484,149.132698173,136.215843468,186.713892437,182.717053676,211.591455483,207.217535191,165.96975335,183.094732596,146.75765029,108.721986017,197.344128895,101.237714676,192.49016013,172.580260794,190.24064548,85.4560124124,134.518509866,146.675642297,107.521262221,181.167344619,105.867204062,123.525532338,177.959056031,163.14723022,127.181584442,155.476663933,123.369955953,177.613114062,143.812176477,161.52391526,81.0219249374,103.211794832,92.5251349721,120.233534297,177.735357885,191.202023667,319.480798909,137.842412037,107.232977899,186.734067714,80.9446068221,164.722367204,215.917735701,131.561553414,154.81487103,148.761336347,157.700285603,140.22942277,203.696275705,161.432904615,113.078197098,148.682822598,203.535241133,204.82437125,171.190785987,131.701207406,139.149431001,197.162860613,115.330793317,105.820822598,181.692471935,112.343922021,167.218574421,158.312894048,122.963138423,115.727799625,119.604496978,104.434332358,224.00849947,93.2287997707,186.405425918,199.248709395,170.506963964,108.433026681,112.482137368,161.740684075,148.571609623,219.321718484,168.691684332,149.10041198,139.045316202,110.267970295,118.119859602,168.163810233,158.547766837,78.270251526,110.363873445,160.731944767,216.113767284,147.461430349,136.012610591,110.070189764,185.182522898,112.415170046,111.468029255,176.468967763,164.678383217,112.807590029,171.080281616,131.076215077,141.029980223,95.0468765512,214.388647666,198.078399036,214.320019622,173.768324222,172.331241426,189.93409887,112.767913271,138.153630702,125.098721966,164.622560448,169.396884667,74.1534308213,97.8425884554,228.520528,125.473966611,182.168220269,155.333760993,235.296730951,165.27400769,233.129483543,188.535973197,203.205911622,152.06542047,145.479634497,152.668310316,175.119630277,169.815112424,166.062269723,110.226234088,176.43719315,126.169207438,177.441937253,192.882653852,105.587930339,139.330788203,141.263042942,132.079880237,144.521558203,160.699558927,105.33401167,111.226938064,116.605674065,78.5979732203,135.224899649,114.473228584,102.980918321,155.980934504,147.085983824,106.510690166,110.417626439,151.942346738,197.729793823,150.651303905,102.730629379,214.966618558,193.811673499,76.580710265,109.641872498,129.530776075,219.507452831,180.882374279,155.959735628,142.168135676,167.926199407,198.164663771,106.606224841,190.166778116,151.253766018,197.322493072,153.020943526,133.253206756,141.493631165,109.866402632,121.168047967,207.065375901,98.497249573,206.795174544,128.856450868,110.983630783,147.44845091,154.020235556,99.5305249169,96.6400992062,155.866267766,159.936496539,151.058147774,139.530317236,204.748403099,184.553266249,124.945929359,174.951654466,147.408567237,159.438705762,196.137147857,140.4991383,154.946192143,118.273485753,114.181678234,77.68795617,176.65854003,165.630112418,165.506762356,91.898435627,184.705619359,180.048393452,123.120466252,175.549934966,219.127410757,156.434460102,133.472138548,123.067964809,146.636530913,117.264685257,95.8474897052,122.493398404,129.218871273,128.246169483,112.365301055,174.284309539,165.030770354,125.541131345,159.151155022,162.000183964,154.346850649,141.751254663,116.817303654,159.077127009,148.617453758,162.432250448,102.228729028,51.9282014258,170.365148929,167.684628266,168.956954592,152.034391929,159.081932304,118.460176395,181.50639698,148.027864585,149.273073171,80.5265102836,120.089959754,217.901820405,160.276562506,114.160325988,183.951404207,137.952571471,119.114463093,117.846550773,181.095633983,158.470923157,171.472641091,109.070110785,58.225679908,148.14961346,170.998972932,160.763215316,123.439947358,137.054222465,220.304198694,228.831758469,188.801516838,172.5284002,158.490585009,147.559421867,136.276561333,157.814342018,161.138377499,147.868274744,153.081047396,112.763733441,125.883487138,84.6286856151,155.210016702,185.278109043,219.930361253,144.440502463,184.782530753,155.320147583,196.696114173,195.616986145,94.4112355767,168.812602367,127.694101229,142.128287813,170.597506836,124.137131165,108.479645763,188.8858969,185.49072511,172.348849633,122.896229839,111.52467989,154.640507203,121.052500949,117.317148827,146.893661893,90.1874300188,118.588798363,87.7569231877,104.247335291,106.983030475,163.374926336,128.478285522,156.97804643,122.243317219,225.969543059,193.204356451,173.705247331,92.3884230772,143.014936893,86.4155249468,87.8921680341,120.238545469,154.498546823,139.441309318,91.9081357689,172.944085041,159.158979946,157.782662205,132.701855802,168.327492095,190.496311523,107.782933798,120.89086819,182.30478273,152.526386357,140.309790964,185.809417885,127.365336352,181.999975985,139.19018381,188.312500742,124.436998929,205.14087336,175.626601333,96.2936849386,192.259894832,154.106302893,264.078183855,137.721304876,235.579195067,106.486275127,168.849188106,85.4126546055,182.67890815,182.58684056,169.92955288,150.200018397,132.806912369,185.602399457,168.266183606,107.418816373,104.865003894,126.264550433,157.128410001,158.711875786,168.428486454,142.092748903,176.510736764,154.680547929,180.787512217,184.80087458,117.911843452,209.026816226,163.153380709,88.6794389655,179.195485753,106.172367925,149.282174842,106.348891217,178.615111553,120.510514917,171.483072329,122.003647873,96.1004324752,89.3103285546,123.03135274,185.687501802,98.3098197515,136.038717303,181.723736312,161.639962575,163.64753172,160.349973434,146.003554887,146.50065102,139.423166341,122.03112913,107.033712932,135.3630556,142.78841725,156.06961572,152.194270973,161.767669835,88.3160228968,189.638363748,151.681515321,185.096951603,114.736802743,164.746609399,255.99364408,209.553731184,118.690276675,132.410827131,197.925669135,196.971921001,152.241635146,110.872953387,191.878159575,143.286365246,184.259486263,86.9611836067,149.535068923,177.34298882,201.611783468,142.302129502,203.014482673,159.115028994,164.94837894,151.073137654,163.889565496,201.146950379,184.705781603,99.1398901341,127.254293051,171.173341028,249.758311803,138.188038977,166.699602479,179.231294128,174.911313153,137.45456733,245.422991673,233.705655817,72.9310055291,172.664402369,149.640783649,227.752627927,122.859337082,147.704789297,188.596676893,119.588319929,200.152585638,181.547096939,191.778706436,203.520522959,237.741063055,183.14285084,156.725561932,145.493629145,76.4676096338,230.429379023,161.30672709,144.237519169,161.031411854,93.8719672039,142.79767329,146.901088821,148.664319793,104.04701203,162.844762068,143.541163527,191.748097587,96.9335544823,117.837441462,147.411840719,151.911057566,187.590166929,124.583170851,151.214207828,152.212352141,167.133230147,183.043532687], + "exponential" : [4.8585228583,26.6901112675,11.1150745618,8.95935517798,11.9217508397,13.9373141135,12.2976213515,24.400795355,12.1683913647,7.39685767588,17.5079727804,22.7669667259,6.62885662385,5.56852131617,5.57159228611,6.57447505291,17.5241478333,41.4809442184,9.56407008622,22.2351196439,10.4456054182,8.70539086427,5.79144139179,9.07679802089,5.45936123917,21.4901054423,10.6726570445,5.48692685686,27.2734472583,11.1754287278,41.37362767,12.1462190156,16.0887380626,5.06115728228,11.2561713611,5.51213169335,7.90295802423,6.55421602611,11.9152877153,11.8462766069,15.51917196,5.90851234558,7.77343326946,15.1623243967,6.22061231525,35.104015777,6.83555264986,18.1008192217,4.44111934468,18.7034741164,15.9933251396,17.501503333,12.8692246134,8.51538118947,5.67270057007,8.06136527284,11.0082196847,34.4770045036,19.89145518,12.6591821027,15.0876551573,10.5575523269,17.8267935754,6.51267311331,26.6882681183,6.78121782138,7.38414356578,11.9679828215,19.3987895693,7.43943502402,5.11417505189,5.89561607999,14.1286492762,8.86746992785,10.2709056648,4.42774950006,14.9160019496,9.65464948219,25.7903101333,11.499458782,7.7613908252,11.599178354,6.34712028593,4.33959200876,9.22525443086,16.8030533055,15.5761873397,57.222564837,13.653953781,5.86575972971,9.1360900101,19.5459675575,10.9297004086,7.2853143675,12.2246158214,7.43622201353,19.7779351721,8.99246821226,24.3903101769,5.85470424495,20.9385215607,14.5270241981,7.45025328578,4.35941951777,13.6495106355,6.39324275558,6.28685689271,5.03687536678,4.85095125125,18.8823105704,5.27009173373,6.44017001604,16.0780210646,7.89996183074,15.468146325,4.55575578203,18.7597986899,10.5322385583,12.8748603934,21.7044583971,4.51178266598,7.81162881218,45.80463418,8.58679262745,32.2026740211,8.88528944368,12.369091046,20.5717041572,4.2708976764,34.6019442379,36.1547221177,6.10591612438,4.34991641659,5.03264888886,11.3785592582,10.8912492281,12.2208412053,7.07060181639,7.50164273076,9.2061649176,4.72586535362,7.88680566395,5.41566973295,4.399406958,8.83487017535,5.02943725088,15.6570314589,8.81202634207,6.40182163103,17.8586573543,32.2930935854,17.0635488939,7.22495043459,39.4123128019,6.2168998839,13.6377877809,8.10211163833,11.5904991088,9.60713142533,19.5814114214,23.5105954046,5.23472267323,17.4612372462,9.40339865086,14.860711541,26.9345963951,8.84959311964,14.1926036444,10.3935059709,24.1492544223,42.1299014711,9.09043886842,11.0799812039,19.5320187946,16.553953187,9.95431129017,21.6739969928,19.9751654962,8.97874302927,18.6197023231,8.25987086562,15.1114007144,10.7280954993,4.78770335828,4.27398589143,29.5919669408,13.3908652314,13.7853304563,6.84467050466,18.1068031374,6.08591871762,5.82292250065,6.85320374364,19.0355187151,4.32561072342,6.69551257351,29.9906154602,8.6358724341,8.79901423029,6.52284824115,7.46861525458,5.6023948514,15.0436723552,18.3857529388,32.3772580562,14.06544575,5.02409416851,22.1307991504,5.82722528097,14.4359998234,6.92060049241,5.88859587828,4.50935569724,4.76341645946,4.58258669659,7.36514244377,4.52370493652,17.6198723346,5.7422862581,20.5675602577,11.8507279565,40.8636563787,11.3440224359,25.956912957,7.47234686574,5.95726637347,16.6856547422,4.97363842595,9.18545742985,5.07645991106,9.67441533692,8.51304570115,19.4713126506,16.1227686857,26.585725525,9.29073476528,5.18667361024,8.3952512741,21.7616138021,7.35338220141,7.48284549599,35.5602856486,10.199192826,13.6972032835,25.2994469992,19.9698051947,8.00386843895,11.3987689638,4.646175874,4.88532265181,16.4420710471,10.1565661783,38.5060250804,5.61393579269,18.6868178469,19.7263274404,10.3518028238,6.25962918559,5.16726435829,8.67391004511,18.5236474505,4.26127095143,47.8678460059,4.62539438885,18.3365034831,4.6815145058,21.8740306745,7.02139837182,4.33368505638,8.75857029829,7.95686273745,37.8442360752,9.15155717368,32.8246425544,43.6474960093,7.0080323106,7.86806233578,5.05302133625,7.70531881347,22.6990564049,13.2336734004,4.22219585457,22.2832043011,12.1018309626,30.9850986965,5.36647407657,21.6419365166,12.4199044443,10.5105439076,5.73849259124,8.25937371913,38.5128261063,5.36071483972,47.469477112,10.80382991,5.53971362051,18.8727826092,18.8052592329,6.01056289204,14.1183568807,4.47499384811,9.20498301581,17.7913008362,21.0036792926,43.5723479925,19.5552461628,13.3794734022,8.30614166658,16.8222203994,15.862614209,13.4817569961,5.28714402978,7.11901232347,11.360070975,19.3680736822,20.3587048464,17.3097465498,61.04351838,37.8373274265,8.22217727047,61.171442431,6.16157861368,11.0830723373,13.7312299054,22.7180407662,16.4099137798,4.76758368438,21.9598423719,21.053979254,27.0937219288,6.91347180328,12.8033438415,4.40671737316,5.48935834095,31.1648620088,8.87644516902,11.1046642449,7.62433722647,13.8485656975,9.50227226413,7.908946418,13.9774383919,5.4826297956,18.0691008961,22.289493833,27.4441179044,4.43843405231,13.9311903877,9.17908588231,11.2076007961,23.0446584687,10.5093777382,59.346879576,14.1327711774,12.4077746313,13.8432297547,16.3758112799,30.2792862351,15.1219530554,25.7183441105,10.0126135905,29.8136517394,5.92879265948,6.91956747975,5.99626517833,9.20167964971,26.1486770468,12.3233709966,7.77856359644,22.6541754894,39.7981684376,17.1365998876,13.9896954226,6.56773437506,36.0477534889,4.90300972124,10.8077497725,12.3406729732,5.29772521073,7.38518454066,12.4320263737,9.34485443033,6.36356218225,19.5925120704,37.4381299548,27.9786062673,9.19186061539,13.3268206779,20.3648210215,5.01217493829,12.9555805136,4.4748051555,17.4132247915,18.8497701408,4.80159721713,5.99978772882,5.97423997729,18.6877742079,14.7769309794,4.68086281381,10.4288798069,15.722574372,21.6418269207,22.4079916156,19.1452570302,14.1352860185,7.09798015781,13.3844156622,5.64397830926,15.3180688488,8.3066475286,7.06176474691,29.3536808527,26.5985501295,48.2613707994,18.3005241625,21.8578569535,7.5010684305,24.4593444626,7.53160647828,13.0320312612,22.6647276003,34.9935494629,11.3021238248,5.3380893194,8.12868227645,13.7507445912,40.3164794817,10.2907969213,6.79780254524,8.08872329086,24.8979053924,14.5956554678,13.2001101075,22.3073615563,4.55488138972,11.0313379375,24.334624001,8.05712156069,11.8539456224,14.3906827756,8.16193452019,40.6002515403,4.55418577316,21.9634497784,4.3404399438,32.466792485,8.99158769383,22.6126163043,16.4086403356,4.92762286534,16.8203006859,15.099210936,19.6179402643,31.5413120919,8.24019412834,7.34747607208,19.3356404341,17.2951030048,4.75175236836,11.3234591704,19.4410597803,11.3715427803,14.9735213874,25.1617465364,7.16210986035,14.7248588327,5.56121571115,20.4141308685,18.2271719645,18.8813547539,5.80530960429,7.08801525309,6.09661156797,20.3849319169,13.8362158114,5.99789007105,5.50834764974,15.9580954025,11.1924935528,71.845184399,11.0813031629,7.4563912479,4.93784579425,9.2892213139,4.49416269022,11.8069665941,20.7597168349,13.8750808983,27.4006997675,14.10468171,24.9089912973,8.12774240137,10.7644814666,26.5492672756,54.1654249547,12.8121314103,5.06870874708,5.42160944944,4.76513880009], + "gamma" : [23.4020700939,21.5298748203,32.2562321362,25.7763193718,23.4703121579,29.3661202701,26.4459369299,27.3791809985,21.9847200673,30.4283213154,25.2524564292,23.0766681004,22.0748007426,22.8738575385,24.7173832218,20.4867806803,23.6115319518,22.3396012727,34.0332461228,22.4481130156,26.2994393417,23.6842779672,26.4469684249,32.208328631,25.2154795653,21.514914246,27.9812265667,29.1646242454,36.1741183773,21.9241086942,25.0200978673,34.541160644,21.1872171313,20.6643523094,28.0611449787,25.0763752424,21.1316987329,28.4729997234,28.297691206,27.9971971914,22.7498137295,27.8584113561,22.9930654114,23.3932717127,26.5867370467,20.5603038407,24.4578198277,31.0726756074,21.9970997924,25.8772696238,23.2792448714,22.1746018538,25.0520189677,30.0147724528,20.9379872315,24.1727033283,34.9513827989,21.8579958707,29.1922941933,32.268170894,23.9759615289,23.2633791899,28.3401947019,23.4056316197,21.9573537249,23.5790724502,32.4038252295,21.7878154345,27.7219914485,21.6891773332,29.0876555664,28.2659118607,27.7141202827,26.4474394796,22.2405955932,26.373367326,29.7809748078,25.1458962029,22.7640487649,29.1364244361,21.2416301746,26.1620639266,28.9165242516,24.1332087951,24.5034865528,22.0630655187,37.0706767896,29.5045377518,34.5009633676,25.8583734444,25.6237019739,29.1496197025,22.7821172328,22.9674190397,20.6825965477,30.0805247229,31.1243102911,21.378796057,24.9602505008,24.498254177,29.0456549503,25.4614759249,23.5492250542,23.6296093195,27.7048563838,23.6143923219,30.8096926411,25.2366118111,22.9342018597,20.3329277139,30.8160753344,25.1544738495,20.256646979,24.7267776724,28.1766743251,21.6930035797,24.3736036984,23.0584508765,24.4423122369,22.99470941,21.0695771875,30.8894198903,21.2748243436,22.3587844346,23.2265191862,21.8254137702,20.9182783187,22.8858016571,22.8513749082,32.8200661037,22.1810922078,21.2289369077,32.0811622537,36.2015120794,24.2696738229,21.8534574697,20.976608467,24.5070371167,28.5753591781,36.3660762521,27.6972536041,31.3783602398,30.271239401,22.1219296831,24.5513116546,27.5162180295,23.71240597,26.7569582375,28.9124084132,22.0112791255,23.6452224477,23.3271453018,28.8044969845,22.0345106059,27.1407964702,26.2810075701,23.8503234598,29.8999812194,23.435396946,23.5115874386,25.6843235086,27.877227615,21.6477642172,26.6076880577,30.5500715332,32.9591334789,20.8871158935,27.8993810718,37.6097196154,29.1195769842,30.5362603374,23.0026683957,20.2106398147,21.9748341562,29.2723974831,30.2994040965,25.9237668312,22.3755955888,21.1789713581,21.9935792747,20.3111108153,29.7688607108,22.3349178042,24.4384511424,30.2365574087,27.3852187001,27.2334957635,30.2447416054,24.2216036265,28.2320450027,27.9283189683,29.1941059574,34.3823679953,24.4002814522,25.9235910909,24.067215501,30.3709626359,22.2624007602,27.9738313877,24.3180558366,22.0444775167,24.0561351466,32.0742577005,20.2948834474,21.3472329216,24.2658670818,25.5387220467,33.7055649282,27.4390032872,23.9768265282,24.3795134206,28.035867306,26.3881524935,24.5783732033,24.6725793264,21.4417604343,21.1470873735,24.3746332706,30.655739241,23.9377251296,26.8840484717,23.4008012268,24.090496709,21.5226738631,27.9097797855,23.5760755694,29.0516553777,24.5115122439,24.2788240342,30.9313326361,22.2613336207,33.7861356829,26.9371114025,28.1509447967,22.5786110298,23.23807363,25.7090964454,22.1971094629,23.0513728887,26.4812783485,29.1454960021,27.2556497752,35.3995007571,27.1526271323,22.7495472506,23.0534100647,33.2030607167,24.5309609048,21.9713357981,41.7067257021,24.9152849464,21.0552530522,22.6147573433,30.9961614016,24.727881004,24.0037052352,27.0823289132,31.6480134684,23.6309120499,26.9003619635,32.9092386539,26.0464097751,26.0324601818,23.4618735782,31.4303877179,24.1524008973,26.7586599848,25.956830104,23.7769743583,21.2146774143,30.7673496572,25.1256169725,23.3446892483,22.3404427682,23.4278757404,25.9898800288,20.7235374574,25.4676280396,24.4834665152,24.5804004299,30.0671960069,26.4600779112,23.9859958353,25.435501002,23.4695889694,32.9267264658,23.1562693065,35.7957273664,45.2462445115,27.0057363853,28.5976138009,20.5293832446,22.3015275666,21.7734989727,27.1505254024,24.8751278647,27.2943584101,23.7005794411,26.7509335294,30.8920076413,25.6822637181,37.494063308,30.6920203637,31.141641066,20.6513374388,25.7183047116,27.9043856399,28.3421366995,24.0865190989,21.6181566532,22.3101668922,21.5457160877,27.5504160094,26.4396679234,27.0975105076,41.5602192775,25.558598806,25.0358378223,24.654272581,29.5280988255,23.8084832319,28.6418538164,27.0784635968,33.4237451,24.2509310937,24.4115364156,27.5830903361,24.2440352552,20.7122531841,27.4581685147,23.0859938202,21.0655210693,26.4210887753,22.8791327272,25.3011838547,21.5791545502,23.6994164277,23.7110020654,26.4710732813,23.6601009694,21.3707634202,23.8163315217,21.0795164284,36.7797295444,25.4827413135,22.4472708755,30.929481418,25.4076973768,38.9581577108,26.024995253,21.6995064143,23.306742254,34.7126446375,22.6507031606,21.8452608301,25.7229017859,27.1063775454,22.4196637287,27.4028710147,38.0768101456,21.4400926349,24.8620018962,28.0221917831,22.7823816283,25.1505578523,22.5758524229,22.2770660308,24.9913737829,32.7599254817,23.5802791186,25.5814804394,23.7429661864,23.239889867,22.9289813179,24.4263310857,22.6435711301,23.6173230533,39.551587905,22.9904610606,20.8094897426,27.6813523597,22.9425081042,21.116348851,29.9963248081,27.0052863552,26.7102746185,28.340255777,31.3798712633,25.628932302,28.3653252833,22.1171232767,29.0192169422,20.0788305558,25.6936015362,26.6957303186,25.0364628134,25.9492884166,28.4837974971,22.0515348994,22.4475588765,35.4840555757,24.5264131282,24.4831030618,31.5137478769,21.4416678572,24.395917165,22.3609059462,35.0327857154,24.2936196349,38.7740282327,24.3015407405,21.8588776505,22.5528269142,22.0383358154,32.9607058596,37.7792238105,29.4019278267,23.6714445364,21.8807557376,21.7961550788,31.1904264486,20.5232897739,24.1964190928,23.9694847157,27.8564640384,27.1298492223,24.7009727848,29.0612505074,33.1299997826,26.2941203858,32.2677841916,21.9101863356,22.2806392369,27.6892579428,22.8638843941,21.6456702933,27.2135023133,29.940402904,26.1445225064,24.9171311121,24.6661735576,24.9804348375,25.7901018279,28.2663345638,22.9267311643,35.2285539273,20.3616740613,32.1153877091,26.2919413995,27.3448408417,30.89118701,25.1619690682,24.444143299,37.4171120675,29.3755195815,22.5757143161,37.7233431754,24.0954964299,22.143816344,29.1261054723,30.5860998952,30.1276589243,25.6233726895,32.0897336447,28.5743220602,22.1897018993,20.4238217973,21.5890578441,23.2656723715,21.9817532227,26.8068329018,26.6076760009,25.5951740103,28.9711309123,27.4058238348,22.1712627203,42.2545658346,41.8649165338,24.2870883877,22.9669501698,20.5088492308,26.4662404469,28.7969797121,27.6409105164,22.3527977435,21.2936513083,21.106610259,24.2366360087,28.4885284624,30.082933384,22.0323717815,31.2417261432,26.4775426548,30.0371866099,30.8771594635,27.8242764897,35.5221517692,24.0058611384,27.3177404054,20.5251540551], + "norm" : [-3.02353829742,0.394716690288,-4.59719357528,-4.77865152174,7.82890286251,4.96702916486,-1.53546061628,-0.591269235849,1.84511283806,4.23217368193,3.03594678792,-8.48110554556,-0.625041832839,-0.855435605323,4.76458417583,2.43214670296,-12.0081865483,-3.8592126857,6.34512654762,-4.19284867918,-4.69870722761,0.384925052061,14.2447195972,-7.10613762178,-4.8854365152,-1.37939362163,-0.486932190437,0.618860340752,-1.99529861112,4.71904896807,-5.56771992548,-6.15576769427,-13.8511582698,-11.3038039428,-6.30378699216,0.80072646514,-8.32967245847,-1.4008643767,-7.31756224027,-0.335586421962,-13.7970940369,-2.99771477668,-9.70997763937,-6.8536795613,-8.53515125485,-0.568251264929,-0.110079445313,-5.76943267395,-0.343571749027,4.74871107249,-1.65061655585,-0.766629450089,-2.05930800536,3.02405795394,4.63597306893,-6.59630778873,-9.74553219142,-1.88907700671,1.79181572508,-5.30262163862,2.31290041265,-2.05015951044,-1.74995322048,1.35107797175,2.2648251612,-6.77934426147,-2.11746660346,-13.521169394,-5.2623442073,-8.09150988291,-8.66304853842,3.37311345047,1.61820752649,1.45000926405,3.00771721247,-4.5154369568,-5.1113711253,-6.60584304007,-5.6310674633,-0.885522270824,-1.7434194954,-7.78859733156,2.08353467945,0.168048028271,3.05368473002,7.12437605673,-6.98759123855,2.25295549368,-2.65788800606,2.56207076021,-1.05894659782,8.84730718845,-2.57464102291,8.01848681208,-1.85194923843,1.97626577931,-1.40945123155,-5.74265774246,0.924848689262,-1.236617136,-9.82828646908,-4.81270094037,-2.16332069579,-6.64503101117,-4.4128632283,-2.18131923051,3.4769503007,2.90464238744,-4.94743843102,5.90850047098,-4.64367413062,0.28500935648,2.64984379487,-9.84635302601,-7.11243489023,-4.01413462206,-0.897565683674,-2.9670055395,1.34579168233,-10.2449240975,-13.263986234,-7.83416108133,-0.231964489627,1.51055085739,-3.37284602283,-2.69571094061,-1.46171389249,-5.03272562667,-4.08532204114,-2.08503518413,-8.12072642097,-11.0041995675,6.17368102305,2.94504150965,0.289700715472,0.775772051604,4.53359862464,-4.20276785098,-3.5067513994,0.493957449563,-6.11995519946,4.60282921316,0.539823928982,-5.26718837544,-1.06510242931,-3.95862624442,-3.3614648762,-2.08570717816,1.40160374371,1.17756178334,-5.7858825122,1.59042917238,-3.52136537989,-10.3889512288,0.134930425744,-9.81869922865,-3.83743760741,3.22956266628,4.0997718246,-3.23849558098,-4.08116065876,-2.58373502131,-11.2239380805,8.34353925626,-5.88483736797,5.20083434413,-2.55278679782,4.13693497423,7.60392130926,1.7321651912,9.1232979508,-5.39700205249,1.6368439079,-6.3436536719,-8.06925452784,-4.35315465608,-6.59620848542,-6.19413344317,0.175776522814,-4.78902358567,-4.83727435503,-3.86320776356,-6.6327845063,6.77554192903,4.04904993041,4.35012365869,-6.87189063252,-5.17354627605,-3.97850376168,-3.44717949972,-5.67148535962,-5.64252339255,2.19387536337,-0.665533936345,1.60597169449,2.55491320946,-7.1045130663,-9.06708019399,4.48303920998,-0.738623955583,3.63740551779,-4.84181723434,-0.453189159375,-4.88692736379,-7.84317036663,-6.12509985915,-15.2220474521,-2.76492901674,-5.75960501527,-2.66304625965,5.28649852319,1.04755922318,-4.46889628488,4.19989941142,-2.67861070119,5.15020902979,-6.23426225664,1.01641065023,4.31786128238,-3.27745278081,-4.22844189936,0.341833406178,-6.80801962174,-11.1225226863,1.12714078249,3.11436192151,3.53712300384,-1.54531455268,-3.75054328375,-0.910214917602,-6.4740656525,-10.7074697736,-7.26127869785,5.18301392898,-4.88103693178,-14.1014721454,-7.31164814152,-0.813138692209,-1.99521315468,-1.6737340958,-8.83762052508,-2.15140025937,2.70244660698,-5.21218375252,3.20089622947,-7.41461127834,0.146067941673,-3.18111834259,1.2090890813,-3.65830278349,4.97036113093,-7.3837097202,-2.96232990981,-6.35593825351,0.104259984528,-8.05705533331,-3.29433456058,-4.90823425028,-8.30210313825,0.322873967494,-7.35120453766,2.02111348919,-2.7836775386,8.051950056,-6.43552215192,-6.88968116176,-3.33608674763,0.416689109388,-4.00166366694,0.249402075769,-0.00203023292896,-2.75787401873,-14.78967028,-1.19596579501,-1.61737466126,-3.48602083031,-8.47137011855,-6.4259000639,-2.93748263051,-4.46779999922,-2.5770648199,-3.75372303743,-1.77651311819,-6.48878157978,2.45436751248,-7.7559258241,-15.061513524,3.70625095076,-6.33567762337,-0.0820837076559,-4.18515082187,-0.262555949149,-8.1508951908,0.855390693155,-1.69969393614,-3.12761997021,4.74863069817,4.75149867008,-3.93326660895,2.32994771008,6.73616799549,-9.05123071939,-3.89120762373,-3.72910333316,-0.099687676645,-1.05502663271,4.61649102124,-13.322942962,-6.57489305267,-4.39482081973,3.23592248818,2.61974208913,-2.57075070314,0.0290122126464,-0.557740962406,-4.17394245755,-0.206221836183,-3.94122471798,8.64373145741,5.04802339384,-2.52717139012,1.50214183457,8.46425939096,-2.68485898067,-6.65244711693,-0.362513828504,4.5150654038,-9.0470118044,-2.72062972428,-5.58207012124,-1.4819290705,-9.4785927829,-7.87446777899,11.0699954406,-5.4465369927,-5.75826317939,1.18140480513,-7.78822079245,1.07339962024,3.10695557199,1.34136245958,-6.04767741227,-6.5406228662,5.56144696298,-1.52441284001,3.92334272602,1.18516467207,-4.69637293416,-2.27550496464,-7.67962893925,-2.85253127211,-7.7904363355,3.52299568457,1.17119049993,4.29841677382,2.82465387956,-4.17222969081,-6.39801429802,-5.47419114987,4.13187022913,0.28639324377,-1.42150687256,3.07021175117,-7.67503858995,-3.31685290809,4.5321259055,-10.0542050469,-7.13310344722,4.20786394536,-2.78379766076,-14.2454776712,-7.16974008783,5.99766813224,0.370353848923,-1.24337036513,-4.71086582829,-4.37748108651,-1.46798694119,-8.54114096064,8.86592376961,0.822806086967,-2.95240382374,-6.58467195062,-6.87906803192,9.06151344632,-1.63034658689,7.09297280707,-9.90765499104,-5.87181704475,0.764682465358,-1.46969305238,17.637640204,-3.27562799331,2.27068657624,-3.82403266088,-1.3444913171,-5.48806947812,4.67824731159,-2.75519428854,0.214689252962,2.70785610386,0.666819763777,-0.218669032817,-2.05057659525,5.07876585809,0.830527663017,0.282436759154,-1.02605947301,-5.27526878573,-4.82615047204,13.8843655942,2.79766270973,-6.87669917457,-7.58371051255,-7.52188172365,-6.49377587563,-8.83316180496,0.257008661964,-9.9361090467,-5.66394415239,-4.5728131344,-5.05938026749,-2.15799333495,-0.829360562456,-0.640318594311,4.15384676329,4.44153080578,2.25906753932,-9.6459030339,-9.75857495975,-0.513535414066,-0.276043993836,-3.99051680212,0.147020287369,-3.42972324343,-13.1421850032,-1.66720607508,0.44982380649,7.33962922803,8.35218900042,-3.22691613555,1.81151054021,-1.35492262579,1.13537956908,-7.31117369855,-9.49751725143,0.725770963637,0.0041163332643,-11.7311519285,0.525158919358,-6.55244569022,-3.09848142724,0.040276914528,-5.01572705582,-3.80566297238,0.820122617887,-7.28308282164,4.95975027147,-10.806338365,-6.55816563997,1.29169631766,-9.89732917709,0.288529881728,-2.91634045873,-5.427419481,-1.46784342632,-3.59118041897,0.16856486352,0.859129643227,0.835530146448,-1.59211517284,-3.51167435685,-5.63458194153,-1.09832443652,-4.60104367235,-0.00953959329388,-6.5846753721,-2.41325067253,-11.6984540307,5.0399718168,5.56203246712,0.632465602303,-3.33465416452,2.31142023809,-1.58098466977,-11.3616944834,-6.81395508265,-1.59966585423,-1.35636778919,-4.39560170157,-5.20140251982,1.72986900313,-5.11273531463,2.68144657666,1.75009188373,-2.28357514089,9.50337256355,0.847487335035,5.44704840591,4.32125141507], + "norm_std" : [-0.761837213353,-0.331616898194,-1.75131542992,0.628894110773,0.282501864129,-1.33813943,-0.50060684963,0.121645029892,1.70832347381,-0.970999448321,-0.619332343444,-0.726708131791,1.22165541672,0.503699288341,-1.3878740774,0.204851419543,0.603705215945,0.545680308693,0.235477019441,0.111834993822,-1.2515037504,-2.94934349814,0.634634160585,0.124157016111,1.29762248959,-1.68693341116,1.08953904655,2.06088173968,-0.241235326269,-0.94787218032,0.676294028923,-0.653356162094,-0.652295297944,0.528827604205,0.357793249335,0.188649359732,0.869416879035,-0.0506674481438,-0.71636457461,-0.103258720839,-1.1410365794,-0.500776900872,-0.389301370382,-0.473850530407,0.128664303795,0.153694305281,0.444790058081,0.128531666655,0.252529866032,-0.940638662695,1.00214544816,-0.52541498431,-0.887400935623,1.83131360327,-0.923029332098,0.700537686638,-0.892151197664,2.30074000291,-0.817765299371,0.513759631538,0.623586943483,1.48920592699,1.94047867052,0.543237129288,0.506190912339,1.66201449055,-1.18920250015,0.0935974490328,-0.539163905131,-1.43739560422,0.187937386025,-0.450454457295,-0.516878231615,-0.0956356677115,0.316423804579,0.603334657292,-1.49459146388,-0.110894079325,0.241289403967,-0.582645109052,-0.241112652347,0.236360537321,0.124720725203,1.04632597952,-0.27309185588,-0.534834020277,-0.306563304509,-0.162242664723,-1.08323219585,0.708401493453,1.52074304043,0.290343183233,-0.683066329711,-0.950312866297,0.400709935824,-0.12607168361,0.398204888062,0.141638473355,-0.264141421911,-0.452212074311,0.758201972721,-0.515583498457,-0.591202321979,0.896745784086,-0.971437523747,1.84080991349,0.153881232452,-0.274083943452,-1.78492568996,0.981006686402,-0.873717139787,-1.01563442014,-0.411243537311,1.46562116753,-1.00621906211,-0.902147762382,0.752769142961,-0.490508526506,-0.524672210141,-0.699195861143,0.352360939272,0.0681025983371,-0.930341707001,0.845399560277,0.0164723816491,0.844962955458,1.85083394768,0.0220742408712,-1.36917902216,0.887203523342,0.0143311821492,-0.0741547051151,-0.048564787848,1.23502145314,-0.433294923904,1.39103545609,0.820210741477,-0.247423465317,0.30227074638,0.543980361346,-0.942368503754,-1.26638281245,0.937249545091,-0.72010224475,-1.5939515375,-0.375497816009,-0.958703834468,0.794336400065,-1.60510783562,0.543710253458,0.925166364208,-1.469628604,-0.399592346308,1.41734264438,-0.897608667966,1.84480501591,1.2531682095,-1.49093241721,-0.0277339245574,1.37523596087,-0.0252081701471,-0.667880179075,-2.86801752898,0.210688543428,1.28715530785,-0.574305988486,0.49532664657,0.396049589985,0.58879818975,-1.28175713014,2.02992261305,-0.501944516275,-0.159284565628,-1.49621629567,0.0114477138521,0.419445985251,2.05121388048,-0.368765332511,-1.68925467803,0.147681161421,-0.180998391963,0.158059054263,-0.396615421768,-0.400236629563,-0.824895666289,-0.244440445893,1.21945742825,-0.43363049174,0.861183873108,-0.334503693494,0.159559959242,-0.984164476346,0.754084973823,-0.284391662165,0.32479752957,-0.885424601706,-1.28089348334,0.196109935055,0.954644156116,-0.800971331814,0.0158514729825,1.08755329253,-0.631242819687,-0.0226893248728,0.685879242202,0.519179207928,0.182701891972,0.204647380607,-0.265986356197,-0.000227288704174,1.23945231553,-0.819715255694,-0.260388906514,0.519140256693,0.143091644531,-0.116677746539,1.49674411145,-1.48427437532,-1.67118275603,0.917173408575,-0.758014151397,2.06479240297,-0.850778395978,0.499450712758,-0.0792663655031,-1.40329263703,1.57894791325,0.000369028987627,0.900884914363,-0.454869220081,-0.864546645457,1.12911990353,0.0578744128611,-0.433738666312,0.0926976373756,-1.39782014504,1.45782265006,-0.176756915665,-0.2542403002,-1.26343749602,0.452262741199,-0.840117409241,-0.502678070859,0.513392586902,1.64165300475,0.580790036214,-1.70734026937,-0.178355430855,-0.828459954458,1.28631168451,-0.406452361793,0.156632047142,0.0521066804267,0.955813177407,0.743191500529,-0.486323084325,1.92046727351,-0.652749022684,-0.173303776916,-0.360410082419,-0.380413976662,-1.29813980546,0.527919007521,-0.0931002762505,0.401184680615,-0.102583380148,0.030869097738,2.61610050962,-0.785577944872,-0.506998120503,-2.01820571555,-0.676853137903,2.66674367704,1.45145614673,0.634628855315,-0.502826863797,0.512931658764,1.75677937083,-0.974310800693,0.680397048216,0.955798725606,1.50153547771,-0.756265648025,0.473504604182,1.71374344767,-1.14769922048,0.00290322049513,-1.10057035996,-0.297531781547,0.502409078192,-0.000987418981448,-0.674560277944,0.297958279225,1.46557313878,-0.303628593773,-0.994479884587,0.189889991266,-1.68402957184,-0.45838074161,0.543405908301,-1.18726425743,-0.412641692684,1.17712534516,-0.313704165315,1.57903161901,0.375388235873,-1.56813881881,-0.900886519308,0.652345518687,0.871600313907,0.268216169978,0.947681219661,0.14726758787,-1.77245546326,0.59241961089,0.0903254744846,0.651121453984,-0.0811946962298,0.801897602972,0.139845227106,-0.501002761518,-0.128302559175,0.414605966484,0.604577785973,2.13409475324,0.941187837385,-0.931456795706,-0.124667539256,0.200696290937,0.180256285555,-0.320370096868,-1.59612803134,-1.28169898363,1.50258574677,0.653538002103,-0.319536626289,0.955094010533,0.261995955396,0.160792900918,-0.571680642407,0.351660058596,1.11498006066,1.18326825729,1.06094106427,0.510712630416,-0.938783998005,-0.546496141191,0.590029971202,1.48218523572,0.10211810416,0.265438049312,0.00319307433395,-2.59501150029,-1.55556933212,1.10299595783,0.554736503855,-1.28901163638,0.385241647645,-1.71729172626,-1.01835312911,0.051635317273,0.503298709971,-0.543186230699,-0.50667841739,0.729652833474,0.434273362739,-1.13367360599,1.42395333777,0.266351536554,-0.854264392719,-0.550596561249,-0.619109858975,1.03893338917,-0.910610824816,0.529952566873,-0.00847143614797,-1.12903824884,0.569854190963,-0.863391621561,-1.35614426983,-0.0571515568786,-0.108621121585,1.65238409216,-1.3509280784,-0.546096737434,0.991400184464,2.20099738608,0.42789979109,0.290468282765,0.611953095869,-0.512450984176,-0.724230691032,1.69288189574,-0.299339119682,1.57172719445,0.461444067234,-0.673829701094,-1.14103626018,-1.22891797584,-1.1592824639,-0.320829018255,1.08834758337,-0.906203145379,-0.464152061905,-0.513378372516,1.61783767507,-0.8166506056,0.244719605323,-1.31109422683,0.388406495077,1.59237370969,0.870399036779,0.335249325288,0.648959907332,-0.183151791075,0.500241269555,1.36882639497,0.895091842247,0.647293371999,-0.567878708259,-0.579517446608,-0.751448572574,1.07551917611,-0.62114235961,1.87855571664,1.26023993075,0.310050972989,0.106402292088,0.248012997113,-1.39383959205,-0.669436307001,-0.566791473817,-0.381778902753,-0.946546906825,-1.06510299994,-1.33182618118,-0.986453191144,-0.378391147408,0.764711974788,0.0603594164541,0.618509998953,-0.484921020219,-0.280530239708,0.406962904079,1.02518778661,0.254751681263,0.0875239905036,0.0706983543389,-0.573152602817,1.22892597332,-0.962201893069,1.52555675514,0.827282588614,0.912470470299,-0.127292342582,0.63431664124,-1.53089842788,-1.29070149281,-0.526228340585,-1.13223396004,-0.499797126597,-0.728463087382,-0.58314416965,0.329290656707,-0.826860798028,-0.536867983053,-0.562980133545,0.918404799512,-0.0793993782114,-0.278624682616,-0.130459538605,-1.39699761449,-0.244713889273,0.830253910578,0.240821201544,-0.915697123133,-2.22527996377,-0.663067012309,-0.321194763746,0.498388164634,0.380338976442,-1.06703532454,0.255452172355,2.11128718522,-0.634189962037,1.36875576787,-0.970649489259,0.654245334263,-1.17189521913,-0.00315987197527,-0.745604825465,1.5982908861,-0.913399998036,2.40291208791,-0.589360262208,0.107657442325,-0.139297516223,-1.15992572508,0.61896478197,1.37389046967], + "uniform" : [1.45432355841,2.84810834343,4.74549161632,0.638780099457,-3.80958924632,-3.334498612,4.52507125868,4.49289433883,-4.1673896291,0.0125584707891,-2.93478910984,1.51450023098,3.28974045779,1.87267359929,-0.748062135296,2.38854324107,-2.47169793606,-0.786869261288,0.37914803654,5.7567507291,0.54094891718,-4.21155738087,-4.40585756366,-2.38089807527,-2.56489267557,-0.571006088134,5.20006663321,-1.67342014432,-4.37642538354,5.31829071475,4.35379893094,-0.139911154119,0.492925185296,-2.89678112452,2.27523861458,-2.10270882885,4.13030475965,0.851435037364,-3.48307762133,5.55425195349,-4.6934942688,-3.62614039352,-2.03383194887,5.98607745069,-3.81397858481,5.53769848592,-3.24758094768,0.298431697339,-1.32007797393,-3.75848597245,2.11744167771,5.87699808631,2.73780395389,-4.84973427301,5.57490240937,-0.988554910227,-0.446840530765,3.48666804704,2.47822507031,0.162043547001,3.5230537369,-0.528256883676,-3.95836502662,-4.3914251168,-4.18263571494,4.95083274636,-4.53376299863,3.94551543404,0.596124503497,-1.7568988481,-4.60545417113,0.117764819999,4.70696947989,2.57605270342,5.48188315937,-3.17947232152,-2.28136708387,-2.7142198104,3.4258475641,5.48009234179,4.5487855596,0.31404742701,-3.98873825127,-4.51501761633,4.7784799764,1.82434191497,-1.71463931332,1.73037034662,0.111955578195,1.60165041552,5.16819504254,-3.39463891206,4.34756282725,1.27008060866,-2.57050853273,-3.46799209841,4.80740829996,-1.54819279283,-0.354483971149,4.00405500806,4.81694037039,0.399101760152,4.15686171048,-3.36559695148,2.34157999029,0.662223312418,-3.37116375219,3.58278392426,-4.02553600154,-4.55235923865,-3.78559623306,1.01391923885,-0.895881980105,-1.22278562807,4.8953108162,0.454499904729,2.17532338327,0.0714670210666,4.96491252964,-0.872846093718,2.75796271442,4.96084919202,5.58626916432,1.80106160046,-3.33883276923,0.808169355019,4.59906551132,-3.46005143836,3.11483063592,0.413642742828,2.62861203367,-1.0807489084,1.86284759175,-0.777383245242,-0.723016247929,3.98252130669,-3.1311277956,2.04125530566,1.56924320631,-2.94243145073,-4.6526331139,-0.299364108457,-3.78750802211,-0.970758953759,1.21324743701,0.0152496553563,2.55176696713,-2.04274582627,-0.297447821896,-2.02981575598,1.83763192045,0.695832023204,0.766789663894,-3.64701328164,-2.04995296437,-0.701819200751,4.0245158366,-3.83268139386,2.36307851704,2.25482285201,-3.86618194188,-3.75245645013,-3.85102316278,4.4926318712,4.14793397653,-2.45495032266,-1.62633267148,-3.23570586799,-0.238206301682,-3.36731307702,-3.76751442236,5.76898598429,-1.29350554983,1.73969608469,3.6518744006,5.74686280155,-0.310483259674,-1.06032669048,3.44718928151,1.96690626227,-4.25485004471,-4.54311287255,3.20289437256,-4.52668011369,3.68500911057,-4.2337702225,1.12120010323,-4.41102012045,-1.94912752564,5.18085316778,5.57592892482,-3.42683883973,3.51409626374,-0.310820927004,5.62096600738,2.05805104359,-1.88553981662,5.31843846289,5.67545701486,-1.26071860507,2.2151071236,-2.94696376511,2.67445209854,-1.07961565758,3.69503023395,-1.57212685176,-1.47581099161,2.32113033297,2.82642707725,4.91455936421,-0.19590775405,3.22495086535,-3.67782776285,-3.63623704763,-3.37728417072,-1.20019074788,-2.58909767083,4.9658213675,5.39015923519,3.14437129408,3.57125949213,1.03085245388,4.92505563847,3.00349697484,4.94044067814,-2.25546063442,0.969496605868,-3.47352987352,3.10662048182,0.566712138486,0.657814313523,-3.71076661201,-4.2813381313,0.84136667553,5.24178104338,-3.11316445456,5.00564158188,1.16212324541,5.87348595144,5.01353726723,1.21249283992,2.22958055259,-0.847575674193,4.00046874363,0.0215846599159,-0.529543984292,5.45916178619,-3.94019779641,2.30195251359,-3.93525892427,0.785684033157,4.94075751158,0.464138317287,-2.87729248081,-4.18867090084,2.68775304724,4.40954110599,-3.31373338772,2.88587136547,1.75819831281,-1.21453174859,-2.61648870641,0.603163913706,-1.61981207919,0.547541098363,5.93496525664,2.61304694492,5.16646820824,1.65564038096,-2.96326713312,-3.98247112705,-2.05730889135,-1.15420530314,-2.50545667115,2.02188632844,0.0411362293029,-4.6909733858,-0.198334427003,-4.98883450765,-2.19545796727,-0.473140769882,2.29109093143,-2.0784401279,-2.53780916013,2.5039531005,4.11389548933,5.30011584122,2.97799405228,3.31257962057,-4.98167852978,-4.85919698744,2.0448839829,5.65031534577,2.20823377487,3.12991527401,5.25105981797,0.157847168897,-2.17002836902,-0.161170643182,-3.50574279308,-4.62726294769,-4.84311113987,-4.38036241882,3.59990144136,1.54128645468,-1.51424909606,-0.114482726736,4.53613071769,-2.9368219888,0.0526311528409,-4.89983570578,4.96410129308,-3.01651816693,0.42130580901,-3.58566188522,2.8187414733,-1.35612870555,-0.767774237509,-4.48591429986,5.68609865847,2.69536066591,0.308370373237,1.45769504901,4.90318464516,5.49212802432,5.85271367584,4.01744878836,2.80816998975,-3.69506425896,3.6113951263,4.73557512742,1.85912958684,-2.87438109843,-3.8288134493,2.02106005559,3.66145304415,-4.82002424873,-2.38125576404,-4.54720629779,5.96313154411,-0.684352288309,5.86606470757,0.414218967106,-2.80223367467,5.57928925642,5.10918823208,-0.960978560975,1.73344519855,1.02065750482,3.24304401792,-2.90342839715,2.80664304761,3.31012446828,-1.58562864324,2.74334353156,4.08606746443,-4.23547881307,0.40070155737,-0.306617652136,-4.68949123813,3.87049763925,-0.556208748437,5.4131590509,1.65577635035,-2.16593261855,0.210138208198,-3.32980479281,-1.62474847294,0.72814496585,-1.14873567673,0.102399279919,4.51709935102,-2.48616170018,3.00286919149,-3.76237119478,4.12018520934,-1.11138185225,-2.67796620885,4.1755561884,-2.89619731859,2.4089691743,-1.62210068409,-3.10752081167,2.56900857828,-1.69399798874,2.35475871375,3.20385460533,-4.43673283558,1.34292368779,-2.93279032498,-0.570984295777,-2.78388758534,2.82673543915,-1.86141159738,1.68155778593,-4.64295009463,3.31991503966,-3.56279656708,-3.0795971262,0.77672321611,-4.65585773141,4.76487405897,3.03061823719,4.08482030209,-3.18701929048,-1.85013275826,1.63009012765,-3.34861893849,3.47529339874,4.20587865389,-4.33856180867,-3.94209921244,-3.434723446,3.52872482059,5.15585689929,5.82907196027,2.71657961,-2.67165284112,4.56799592233,1.35149079325,4.55842480273,3.34531861458,5.03609612971,-4.20030167038,-2.54734192104,-4.76904205819,5.87287831778,3.49310440431,-3.37341145157,-4.54314464789,5.42142775361,-0.225874049476,0.684628253075,-4.16112651428,-2.64748260518,2.25373978414,2.74307101515,-2.55286091027,-1.59107809722,-1.56113175348,2.24544485998,-1.1246652061,-4.07172210405,1.38784744908,5.31093292739,0.0169933328073,-2.29048976868,-1.457767139,2.51997086593,5.29536043385,5.69558987975,1.11693040759,0.0394180689375,-0.777882497327,1.12645828961,-4.46304170204,1.03010349355,5.1864925466,-4.64559026931,-2.71739482646,-3.44739004694,-4.48424496653,-2.27491388597,-4.34829681524,-1.72103595731,0.949498236372,-3.42100592396,-0.869391830051,0.224304025292,-1.03339951677,-3.32809341458,-2.84838581,1.52267692885,-0.493936084335,3.0571958706,1.56274685093,3.3038573452,-4.32686459217,-4.96084105141,3.77099677054,2.11412416858,-0.604271076291,-1.49238406336,-4.36006707164,-3.30307102943,2.61180121593,-2.8428853935,3.44951738737,-3.41707540886,4.38662123682,3.02677617193,3.83492464642,-0.84021715418,-2.46159977217,-4.16813287848,4.03088020193,5.31847322904,3.5475640187,2.29265832016,2.58307453448] + }, + "tests" : [{ + "title": "Positive standard norm distribution", + "exact_match_out": false, + "in": { + "column": "norm_std", + "result_format": "BASIC", + "params": {"mean":0, "std_dev":1}, + "distribution": "norm", + "p_value": 0.05 + }, + "out": { + "success": true + } + }, + { + "title": "Negative standard norm distribution", + "exact_match_out": false, + "in": { + "column": "norm_std", + "result_format": "BASIC", + "params": {"mean":1, "std_dev":1}, + "distribution": "norm", + "p_value": 0.05 + }, + "out": { + "success": false + } + }, + { + "title": "Positive beta distribution, params is a dict", + "exact_match_out": false, + "in": { + "column": "beta", + "result_format": "BASIC", + "params": {"alpha": 0.5,"beta": 10,"loc": 5,"scale": 11}, + "distribution": "beta", + "p_value": 0.05 + }, + "out": { + "success": true + } + }, + { + "title": "Positive beta distribution, params is a tuple", + "exact_match_out": false, + "in": { + "column": "beta", + "result_format": "BASIC", + "params": [0.5, 10, 5, 11], + "distribution": "beta", + "p_value": 0.05 + }, + "out": { + "success" : true + } + }, + { + "title": "Negative beta distribution, params is a dict", + "exact_match_out": false, + "in": { + "column": "beta", + "result_format": "BASIC", + "params": {"alpha": 1,"beta": 11,"loc": 5,"scale": 11}, + "distribution": "beta", + "p_value": 0.05 + }, + "out": { + "success": false + } + }, + { + "title": "Negative beta distribution, params is a tuple", + "exact_match_out": false, + "in": { + "column": "beta", + "result_format": "BASIC", + "params": [1, 11, 5, 11], + "distribution": "beta", + "p_value": 0.05 + }, + "out": { + "success": false + } + }, + { + "title": "Positive gamma distribution, params is a dict", + "exact_match_out": false, + "in": { + "column": "gamma", + "result_format": "BASIC", + "params": {"alpha": 2, "loc": 20, "scale": 3}, + "distribution": "gamma", + "p_value": 0.05 + }, + "out": { + "success": true + } + }, + { + "title": "Positive gamma distribution, params is a tuple", + "exact_match_out": false, + "in": { + "column": "gamma", + "result_format": "BASIC", + "params": [2, 20, 3], + "distribution": "gamma", + "p_value": 0.05 + }, + "out": { + "success": true + } + }, + { + "title": "Negative gamma distribution, params is a dict", + "exact_match_out": false, + "in": { + "column": "gamma", + "result_format": "BASIC", + "params": {"alpha": 3, "loc": 20, "scale": 3}, + "distribution": "gamma", + "p_value": 0.05 + }, + "out": { + "success": false + } + }, + { + "title": "Negative gamma distribution, params is a tuple", + "exact_match_out": false, + "in": { + "column": "gamma", + "result_format": "BASIC", + "params": [3, 20, 3], + "distribution": "gamma", + "p_value": 0.05 + }, + "out": { + "success": false + } + }, + { + "title": "Positive uniform distribution, params is a dict", + "exact_match_out": false, + "in": { + "column": "uniform", + "result_format": "BASIC", + "params": {"min": -5, "max": 11}, + "distribution": "uniform", + "p_value": 0.05 + }, + "out": { + "success": true + } + }, + { + "title": "Positive uniform distribution, params is a tuple", + "exact_match_out": false, + "in": { + "column": "uniform", + "result_format": "BASIC", + "params": [-5, 11], + "distribution": "uniform", + "p_value": 0.05 + }, + "out": { + "success": true + } + }, + { + "title": "Negative uniform distribution, params is a dict", + "exact_match_out": false, + "in": { + "column": "uniform", + "result_format": "BASIC", + "params": {"min": -4, "max": 12}, + "distribution": "uniform", + "p_value": 0.05 + }, + "out": { + "success": false + } + }, + { + "title": "Negative uniform distribution, params is a tuple", + "exact_match_out": false, + "in": { + "column": "uniform", + "result_format": "BASIC", + "params": [-4, 12], + "distribution": "uniform", + "p_value": 0.05 + }, + "out": { + "success": false + } + }, + { + "title": "Positive chi2, params is a dict", + "exact_match_out": false, + "in": { + "column": "chi2", + "result_format": "BASIC", + "params": {"df": 30, "loc": 3, "scale": 5}, + "distribution": "chi2", + "p_value": 0.05 + }, + "out": { + "success": true + } + }, + { + "title": "Positive chi2, params is a tuple", + "exact_match_out": false, + "in": { + "column": "chi2", + "result_format": "COMPLETE", + "params": [30, 3, 5], + "distribution": "chi2", + "p_value": 0.05 + }, + "out": { + "success": true + } + }, + { + "title": "Negative chi2, params is a dict", + "exact_match_out": false, + "in": { + "column": "chi2", + "result_format": "BASIC", + "params": {"df": 33, "loc": 3, "scale": 5}, + "distribution": "chi2", + "p_value": 0.05 + }, + "out": { + "success": false + } + }, + { + "title": "Negative chi2, params is a tuple", + "exact_match_out": false, + "in": { + "column": "chi2", + "result_format": "BASIC", + "params": [33, 3, 5], + "distribution": "chi2", + "p_value": 0.05 + }, + "out": { + "success": false + } + }, + { + "title": "Positive expon distribution, params is a dict", + "exact_match_out": false, + "in": { + "column": "exponential", + "result_format": "BASIC", + "params": {"loc": 4.2, "scale": 10}, + "distribution": "expon", + "p_value": 0.05 + }, + "out": { + "success": true + } + }, + { + "title": "Positive expon distribution, params is a tuple", + "exact_match_out": false, + "in": { + "column": "exponential", + "result_format": "BASIC", + "params": [4.2, 10], + "distribution": "expon", + "p_value": 0.05 + }, + "out": { + "success": true + } + }, + { + "title": "Negative expon distribution, params is a dict", + "exact_match_out": false, + "in": { + "column": "exponential", + "result_format": "BASIC", + "params": {"loc": 5, "scale": 10}, + "distribution": "expon", + "p_value": 0.05 + }, + "out": { + "success": false + } + }, + { + "title": "Negative expon distribution, params is a tuple", + "exact_match_out": false, + "in": { + "column": "exponential", + "result_format": "BASIC", + "params": [5, 10], + "distribution": "expon", + "p_value": 0.05 + }, + "out": { + "success": false + } + }] + }] + } + diff --git a/tests/column_aggregate_expectations/expect_column_proportion_of_unique_values_to_be_between.json b/tests/column_aggregate_expectations/expect_column_proportion_of_unique_values_to_be_between.json new file mode 100644 index 000000000000..69658822d988 --- /dev/null +++ b/tests/column_aggregate_expectations/expect_column_proportion_of_unique_values_to_be_between.json @@ -0,0 +1,78 @@ +{ + "expectation_type": "expect_column_proportion_of_unique_values_to_be_between", + "datasets": [{ + "data": { + "dist1" : [1,2,3,4,5,6,7,8], + "dist2" : [1,2,3,4,5,null,null,null], + "dist3" : [2,2,2,2,5,6,7,8], + "dist4" : [1,1,1,1,null,null,null,null] + }, + "tests": [ + { + "title": "Basic positive test", + "exact_match_out": false, + "in": { + "column": "dist1", + "min_value": 0.5, + "max_value": 1 + }, + "out": { + "success": true, + "observed_value": 1 + } + }, + { + "title": "Positive test with null values in column", + "exact_match_out": false, + "in": { + "column": "dist2", + "min_value": 0.5, + "max_value": 1 + }, + "out": { + "success": true, + "observed_value": 1 + } + }, + { + "title": "Duplicate values in column", + "exact_match_out": false, + "in": { + "column": "dist3", + "min_value": 0.6, + "max_value": 0.7 + }, + "out": { + "success": true, + "observed_value": 0.625 + } + }, + { + "title": "Null max; duplicate and null values", + "exact_match_out": false, + "in": { + "column": "dist4", + "min_value": 0.3, + "max_value": null + }, + "out": { + "success": false, + "observed_value": 0.25 + } + }, + { + "title": "Exception: null min and max", + "exact_match_out": false, + "in": { + "column": "dist1", + "min_value": null, + "max_value": null, + "catch_exceptions": true + }, + "out": { + "traceback_substring": "cannot both be None" + } + } + ] + }] +} \ No newline at end of file diff --git a/tests/column_aggregate_expectations/expect_column_sum_to_be_between.json b/tests/column_aggregate_expectations/expect_column_sum_to_be_between.json new file mode 100644 index 000000000000..ebd308eaae11 --- /dev/null +++ b/tests/column_aggregate_expectations/expect_column_sum_to_be_between.json @@ -0,0 +1,98 @@ +{ + "expectation_type" : "expect_column_sum_to_be_between", + "datasets" : [{ + "data" : { + "w" : [1, 2, 3, 4, 5, 5, 4, 3, 2, 1], + "x" : [2, 3, 4, 5, 6, 7, 8, 9, null, null], + "y" : [1, 1, 1, 2, 2, 2, 3, 3, 3, 4], + "z" : ["a", "b", "c", "d", "e", null, null, null, null, null], + "zz" : ["1/1/2016", "1/2/2016", "2/2/2016", "2/2/2016", "3/1/2016", null, null, null, null, null], + "a" : [null, 0, null, null, 1, null, null, 2, null, null], + "b" : [null, 0, null, null, 2, null, null, 1, null, null] + }, + "tests" : [{ + "title": "Basic positive test case", + "exact_match_out" : false, + "in": { + "column": "w", + "result_format": "BASIC", + "min_value": 30, + "max_value": 30 + }, + "out": { + "success": true, + "observed_value": 30 + } + },{ + "title": "Basic negative test case", + "exact_match_out" : false, + "in": { + "column": "w", + "result_format": "BASIC", + "min_value": 40, + "max_value": 50 + }, + "out": { + "success": false, + "observed_value": 30 + } + },{ + "title": "Test case with result_format=SUMMARY", + "exact_match_out" : false, + "in": { + "column": "w", + "result_format": "SUMMARY", + "min_value": 20, + "max_value": 40 + }, + "out": { + "success": true, + "observed_value": 30 + } + },{ + "title": "Test case with only a lower bound, and a missing value", + "exact_match_out" : false, + "in": { + "column": "x", + "min_value": 30 + }, + "out": { + "success": true, + "observed_value": 44 + } + },{ + "title": "Negative test case with only a lower bound", + "exact_match_out" : false, + "in": { + "column": "w", + "min_value": 50 + }, + "out": { + "success": false, + "observed_value": 30 + } + },{ + "title": "Negative test case with only a max", + "exact_match_out" : false, + "in": { + "column": "y", + "max_value": 20 + }, + "out": { + "success": false, + "observed_value": 22 + } + },{ + "title": "Raise ValueError with both max and min are missing", + "exact_match_out" : false, + "in": { + "column": "y", + "catch_exceptions": true + }, + "out" : {}, + "error": { + "traceback_substring": "cannot both be None" + } + }] + }] +} \ No newline at end of file diff --git a/tests/column_aggregate_expectations/expect_column_unique_value_count_to_be_between.json b/tests/column_aggregate_expectations/expect_column_unique_value_count_to_be_between.json new file mode 100644 index 000000000000..c3953293db92 --- /dev/null +++ b/tests/column_aggregate_expectations/expect_column_unique_value_count_to_be_between.json @@ -0,0 +1,78 @@ +{ + "expectation_type": "expect_column_unique_value_count_to_be_between", + "datasets": [{ + "data": { + "dist1" : [1,2,3,4,5,6,7,8], + "dist2" : [1,2,3,4,5,null,null,null], + "dist3" : [2,2,2,2,5,6,7,8], + "dist4" : [1,1,1,1,null,null,null,null] + }, + "tests": [ + { + "title": "Basic positive test", + "exact_match_out": false, + "in": { + "column": "dist1", + "min_value": 0, + "max_value": 10 + }, + "out": { + "success": true, + "observed_value": 8 + } + }, + { + "title": "Positive test with null values in column", + "exact_match_out": false, + "in": { + "column": "dist2", + "min_value": 0, + "max_value": 10 + }, + "out": { + "success": true, + "observed_value": 5 + } + }, + { + "title": "Null min; duplicate values in column", + "exact_match_out": false, + "in": { + "column": "dist3", + "min_value": null, + "max_value": 10 + }, + "out": { + "success": true, + "observed_value": 5 + } + }, + { + "title": "Null max; duplicate and null values", + "exact_match_out": false, + "in": { + "column": "dist4", + "min_value": 2, + "max_value": null + }, + "out": { + "success": false, + "observed_value": 1 + } + }, + { + "title": "Exception: null min and max", + "exact_match_out": false, + "in": { + "column": "dist1", + "min_value": null, + "max_value": null, + "catch_exceptions": true + }, + "out": { + "traceback_substring": "cannot both be None" + } + } + ] + }] +} \ No newline at end of file diff --git a/tests/column_aggregate_expectations/test_column_aggregate_expectations.py b/tests/column_aggregate_expectations/test_column_aggregate_expectations.py new file mode 100644 index 000000000000..f3e988aaf395 --- /dev/null +++ b/tests/column_aggregate_expectations/test_column_aggregate_expectations.py @@ -0,0 +1,65 @@ +### +### +# +# This file should not be modified. To adjust test cases, edit the related json file(s). +# +### +### + + +import pytest + +import os +import json +import glob +import warnings + +from tests.test_utils import get_dataset, candidate_test_is_on_temporary_notimplemented_list, evaluate_json_test + +contexts = ['PandasDataset', 'SqlAlchemyDataset'] + + +def pytest_generate_tests(metafunc): + # Load all the JSON files in the directory + dir_path = os.path.dirname(os.path.realpath(__file__)) + test_configuration_files = glob.glob(dir_path + '/*.json') + + parametrized_tests = [] + ids = [] + for c in contexts: + for filename in test_configuration_files: + file = open(filename) + test_configuration = json.load(file) + + if candidate_test_is_on_temporary_notimplemented_list(c, test_configuration["expectation_type"]): + warnings.warn("Skipping generation of tests for expectation " + test_configuration["expectation_type"] + + " and context " + c) + else: + for d in test_configuration['datasets']: + my_dataset = get_dataset(c, d["data"]) + + for test in d["tests"]: + parametrized_tests.append({ + "expectation_type": test_configuration["expectation_type"], + "dataset": my_dataset, + "test": test, + }) + + ids.append(c + ":" + test_configuration["expectation_type"] + ":" + test["title"]) + + metafunc.parametrize( + "test_case", + parametrized_tests, + ids=ids + ) + + +def test_case_runner(test_case): + # Note: this should never be done in practice, but we are wiping expectations to reuse datasets during testing. + test_case["dataset"].initialize_expectations() + + evaluate_json_test( + test_case["dataset"], + test_case["expectation_type"], + test_case["test"] + ) \ No newline at end of file diff --git a/tests/column_aggregate_expectations_distributional/__init__.py b/tests/column_aggregate_expectations_distributional/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/test_pandas_dataset_distributional_expectations.py b/tests/column_aggregate_expectations_distributional/test_pandas_dataset_distributional_expectations.py similarity index 75% rename from tests/test_pandas_dataset_distributional_expectations.py rename to tests/column_aggregate_expectations_distributional/test_pandas_dataset_distributional_expectations.py index 9f367f650bf0..edd693e80b9b 100644 --- a/tests/test_pandas_dataset_distributional_expectations.py +++ b/tests/column_aggregate_expectations_distributional/test_pandas_dataset_distributional_expectations.py @@ -21,7 +21,7 @@ def test_expect_column_chisquare_test_p_value_to_be_greater_than(self): 'partition_object': self.test_partitions['categorical_fixed'], 'p': 0.05 }, - 'out': {'success': True, 'true_value': 1.} + 'out': {'success': True, 'observed_value': 1.} }, { 'args': ['categorical_fixed'], @@ -29,23 +29,20 @@ def test_expect_column_chisquare_test_p_value_to_be_greater_than(self): 'partition_object': self.test_partitions['categorical_fixed_alternate'], 'p': 0.05 }, - 'out': {'success': False, 'true_value': 5.1397782097623862e-53} + 'out': {'success': False, 'observed_value': 5.1397782097623862e-53} }, { 'args': ['categorical_fixed'], 'kwargs': { 'partition_object': self.test_partitions['categorical_fixed_alternate'], - 'p': 0.05, 'output_format': 'SUMMARY' + 'p': 0.05, 'result_format': 'SUMMARY' }, - 'out': {'success': False, 'true_value': 5.1397782097623862e-53, - 'summary_obj': { + 'out': {'success': False, 'observed_value': 5.1397782097623862e-53, + 'details': { 'observed_partition': { 'values': [u'A', u'B', u'C'], 'weights': [540, 320, 140] }, - 'missing_percent': 0.0, - 'element_count': 1000, - 'missing_count': 0, 'expected_partition': { 'values': [u'A', u'B', u'C'], 'weights': [333.3333333333333, 333.3333333333333, 333.3333333333333] @@ -56,15 +53,15 @@ def test_expect_column_chisquare_test_p_value_to_be_greater_than(self): ] for t in T: out = self.D.expect_column_chisquare_test_p_value_to_be_greater_than(*t['args'], **t['kwargs']) - self.assertEqual(out['success'],t['out']['success']) - self.assertEqual(out['true_value'], t['out']['true_value']) - if 'output_format' in t['kwargs'] and t['kwargs']['output_format'] == 'SUMMARY': - self.assertDictEqual(out['summary_obj'], t['out']['summary_obj']) + self.assertEqual(t['out']['success'], out['success']) + self.assertEqual(t['out']['observed_value'], out['result']['observed_value']) + if 'result_format' in t['kwargs'] and t['kwargs']['result_format'] == 'SUMMARY': + self.assertDictEqual(t['out']['details'], out['result']['details']) def test_expect_column_chisquare_test_p_value_to_be_greater_than_new_categorical_val(self): # Note: Chisquare test with true zero expected could be treated subtly. Here, we tolerate a warning from stats. categorical_list = (['A'] * 25) + (['B'] * 25) + (['C'] * 25) + (['D'] * 25) - df = ge.dataset.PandasDataSet({'categorical': categorical_list}) + df = ge.dataset.PandasDataset({'categorical': categorical_list}) out = df.expect_column_chisquare_test_p_value_to_be_greater_than('categorical', self.test_partitions['categorical_fixed_alternate']) self.assertEqual(out['success'], False) @@ -74,7 +71,7 @@ def test_expect_column_chisquare_test_p_value_to_be_greater_than_new_categorical def test_expect_column_chisquare_test_p_value_to_be_greater_than_missing_categorical_val(self): categorical_list = (['A'] * 61) + (['B'] * 39) - df = ge.dataset.PandasDataSet({'categorical': categorical_list}) + df = ge.dataset.PandasDataset({'categorical': categorical_list}) out = df.expect_column_chisquare_test_p_value_to_be_greater_than('categorical', self.test_partitions['categorical_fixed']) self.assertEqual(out['success'], False) @@ -86,7 +83,7 @@ def test_expect_column_kl_divergence_to_be_less_than_discrete(self): 'partition_object': self.test_partitions['categorical_fixed'], 'threshold': 0.1 }, - 'out': {'success': True, 'true_value': 0.} + 'out': {'success': True, 'observed_value': 0.} }, { 'args': ['categorical_fixed'], @@ -94,22 +91,19 @@ def test_expect_column_kl_divergence_to_be_less_than_discrete(self): 'partition_object': self.test_partitions['categorical_fixed_alternate'], 'threshold': 0.1 }, - 'out': {'success': False, 'true_value': 0.12599700286677529} + 'out': {'success': False, 'observed_value': 0.12599700286677529} }, { 'args': ['categorical_fixed'], 'kwargs': { 'partition_object': self.test_partitions['categorical_fixed_alternate'], - 'threshold': 0.1, 'output_format': 'SUMMARY' + 'threshold': 0.1, 'result_format': 'SUMMARY' }, - 'out': {'success': False, 'true_value': 0.12599700286677529, - 'summary_obj': { + 'out': {'success': False, 'observed_value': 0.12599700286677529, + 'details': { 'observed_partition': { 'weights': [0.54, 0.32, 0.14], 'values': [u'A', u'B', u'C']}, - 'missing_percent': 0.0, - 'element_count': 1000, - 'missing_count': 0, 'expected_partition': { 'weights': [0.3333333333333333, 0.3333333333333333, 0.3333333333333333], 'values': [u'A', u'B', u'C'] @@ -121,100 +115,100 @@ def test_expect_column_kl_divergence_to_be_less_than_discrete(self): for t in T: out = self.D.expect_column_kl_divergence_to_be_less_than(*t['args'], **t['kwargs']) self.assertTrue(np.allclose(out['success'], t['out']['success'])) - self.assertTrue(np.allclose(out['true_value'], t['out']['true_value'])) - if 'output_format' in t['kwargs'] and t['kwargs']['output_format'] == 'SUMMARY': - self.assertDictEqual(out['summary_obj'], t['out']['summary_obj']) + self.assertTrue(np.allclose(out['result']['observed_value'], t['out']['observed_value'])) + if 'result_format' in t['kwargs'] and t['kwargs']['result_format'] == 'SUMMARY': + self.assertDictEqual(out['result']['details'], t['out']['details']) def test_expect_column_kl_divergence_to_be_less_than_discrete_holdout(self): - df = ge.dataset.PandasDataSet({'a': ['a', 'a', 'b', 'c']}) + df = ge.dataset.PandasDataset({'a': ['a', 'a', 'b', 'c']}) out = df.expect_column_kl_divergence_to_be_less_than('a', {'values': ['a', 'b'], 'weights': [0.6, 0.4]}, threshold=0.1, tail_weight_holdout=0.1) self.assertEqual(out['success'], True) - self.assertTrue(np.allclose(out['true_value'], [0.099431384003497381])) + self.assertTrue(np.allclose(out['result']['observed_value'], [0.099431384003497381])) out = df.expect_column_kl_divergence_to_be_less_than('a', {'values': ['a', 'b'], 'weights': [0.6, 0.4]}, threshold=0.1, tail_weight_holdout=0.05) self.assertEqual(out['success'], False) - self.assertTrue(np.isclose(out['true_value'], [0.23216776319077681])) + self.assertTrue(np.isclose(out['result']['observed_value'], [0.23216776319077681])) out = df.expect_column_kl_divergence_to_be_less_than('a', {'values': ['a', 'b'], 'weights': [0.6, 0.4]}, threshold=0.1) self.assertEqual(out['success'], False) - self.assertTrue(np.isclose(out['true_value'], [np.inf])) + self.assertTrue(np.isclose(out['result']['observed_value'], [np.inf])) def test_expect_column_bootstrapped_ks_test_p_value_to_be_greater_than(self): T = [ { 'args': ['norm_0_1'], 'kwargs': {'partition_object': self.test_partitions['norm_0_1_auto'], "p": 0.05}, - 'out': {'success': True, 'true_value': "RANDOMIZED"} + 'out': {'success': True, 'observed_value': "RANDOMIZED"} }, { 'args': ['norm_0_1'], 'kwargs':{'partition_object': self.test_partitions['norm_0_1_uniform'], "p": 0.05}, - 'out':{'success':True, 'true_value': "RANDOMIZED"} + 'out':{'success':True, 'observed_value': "RANDOMIZED"} }, { 'args': ['norm_0_1'], 'kwargs':{'partition_object': self.test_partitions['norm_0_1_ntile'], "p": 0.05}, - 'out':{'success':True, 'true_value': "RANDOMIZED"} + 'out':{'success':True, 'observed_value': "RANDOMIZED"} }, { 'args': ['norm_0_1'], 'kwargs':{'partition_object': self.test_partitions['norm_0_1_kde'], "p": 0.05}, - 'out':{'success':True, 'true_value': "RANDOMIZED"} + 'out':{'success':True, 'observed_value': "RANDOMIZED"} }, { 'args': ['norm_1_1'], 'kwargs':{'partition_object': self.test_partitions['norm_0_1_auto'], "p": 0.05}, - 'out':{'success':False, 'true_value': "RANDOMIZED"} + 'out':{'success':False, 'observed_value': "RANDOMIZED"} }, { 'args': ['norm_1_1'], 'kwargs':{'partition_object': self.test_partitions['norm_0_1_uniform'], "p": 0.05}, - 'out':{'success':False, 'true_value': "RANDOMIZED"} + 'out':{'success':False, 'observed_value': "RANDOMIZED"} }, { 'args': ['norm_1_1'], 'kwargs':{'partition_object': self.test_partitions['norm_0_1_ntile'], "p": 0.05}, - 'out':{'success':False, 'true_value': "RANDOMIZED"} + 'out':{'success':False, 'observed_value': "RANDOMIZED"} }, { 'args': ['norm_1_1'], 'kwargs':{'partition_object': self.test_partitions['norm_0_1_kde'], "p": 0.05}, - 'out':{'success':False, 'true_value': "RANDOMIZED"} + 'out':{'success':False, 'observed_value': "RANDOMIZED"} }, { 'args': ['bimodal'], 'kwargs':{'partition_object': self.test_partitions['bimodal_auto'], "p": 0.05}, - 'out':{'success':True, 'true_value': "RANDOMIZED"} + 'out':{'success':True, 'observed_value': "RANDOMIZED"} }, { 'args': ['bimodal'], 'kwargs':{'partition_object': self.test_partitions['bimodal_kde'], "p": 0.05}, - 'out':{'success':True, 'true_value': "RANDOMIZED"} + 'out':{'success':True, 'observed_value': "RANDOMIZED"} }, { 'args': ['bimodal'], 'kwargs':{'partition_object': self.test_partitions['norm_0_1_auto'], "p": 0.05, 'include_config': True}, - 'out':{'success':False, 'true_value': "RANDOMIZED"} + 'out':{'success':False, 'observed_value': "RANDOMIZED"} }, { 'args': ['bimodal'], 'kwargs':{'partition_object': self.test_partitions['norm_0_1_uniform'], "p": 0.05}, - 'out':{'success':False, 'true_value': "RANDOMIZED"} + 'out':{'success':False, 'observed_value': "RANDOMIZED"} }, { 'args': ['bimodal'], - 'kwargs': {'partition_object': self.test_partitions['norm_0_1_uniform'], "p": 0.05, 'output_format': 'SUMMARY'}, - 'out': {'success': False, 'true_value': "RANDOMIZED", - 'summary_obj': { + 'kwargs': {'partition_object': self.test_partitions['norm_0_1_uniform'], "p": 0.05, 'result_format': 'SUMMARY'}, + 'out': {'success': False, 'observed_value': "RANDOMIZED", + 'details': { 'expected_cdf': { 'cdf_values': [0.0, 0.001, 0.009000000000000001, 0.056, 0.184, 0.429, 0.6779999999999999, 0.8899999999999999, 0.9689999999999999, 0.9929999999999999, 0.9999999999999999], 'x': [-3.721835843971108, -3.02304158492966, -2.324247325888213, -1.625453066846767, -0.926658807805319, -0.227864548763872, 0.470929710277574, 1.169723969319022, 1.868518228360469, 2.567312487401916, 3.266106746443364] @@ -232,10 +226,7 @@ def test_expect_column_bootstrapped_ks_test_p_value_to_be_greater_than(self): 'weights': [0.001, 0.008, 0.047, 0.128, 0.245, 0.249, 0.212, 0.079, 0.024, 0.007], 'bins': [-3.721835843971108, -3.02304158492966, -2.324247325888213, -1.625453066846767, -0.926658807805319, -0.227864548763872, 0.470929710277574, 1.169723969319022, 1.868518228360469, 2.567312487401916, 3.266106746443364] }, - 'element_count': 1000, - 'bootstrap_sample_size': 20, - 'missing_percent': 0.0, - 'missing_count': 0 + 'bootstrap_sample_size': 20 } } } @@ -247,40 +238,40 @@ def test_expect_column_bootstrapped_ks_test_p_value_to_be_greater_than(self): print(t) print(out) self.assertEqual(out['success'], t['out']['success']) - if 'output_format' in t['kwargs'] and t['kwargs']['output_format'] == 'SUMMARY': - self.assertTrue(np.allclose(out['summary_obj']['observed_cdf']['x'],t['out']['summary_obj']['observed_cdf']['x'])) - self.assertTrue(np.allclose(out['summary_obj']['observed_cdf']['cdf_values'],t['out']['summary_obj']['observed_cdf']['cdf_values'])) - self.assertTrue(np.allclose(out['summary_obj']['expected_cdf']['x'],t['out']['summary_obj']['expected_cdf']['x'])) - self.assertTrue(np.allclose(out['summary_obj']['expected_cdf']['cdf_values'],t['out']['summary_obj']['expected_cdf']['cdf_values'])) - self.assertTrue(np.allclose(out['summary_obj']['observed_partition']['bins'],t['out']['summary_obj']['observed_partition']['bins'])) - self.assertTrue(np.allclose(out['summary_obj']['observed_partition']['weights'],t['out']['summary_obj']['observed_partition']['weights'])) - self.assertTrue(np.allclose(out['summary_obj']['expected_partition']['bins'],t['out']['summary_obj']['expected_partition']['bins'])) - self.assertTrue(np.allclose(out['summary_obj']['expected_partition']['weights'],t['out']['summary_obj']['expected_partition']['weights'])) + if 'result_format' in t['kwargs'] and t['kwargs']['result_format'] == 'SUMMARY': + self.assertTrue(np.allclose(out['result']['details']['observed_cdf']['x'],t['out']['details']['observed_cdf']['x'])) + self.assertTrue(np.allclose(out['result']['details']['observed_cdf']['cdf_values'],t['out']['details']['observed_cdf']['cdf_values'])) + self.assertTrue(np.allclose(out['result']['details']['expected_cdf']['x'],t['out']['details']['expected_cdf']['x'])) + self.assertTrue(np.allclose(out['result']['details']['expected_cdf']['cdf_values'],t['out']['details']['expected_cdf']['cdf_values'])) + self.assertTrue(np.allclose(out['result']['details']['observed_partition']['bins'],t['out']['details']['observed_partition']['bins'])) + self.assertTrue(np.allclose(out['result']['details']['observed_partition']['weights'],t['out']['details']['observed_partition']['weights'])) + self.assertTrue(np.allclose(out['result']['details']['expected_partition']['bins'],t['out']['details']['expected_partition']['bins'])) + self.assertTrue(np.allclose(out['result']['details']['expected_partition']['weights'],t['out']['details']['expected_partition']['weights'])) def test_expect_column_bootstrapped_ks_test_p_value_to_be_greater_than_expanded_partitions(self): # Extend observed above and below expected out = self.D.expect_column_bootstrapped_ks_test_p_value_to_be_greater_than('norm_0_1', {'bins': np.linspace(-1, 1, 11), 'weights': [0.1] * 10}, - output_format='SUMMARY') - self.assertTrue(out['summary_obj']['observed_cdf']['x'][0] < -1) - self.assertTrue(out['summary_obj']['observed_cdf']['x'][-1] > 1) + result_format='SUMMARY') + self.assertTrue(out['result']['details']['observed_cdf']['x'][0] < -1) + self.assertTrue(out['result']['details']['observed_cdf']['x'][-1] > 1) # Extend observed below expected out = self.D.expect_column_bootstrapped_ks_test_p_value_to_be_greater_than('norm_0_1', {'bins': np.linspace(-10, 1, 11), 'weights': [0.1] * 10}, - output_format='SUMMARY') - self.assertTrue(out['summary_obj']['observed_cdf']['x'][0] == -10) - self.assertTrue(out['summary_obj']['observed_cdf']['x'][-1] > 1) + result_format='SUMMARY') + self.assertTrue(out['result']['details']['observed_cdf']['x'][0] == -10) + self.assertTrue(out['result']['details']['observed_cdf']['x'][-1] > 1) # Extend observed above expected out = self.D.expect_column_bootstrapped_ks_test_p_value_to_be_greater_than('norm_0_1', {'bins': np.linspace(-1, 10, 11), 'weights': [0.1] * 10}, - output_format='SUMMARY') - self.assertTrue(out['summary_obj']['observed_cdf']['x'][0] < -1) - self.assertTrue(out['summary_obj']['observed_cdf']['x'][-1] == 10) + result_format='SUMMARY') + self.assertTrue(out['result']['details']['observed_cdf']['x'][0] < -1) + self.assertTrue(out['result']['details']['observed_cdf']['x'][-1] == 10) # Extend expected above and below observed out = self.D.expect_column_bootstrapped_ks_test_p_value_to_be_greater_than('norm_0_1', {'bins': np.linspace(-10, 10, 11), 'weights': [0.1] * 10}, - output_format='SUMMARY') - self.assertTrue(out['summary_obj']['observed_cdf']['x'][0] == -10) - self.assertTrue(out['summary_obj']['observed_cdf']['x'][-1] == 10) + result_format='SUMMARY') + self.assertTrue(out['result']['details']['observed_cdf']['x'][0] == -10) + self.assertTrue(out['result']['details']['observed_cdf']['x'][-1] == 10) def test_expect_column_bootstrapped_ks_test_p_value_to_be_greater_than_bad_partition(self): with self.assertRaises(ValueError): @@ -312,13 +303,13 @@ def test_expect_column_kl_divergence_to_be_less_than_continuous_infinite_partiti 'bins': [-np.inf, 0, 1, 2, 3, np.inf], 'weights': [0.25, 0.25, 0.25, 0.25, 0] } - test_df = ge.dataset.PandasDataSet( + test_df = ge.dataset.PandasDataset( {'x': [-0.5, 0.5, 1.5, 2.5]}) # This should succeed: our data match the partition - out = test_df.expect_column_kl_divergence_to_be_less_than('x', test_partition, 0.5, output_format='SUMMARY') + out = test_df.expect_column_kl_divergence_to_be_less_than('x', test_partition, 0.5, result_format='SUMMARY') self.assertTrue(out['success']) - self.assertDictEqual(out['summary_obj']['observed_partition'], summary_observed_partition) - self.assertDictEqual(out['summary_obj']['expected_partition'], summary_expected_partition) + self.assertDictEqual(out['result']['details']['observed_partition'], summary_observed_partition) + self.assertDictEqual(out['result']['details']['expected_partition'], summary_expected_partition) # Build one-sided to infinity test partitions test_partition = { @@ -333,13 +324,13 @@ def test_expect_column_kl_divergence_to_be_less_than_continuous_infinite_partiti 'bins': [-np.inf, 0, 1, 2, 3, np.inf], 'weights': [0.2, 0.2, 0.2, 0.2, 0.2] } - test_df = ge.dataset.PandasDataSet( + test_df = ge.dataset.PandasDataset( {'x': [-0.5, 0.5, 1.5, 2.5, 3.5]}) - out = test_df.expect_column_kl_divergence_to_be_less_than('x', test_partition, 0.5, output_format='SUMMARY') + out = test_df.expect_column_kl_divergence_to_be_less_than('x', test_partition, 0.5, result_format='SUMMARY') # This should fail: we expect zero weight less than 0 self.assertFalse(out['success']) - self.assertDictEqual(out['summary_obj']['observed_partition'], summary_observed_partition) - self.assertDictEqual(out['summary_obj']['expected_partition'], summary_expected_partition) + self.assertDictEqual(out['result']['details']['observed_partition'], summary_observed_partition) + self.assertDictEqual(out['result']['details']['expected_partition'], summary_expected_partition) # Build two-sided to infinity test partition test_partition = { @@ -354,13 +345,13 @@ def test_expect_column_kl_divergence_to_be_less_than_continuous_infinite_partiti 'bins': [-np.inf, 0, 1, 2, 3, np.inf], 'weights': [0.1, 0.2, 0.4, 0.2, 0.1] } - test_df = ge.dataset.PandasDataSet( + test_df = ge.dataset.PandasDataset( {'x': [-0.5, 0.5, 0.5, 1.5, 1.5, 1.5, 1.5, 2.5, 2.5, 3.5]}) # This should succeed: our data match the partition - out = test_df.expect_column_kl_divergence_to_be_less_than('x', test_partition, 0.5, output_format='SUMMARY') + out = test_df.expect_column_kl_divergence_to_be_less_than('x', test_partition, 0.5, result_format='SUMMARY') self.assertTrue(out['success']) - self.assertDictEqual(out['summary_obj']['observed_partition'], summary_observed_partition) - self.assertDictEqual(out['summary_obj']['expected_partition'], summary_expected_partition) + self.assertDictEqual(out['result']['details']['observed_partition'], summary_observed_partition) + self.assertDictEqual(out['result']['details']['expected_partition'], summary_expected_partition) # Tail weight holdout is not defined for partitions already extending to infinity: with self.assertRaises(ValueError): @@ -378,13 +369,13 @@ def test_expect_column_kl_divergence_to_be_less_than_continuous_serialized_infin 'bins': [-np.inf, 0, 1, 2, 3, np.inf], 'weights': [0.1, 0.2, 0.4, 0.2, 0.1] } - test_df = ge.dataset.PandasDataSet( + test_df = ge.dataset.PandasDataset( {'x': [-0.5, 0.5, 0.5, 1.5, 1.5, 1.5, 1.5, 2.5, 2.5, 3.5]}) # This should succeed: our data match the partition - out = test_df.expect_column_kl_divergence_to_be_less_than('x', test_partition, 0.5, output_format='SUMMARY') + out = test_df.expect_column_kl_divergence_to_be_less_than('x', test_partition, 0.5, result_format='SUMMARY') self.assertTrue(out['success']) - self.assertDictEqual(out['summary_obj']['observed_partition'], summary_observed_partition) - self.assertDictEqual(out['summary_obj']['expected_partition'], summary_expected_partition) + self.assertDictEqual(out['result']['details']['observed_partition'], summary_observed_partition) + self.assertDictEqual(out['result']['details']['expected_partition'], summary_expected_partition) # Confirm serialization of resulting expectations config expectation_config = test_df.get_expectations_config() @@ -406,7 +397,7 @@ def test_expect_column_kl_divergence_to_be_less_than_continuous(self): "threshold": 0.1, "tail_weight_holdout": 0.01, "internal_weight_holdout": 0.01}, - 'out':{'success':True, 'true_value': 'NOTTESTED'} + 'out':{'success':True, 'observed_value': 'NOTTESTED'} }, { 'args': ['norm_0_1'], @@ -414,7 +405,7 @@ def test_expect_column_kl_divergence_to_be_less_than_continuous(self): "threshold": 0.1, "tail_weight_holdout": 0.01, "internal_weight_holdout": 0.01}, - 'out':{'success':True, 'true_value': 'NOTTESTED'} + 'out':{'success':True, 'observed_value': 'NOTTESTED'} }, { 'args': ['norm_0_1'], @@ -422,7 +413,7 @@ def test_expect_column_kl_divergence_to_be_less_than_continuous(self): "threshold": 0.1, "tail_weight_holdout": 0.01, "internal_weight_holdout": 0.01}, - 'out':{'success':True, 'true_value': 'NOTTESTED'} + 'out':{'success':True, 'observed_value': 'NOTTESTED'} }, ## Note higher threshold example for kde { @@ -431,7 +422,7 @@ def test_expect_column_kl_divergence_to_be_less_than_continuous(self): "threshold": 0.3, "tail_weight_holdout": 0.01, "internal_weight_holdout": 0.01}, - 'out':{'success':True, 'true_value': 'NOTTESTED'} + 'out':{'success':True, 'observed_value': 'NOTTESTED'} }, { 'args': ['norm_1_1'], @@ -439,7 +430,7 @@ def test_expect_column_kl_divergence_to_be_less_than_continuous(self): "threshold": 0.1, "tail_weight_holdout": 0.01, "internal_weight_holdout": 0.01}, - 'out':{'success':False, 'true_value': 'NOTTESTED'} + 'out':{'success':False, 'observed_value': 'NOTTESTED'} }, { 'args': ['norm_1_1'], @@ -447,7 +438,7 @@ def test_expect_column_kl_divergence_to_be_less_than_continuous(self): "threshold": 0.1, "tail_weight_holdout": 1e-5, "internal_weight_holdout": 1e-5}, - 'out':{'success':False, 'true_value': 'NOTTESTED'} + 'out':{'success':False, 'observed_value': 'NOTTESTED'} }, { 'args': ['norm_1_1'], @@ -455,7 +446,7 @@ def test_expect_column_kl_divergence_to_be_less_than_continuous(self): "threshold": 0.1, "tail_weight_holdout": 0.01, "internal_weight_holdout": 0.01}, - 'out':{'success':False, 'true_value': 'NOTTESTED'} + 'out':{'success':False, 'observed_value': 'NOTTESTED'} }, { 'args': ['norm_1_1'], @@ -463,7 +454,7 @@ def test_expect_column_kl_divergence_to_be_less_than_continuous(self): "threshold": 0.1, "tail_weight_holdout": 0.01, "internal_weight_holdout": 0.01}, - 'out':{'success':False, 'true_value': 'NOTTESTED'} + 'out':{'success':False, 'observed_value': 'NOTTESTED'} }, { 'args': ['bimodal'], @@ -471,7 +462,7 @@ def test_expect_column_kl_divergence_to_be_less_than_continuous(self): "threshold": 0.1, "tail_weight_holdout": 0.01, "internal_weight_holdout": 0.01}, - 'out':{'success':True, 'true_value': 'NOTTESTED'} + 'out':{'success':True, 'observed_value': 'NOTTESTED'} }, { 'args': ['bimodal'], @@ -479,7 +470,7 @@ def test_expect_column_kl_divergence_to_be_less_than_continuous(self): "threshold": 0.1, "tail_weight_holdout": 0.01, "internal_weight_holdout": 0.01}, - 'out':{'success':False, 'true_value': "NOTTESTED"} + 'out':{'success':False, 'observed_value': "NOTTESTED"} }, { 'args': ['bimodal'], @@ -487,7 +478,7 @@ def test_expect_column_kl_divergence_to_be_less_than_continuous(self): "threshold": 0.1, "tail_weight_holdout": 0.01, "internal_weight_holdout": 0.01}, - 'out':{'success':False, 'true_value': "NOTTESTED"} + 'out':{'success':False, 'observed_value': "NOTTESTED"} }, { 'args': ['bimodal'], @@ -495,9 +486,9 @@ def test_expect_column_kl_divergence_to_be_less_than_continuous(self): "threshold": 0.1, "tail_weight_holdout": 0.01, "internal_weight_holdout": 0.01, - "output_format": "SUMMARY"}, - 'out': {'success': False, 'true_value': "NOTTESTED", - 'summary_obj': + "result_format": "SUMMARY"}, + 'out': {'success': False, 'observed_value': "NOTTESTED", + 'details': {'observed_partition': {'weights': [0.0, 0.001, 0.006, 0.022, 0.07, 0.107, 0.146, 0.098, 0.04, 0.01, 0.0, 0.5], 'bins': [-np.inf, -3.721835843971108, -3.02304158492966, -2.324247325888213, -1.625453066846767, -0.926658807805319, -0.227864548763872, 0.470929710277574, 1.169723969319022, 1.868518228360469, 2.567312487401916, 3.266106746443364, np.inf] @@ -514,17 +505,17 @@ def test_expect_column_kl_divergence_to_be_less_than_continuous(self): ] for t in T: out = self.D.expect_column_kl_divergence_to_be_less_than(*t['args'], **t['kwargs']) - if t['out']['true_value'] != 'NOTTESTED': - if not np.allclose(out['true_value'],t['out']['true_value']): + if t['out']['observed_value'] != 'NOTTESTED': + if not np.allclose(out['observed_value'],t['out']['observed_value']): print("Test case error:") print(t) print(out) - self.assertTrue(np.allclose(out['true_value'],t['out']['true_value'])) - if 'output_format' in t['kwargs'] and t['kwargs']['output_format'] == 'SUMMARY': - self.assertTrue(np.allclose(out['summary_obj']['observed_partition']['bins'],t['out']['summary_obj']['observed_partition']['bins'])) - self.assertTrue(np.allclose(out['summary_obj']['observed_partition']['weights'],t['out']['summary_obj']['observed_partition']['weights'])) - self.assertTrue(np.allclose(out['summary_obj']['expected_partition']['bins'],t['out']['summary_obj']['expected_partition']['bins'])) - self.assertTrue(np.allclose(out['summary_obj']['expected_partition']['weights'],t['out']['summary_obj']['expected_partition']['weights'])) + self.assertTrue(np.allclose(out['observed_value'],t['out']['observed_value'])) + if 'result_format' in t['kwargs'] and t['kwargs']['result_format'] == 'SUMMARY': + self.assertTrue(np.allclose(out['result']['details']['observed_partition']['bins'],t['out']['details']['observed_partition']['bins'])) + self.assertTrue(np.allclose(out['result']['details']['observed_partition']['weights'],t['out']['details']['observed_partition']['weights'])) + self.assertTrue(np.allclose(out['result']['details']['expected_partition']['bins'],t['out']['details']['expected_partition']['bins'])) + self.assertTrue(np.allclose(out['result']['details']['expected_partition']['weights'],t['out']['details']['expected_partition']['weights'])) if not out['success'] == t['out']['success']: print("Test case error:") diff --git a/tests/column_map_expectations/__init__.py b/tests/column_map_expectations/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/column_map_expectations/expect_column_values_to_be_between.json b/tests/column_map_expectations/expect_column_values_to_be_between.json new file mode 100644 index 000000000000..04b734bb617a --- /dev/null +++ b/tests/column_map_expectations/expect_column_values_to_be_between.json @@ -0,0 +1,437 @@ +{ + "expectation_type": "expect_column_values_to_be_between", + "datasets": [{ + "data": { + "x": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + "y": [1, 2, 3, 4, 5, 6, 7, 8, 9, "abc"], + "z": [1, 2, 3, 4, 5, null, null, null, null, null], + "ts": [ + "Jan 01 1870 12:00:01", + "Dec 31 1999 12:00:01", + "Jan 01 2000 12:00:01", + "Feb 01 2000 12:00:01", + "Mar 01 2000 12:00:01", + "Apr 01 2000 12:00:01", + "May 01 2000 12:00:01", + "Jun 01 2000 12:00:01", + null, + "Jan 01 2001 12:00:01" + ], + "alpha": ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"], + "numeric": ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] + }, + "tests": [ + { + "title": "Basic positive test", + "exact_match_out": false, + "out": { + "unexpected_list": [], + "unexpected_index_list": [], + "success": true + }, + "in": { + "column": "x", + "max_value": 10, + "min_value": 1 + } + }, + { + "title" : "Another basic positive test", + "exact_match_out": false, + "out": { + "unexpected_list": [], + "unexpected_index_list": [], + "success": true + }, + "in": { + "column": "x", + "max_value": 20, + "min_value": 0 + } + }, + { + "title" : "Missing min_value", + "exact_match_out": false, + "out": { + "unexpected_list": [], + "unexpected_index_list": [], + "success": true + }, + "in": { + "column": "x", + "max_value": 20 + } + }, + { + "title" : "Null min_value", + "exact_match_out": false, + "out": { + "unexpected_list": [], + "unexpected_index_list": [], + "success": true + }, + "in": { + "column": "x", + "min_value": null, + "max_value": 20 + } + }, + { + "title" : "Missing max_value", + "exact_match_out": false, + "out": { + "unexpected_list": [], + "unexpected_index_list": [], + "success": true + }, + "in": { + "column": "x", + "min_value": 0 + } + }, + { + "title" : "Null max_value", + "exact_match_out": false, + "out": { + "unexpected_list": [], + "unexpected_index_list": [], + "success": true + }, + "in": { + "column": "x", + "min_value": 0, + "max_value": null + } + }, + { + "title" : "Basic negative test", + "exact_match_out": false, + "out": { + "unexpected_list": [ + 10 + ], + "unexpected_index_list": [9], + "success": false + }, + "in": { + "column": "x", + "max_value": 9, + "min_value": 1 + } + }, + { + "title" : "Another negative test", + "exact_match_out": false, + "out": { + "unexpected_list": [ + 1, + 2 + ], + "unexpected_index_list": [0, 1], + "success": false + }, + "in": { + "column": "x", + "max_value": 10, + "min_value": 3 + } + }, + { + "title" : "Positive test with result_format=BOOLEAN_ONLY", + "exact_match_out": false, + "out": {"success": true}, + "in": { + "column": "x", + "max_value": 10, + "min_value": 1, + "result_format": "BOOLEAN_ONLY" + } + }, + { + "title" : "Another positive test with result_format=BOOLEAN_ONLY", + "exact_match_out": false, + "out": {"success": true}, + "in": { + "column": "x", + "max_value": 20, + "min_value": 0, + "result_format": "BOOLEAN_ONLY" + } + }, + { + "title" : "Negative test with result_format=BOOLEAN_ONLY", + "exact_match_out": false, + "out": {"success": false}, + "in": { + "column": "x", + "max_value": 9, + "min_value": 1, + "result_format": "BOOLEAN_ONLY" + } + }, + { + "title" : "Another negative test with result_format=BOOLEAN_ONLY", + "exact_match_out": false, + "out": {"success": false}, + "in": { + "column": "x", + "max_value": 10, + "min_value": 3, + "result_format": "BOOLEAN_ONLY" + } + }, + { + "title" : "Positive test with mostly", + "exact_match_out": false, + "out": { + "unexpected_list": [], + "unexpected_index_list": [], + "success": true + }, + "in": { + "column": "x", + "max_value": 10, + "min_value": 1, + "mostly": 0.9 + } + }, + { + "title" : "2nd positive test with mostly", + "exact_match_out": false, + "out": { + "unexpected_list": [], + "unexpected_index_list": [], + "success": true + }, + "in": { + "column": "x", + "max_value": 20, + "min_value": 0, + "mostly": 0.9 + } + }, + { + "title" : "3rd positive test with mostly", + "exact_match_out": false, + "out": { + "unexpected_list": [ + 10 + ], + "unexpected_index_list": [ + 9 + ], + "success": true + }, + "in": { + "column": "x", + "max_value": 9, + "min_value": 1, + "mostly": 0.9 + } + }, + { + "title" : "Negative test with mostly", + "exact_match_out": false, + "out": { + "unexpected_list": [ + 1, + 2 + ], + "unexpected_index_list": [ + 0, + 1 + ], + "success": false + }, + "in": { + "column": "x", + "max_value": 10, + "min_value": 3, + "mostly": 0.9 + } + }, + { + "title" : "Error: improperly mixed types", + "exact_match_out": false, + "in": { + "column": "y", + "max_value": 10, + "min_value": 1, + "mostly": 0.95, + "catch_exceptions": true + }, + "out": {}, + "error":{ + "traceback_substring" : "TypeError: Column values, min_value, and max_value must either be None or of the same type." + } + }, + { + "title" : "Error: improperly mixed types again", + "exact_match_out": false, + "in": { + "column": "y", + "max_value": 10, + "min_value": 1, + "mostly": 0.9, + "catch_exceptions": true + }, + "out": {}, + "error":{ + "traceback_substring" : "TypeError: Column values, min_value, and max_value must either be None or of the same type." + } + }, + { + "title" : "Error: improperly mixed types once more", + "exact_match_out": false, + "in": { + "column": "y", + "max_value": 10, + "min_value": 1, + "mostly": 0.8, + "catch_exceptions": true + }, + "out": {}, + "error":{ + "traceback_substring" : "TypeError: Column values, min_value, and max_value must either be None or of the same type." + } + }, + { + "title" : "Negative test to verify that the denominator for mostly works with missing values", + "exact_match_out": false, + "out": { + "unexpected_list": [ + 5 + ], + "unexpected_index_list": [ + 4 + ], + "success": false + }, + "in": { + "column": "z", + "max_value": 4, + "min_value": 1, + "mostly": 0.9 + } + }, + { + "title" : "Positive test to verify that the denominator for mostly works with missing values", + "exact_match_out": false, + "out": { + "unexpected_list": [ + 5 + ], + "unexpected_index_list": [ + 4 + ], + "success": true + }, + "in": { + "column": "z", + "max_value": 4, + "min_value": 1, + "mostly": 0.8 + } + }, + { + "title" : "Negative test with parse_strings_as_datetimes", + "exact_match_out": false, + "out": { + "unexpected_list": [ + "Jan 01 1870 12:00:01", + "Dec 31 1999 12:00:01", + "Jan 01 2001 12:00:01" + ], + "unexpected_index_list": [ + 0, 1, 9 + ], + "success": false + }, + "in": { + "column": "ts", + "max_value": "Dec 31 2000", + "min_value": "Jan 01 2000", + "parse_strings_as_datetimes": true + } + }, + { + "title" : "Error on string-to-int comparisons", + "exact_match_out": false, + "in": { + "column": "numeric", + "max_value": 10, + "min_value": 0, + "catch_exceptions": true + }, + "out":{}, + "error":{ + "traceback_substring" : "TypeError: Column values, min_value, and max_value must either be None or of the same type." + } + }, + { + "title" : "Test 'min_value is greater than max_value'", + "exact_match_out": false, + "in": { + "column": "x", + "min_value": 10, + "max_value": 0, + "catch_exceptions": true + }, + "out":{}, + "error":{ + "traceback_substring" : "ValueError: min_value cannot be greater than max_value" + } + }, + { + "title": "Test allow_cross_type_comparisons", + "exact_match_out": false, + "in": { + "column": "y", + "min_value": 0, + "max_value": 10, + "allow_cross_type_comparisons": true + }, + "out": { + "unexpected_list": [ + "abc" + ], + "unexpected_index_list": [ + 9 + ], + "success": false + } + }, + { + "title": "Test allow_cross_type_comparisons again", + "exact_match_out": false, + "in": { + "column": "numeric", + "min_value": 0, + "max_value": 10, + "allow_cross_type_comparisons": true + }, + "out": { + "unexpected_list": ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"], + "unexpected_index_list": [ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 + ], + "success": false + } + }, + { + "title": "Verify that min_value=max_value=None raises an error", + "exact_match_out": false, + "in": { + "column": "y", + "max_value": null, + "min_value": null, + "catch_exceptions": true + }, + "out":{}, + "error":{ + "traceback_substring" : "cannot both be None" + } + } + ] + }] +} diff --git a/tests/column_map_expectations/expect_column_values_to_be_decreasing.json b/tests/column_map_expectations/expect_column_values_to_be_decreasing.json new file mode 100644 index 000000000000..11e4522e3b57 --- /dev/null +++ b/tests/column_map_expectations/expect_column_values_to_be_decreasing.json @@ -0,0 +1,83 @@ +{ + "expectation_type" : "expect_column_values_to_be_decreasing", + "datasets" : [{ + "data" : { + "w" : [1, 2, 3, 7, 6, 5, 4, 3, 2, 1], + "x" : [null, null, 10, 9, 8, 7, null, 6, 5, 4], + "y" : [1, 1, 1, 2, 2, 2, 3, 3, 3, 4], + "z" : ["12/1/2016", "11/2/2016", "10/2/2016", "10/2/2016", "8/1/2016", null, null, null, null, null] + }, + "tests" : [{ + "title" : "Positive test with mostly", + "exact_match_out" : false, + "in": { + "column": "w", + "mostly": 0.6 + }, + "out": { + "unexpected_list": [2,3,7], + "unexpected_index_list": [1,2,3], + "success": true + } + },{ + "title" : "Basic negative test", + "exact_match_out" : false, + "in": { + "column": "y" + }, + "out": { + "unexpected_list": [2,3,4], + "unexpected_index_list": [3,6,9], + "success": false + } + },{ + "title" : "Basic negative test with strictly", + "exact_match_out" : false, + "in": { + "column": "y", + "strictly": true + }, + "out": { + "unexpected_list": [1,1,2,2,2,3,3,3,4], + "unexpected_index_list": [1,2,3,4,5,6,7,8,9], + "success": false + } + },{ + "title" : "Basic positive test", + "exact_match_out" : false, + "in": { + "column": "x" + }, + "out": { + "unexpected_list": [], + "unexpected_index_list": [], + "success": true + } + },{ + "title" : "Positive test with parse_strings_as_datetimes", + "exact_match_out" : false, + "in": { + "column": "z", + "parse_strings_as_datetimes" : true + }, + "out": { + "unexpected_list": [], + "unexpected_index_list": [], + "success": true + } + },{ + "title" : "Negative test with parse_strings_as_datetimes", + "exact_match_out" : false, + "in": { + "column": "z", + "parse_strings_as_datetimes" : true, + "strictly": true + }, + "out": { + "unexpected_list": ["10/2/2016"], + "unexpected_index_list": [3], + "success": false + } + }] + }] +} \ No newline at end of file diff --git a/tests/column_map_expectations/expect_column_values_to_be_in_set.json b/tests/column_map_expectations/expect_column_values_to_be_in_set.json new file mode 100644 index 000000000000..b1cea5415555 --- /dev/null +++ b/tests/column_map_expectations/expect_column_values_to_be_in_set.json @@ -0,0 +1,60 @@ +{ + "expectation_type": "expect_column_values_to_be_in_set", + "datasets" : [{ + "data" : { + "x" : [1,2,4], + "y" : [1,2,5], + "z" : ["hello", "jello", "mello"] + }, + "tests" : [ + { + "title": "Basic positive test case", + "exact_match_out": false, + "in": { + "column": "x", + "values_set": [1,2,4] + }, + "out": { + "success": true + } + }, + { + "title": "Negative test case", + "exact_match_out": false, + "in": { + "column": "x", + "values_set": [2, 4] + }, + "out": { + "success": false, + "unexpected_index_list": [0], + "unexpected_list": [1] + } + }, + { + "title": "Empty values_set", + "exact_match_out": false, + "in": { + "column": "x", + "values_set": [] + }, + "out": { + "success": false, + "unexpected_index_list": [0, 1, 2], + "unexpected_list": [1, 2, 4] + } + }, + { + "title": "Basic strings set", + "exact_match_out": false, + "in": { + "column": "z", + "values_set": ["hello", "jello", "mello"] + }, + "out": { + "success": true + } + } + ] + }] +} \ No newline at end of file diff --git a/tests/column_map_expectations/expect_column_values_to_be_increasing.json b/tests/column_map_expectations/expect_column_values_to_be_increasing.json new file mode 100644 index 000000000000..2ecf7f8bd2f8 --- /dev/null +++ b/tests/column_map_expectations/expect_column_values_to_be_increasing.json @@ -0,0 +1,94 @@ +{ + "expectation_type" : "expect_column_values_to_be_increasing", + "datasets" : [{ + "data" : { + "w" : [1, 2, 3, 4, 5, 5, 4, 3, 2, 1], + "x" : [2, 3, 4, 5, 6, 7, 8, 9, 10, null], + "y" : [1, 1, 1, 2, 2, 2, 3, 3, 3, 4], + "z" : ["a", "b", "c", "d", "e", null, null, null, null, null], + "zz" : ["1/1/2016", "1/2/2016", "2/2/2016", "2/2/2016", "3/1/2016", null, null, null, null, null], + "a" : [null, 0, null, null, 1, null, null, 2, null, null], + "b" : [null, 0, null, null, 2, null, null, 1, null, null] + }, + "tests" : [{ + "title" : "Basic positive test", + "exact_match_out" : false, + "in": { + "column": "x" + }, + "out": { + "unexpected_list": [], + "unexpected_index_list": [], + "success": true + } + },{ + "title" : "2nd basic positive test", + "exact_match_out" : false, + "in": { + "column": "y" + }, + "out": { + "unexpected_list": [], + "unexpected_index_list": [], + "success": true + } + },{ + "title" : "Positive test with strictly", + "exact_match_out" : false, + "in": { + "column": "y", + "strictly": true + }, + "out": { + "unexpected_list": [1,1,2,2,3,3], + "unexpected_index_list": [1,2,4,5,7,8], + "success": false + } + },{ + "title" : "Basic negative test", + "exact_match_out" : false, + "in": { + "column": "w" + }, + "out": { + "unexpected_list": [4,3,2,1], + "unexpected_index_list": [6,7,8,9], + "success": false + } + },{ + "title" : "Positive test with parse_strings_as_datetimes", + "exact_match_out" : false, + "in": { + "column": "zz", + "parse_strings_as_datetimes": true + }, + "out": { + "unexpected_list": [], + "unexpected_index_list": [], + "success": true + } + },{ + "title" : "Positive test with interspersed nulls", + "exact_match_out" : false, + "in": { + "column": "a" + }, + "out": { + "unexpected_list": [], + "unexpected_index_list": [], + "success": true + } + },{ + "title" : "Negative test with interspersed nulls", + "exact_match_out" : false, + "in": { + "column": "b" + }, + "out": { + "unexpected_list": [1], + "unexpected_index_list": [7], + "success": false + } + }] + }] +} \ No newline at end of file diff --git a/tests/column_map_expectations/expect_column_values_to_match_json_schema.json b/tests/column_map_expectations/expect_column_values_to_match_json_schema.json new file mode 100644 index 000000000000..18d068a2983c --- /dev/null +++ b/tests/column_map_expectations/expect_column_values_to_match_json_schema.json @@ -0,0 +1,60 @@ +{ + "expectation_type" : "expect_column_values_to_match_json_schema", + "datasets" : [{ + "data" : { + "w" : [2, 3, 4, 5, 6, 7, 8, 9, 10, null], + "x" : ["{\"a\":1}", "{\"a\":2}", "{\"a\":3}", "{\"a\":4}", "{\"a\":5}", null, null, null, null, null] + }, + "tests" : [{ + "title" : "Basic positive test", + "exact_match_out" : false, + "in": { + "column": "x", + "json_schema": {} + }, + "out": { + "unexpected_list": [], + "unexpected_index_list": [], + "success": true + } + },{ + "title" : "Positive test with a more complex schema", + "exact_match_out" : false, + "in": { + "column": "x", + "json_schema": { + "properties": { + "a": { + "type": "integer" + } + }, + "required": ["a"] + } + }, + "out": { + "unexpected_list": [], + "unexpected_index_list": [], + "success": true + } + },{ + "title" : "Basic negative test", + "exact_match_out" : false, + "in": { + "column": "x", + "json_schema": { + "properties": { + "a": { + "type": "integer" + } + }, + "required": ["b"] + } + }, + "out": { + "unexpected_list": ["{\"a\":1}", "{\"a\":2}", "{\"a\":3}", "{\"a\":4}", "{\"a\":5}"], + "unexpected_index_list": [0,1,2,3,4], + "success": false + } + }] + }] +} \ No newline at end of file diff --git a/tests/column_map_expectations/expect_column_values_to_match_regex_list.json b/tests/column_map_expectations/expect_column_values_to_match_regex_list.json new file mode 100644 index 000000000000..c9dbc7271ada --- /dev/null +++ b/tests/column_map_expectations/expect_column_values_to_match_regex_list.json @@ -0,0 +1,73 @@ +{ + "expectation_type" : "expect_column_values_to_match_regex_list", + "datasets" : [{ + "data" : { + "w" : ["111", "222", "333", "123", "321", "444", "456", "654", "555", null], + "x" : ["man", "plan", "canal", "panama", "hat", "bat", "bit", "bot", "but", "bet"] + }, + "tests" : [{ + "title" : "Basic positive test", + "exact_match_out" : false, + "in": { + "column": "w", + "regex_list": ["\\d+"] + }, + "out": { + "unexpected_list": [], + "unexpected_index_list": [], + "success": true + } + },{ + "title" : "Positive test with multiple regexes", + "exact_match_out" : false, + "in": { + "column": "w", + "regex_list": ["[123]+", "[456]+"], + "match_on": "any" + }, + "out": { + "unexpected_list": [], + "unexpected_index_list": [], + "success": true + } + },{ + "title" : "Basic negative test", + "exact_match_out" : false, + "in": { + "column": "w", + "regex_list": ["[123]+", "[456]+"], + "match_on": "all" + }, + "out": { + "unexpected_list": ["111", "222", "333", "123", "321", "444", "456", "654", "555"], + "unexpected_index_list": [0,1,2,3,4,5,6,7,8], + "success": false + } + },{ + "title" : "Negative test with more string-ish strings", + "exact_match_out" : false, + "in": { + "column": "x", + "regex_list": ["^.*a.*$"] + }, + "out": { + "unexpected_list": ["bit", "bot", "but", "bet"], + "unexpected_index_list": [6,7,8,9], + "success": false + } + },{ + "title" : "Positive test with match_on=any", + "exact_match_out" : false, + "in": { + "column": "x", + "regex_list": ["^.*a.*$", "b.t"], + "match_on": "any" + }, + "out": { + "unexpected_list": [], + "unexpected_index_list": [], + "success": true + } + }] + }] +} \ No newline at end of file diff --git a/tests/column_map_expectations/test_column_map_expectations.py b/tests/column_map_expectations/test_column_map_expectations.py new file mode 100644 index 000000000000..535728c25332 --- /dev/null +++ b/tests/column_map_expectations/test_column_map_expectations.py @@ -0,0 +1,64 @@ +### +### +# +# This file should not be modified. To adjust test cases, edit the related json file(s). +# +### +### + + +import pytest + +import os +import json +import glob +import warnings + +from tests.test_utils import get_dataset, candidate_test_is_on_temporary_notimplemented_list, evaluate_json_test + +contexts = ['PandasDataset', 'SqlAlchemyDataset'] + +def pytest_generate_tests(metafunc): + + #Load all the JSON files in the directory + dir_path = os.path.dirname(os.path.realpath(__file__)) + test_configuration_files = glob.glob(dir_path+'/*.json') + + parametrized_tests = [] + ids = [] + for c in contexts: + for filename in test_configuration_files: + file = open(filename) + test_configuration = json.load(file) + + if candidate_test_is_on_temporary_notimplemented_list(c, test_configuration["expectation_type"]): + warnings.warn("Skipping generation of tests for expectation " + test_configuration["expectation_type"] + + " and context " + c) + else: + for d in test_configuration['datasets']: + my_dataset = get_dataset(c, d["data"]) + + for test in d["tests"]: + parametrized_tests.append({ + "expectation_type": test_configuration["expectation_type"], + "dataset": my_dataset, + "test": test, + }) + + ids.append(c+":"+test_configuration["expectation_type"]+":"+test["title"]) + + metafunc.parametrize( + "test_case", + parametrized_tests, + ids=ids + ) + +def test_case_runner(test_case): + # Note: this should never be done in practice, but we are wiping expectations to reuse datasets during testing. + test_case["dataset"].initialize_expectations() + + evaluate_json_test( + test_case["dataset"], + test_case["expectation_type"], + test_case["test"] + ) diff --git a/tests/other_expectations/__init__.py b/tests/other_expectations/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/other_expectations/expect_column_to_exist.json b/tests/other_expectations/expect_column_to_exist.json new file mode 100644 index 000000000000..664a5139b117 --- /dev/null +++ b/tests/other_expectations/expect_column_to_exist.json @@ -0,0 +1,49 @@ +{ + "expectation_type": "expect_column_to_exist", + "datasets": [{ + "data": { + "c1": [4, 5, 6, 7], + "c2": ["a", "b", "c", "d"], + "c3": [null, null, null, null] + }, + "tests": [{ + "title": "Basic positive test", + "exact_match_out": false, + "in": { + "column": "c1" + }, + "out": { + "success": true + } + }, { + "title": "Basic negative test", + "exact_match_out": false, + "in": { + "column": "covfefe" + }, + "out": { + "success": false + } + }, { + "title": "Positive test with column order", + "exact_match_out": false, + "in": { + "column": "c2", + "column_index": 1 + }, + "out": { + "success": true + } + }, { + "title": "Column exists but wrong index", + "exact_match_out": false, + "in": { + "column": "c3", + "column_index": 4 + }, + "out": { + "success": false + } + }] + }] +} diff --git a/tests/other_expectations/expect_table_columns_to_match_ordered_list_test_set.json b/tests/other_expectations/expect_table_columns_to_match_ordered_list_test_set.json new file mode 100644 index 000000000000..e4c277b238ef --- /dev/null +++ b/tests/other_expectations/expect_table_columns_to_match_ordered_list_test_set.json @@ -0,0 +1,48 @@ +{ + "expectation_type" : "expect_table_columns_to_match_ordered_list", + "datasets" : [ + { + "data": { + "c1" : [4,5,6,7], + "c2" : ["a","b","c","d"], + "c3" : [null,null,null,null] + }, + "tests": [{ + "title": "Basic positive test", + "exact_match_out": false, + "in":{ + "column_list": ["c1", "c2", "c3"] + }, + "out":{ + "success":true + } + },{ + "title": "Negative test: column is missing", + "exact_match_out": false, + "in":{ + "column_list": ["c1", "c2"] + }, + "out":{ + "success":false + } + },{ + "title": "Negative test: column is misnamed", + "exact_match_out": false, + "in":{ + "column_list": ["b1", "c2", "c3"] + }, + "out":{ + "success":false + } + },{ + "title": "Negative test: Columns are right, but ordering wrong", + "exact_match_out": false, + "in":{ + "column_list": ["c3", "c2", "c1"] + }, + "out":{ + "success":false + } + }] + }] +} \ No newline at end of file diff --git a/tests/other_expectations/expect_table_row_count_to_be_between.json b/tests/other_expectations/expect_table_row_count_to_be_between.json new file mode 100644 index 000000000000..d71ade1c0ded --- /dev/null +++ b/tests/other_expectations/expect_table_row_count_to_be_between.json @@ -0,0 +1,89 @@ +{ + "expectation_type" : "expect_table_row_count_to_be_between", + "datasets" : [{ + "data" : { + "c1" : [4,5,6,7], + "c2" : ["a","b","c","d"], + "c3" : [null,null,null,null] + }, + "tests": [{ + "title": "Basic positive test", + "exact_match_out" : false, + "in":{ + "min_value": 3, + "max_value": 5 + }, + "out":{ + "success":true, + "observed_value": 4 + } + },{ + "title": "Basic negative test", + "exact_match_out" : false, + "in":{ + "min_value": 0, + "max_value": 1 + }, + "out":{ + "success":false, + "observed_value": 4 + } + },{ + "title": "Positive test with null min_value", + "exact_match_out" : false, + "in":{ + "min_value": null, + "max_value": 4 + }, + "out":{ + "success":true, + "observed_value": 4 + } + },{ + "title": "Negative test with max < min", + "exact_match_out" : false, + "in":{ + "min_value": 1, + "max_value": 0 + }, + "out":{ + "success":false, + "observed_value": 4 + } + },{ + "title": "Positive test with null min", + "exact_match_out" : false, + "in":{ + "min_value": null, + "max_value": 10 + }, + "out":{ + "success": true, + "observed_value": 4 + } + },{ + "title": "Test error handling for non-int min_value", + "exact_match_out" : false, + "in":{ + "min_value": "quack", + "max_value": 0, + "catch_exceptions": true + }, + "out" : {}, + "error":{ + "traceback_substring": "must be integers" + } + },{ + "title": "Test error handling for non-int max_value", + "exact_match_out" : false, + "in":{ + "max_value": "quack", + "catch_exceptions": true + }, + "out" : {}, + "error":{ + "traceback_substring": "must be integers" + } + }] + }] +} \ No newline at end of file diff --git a/tests/other_expectations/expect_table_row_count_to_equal.json b/tests/other_expectations/expect_table_row_count_to_equal.json new file mode 100644 index 000000000000..63ca0ff40a02 --- /dev/null +++ b/tests/other_expectations/expect_table_row_count_to_equal.json @@ -0,0 +1,87 @@ +{ + "expectation_type": "expect_table_row_count_to_equal", + "datasets": [ + { + "data": { + "c1": [ + 4, + 5, + null, + 7 + ], + "c2": [ + "a", + null, + "c", + "d" + ], + "c3": [ + null, + null, + null, + null + ] + }, + "tests": [ + { + "title": "Basic positive case", + "exact_match_out": false, + + "in": [ + 4 + ], + "kwargs": {}, + "out": { + "success": true, + "observed_value": 4 + } + }, + { + "title": "Basic negative case - upper error", + "exact_match_out": false, + "in": [ + 5 + ], + "out": { + "success": false, + "observed_value": 4 + } + }, + { + "title": "Basic negative case - lower error", + "exact_match_out": false, + "in": [ + 0 + ], + "out": { + "success": false, + "observed_value": 4 + } + }, + { + "title": "Basic negative case - kwargs args", + "exact_match_out": false, + "in": { + "value": 3 + }, + "out": { + "success": false, + "observed_value": 4 + } + }, + { + "title": "Invalid arguments-throws exception", + "exact_match_out": false, + "in": { + "value": "hello", + "catch_exceptions": true + }, + "out": { + "success": false, + "traceback_substring": "ValueError" + } + } + ] + } + ] +} \ No newline at end of file diff --git a/tests/other_expectations/test_other_expectations.py b/tests/other_expectations/test_other_expectations.py new file mode 100644 index 000000000000..1243a31fb07b --- /dev/null +++ b/tests/other_expectations/test_other_expectations.py @@ -0,0 +1,65 @@ +### +### +# +# This file should not be modified. To adjust test cases, edit the related json file(s). +# +### +### + + +import pytest + +import os +import json +import glob +import warnings + +from tests.test_utils import get_dataset, candidate_test_is_on_temporary_notimplemented_list, evaluate_json_test + +contexts = ['PandasDataset', 'SqlAlchemyDataset'] + + +def pytest_generate_tests(metafunc): + # Load all the JSON files in the directory + dir_path = os.path.dirname(os.path.realpath(__file__)) + test_configuration_files = glob.glob(dir_path + '/*.json') + + parametrized_tests = [] + ids = [] + for c in contexts: + for filename in test_configuration_files: + file = open(filename) + test_configuration = json.load(file) + + if candidate_test_is_on_temporary_notimplemented_list(c, test_configuration["expectation_type"]): + warnings.warn("Skipping generation of tests for expectation " + test_configuration["expectation_type"] + + " and context " + c) + else: + for d in test_configuration['datasets']: + my_dataset = get_dataset(c, d["data"]) + + for test in d["tests"]: + parametrized_tests.append({ + "expectation_type": test_configuration["expectation_type"], + "dataset": my_dataset, + "test": test, + }) + + ids.append(c + ":" + test_configuration["expectation_type"] + ":" + test["title"]) + + metafunc.parametrize( + "test_case", + parametrized_tests, + ids=ids + ) + + +def test_case_runner(test_case): + # Note: this should never be done in practice, but we are wiping expectations to reuse datasets during testing. + test_case["dataset"].initialize_expectations() + + evaluate_json_test( + test_case["dataset"], + test_case["expectation_type"], + test_case["test"] + ) diff --git a/tests/sqlalchemy_dataset/__init__.py b/tests/sqlalchemy_dataset/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/sqlalchemy_dataset/test_sqlalchemydataset.py b/tests/sqlalchemy_dataset/test_sqlalchemydataset.py new file mode 100644 index 000000000000..89f2cc14041a --- /dev/null +++ b/tests/sqlalchemy_dataset/test_sqlalchemydataset.py @@ -0,0 +1,84 @@ +import pytest + +from great_expectations.dataset import MetaSqlAlchemyDataset, SqlAlchemyDataset +import sqlalchemy as sa +import pandas as pd + +@pytest.fixture +def custom_dataset(): + class CustomSqlAlchemyDataset(SqlAlchemyDataset): + + @MetaSqlAlchemyDataset.column_map_expectation + def expect_column_values_to_equal_2(self, column): + return (sa.column(column) == 2) + + @MetaSqlAlchemyDataset.column_aggregate_expectation + def expect_column_mode_to_equal_0(self, column): + mode_query = sa.select([ + sa.column(column).label('value'), + sa.func.count(sa.column(column)).label('frequency') + ]).select_from(sa.table(self.table_name)).group_by(sa.column(column)).order_by( + sa.desc(sa.column('frequency'))) + + mode = self.engine.execute(mode_query).scalar() + return { + "success": mode == 0, + "result": { + "observed_value": mode, + } + } + + @MetaSqlAlchemyDataset.column_aggregate_expectation + def broken_aggregate_expectation(self, column): + return { + "not_a_success_value": True, + } + + @MetaSqlAlchemyDataset.column_aggregate_expectation + def another_broken_aggregate_expectation(self, column): + return { + "success": True, + "result": { + "no_observed_value": True + } + } + + engine = sa.create_engine('sqlite://') + + data = pd.DataFrame({ + "c1": [2, 2, 2, 2, 0], + "c2": [4, 4, 5, None, 7], + "c3": ["cat", "dog", "fish", "tiger", "elephant"] + + }) + + data.to_sql(name='test_data', con=engine, index=False) + custom_dataset = CustomSqlAlchemyDataset('test_data', engine=engine) + + return custom_dataset + + +def test_custom_sqlalchemydataset(custom_dataset): + custom_dataset.initialize_expectations() + custom_dataset.set_default_expectation_argument("result_format", {"result_format": "COMPLETE"}) + + result = custom_dataset.expect_column_values_to_equal_2('c1') + assert result['success'] == False + assert result['result']['unexpected_list'] == [0] + + result = custom_dataset.expect_column_mode_to_equal_0('c2') + assert result['success'] == False + assert result['result']['observed_value'] == 4 + + +def test_broken_decorator_errors(custom_dataset): + custom_dataset.initialize_expectations() + custom_dataset.set_default_expectation_argument("result_format", {"result_format": "COMPLETE"}) + + with pytest.raises(ValueError) as err: + custom_dataset.broken_aggregate_expectation('c1') + assert "Column aggregate expectation failed to return required information: success" in str(err) + + with pytest.raises(ValueError) as err: + custom_dataset.another_broken_aggregate_expectation('c1') + assert "Column aggregate expectation failed to return required information: observed_value" in str(err) diff --git a/tests/test_cli.py b/tests/test_cli.py index 1941946c9d5f..816f21f3d2c8 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -9,7 +9,7 @@ import subprocess import great_expectations as ge -from .util import assertDeepAlmostEqual +from .test_utils import assertDeepAlmostEqual def get_system_command_result(command_str): p = subprocess.Popen( @@ -41,7 +41,7 @@ def test_cli_arguments(self): # print get_system_command_result('python '+filepath+'/../bin/great_expectations validate') # self.assertEqual( # get_system_command_result('python '+filepath+'/../bin/great_expectations validate'), - # {'output': '', 'errors': 'usage: great_expectations validate [-h] [--output_format OUTPUT_FORMAT]\n [--catch_exceptions]\n [--include_config INCLUDE_CONFIG]\n [--only_return_failures]\n [--custom_dataset_module CUSTOM_DATASET_MODULE]\n [--custom_dataset_class CUSTOM_DATASET_CLASS]\n data_set expectations_config_file\ngreat_expectations validate: error: too few arguments\n'} + # {'output': '', 'errors': 'usage: great_expectations validate [-h] [--result_format result_format]\n [--catch_exceptions]\n [--include_config INCLUDE_CONFIG]\n [--only_return_failures]\n [--custom_dataset_module CUSTOM_DATASET_MODULE]\n [--custom_dataset_class CUSTOM_DATASET_CLASS]\n data_set expectations_config_file\ngreat_expectations validate: error: too few arguments\n'} # ) command_str = 'python '+filepath+'/../bin/great_expectations validate '+filepath+'/test_sets/Titanic.csv '+filepath+'/test_sets/titanic_expectations.json' @@ -57,223 +57,56 @@ def test_cli_arguments(self): print(ve) json_result = {} - print("^"*80) - print(json.dumps(json_result, indent=2)) + # print("^"*80) + # print(json.dumps(json_result, indent=2)) self.maxDiff = None + with open(filepath + '/test_sets/expected_cli_results_default.json', 'r') as f: + expected_cli_results = json.load(f) + assertDeepAlmostEqual(self, json_result, - { - "results": [ - { - "kwargs": { - "column": "Name", - "output_format": "SUMMARY" - }, - "exception_traceback": None, - "expectation_type": "expect_column_to_exist", - "success": True, - "raised_exception": False - }, - { - "kwargs": { - "column": "PClass", - "output_format": "SUMMARY" - }, - "exception_traceback": None, - "expectation_type": "expect_column_to_exist", - "success": True, - "raised_exception": False - }, - { - "kwargs": { - "column": "Age", - "output_format": "SUMMARY" - }, - "exception_traceback": None, - "expectation_type": "expect_column_to_exist", - "success": True, - "raised_exception": False - }, - { - "kwargs": { - "column": "Sex", - "output_format": "SUMMARY" - }, - "exception_traceback": None, - "expectation_type": "expect_column_to_exist", - "success": True, - "raised_exception": False - }, - { - "kwargs": { - "column": "Survived", - "output_format": "SUMMARY" - }, - "exception_traceback": None, - "expectation_type": "expect_column_to_exist", - "success": True, - "raised_exception": False - }, - { - "kwargs": { - "column": "SexCode", - "output_format": "SUMMARY" - }, - "exception_traceback": None, - "expectation_type": "expect_column_to_exist", - "success": True, - "raised_exception": False - }, - { - "exception_traceback": None, - "true_value": 30.397989417989415, - "expectation_type": "expect_column_mean_to_be_between", - "success": True, - "raised_exception": False, - "kwargs": { - "column": "Age", - "max_value": 40, - "output_format": "SUMMARY", - "min_value": 20 - }, - "summary_obj": { - "element_count": 1313, - "missing_percent": 0.4242193450114242, - "missing_count": 557 - } - }, - { - "exception_traceback": None, - "summary_obj": { - "exception_percent_nonmissing": 0.0, - "missing_count": 557, - "partial_exception_index_list": [], - "element_count": 1313, - "exception_count": 0, - "missing_percent": 0.4242193450114242, - "exception_percent": 0.0, - "partial_exception_list": [], - "partial_exception_counts": {} - }, - "success": True, - "raised_exception": False, - "kwargs": { - "column": "Age", - "max_value": 80, - "output_format": "SUMMARY", - "min_value": 0 - }, - "expectation_type": "expect_column_values_to_be_between" - }, - { - "exception_traceback": None, - "summary_obj": { - "exception_percent_nonmissing": 0.002284843869002285, - "missing_count": 0, - "partial_exception_index_list": [ - 394, - 456, - 1195 - ], - "element_count": 1313, - "exception_count": 3, - "missing_percent": 0.0, - "exception_percent": 0.002284843869002285, - "partial_exception_list": [ - "Downton (?Douton), Mr William James", - "Jacobsohn Mr Samuel", - "Seman Master Betros" - ], - "partial_exception_counts": { - "Seman Master Betros": 1, - "Downton (?Douton), Mr William James": 1, - "Jacobsohn Mr Samuel": 1 - } - }, - "success": True, - "raised_exception": False, - "kwargs": { - "regex": "[A-Z][a-z]+(?: \\([A-Z][a-z]+\\))?, ", - "column": "Name", - "output_format": "SUMMARY", - "mostly": 0.95 - }, - "expectation_type": "expect_column_values_to_match_regex" - }, - { - "exception_traceback": None, - "summary_obj": { - "exception_percent_nonmissing": 0.0007616146230007616, - "missing_count": 0, - "partial_exception_index_list": [ - 456 - ], - "element_count": 1313, - "exception_count": 1, - "missing_percent": 0.0, - "exception_percent": 0.0007616146230007616, - "partial_exception_list": [ - "*" - ], - "partial_exception_counts": { - "*": 1 - } - }, - "success": False, - "raised_exception": False, - "kwargs": { - "column": "PClass", - "values_set": [ - "1st", - "2nd", - "3rd" - ], - "output_format": "SUMMARY" - }, - "expectation_type": "expect_column_values_to_be_in_set" - } - ] - } + expected_cli_results ) + # # command_str = 'python '+filepath+'/../bin/great_expectations validate '+filepath+'/examples/Titanic.csv '+filepath+'/examples/titanic_expectations.json -f' # self.assertEqual( # get_system_command_result(command_str), - # {'output': '{\n "results": [\n {\n "exception_traceback": null, \n "expectation_type": "expect_column_values_to_be_in_set", \n "success": false, \n "raised_exception": false, \n "kwargs": {\n "column": "PClass", \n "output_format": "BASIC", \n "value_set": [\n "1st", \n "2nd", \n "3rd"\n ]\n }, \n "summary_obj": {\n "exception_percent": 0.0007616146230007616, \n "partial_exception_list": [\n "*"\n ], \n "exception_percent_nonmissing": 0.0007616146230007616, \n "exception_count": 1\n }\n }\n ]\n}\n', 'errors': ''} + # {'output': '{\n "results": [\n {\n "exception_traceback": null, \n "expectation_type": "expect_column_values_to_be_in_set", \n "success": false, \n "raised_exception": false, \n "kwargs": {\n "column": "PClass", \n "result_format": "BASIC", \n "value_set": [\n "1st", \n "2nd", \n "3rd"\n ]\n }, \n "summary_obj": {\n "unexpected_percent": 0.0007616146230007616, \n "partial_unexpected_list": [\n "*"\n ], \n "unexpected_percent_nonmissing": 0.0007616146230007616, \n "unexpected_count": 1\n }\n }\n ]\n}\n', 'errors': ''} # ) # # command_str = 'python '+filepath+'/../bin/great_expectations validate '+filepath+'/examples/Titanic.csv '+filepath+'/examples/titanic_expectations.json -f -o=COMPLETE' # self.assertEqual( # get_system_command_result(command_str), - # {'output': '{\n "results": [\n {\n "exception_traceback": null, \n "expectation_type": "expect_column_values_to_be_in_set", \n "success": false, \n "exception_list": [\n "*"\n ], \n "raised_exception": false, \n "kwargs": {\n "column": "PClass", \n "output_format": "COMPLETE", \n "value_set": [\n "1st", \n "2nd", \n "3rd"\n ]\n }, \n "exception_index_list": [\n 456\n ]\n }\n ]\n}\n', 'errors': ''} + # {'output': '{\n "results": [\n {\n "exception_traceback": null, \n "expectation_type": "expect_column_values_to_be_in_set", \n "success": False, \n "unexpected_list": [\n "*"\n ], \n "raised_exception": false, \n "kwargs": {\n "column": "PClass", \n "result_format": "COMPLETE", \n "value_set": [\n "1st", \n "2nd", \n "3rd"\n ]\n }, \n "unexpected_index_list": [\n 456\n ]\n }\n ]\n}\n', 'errors': ''} # ) # # command_str = 'python '+filepath+'/../bin/great_expectations validate '+filepath+'/examples/Titanic.csv '+filepath+'/examples/titanic_expectations.json -f -o=BOOLEAN_ONLY' # self.assertEqual( # get_system_command_result(command_str), - # {'output': '{\n "results": [\n {\n "expectation_type": "expect_column_values_to_be_in_set", \n "success": false, \n "kwargs": {\n "column": "PClass", \n "output_format": "BOOLEAN_ONLY", \n "value_set": [\n "1st", \n "2nd", \n "3rd"\n ]\n }\n }\n ]\n}\n', 'errors': ''} + # {'output': '{\n "results": [\n {\n "expectation_type": "expect_column_values_to_be_in_set", \n "success": false, \n "kwargs": {\n "column": "PClass", \n "result_format": "BOOLEAN_ONLY", \n "value_set": [\n "1st", \n "2nd", \n "3rd"\n ]\n }\n }\n ]\n}\n', 'errors': ''} # ) # # command_str = 'python '+filepath+'/../bin/great_expectations validate '+filepath+'/examples/Titanic.csv '+filepath+'/examples/titanic_expectations.json -f -e' # # print get_system_command_result(command_str) # self.assertEqual( # get_system_command_result(command_str), - # {'output': '{\n "results": [\n {\n "summary_obj": {\n "exception_percent": 0.0007616146230007616, \n "partial_exception_list": [\n "*"\n ], \n "exception_percent_nonmissing": 0.0007616146230007616, \n "exception_count": 1\n }, \n "expectation_type": "expect_column_values_to_be_in_set", \n "success": false, \n "kwargs": {\n "column": "PClass", \n "output_format": "BASIC", \n "value_set": [\n "1st", \n "2nd", \n "3rd"\n ]\n }\n }\n ]\n}\n', 'errors': ''} + # {'output': '{\n "results": [\n {\n "summary_obj": {\n "unexpected_percent": 0.0007616146230007616, \n "partial_unexpected_list": [\n "*"\n ], \n "unexpected_percent_nonmissing": 0.0007616146230007616, \n "unexpected_count": 1\n }, \n "expectation_type": "expect_column_values_to_be_in_set", \n "success": false, \n "kwargs": {\n "column": "PClass", \n "result_format": "BASIC", \n "value_set": [\n "1st", \n "2nd", \n "3rd"\n ]\n }\n }\n ]\n}\n', 'errors': ''} # ) # # command_str = 'python '+filepath+'/../bin/great_expectations validate '+filepath+'/examples/Titanic.csv '+filepath+'/examples/titanic_expectations.json -f -e' # # print get_system_command_result(command_str) # self.assertEqual( # get_system_command_result(command_str)["output"], - # "{\n \"results\": [\n {\n \"summary_obj\": {\n \"exception_percent\": 0.0007616146230007616, \n \"partial_exception_list\": [\n \"*\"\n ], \n \"exception_percent_nonmissing\": 0.0007616146230007616, \n \"exception_count\": 1\n }, \n \"expectation_type\": \"expect_column_values_to_be_in_set\", \n \"success\": false, \n \"kwargs\": {\n \"column\": \"PClass\", \n \"output_format\": \"BASIC\", \n \"value_set\": [\n \"1st\", \n \"2nd\", \n \"3rd\"\n ]\n }\n }\n ]\n}\n" + # "{\n \"results\": [\n {\n \"summary_obj\": {\n \"unexpected_percent\": 0.0007616146230007616, \n \"partial_unexpected_list\": [\n \"*\"\n ], \n \"unexpected_percent_nonmissing\": 0.0007616146230007616, \n \"unexpected_count\": 1\n }, \n \"expectation_type\": \"expect_column_values_to_be_in_set\", \n \"success\": false, \n \"kwargs\": {\n \"column\": \"PClass\", \n \"result_format\": \"BASIC\", \n \"value_set\": [\n \"1st\", \n \"2nd\", \n \"3rd\"\n ]\n }\n }\n ]\n}\n" # ) #print(filepath) command_str = 'python ' + filepath + '/../bin/great_expectations validate ' \ + filepath + '/test_sets/Titanic.csv '\ + filepath + '/test_sets/titanic_custom_expectations.json -f -m='\ - + filepath + '/test_fixtures/custom_dataset.py -c=CustomPandasDataSet' + + filepath + '/test_fixtures/custom_dataset.py -c=CustomPandasDataset' try: result = get_system_command_result(command_str) json_result = json.loads(result["output"]) @@ -285,79 +118,17 @@ def test_cli_arguments(self): json_result = {} self.maxDiff = None - print(json.dumps(json_result, indent=2)) + # print(json.dumps(json_result, indent=2)) + + #Remove partial unexpected counts, because we can't guarantee that they'll be the same every time. + del json_result["results"][0]["result"]['partial_unexpected_counts'] + + with open(filepath + '/test_sets/expected_cli_results_custom.json', 'r') as f: + expected_cli_results = json.load(f) - #Remove partial exception counts, because we can't guarantee that they'll be the same every time. - del json_result["results"][0]["summary_obj"]['partial_exception_counts'] self.assertEqual( json_result, - { - "results": [ - { - "exception_traceback": None, - "summary_obj": { - "exception_percent_nonmissing": 0.5026656511805027, - "missing_count": 0, - "partial_exception_index_list": [ - 0, - 5, - 6, - 7, - 8, - 9, - 11, - 12, - 13, - 14, - 15, - 18, - 20, - 21, - 22, - 23, - 27, - 31, - 32, - 33 - ], - "element_count": 1313, - "exception_count": 660, - "missing_percent": 0.0, - "exception_percent": 0.5026656511805027, - "partial_exception_list": [ - "Allen, Miss Elisabeth Walton", - "Anderson, Mr Harry", - "Andrews, Miss Kornelia Theodosia", - "Andrews, Mr Thomas, jr", - "Appleton, Mrs Edward Dale (Charlotte Lamson)", - "Artagaveytia, Mr Ramon", - "Astor, Mrs John Jacob (Madeleine Talmadge Force)", - "Aubert, Mrs Leontine Pauline", - "Barkworth, Mr Algernon H", - "Baumann, Mr John D", - "Baxter, Mrs James (Helene DeLaudeniere Chaput)", - "Beckwith, Mr Richard Leonard", - "Behr, Mr Karl Howell", - "Birnbaum, Mr Jakob", - "Bishop, Mr Dickinson H", - "Bishop, Mrs Dickinson H (Helen Walton)", - "Bonnell, Miss Caroline", - "Bowerman, Miss Elsie Edith", - "Bradley, Mr George", - "Brady, Mr John Bertram" - ], - }, - "success": False, - "raised_exception": False, - "kwargs": { - "column": "Name", - "output_format": "SUMMARY" - }, - "expectation_type": "expect_column_values_to_have_odd_lengths" - } - ] - } - + expected_cli_results ) # command_str = 'python '+filepath+'/../bin/great_expectations validate '+filepath+'/examples/Titanic.csv '+filepath+'/examples/titanic_expectations.json -f' # # print get_system_command_result(command_str) @@ -372,7 +143,7 @@ def test_cli_arguments(self): # "raised_exception": False, # "kwargs": { # "column": "PClass", - # "output_format": "BASIC", + # "result_format": "BASIC", # "include_config": False, # "value_set": [ # "1st", @@ -381,12 +152,12 @@ def test_cli_arguments(self): # ] # }, # "summary_obj": { - # "exception_percent": 0.0007616146230007616, - # "partial_exception_list": [ + # "unexpected_percent": 0.0007616146230007616, + # "partial_unexpected_list": [ # "*" # ], - # "exception_percent_nonmissing": 0.0007616146230007616, - # "exception_count": 1 + # "unexpected_percent_nonmissing": 0.0007616146230007616, + # "unexpected_count": 1 # } # } # ] diff --git a/tests/test_data_contexts/test_data_contexts.py b/tests/test_data_contexts/test_data_contexts.py new file mode 100644 index 000000000000..c412734d0269 --- /dev/null +++ b/tests/test_data_contexts/test_data_contexts.py @@ -0,0 +1,52 @@ +import pytest + +import os +import sqlalchemy as sa +import pandas as pd + +from great_expectations import get_data_context +from great_expectations.dataset import PandasDataset, SqlAlchemyDataset + + +@pytest.fixture(scope="module") +def test_db_connection_string(tmpdir_factory): + df1 = pd.DataFrame({'col_1': [1, 2, 3, 4, 5], 'col_2': ['a', 'b', 'c', 'd', 'e']}) + df2 = pd.DataFrame({'col_1': [0, 1, 2, 3, 4], 'col_2': ['b', 'c', 'd', 'e', 'f']}) + + path = tmpdir_factory.mktemp("db_context").join("test.db") + engine = sa.create_engine('sqlite:///' + str(path)) + df1.to_sql('table_1', con=engine, index=True) + df2.to_sql('table_2', con=engine, index=True) + + # Return a connection string to this newly-created db + return 'sqlite:///' + str(path) + +@pytest.fixture(scope="module") +def test_folder_connection_path(tmpdir_factory): + df1 = pd.DataFrame({'col_1': [1, 2, 3, 4, 5], 'col_2': ['a', 'b', 'c', 'd', 'e']}) + path = tmpdir_factory.mktemp("csv_context") + df1.to_csv(path.join("test.csv")) + + return str(path) + +def test_invalid_data_context(): + # Test an unknown data context name + with pytest.raises(ValueError) as err: + context = get_data_context('what_a_ridiculous_name', None) + assert "Unknown data context." in str(err) + + +def test_sqlalchemy_data_context(test_db_connection_string): + context = get_data_context('SqlAlchemy', test_db_connection_string) + + assert context.list_datasets() == ['table_1', 'table_2'] + dataset = context.get_dataset('table_1') + assert isinstance(dataset, SqlAlchemyDataset) + + +def test_pandas_data_context(test_folder_connection_path): + context = get_data_context('PandasCSV', test_folder_connection_path) + + assert context.list_datasets() == ['test.csv'] + dataset = context.get_dataset('test.csv') + assert isinstance(dataset, PandasDataset) diff --git a/tests/test_dataset.py b/tests/test_dataset.py index 39e9407be852..73dfdc2025b1 100644 --- a/tests/test_dataset.py +++ b/tests/test_dataset.py @@ -14,7 +14,7 @@ class TestDataset(unittest.TestCase): def test_dataset(self): - D = ge.dataset.PandasDataSet({ + D = ge.dataset.PandasDataset({ 'x' : [1,2,4], 'y' : [1,2,5], 'z' : ['hello', 'jello', 'mello'], @@ -65,7 +65,7 @@ def test_dataset(self): ) def test_expectation_meta(self): - df = ge.dataset.PandasDataSet({ + df = ge.dataset.PandasDataset({ 'x' : [1,2,4], 'y' : [1,2,5], 'z' : ['hello', 'jello', 'mello'], @@ -95,7 +95,7 @@ def test_expectation_meta(self): #TODO: !!! Add tests for save_expectation def test_set_default_expectation_argument(self): - df = ge.dataset.PandasDataSet({ + df = ge.dataset.PandasDataset({ 'x' : [1,2,4], 'y' : [1,2,5], 'z' : ['hello', 'jello', 'mello'], @@ -106,25 +106,25 @@ def test_set_default_expectation_argument(self): { "include_config" : False, "catch_exceptions" : False, - "output_format" : 'BASIC', + "result_format" : 'BASIC', } ) - df.set_default_expectation_argument("output_format", "SUMMARY") + df.set_default_expectation_argument("result_format", "SUMMARY") self.assertEqual( df.get_default_expectation_arguments(), { "include_config" : False, "catch_exceptions" : False, - "output_format" : 'SUMMARY', + "result_format" : 'SUMMARY', } ) def test_get_and_save_expectation_config(self): directory_name = tempfile.mkdtemp() - df = ge.dataset.PandasDataSet({ + df = ge.dataset.PandasDataset({ 'x' : [1,2,4], 'y' : [1,2,5], 'z' : ['hello', 'jello', 'mello'], @@ -138,42 +138,42 @@ def test_get_and_save_expectation_config(self): output_config = { "expectations": [ { - "expectation_type": "expect_column_to_exist", + "expectation_type": "expect_column_to_exist", "kwargs": { "column": "x" } - }, + }, { - "expectation_type": "expect_column_to_exist", + "expectation_type": "expect_column_to_exist", "kwargs": { "column": "y" } - }, + }, { - "expectation_type": "expect_column_to_exist", + "expectation_type": "expect_column_to_exist", "kwargs": { "column": "z" } - }, + }, { - "expectation_type": "expect_column_values_to_be_in_set", + "expectation_type": "expect_column_values_to_be_in_set", "kwargs": { - "column": "x", + "column": "x", "values_set": [ - 1, - 2, + 1, + 2, 4 ] } - }, + }, { - "expectation_type": "expect_column_values_to_match_regex", + "expectation_type": "expect_column_values_to_match_regex", "kwargs": { - "column": "z", + "column": "z", "regex": "ello" } } - ], + ], "dataset_name": None, "meta": { "great_expectations.__version__": ge.__version__ @@ -198,53 +198,53 @@ def test_get_and_save_expectation_config(self): output_config = { "expectations": [ { - "expectation_type": "expect_column_to_exist", + "expectation_type": "expect_column_to_exist", "kwargs": { "column": "x" } - }, + }, { - "expectation_type": "expect_column_to_exist", + "expectation_type": "expect_column_to_exist", "kwargs": { "column": "y" } - }, + }, { - "expectation_type": "expect_column_to_exist", + "expectation_type": "expect_column_to_exist", "kwargs": { "column": "z" } - }, + }, { - "expectation_type": "expect_column_values_to_be_in_set", + "expectation_type": "expect_column_values_to_be_in_set", "kwargs": { - "column": "x", + "column": "x", "values_set": [ - 1, - 2, + 1, + 2, 4 ] } - }, + }, { - "expectation_type": "expect_column_values_to_be_in_set", + "expectation_type": "expect_column_values_to_be_in_set", "kwargs": { - "column": "y", + "column": "y", "values_set": [ - 1, - 2, + 1, + 2, 4 ] } - }, + }, { - "expectation_type": "expect_column_values_to_match_regex", + "expectation_type": "expect_column_values_to_match_regex", "kwargs": { - "column": "z", + "column": "z", "regex": "ello" } } - ], + ], "dataset_name": None, "meta": { "great_expectations.__version__": ge.__version__ @@ -274,44 +274,44 @@ def test_get_and_save_expectation_config(self): output_config = { "expectations": [ { - "expectation_type": "expect_column_to_exist", + "expectation_type": "expect_column_to_exist", "kwargs": { "column": "x" } - }, + }, { - "expectation_type": "expect_column_to_exist", + "expectation_type": "expect_column_to_exist", "kwargs": { "column": "y" } - }, + }, { - "expectation_type": "expect_column_to_exist", + "expectation_type": "expect_column_to_exist", "kwargs": { "column": "z" } - }, + }, { - "expectation_type": "expect_column_values_to_be_in_set", + "expectation_type": "expect_column_values_to_be_in_set", "kwargs": { - "column": "x", + "column": "x", "values_set": [ - 1, - 2, + 1, + 2, 4 - ], - "output_format": "BASIC" + ], + "result_format": "BASIC" } - }, + }, { - "expectation_type": "expect_column_values_to_match_regex", + "expectation_type": "expect_column_values_to_match_regex", "kwargs": { - "column": "z", - "regex": "ello", - "output_format": "BASIC" + "column": "z", + "regex": "ello", + "result_format": "BASIC" } } - ], + ], "dataset_name": None, "meta": { "great_expectations.__version__": ge.__version__ @@ -320,7 +320,7 @@ def test_get_and_save_expectation_config(self): self.assertEqual( df.get_expectations_config( - discard_output_format_kwargs=False, + discard_result_format_kwargs=False, discard_include_configs_kwargs=False, discard_catch_exceptions_kwargs=False, ), @@ -329,7 +329,7 @@ def test_get_and_save_expectation_config(self): df.save_expectations_config( directory_name+'/temp3.json', - discard_output_format_kwargs=False, + discard_result_format_kwargs=False, discard_include_configs_kwargs=False, discard_catch_exceptions_kwargs=False, ) @@ -344,7 +344,7 @@ def test_get_and_save_expectation_config(self): shutil.rmtree(directory_name) def test_format_column_map_output(self): - df = ge.dataset.PandasDataSet({ + df = ge.dataset.PandasDataset({ "x" : list("abcdefghijklmnopqrstuvwxyz"), }) self.maxDiff = None @@ -357,79 +357,88 @@ def test_format_column_map_output(self): nonnull_count = 15 boolean_mapped_success_values = pd.Series([True for i in range(15)]) success_count = 15 - exception_list = [] - exception_index_list = [] + unexpected_list = [] + unexpected_index_list = [] self.assertEqual( df._format_column_map_output( "BOOLEAN_ONLY", success, - element_count, - nonnull_values, nonnull_count, - boolean_mapped_success_values, success_count, - exception_list, exception_index_list + element_count, nonnull_count, + unexpected_list, unexpected_index_list ), - True + {'success': True} ) self.assertEqual( df._format_column_map_output( "BASIC", success, - element_count, - nonnull_values, nonnull_count, - boolean_mapped_success_values, success_count, - exception_list, exception_index_list + element_count, nonnull_count, + unexpected_list, unexpected_index_list ), { 'success': True, - 'summary_obj': { - 'exception_percent': 0.0, - 'partial_exception_list': [], - 'exception_percent_nonmissing': 0.0, - 'exception_count': 0 + 'result': { + 'element_count': 20, + 'missing_count': 5, + 'missing_percent': 0.25, + 'partial_unexpected_list': [], + 'unexpected_count': 0, + 'unexpected_percent': 0.0, + 'unexpected_percent_nonmissing': 0.0 } } ) self.assertEqual( df._format_column_map_output( - "COMPLETE", + "SUMMARY", success, element_count, - nonnull_values, nonnull_count, - boolean_mapped_success_values, success_count, - exception_list, exception_index_list + nonnull_count, + unexpected_list, unexpected_index_list ), { 'success': True, - 'exception_list': [], - 'exception_index_list': [], + 'result': { + 'element_count': 20, + 'missing_count': 5, + 'missing_percent': 0.25, + 'partial_unexpected_list': [], + 'unexpected_count': 0, + 'unexpected_percent': 0.0, + 'unexpected_percent_nonmissing': 0.0, + 'partial_unexpected_index_list': [], + 'partial_unexpected_counts': [] + } } ) self.assertEqual( df._format_column_map_output( - "SUMMARY", + "COMPLETE", success, element_count, - nonnull_values, nonnull_count, - boolean_mapped_success_values, success_count, - exception_list, exception_index_list + nonnull_count, + unexpected_list, unexpected_index_list ), { 'success': True, - 'summary_obj': { - 'element_count': 20, - 'exception_count': 0, - 'exception_percent': 0.0, - 'exception_percent_nonmissing': 0.0, - 'missing_count': 5, - 'missing_percent': 0.25, - 'partial_exception_counts': {}, - 'partial_exception_index_list': [], - 'partial_exception_list': [] - } + 'result': + { + 'element_count': 20, + 'missing_count': 5, + 'missing_percent': 0.25, + 'partial_unexpected_list': [], + 'unexpected_count': 0, + 'unexpected_percent': 0.0, + 'unexpected_percent_nonmissing': 0.0, + 'partial_unexpected_index_list': [], + 'partial_unexpected_counts': [], + 'unexpected_list': [], + 'unexpected_index_list': [] + } } ) @@ -441,19 +450,18 @@ def test_format_column_map_output(self): nonnull_count = 0 boolean_mapped_success_values = pd.Series([]) success_count = 0 - exception_list = [] - exception_index_list = [] + unexpected_list = [] + unexpected_index_list = [] self.assertEqual( df._format_column_map_output( "BOOLEAN_ONLY", success, element_count, - nonnull_values, nonnull_count, - boolean_mapped_success_values, success_count, - exception_list, exception_index_list + nonnull_count, + unexpected_list, unexpected_index_list ), - True + {'success': True} ) self.assertEqual( @@ -461,58 +469,69 @@ def test_format_column_map_output(self): "BASIC", success, element_count, - nonnull_values, nonnull_count, - boolean_mapped_success_values, success_count, - exception_list, exception_index_list + nonnull_count, + unexpected_list, unexpected_index_list ), { 'success': True, - 'summary_obj': { - 'exception_percent': 0.0, - 'partial_exception_list': [], - 'exception_percent_nonmissing': None, - 'exception_count': 0 + 'result': { + 'element_count': 20, + 'missing_count': 20, + 'missing_percent': 1, + 'partial_unexpected_list': [], + 'unexpected_count': 0, + 'unexpected_percent': 0.0, + 'unexpected_percent_nonmissing': None } } ) self.assertEqual( df._format_column_map_output( - "COMPLETE", + "SUMMARY", success, element_count, - nonnull_values, nonnull_count, - boolean_mapped_success_values, success_count, - exception_list, exception_index_list + nonnull_count, + unexpected_list, unexpected_index_list ), { 'success': True, - 'exception_list': [], - 'exception_index_list': [], + 'result': { + 'element_count': 20, + 'missing_count': 20, + 'missing_percent': 1, + 'partial_unexpected_list': [], + 'unexpected_count': 0, + 'unexpected_percent': 0.0, + 'unexpected_percent_nonmissing': None, + 'partial_unexpected_index_list': [], + 'partial_unexpected_counts': [] + } } ) self.assertEqual( df._format_column_map_output( - "SUMMARY", + "COMPLETE", success, element_count, - nonnull_values, nonnull_count, - boolean_mapped_success_values, success_count, - exception_list, exception_index_list + nonnull_count, + unexpected_list, unexpected_index_list ), { 'success': True, - 'summary_obj': { + 'result': { 'element_count': 20, - 'exception_count': 0, - 'exception_percent': 0.0, - 'exception_percent_nonmissing': None, 'missing_count': 20, - 'missing_percent': 1.0, - 'partial_exception_counts': {}, - 'partial_exception_index_list': [], - 'partial_exception_list': [] + 'missing_percent': 1, + 'partial_unexpected_list': [], + 'unexpected_count': 0, + 'unexpected_percent': 0.0, + 'unexpected_percent_nonmissing': None, + 'partial_unexpected_index_list': [], + 'partial_unexpected_counts': [], + 'unexpected_list': [], + 'unexpected_index_list': [] } } ) @@ -525,19 +544,18 @@ def test_format_column_map_output(self): nonnull_count = 0 boolean_mapped_success_values = pd.Series([]) success_count = 0 - exception_list = [] - exception_index_list = [] + unexpected_list = [] + unexpected_index_list = [] self.assertEqual( df._format_column_map_output( "BOOLEAN_ONLY", success, element_count, - nonnull_values, nonnull_count, - boolean_mapped_success_values, success_count, - exception_list, exception_index_list + nonnull_count, + unexpected_list, unexpected_index_list ), - False + {'success': False} ) self.assertEqual( @@ -545,64 +563,75 @@ def test_format_column_map_output(self): "BASIC", success, element_count, - nonnull_values, nonnull_count, - boolean_mapped_success_values, success_count, - exception_list, exception_index_list + nonnull_count, + unexpected_list, unexpected_index_list ), { 'success': False, - 'summary_obj': { - 'exception_percent': None, - 'partial_exception_list': [], - 'exception_percent_nonmissing': None, - 'exception_count': 0 + 'result': { + 'element_count': 0, + 'missing_count': 0, + 'missing_percent': None, + 'partial_unexpected_list': [], + 'unexpected_count': 0, + 'unexpected_percent': None, + 'unexpected_percent_nonmissing': None } } ) self.assertEqual( df._format_column_map_output( - "COMPLETE", + "SUMMARY", success, element_count, - nonnull_values, nonnull_count, - boolean_mapped_success_values, success_count, - exception_list, exception_index_list + nonnull_count, + unexpected_list, unexpected_index_list ), { 'success': False, - 'exception_list': [], - 'exception_index_list': [], + 'result': { + 'element_count': 0, + 'missing_count': 0, + 'missing_percent': None, + 'partial_unexpected_list': [], + 'unexpected_count': 0, + 'unexpected_percent': None, + 'unexpected_percent_nonmissing': None, + 'partial_unexpected_counts': [], + 'partial_unexpected_index_list': [] + } } ) self.assertEqual( df._format_column_map_output( - "SUMMARY", + "COMPLETE", success, element_count, - nonnull_values, nonnull_count, - boolean_mapped_success_values, success_count, - exception_list, exception_index_list + nonnull_count, + unexpected_list, unexpected_index_list ), { 'success': False, - 'summary_obj': { + 'result': { 'element_count': 0, - 'exception_count': 0, - 'exception_percent': None, - 'exception_percent_nonmissing': None, 'missing_count': 0, 'missing_percent': None, - 'partial_exception_counts': {}, - 'partial_exception_index_list': [], - 'partial_exception_list': [] + 'partial_unexpected_list': [], + 'unexpected_count': 0, + 'unexpected_percent': None, + 'unexpected_percent_nonmissing': None, + 'partial_unexpected_counts': [], + 'partial_unexpected_index_list': [], + 'unexpected_list': [], + 'unexpected_index_list': [] } } ) def test_calc_map_expectation_success(self): - df = ge.dataset.PandasDataSet({ + df = ge.dataset.PandasDataset({ "x" : list("abcdefghijklmnopqrstuvwxyz") }) self.assertEqual( @@ -660,7 +689,7 @@ def test_calc_map_expectation_success(self): ) def test_find_expectations(self): - my_df = ge.dataset.PandasDataSet({ + my_df = ge.dataset.PandasDataset({ 'x' : [1,2,3,4,5,6,7,8,9,10], 'y' : [1,2,None,4,None,6,7,8,9,None], 'z' : ['cello', 'hello', 'jello', 'bellow', 'fellow', 'mellow', 'wellow', 'xello', 'yellow', 'zello'], @@ -679,7 +708,7 @@ def test_find_expectations(self): self.assertEqual( my_df.find_expectations("expect_column_to_exist", "x", expectation_kwargs={}), [{ - "expectation_type": "expect_column_to_exist", + "expectation_type": "expect_column_to_exist", "kwargs": { "column": "x" } @@ -689,7 +718,7 @@ def test_find_expectations(self): self.assertEqual( my_df.find_expectations("expect_column_to_exist", expectation_kwargs={"column": "y"}), [{ - "expectation_type": "expect_column_to_exist", + "expectation_type": "expect_column_to_exist", "kwargs": { "column": "y" } @@ -699,23 +728,23 @@ def test_find_expectations(self): self.assertEqual( my_df.find_expectations("expect_column_to_exist"), [{ - "expectation_type": "expect_column_to_exist", + "expectation_type": "expect_column_to_exist", "kwargs": { "column": "x" } },{ - "expectation_type": "expect_column_to_exist", + "expectation_type": "expect_column_to_exist", "kwargs": { "column": "y" } },{ - "expectation_type": "expect_column_to_exist", + "expectation_type": "expect_column_to_exist", "kwargs": { "column": "z" } }] ) - + with self.assertRaises(Exception) as context: my_df.find_expectations("expect_column_to_exist", "x", {"column": "y"}) @@ -727,19 +756,19 @@ def test_find_expectations(self): self.assertEqual( my_df.find_expectations(column="x"), [{ - "expectation_type": "expect_column_to_exist", + "expectation_type": "expect_column_to_exist", "kwargs": { "column": "x" } },{ - "expectation_type": "expect_column_values_to_be_of_type", + "expectation_type": "expect_column_values_to_be_of_type", "kwargs": { "column": "x", "type_": "int", "target_datasource": "python", } },{ - "expectation_type": "expect_column_values_to_be_increasing", + "expectation_type": "expect_column_values_to_be_increasing", "kwargs": { "column": "x" } @@ -748,7 +777,7 @@ def test_find_expectations(self): def test_remove_expectation(self): - my_df = ge.dataset.PandasDataSet({ + my_df = ge.dataset.PandasDataset({ 'x' : [1,2,3,4,5,6,7,8,9,10], 'y' : [1,2,None,4,None,6,7,8,9,None], 'z' : ['cello', 'hello', 'jello', 'bellow', 'fellow', 'mellow', 'wellow', 'xello', 'yellow', 'zello'], @@ -768,7 +797,7 @@ def test_remove_expectation(self): self.assertEqual( my_df.remove_expectation("expect_column_to_exist", "x", expectation_kwargs={}, dry_run=True), { - "expectation_type": "expect_column_to_exist", + "expectation_type": "expect_column_to_exist", "kwargs": { "column": "x" } @@ -778,7 +807,7 @@ def test_remove_expectation(self): self.assertEqual( my_df.remove_expectation("expect_column_to_exist", expectation_kwargs={"column": "y"}, dry_run=True), { - "expectation_type": "expect_column_to_exist", + "expectation_type": "expect_column_to_exist", "kwargs": { "column": "y" } @@ -788,7 +817,7 @@ def test_remove_expectation(self): self.assertEqual( my_df.remove_expectation("expect_column_to_exist", expectation_kwargs={"column": "y"}, remove_multiple_matches=True, dry_run=True), [{ - "expectation_type": "expect_column_to_exist", + "expectation_type": "expect_column_to_exist", "kwargs": { "column": "y" } @@ -804,23 +833,23 @@ def test_remove_expectation(self): self.assertEqual( my_df.remove_expectation("expect_column_to_exist", remove_multiple_matches=True, dry_run=True), [{ - "expectation_type": "expect_column_to_exist", + "expectation_type": "expect_column_to_exist", "kwargs": { "column": "x" } },{ - "expectation_type": "expect_column_to_exist", + "expectation_type": "expect_column_to_exist", "kwargs": { "column": "y" } },{ - "expectation_type": "expect_column_to_exist", + "expectation_type": "expect_column_to_exist", "kwargs": { "column": "z" } }] ) - + with self.assertRaises(Exception) as context: my_df.remove_expectation("expect_column_to_exist", "x", {"column": "y"}, dry_run=True) @@ -832,19 +861,19 @@ def test_remove_expectation(self): self.assertEqual( my_df.remove_expectation(column="x", remove_multiple_matches=True, dry_run=True), [{ - "expectation_type": "expect_column_to_exist", + "expectation_type": "expect_column_to_exist", "kwargs": { "column": "x" } },{ - "expectation_type": "expect_column_values_to_be_of_type", + "expectation_type": "expect_column_values_to_be_of_type", "kwargs": { "column": "x", "type_": "int", "target_datasource": "python", } },{ - "expectation_type": "expect_column_values_to_be_increasing", + "expectation_type": "expect_column_values_to_be_increasing", "kwargs": { "column": "x" } @@ -900,12 +929,117 @@ def test_remove_expectation(self): } ) + def test_discard_failing_expectations(self): + df = ge.dataset.PandasDataset({ + 'A':[1,2,3,4], + 'B':[5,6,7,8], + 'C':['a','b','c','d'], + 'D':['e','f','g','h'] + }) + + # Put some simple expectations on the data frame + df.expect_column_values_to_be_in_set("A", [1, 2, 3, 4]) + df.expect_column_values_to_be_in_set("B", [5, 6, 7, 8]) + df.expect_column_values_to_be_in_set("C", ['a', 'b', 'c', 'd']) + df.expect_column_values_to_be_in_set("D", ['e', 'f', 'g', 'h']) + + exp1 = [ + {'expectation_type': 'expect_column_to_exist', + 'kwargs': {'column': 'A'}}, + {'expectation_type': 'expect_column_to_exist', + 'kwargs': {'column': 'B'}}, + {'expectation_type': 'expect_column_to_exist', + 'kwargs': {'column': 'C'}}, + {'expectation_type': 'expect_column_to_exist', + 'kwargs': {'column': 'D'}}, + {'expectation_type': 'expect_column_values_to_be_in_set', + 'kwargs': {'column': 'A', 'values_set': [1, 2, 3, 4]}}, + {'expectation_type': 'expect_column_values_to_be_in_set', + 'kwargs': {'column': 'B', 'values_set': [5, 6, 7, 8]}}, + {'expectation_type': 'expect_column_values_to_be_in_set', + 'kwargs': {'column': 'C', 'values_set': ['a', 'b', 'c', 'd']}}, + {'expectation_type': 'expect_column_values_to_be_in_set', + 'kwargs': {'column': 'D', 'values_set': ['e', 'f', 'g', 'h']}} + ] + + sub1 = df[:3] + sub1.discard_failing_expectations() + self.assertEqual(sub1.find_expectations(), exp1) + + sub1 = df[1:2] + sub1.discard_failing_expectations() + self.assertEqual(sub1.find_expectations(), exp1) + + sub1 = df[:-1] + sub1.discard_failing_expectations() + self.assertEqual(sub1.find_expectations(), exp1) + + sub1 = df[-1:] + sub1.discard_failing_expectations() + self.assertEqual(sub1.find_expectations(), exp1) + + sub1 = df[['A', 'D']] + exp1 = [ + {'expectation_type': 'expect_column_to_exist', + 'kwargs': {'column': 'A'}}, + {'expectation_type': 'expect_column_to_exist', + 'kwargs': {'column': 'D'}}, + {'expectation_type': 'expect_column_values_to_be_in_set', + 'kwargs': {'column': 'A', 'values_set': [1, 2, 3, 4]}}, + {'expectation_type': 'expect_column_values_to_be_in_set', + 'kwargs': {'column': 'D', 'values_set': ['e', 'f', 'g', 'h']}} + ] + sub1.discard_failing_expectations() + self.assertEqual(sub1.find_expectations(), exp1) + + sub1 = df[['A']] + exp1 = [ + {'expectation_type': 'expect_column_to_exist', + 'kwargs': {'column': 'A'}}, + {'expectation_type': 'expect_column_values_to_be_in_set', + 'kwargs': {'column': 'A', 'values_set': [1, 2, 3, 4]}} + ] + sub1.discard_failing_expectations() + self.assertEqual(sub1.find_expectations(), exp1) + + sub1 = df.iloc[:3, 1:4] + exp1 = [ + {'expectation_type': 'expect_column_to_exist', + 'kwargs': {'column': 'B'}}, + {'expectation_type': 'expect_column_to_exist', + 'kwargs': {'column': 'C'}}, + {'expectation_type': 'expect_column_to_exist', + 'kwargs': {'column': 'D'}}, + {'expectation_type': 'expect_column_values_to_be_in_set', + 'kwargs': {'column': 'B', 'values_set': [5, 6, 7, 8]}}, + {'expectation_type': 'expect_column_values_to_be_in_set', + 'kwargs': {'column': 'C', 'values_set': ['a', 'b', 'c', 'd']}}, + {'expectation_type': 'expect_column_values_to_be_in_set', + 'kwargs': {'column': 'D', 'values_set': ['e', 'f', 'g', 'h']}} + ] + sub1.discard_failing_expectations() + self.assertEqual(sub1.find_expectations(), exp1) + + sub1 = df.loc[0:, 'A':'B'] + exp1 = [ + {'expectation_type': 'expect_column_to_exist', + 'kwargs': {'column': 'A'}}, + {'expectation_type': 'expect_column_to_exist', + 'kwargs': {'column': 'B'}}, + {'expectation_type': 'expect_column_values_to_be_in_set', + 'kwargs': {'column': 'A', 'values_set': [1, 2, 3, 4]}}, + {'expectation_type': 'expect_column_values_to_be_in_set', + 'kwargs': {'column': 'B', 'values_set': [5, 6, 7, 8]}} + ] + sub1.discard_failing_expectations() + self.assertEqual(sub1.find_expectations(), exp1) + def test_test_expectation_function(self): - D = ge.dataset.PandasDataSet({ + D = ge.dataset.PandasDataset({ 'x' : [1,3,5,7,9], 'y' : [1,2,None,7,9], }) - D2 = ge.dataset.PandasDataSet({ + D2 = ge.dataset.PandasDataset({ 'x' : [1,3,5,6,9], 'y' : [1,2,None,6,9], }) @@ -913,7 +1047,7 @@ def expect_dataframe_to_contain_7(self): return { "success": bool((self==7).sum().sum() > 0) } - + self.assertEqual( D.test_expectation_function(expect_dataframe_to_contain_7), {'success': True} @@ -926,61 +1060,62 @@ def expect_dataframe_to_contain_7(self): def test_test_column_map_expectation_function(self): - D = ge.dataset.PandasDataSet({ + D = ge.dataset.PandasDataset({ 'x' : [1,3,5,7,9], 'y' : [1,2,None,7,9], }) - def is_odd(self, column, mostly=None, output_format=None, include_config=False, catch_exceptions=None, meta=None): + def is_odd(self, column, mostly=None, result_format=None, include_config=False, catch_exceptions=None, meta=None): return column % 2 == 1 self.assertEqual( D.test_column_map_expectation_function(is_odd, column='x'), - {'summary_obj': {'exception_percent': 0.0, 'partial_exception_list': [], 'exception_percent_nonmissing': 0.0, 'exception_count': 0}, 'success': True} + {'result': {'element_count': 5, 'missing_count': 0, 'missing_percent': 0, 'unexpected_percent': 0.0, 'partial_unexpected_list': [], 'unexpected_percent_nonmissing': 0.0, 'unexpected_count': 0}, 'success': True} ) self.assertEqual( - D.test_column_map_expectation_function(is_odd, 'x', output_format="BOOLEAN_ONLY"), - True + D.test_column_map_expectation_function(is_odd, 'x', result_format="BOOLEAN_ONLY"), + {'success': True} ) self.assertEqual( - D.test_column_map_expectation_function(is_odd, column='y', output_format="BOOLEAN_ONLY"), - False + D.test_column_map_expectation_function(is_odd, column='y', result_format="BOOLEAN_ONLY"), + {'success': False} ) self.assertEqual( - D.test_column_map_expectation_function(is_odd, column='y', output_format="BOOLEAN_ONLY", mostly=.7), - True - ) + D.test_column_map_expectation_function(is_odd, column='y', result_format="BOOLEAN_ONLY", mostly=.7), + {'success': True} + ) def test_test_column_aggregate_expectation_function(self): - D = ge.dataset.PandasDataSet({ + D = ge.dataset.PandasDataset({ 'x' : [1,3,5,7,9], 'y' : [1,2,None,7,9], }) - def expect_second_value_to_be(self, column, value, output_format=None, include_config=False, catch_exceptions=None, meta=None): + def expect_second_value_to_be(self, column, value, result_format=None, include_config=False, catch_exceptions=None, meta=None): return { "success": column.ix[1] == value, - "true_value": column.ix[1], - "summary_obj": {} + "result": { + "observed_value": column.ix[1], + } } self.assertEqual( D.test_column_aggregate_expectation_function(expect_second_value_to_be, 'x', 2), - {'true_value': 3.0, 'success': False} + {'result': {'observed_value': 3.0, 'element_count': 5, 'missing_count': 0, 'missing_percent': 0.0}, 'success': False} ) self.assertEqual( D.test_column_aggregate_expectation_function(expect_second_value_to_be, column='x', value=3), - {'true_value': 3.0, 'success': True} + {'result': {'observed_value': 3.0, 'element_count': 5, 'missing_count': 0, 'missing_percent': 0.0}, 'success': True} ) self.assertEqual( - D.test_column_aggregate_expectation_function(expect_second_value_to_be, 'y', value=3, output_format="BOOLEAN_ONLY"), - False + D.test_column_aggregate_expectation_function(expect_second_value_to_be, 'y', value=3, result_format="BOOLEAN_ONLY"), + {'success': False} ) self.assertEqual( - D.test_column_aggregate_expectation_function(expect_second_value_to_be, 'y', 2, output_format="BOOLEAN_ONLY"), - True + D.test_column_aggregate_expectation_function(expect_second_value_to_be, 'y', 2, result_format="BOOLEAN_ONLY"), + {'success': True} ) def test_meta_version_warning(self): - D = ge.dataset.DataSet(); + D = ge.dataset.Dataset(); with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") @@ -994,5 +1129,20 @@ def test_meta_version_warning(self): self.assertEqual(str(w[0].message), "WARNING: This configuration object was built using a different version of great_expectations than is currently validating it.") + def test_catch_exceptions_with_bad_expectation_type(self): + my_df = ge.dataset.PandasDataset({"x":range(10)}) + my_df.append_expectation({'expectation_type':'foobar', 'kwargs':{}}) + result = my_df.validate(catch_exceptions=True) + + self.assertEqual(result["results"][1]["success"], False) + self.assertEqual(result["results"][1]["expectation_config"]["expectation_type"], "foobar") + self.assertEqual(result["results"][1]["expectation_config"]["kwargs"], {}) + self.assertEqual(result["results"][1]["exception_info"]["raised_exception"], True) + assert "AttributeError: \'PandasDataset\' object has no attribute \'foobar\'" in result["results"][1]["exception_info"]["exception_traceback"] + + with self.assertRaises(AttributeError) as context: + result = my_df.validate(catch_exceptions=False) + + if __name__ == "__main__": unittest.main() diff --git a/tests/test_dataset_util.py b/tests/test_dataset_util.py new file mode 100644 index 000000000000..2cfca9df23b9 --- /dev/null +++ b/tests/test_dataset_util.py @@ -0,0 +1,573 @@ +import json +import datetime +import numpy as np +import unittest +from functools import wraps + +import great_expectations as ge + +class TestUtilMethods(unittest.TestCase): + def __init__(self, *args, **kwargs): + super(TestUtilMethods, self).__init__(*args, **kwargs) + self.D = ge.read_csv('./tests/test_sets/distributional_expectations_data_base.csv') + + with open('./tests/test_sets/test_partitions.json', 'r') as file: + self.test_partitions = json.loads(file.read()) + + def test_DotDict(self): + D = ge.util.DotDict({ + 'x' : [1,2,4], + 'y' : [1,2,5], + 'z' : ['hello', 'jello', 'mello'], + }) + self.assertEqual(D.x[0],D.y[0]) + self.assertNotEqual(D.x[0],D.z[0]) + + def test_continuous_partition_data_error(self): + with self.assertRaises(ValueError): + test_partition = ge.dataset.util.continuous_partition_data(self.D['norm_0_1'], bins=-1) + self.assertFalse(ge.dataset.util.is_valid_continuous_partition_object(test_partition)) + test_partition = ge.dataset.util.continuous_partition_data(self.D['norm_0_1'], n_bins=-1) + self.assertFalse(ge.dataset.util.is_valid_continuous_partition_object(test_partition)) + + def test_partition_data_norm_0_1(self): + test_partition = ge.dataset.util.continuous_partition_data(self.D.norm_0_1) + for key, val in self.test_partitions['norm_0_1_auto'].items(): + self.assertEqual(len(val), len(test_partition[key])) + self.assertTrue(np.allclose(test_partition[key], val)) + + + def test_partition_data_bimodal(self): + test_partition = ge.dataset.util.continuous_partition_data(self.D.bimodal) + for key, val in self.test_partitions['bimodal_auto'].items(): + self.assertEqual(len(val), len(test_partition[key])) + self.assertTrue(np.allclose(test_partition[key], val)) + + + def test_kde_partition_data_norm_0_1(self): + test_partition = ge.dataset.util.kde_partition_data(self.D.norm_0_1) + for key, val in self.test_partitions['norm_0_1_kde'].items(): + self.assertEqual(len(val), len(test_partition[key])) + self.assertTrue(np.allclose(test_partition[key], val)) + + + def test_kde_partition_data_bimodal(self): + test_partition = ge.dataset.util.kde_partition_data(self.D.bimodal) + for key, val in self.test_partitions['bimodal_kde'].items(): + self.assertEqual(len(val), len(test_partition[key])) + self.assertTrue(np.allclose(test_partition[key], val)) + + + def test_categorical_data_fixed(self): + test_partition = ge.dataset.util.categorical_partition_data(self.D.categorical_fixed) + for k in self.test_partitions['categorical_fixed']['values']: + # Iterate over each categorical value and check that the weights equal those computed originally. + self.assertEqual( + self.test_partitions['categorical_fixed']['weights'][self.test_partitions['categorical_fixed']['values'].index(k)], + test_partition['weights'][test_partition['values'].index(k)]) + + def test_categorical_data_na(self): + df = ge.dataset.PandasDataset({ + 'my_column': ["A", "B", "A", "B", None] + }) + partition = ge.dataset.util.categorical_partition_data(df['my_column']) + self.assertTrue(ge.dataset.util.is_valid_categorical_partition_object(partition)) + self.assertTrue(len(partition['values']) == 2) + + def test_is_valid_partition_object_simple(self): + self.assertTrue(ge.dataset.util.is_valid_continuous_partition_object(ge.dataset.util.continuous_partition_data(self.D['norm_0_1']))) + self.assertTrue(ge.dataset.util.is_valid_continuous_partition_object(ge.dataset.util.continuous_partition_data(self.D['bimodal']))) + self.assertTrue(ge.dataset.util.is_valid_continuous_partition_object(ge.dataset.util.continuous_partition_data(self.D['norm_0_1'], bins='auto'))) + self.assertTrue(ge.dataset.util.is_valid_continuous_partition_object(ge.dataset.util.continuous_partition_data(self.D['norm_0_1'], bins='uniform', n_bins=10))) + + def test_generated_partition_objects(self): + for partition_name, partition_object in self.test_partitions.items(): + result = ge.dataset.util.is_valid_partition_object(partition_object) + if not result: + print("Partition object " + partition_name + " is invalid.") + self.assertTrue(result) + + def test_is_valid_partition_object_fails_length(self): + self.assertFalse(ge.dataset.util.is_valid_partition_object({'bins': [0,1], 'weights': [0,1,2]})) + + def test_is_valid_partition_object_fails_weights(self): + self.assertFalse(ge.dataset.util.is_valid_partition_object({'bins': [0,1,2], 'weights': [0.5,0.6]})) + + def test_is_valid_partition_object_fails_structure(self): + self.assertFalse(ge.dataset.util.is_valid_partition_object({'weights': [0.5,0.5]})) + self.assertFalse(ge.dataset.util.is_valid_partition_object({'bins': [0,1,2]})) + + def test_recursively_convert_to_json_serializable(self): + D = ge.dataset.PandasDataset({ + 'x' : [1,2,3,4,5,6,7,8,9,10], + }) + D.expect_column_values_to_be_in_set("x", set([1,2,3,4,5,6,7,8,9]), mostly=.8) + + part = ge.dataset.util.partition_data(D.x) + D.expect_column_kl_divergence_to_be_less_than("x", part, .6) + + #Dumping this JSON object verifies that everything is serializable + json.dumps(D.get_expectations_config(), indent=2) + + + x = { + 'w': [ + "aaaa", "bbbb", 1.3, 5, 6, 7 + ], + 'x': np.array([1, 2, 3]), + 'y': { + 'alpha' : None, + 'beta' : np.nan, + 'delta': np.inf, + 'gamma' : -np.inf + }, + 'z': set([1,2,3,4,5]), + 'zz': (1,2,3), + 'zzz': [ + datetime.datetime(2017,1,1), + datetime.date(2017,5,1), + ] + } + x = ge.dataset.util.recursively_convert_to_json_serializable(x) + self.assertEqual(type(x['x']), list) + + try: + x = unicode("abcdefg") + x = ge.dataset.util.recursively_convert_to_json_serializable(x) + self.assertEqual(type(x), unicode) + except NameError: + pass + + def test_expect_file_hash_to_equal(self): + test_file = './tests/test_sets/Titanic.csv' + # Test for non-existent file + try: + ge.expect_file_hash_to_equal('abc', value='abc') + except IOError: + pass + # Test for non-existent hash algorithm + try: + ge.expect_file_hash_to_equal(test_file, + hash_alg='md51', + value='abc') + except ValueError: + pass + # Test non-matching hash value + self.assertFalse(ge.expect_file_hash_to_equal(test_file, + value='abc')) + # Test matching hash value with default algorithm + self.assertTrue(ge.expect_file_hash_to_equal(test_file, + value='63188432302f3a6e8c9e9c500ff27c8a')) + # Test matching hash value with specified algorithm + self.assertTrue(ge.expect_file_hash_to_equal(test_file, + value='f89f46423b017a1fc6a4059d81bddb3ff64891e3c81250fafad6f3b3113ecc9b', + hash_alg='sha256')) + + def test_validate_distribution_parameters(self): + D = ge.read_csv('./tests/test_sets/fixed_distributional_test_dataset.csv') + + # ------ p_value ------ + with self.assertRaises(ValueError): + # p_value is 0 + D.expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than('norm', distribution='norm', + params=[0, 1], + p_value=0) + with self.assertRaises(ValueError): + # p_value negative + D.expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than('norm', distribution='norm', + params=[0,1], + p_value=-0.1) + with self.assertRaises(ValueError): + P_value = 1 + D.expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than('norm', distribution='norm', + params=[0,1], + p_value=1) + + with self.assertRaises(ValueError): + # p_value greater than 1 + D.expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than('norm', distribution='norm', + params=[0,1], + p_value=1.1) + with self.assertRaises(ValueError): + # params is none + D.expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than('norm', distribution='norm', + params=None) + + # ---- std_dev ------ + with self.assertRaises(ValueError): + # std_dev is 0, dict + D.expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than('norm', distribution='norm', + params={ + 'mean': 0, + 'std_dev': 0 + }) + with self.assertRaises(ValueError): + # std_dev is negative, dict + D.expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than('norm', distribution='norm', + params={ + 'mean': 0, + 'std_dev': -1 + }) + with self.assertRaises(ValueError): + # std_dev is 0, list + D.expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than('norm', + distribution='norm', + params=[0,0]) + with self.assertRaises(ValueError): + # std_dev is negative, list + D.expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than('norm', + distribution='norm', + params=[0,-1]) + + # ------- beta ------ + with self.assertRaises(ValueError): + # beta, alpha is 0, dict params + D.expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than('beta', + distribution='beta', + params={ + 'alpha':0, + 'beta':0.1 + }) + with self.assertRaises(ValueError): + # beta, alpha is negative, dict params + D.expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than('beta', + distribution='beta', + params={ + 'alpha':-1, + 'beta':0.1 + }) + with self.assertRaises(ValueError): + # beta, beta is 0, dict params + D.expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than('beta', + distribution='beta', + params={ + 'alpha':0.1, + 'beta':0 + }) + with self.assertRaises(ValueError): + # beta, beta is negative, dict params + D.expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than('beta', + distribution='beta', + params={ + 'alpha':0, + 'beta':-1 + }) + with self.assertRaises(ValueError): + # beta, alpha is 0, list params + D.expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than('beta', + distribution='beta', + params=[0,0.1]) + with self.assertRaises(ValueError): + # beta, alpha is negative, list params + D.expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than('beta', + distribution='beta', + params=[-1,0.1]) + with self.assertRaises(ValueError): + # beta, beta is 0, list params + D.expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than('beta', + distribution='beta', + params=[0.1,0]) + with self.assertRaises(ValueError): + # beta, beta is negative, list params + D.expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than('beta', + distribution='beta', + params=[0.1,-1]) + + with self.assertRaises(ValueError): + # beta, missing alpha, dict + D.expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than('beta', + distribution='beta', + params={ + 'beta': 0.1 + }) + with self.assertRaises(ValueError): + # beta, missing beta, dict + D.expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than('beta', + distribution='beta', + params={ + 'alpha': 0.1 + }) + with self.assertRaises(ValueError): + # beta, missing beta, list + D.expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than('beta', + distribution='beta', + params=[1]) + with self.assertRaises(ValueError): + # beta, missing beta, list + D.expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than('beta', + distribution='beta', + params=[1,1,1,1,1]) + + # ------ Gamma ------- + with self.assertRaises(ValueError): + # gamma, alpha is 0, dict + D.expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than('gamma', + distribution='gamma', + params={ + 'alpha': 0 + }) + with self.assertRaises(ValueError): + # gamma, alpha is negative, dict + D.expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than('gamma', + distribution='gamma', + params={ + 'alpha': -1 + }) + with self.assertRaises(ValueError): + # gamma, alpha is 0, dict + D.expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than('gamma', + distribution='gamma', + params={ + 'alpha': 0 + }) + with self.assertRaises(ValueError): + # gamma, alpha is missing, dict + D.expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than('gamma', + distribution='gamma', + params={ + }) + with self.assertRaises(ValueError): + # gamma, alpha is missing, list + D.expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than('gamma', + distribution='gamma', + params=[]) + with self.assertRaises(ValueError): + # gamma, alpha is 0, list + D.expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than('gamma', + distribution='gamma', + params=[0]) + with self.assertRaises(ValueError): + # gamma, alpha is negative, list + D.expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than('gamma', + distribution='gamma', + params=[-1]) + with self.assertRaises(ValueError): + # gamma, too many arguments, list + D.expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than('gamma', + distribution='gamma', + params=[1, 1, 1, 1]) + + # ----- chi2 -------- + with self.assertRaises(ValueError): + # chi2, df is 0, dict + D.expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than('chi2', + distribution='chi2', + params={ + 'df': 0 + }) + with self.assertRaises(ValueError): + # chi2, df is negative, dict + D.expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than('chi2', + distribution='chi2', + params={ + 'df': -1 + }) + with self.assertRaises(ValueError): + # chi2, df is missing, dict + D.expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than('chi2', + distribution='chi2', + params={ + }) + with self.assertRaises(ValueError): + # chi2, df is 0, list + D.expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than('chi2', + distribution='chi2', + params=[0]) + with self.assertRaises(ValueError): + # chi2, df is negative, list + D.expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than('chi2', + distribution='chi2', + params=[-1]) + with self.assertRaises(ValueError): + # chi2, df is missing, list + D.expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than('chi2', + distribution='chi2', + params=[]) + with self.assertRaises(ValueError): + # chi2, too many parameters, list + D.expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than('chi2', + distribution='chi2', + params=[1, 1, 1, 5]) + # ----- norm ------ + with self.assertRaises(ValueError): + # norm, too many arguments, list + D.expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than('norm', distribution='norm', + params=[0, 1, 500]) + + + # ----- uniform ----- + with self.assertRaises(ValueError): + # uniform, scale is 0, list + D.expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than('uniform', distribution='uniform', + params=[0, 0]) + with self.assertRaises(ValueError): + # uniform, scale is negative, list + D.expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than('uniform', distribution='uniform', + params=[0, -1]) + with self.assertRaises(ValueError): + # uniform, scale is negative, dict + D.expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than('uniform', distribution='uniform', + params={ + 'loc': 0, + 'scale': -1 + }) + with self.assertRaises(ValueError): + # uniform, scale is 0, dict + D.expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than('uniform', distribution='uniform', + params={ + 'loc': 0, + 'scale': 0 + }) + + with self.assertRaises(ValueError): + # uniform, too many parameters, list + D.expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than('uniform', distribution='uniform', + params=[0, 1, 500]) + + + # --- expon --- + with self.assertRaises(ValueError): + # expon, scale is 0, list + D.expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than('exponential', distribution='expon', + params=[0, 0]) + with self.assertRaises(ValueError): + # expon, scale is negative, list + D.expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than('exponential', distribution='expon', + params=[0, -1]) + with self.assertRaises(ValueError): + # expon, scale is 0, dict + D.expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than('exponential', distribution='expon', + params={ + 'loc': 0, + 'scale': 0 + }) + with self.assertRaises(ValueError): + # expon, scale is negative, dict + D.expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than('exponential', distribution='expon', + params={ + 'loc': 0, + 'scale': -1 + }) + with self.assertRaises(ValueError): + # expon, too many parameters, list + D.expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than('exponential', distribution='expon', + params=[0, 1, 500]) + + # --- misc --- + with self.assertRaises(AttributeError): + # non-supported distribution + D.expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than('exponential', distribution='fakedistribution', + params=[0, 1]) + def test_infer_distribution_parameters(self): + D = ge.read_csv('./tests/test_sets/fixed_distributional_test_dataset.csv') + + with self.assertRaises(TypeError): + ge.dataset.util.infer_distribution_parameters(data=D.norm, + distribution='norm', + params=['wrong_param_format']) + t = ge.dataset.util.infer_distribution_parameters(data=D.norm_std, + distribution='norm', + params=None) + self.assertEqual(t['mean'], D.norm_std.mean()) + self.assertEqual(t['std_dev'], D.norm_std.std()) + self.assertEqual(t['loc'], 0) + self.assertEqual(t['scale'], 1) + + # beta + t = ge.dataset.util.infer_distribution_parameters(data=D.beta, distribution='beta') + self.assertEqual(t['alpha'], (t['mean'] ** 2) * ( + ((1 - t['mean']) / t['std_dev'] ** 2) - (1 / t['mean'])), "beta dist, alpha infer") + self.assertEqual(t['beta'], t['alpha'] * ((1 / t['mean']) - 1), "beta dist, beta infer") + + # gamma + t = ge.dataset.util.infer_distribution_parameters(data=D.gamma, distribution='gamma') + self.assertEqual(t['alpha'], D.gamma.mean()) + + # uniform distributions + t = ge.dataset.util.infer_distribution_parameters(data=D.uniform, + distribution='uniform') + self.assertEqual(t['min'], min(D.uniform), "uniform, min infer") + self.assertEqual(t['max'], max(D.uniform) - min(D.uniform), "uniform, max infer") + + + uni_loc = 5 + uni_scale = 10 + t = ge.dataset.util.infer_distribution_parameters(data=D.uniform, + distribution='uniform', + params={ + 'loc': uni_loc, + 'scale': uni_scale + }) + self.assertEqual(t['min'], uni_loc, "uniform, min infer") + self.assertEqual(t['max'], uni_scale, "uniform, max infer") + + + # expon distribution + with self.assertRaises(AttributeError): + ge.dataset.util.infer_distribution_parameters(data=D.norm, + distribution='fakedistribution') + + # chi2 + t = ge.dataset.util.infer_distribution_parameters(data=D.chi2, distribution='chi2') + self.assertEqual(t['df'], D.chi2.mean()) + +""" +The following Parent and Child classes are used for testing documentation inheritance. +""" +class Parent(object): + """Parent class docstring + """ + + @classmethod + def expectation(cls, func): + """Manages configuration and running of expectation objects. + """ + @wraps(func) + def wrapper(*args, **kwargs): + # wrapper logic + func(*args, **kwargs) + + return wrapper + + + def override_me(self): + """Parent method docstring + Returns: + Unattainable abiding satisfaction. + """ + raise NotImplementedError + + +class Child(Parent): + """ + Child class docstring + """ + + @ge.dataset.util.DocInherit + @Parent.expectation + def override_me(self): + """Child method docstring + Returns: + Real, instantiable, abiding satisfaction. + """ + + +class TestDocumentation(unittest.TestCase): + + def test_doc_inheritance(self): + c = Child() + + self.assertEqual( + c.__getattribute__('override_me').__doc__, + """Child method docstring + Returns: + Real, instantiable, abiding satisfaction. + """ + '\n' + + """Parent method docstring + Returns: + Unattainable abiding satisfaction. + """ + ) + +if __name__ == "__main__": + unittest.main() \ No newline at end of file diff --git a/tests/test_expectation_decorators.py b/tests/test_expectation_decorators.py index 78f76b6e89aa..68cb5e096e92 100644 --- a/tests/test_expectation_decorators.py +++ b/tests/test_expectation_decorators.py @@ -1,172 +1,246 @@ -# import json -# import hashlib -# import datetime -# import numpy as np -# import random -# import os -# import inspect - -# from nose.tools import * -import sys +from __future__ import division + import unittest -import great_expectations as ge -#reload(ge) -# from great_expectations.dataset import PandasDataSet -PandasDataSet = ge.dataset.PandasDataSet -MetaPandasDataSet = ge.dataset.MetaPandasDataSet +from great_expectations.dataset import Dataset, PandasDataset, MetaPandasDataset + + +class ExpectationOnlyDataset(Dataset): + + @Dataset.expectation([]) + def no_op_expectation(self, result_format=None, include_config=False, catch_exceptions=None, meta=None): + return {"success": True} + + @Dataset.expectation(['value']) + def no_op_value_expectation(self, value=None, + result_format=None, include_config=False, catch_exceptions=None, meta=None): + return {"success": True} + + @Dataset.expectation([]) + def exception_expectation(self, + result_format=None, include_config=False, catch_exceptions=None, meta=None): + raise ValueError("Gotcha!") -# from ge.decorators import expectation, column_map_expectation, column_aggregate_expectation class TestExpectationDecorators(unittest.TestCase): - + + def test_expectation_decorator_build_config(self): + eds = ExpectationOnlyDataset() + eds.no_op_expectation() + eds.no_op_value_expectation('a') + + config = eds.get_expectations_config() + self.assertEqual({'expectation_type': 'no_op_expectation', 'kwargs': {}}, + config['expectations'][0]) + + self.assertEqual({'expectation_type': 'no_op_value_expectation', 'kwargs': {'value': 'a'}}, + config['expectations'][1]) + + + def test_expectation_decorator_include_config(self): + eds = ExpectationOnlyDataset() + out = eds.no_op_value_expectation('a', include_config=True) + + self.assertEqual({'expectation_type': 'no_op_value_expectation', + 'kwargs': {'value': 'a', 'result_format': 'BASIC'} + }, + out['expectation_config']) + + def test_expectation_decorator_catch_exceptions(self): + eds = ExpectationOnlyDataset() + + # Confirm that we would raise an error without catching exceptions + with self.assertRaises(ValueError): + eds.exception_expectation(catch_exceptions=False) + + # Catch exceptions and validate results + out = eds.exception_expectation(catch_exceptions=True) + self.assertEqual(True, + out['exception_info']['raised_exception']) + + # Check only the first and last line of the traceback, since formatting can be platform dependent. + self.assertEqual('Traceback (most recent call last):', + out['exception_info']['exception_traceback'].split('\n')[0]) + self.assertEqual('ValueError: Gotcha!', + out['exception_info']['exception_traceback'].split('\n')[-2]) + + def test_pandas_column_map_decorator_partial_exception_counts(self): + df = PandasDataset({'a': [0,1,2,3,4]}) + out = df.expect_column_values_to_be_between('a', 3, 4, + result_format={'result_format': 'COMPLETE', 'partial_unexpected_count': 1}) + + self.assertTrue(1, len(out['result']['partial_unexpected_counts'])) + self.assertTrue(3, len(out['result']['unexpected_list'])) + def test_column_map_expectation_decorator(self): - # Create a new CustomPandasDataSet to + # Create a new CustomPandasDataset to # (1) Prove that custom subclassing works, AND # (2) Test expectation business logic without dependencies on any other functions. - class CustomPandasDataSet(PandasDataSet): + class CustomPandasDataset(PandasDataset): - @MetaPandasDataSet.column_map_expectation + @MetaPandasDataset.column_map_expectation def expect_column_values_to_be_odd(self, column): return column.map(lambda x: x % 2 ) - @MetaPandasDataSet.column_map_expectation + @MetaPandasDataset.column_map_expectation def expectation_that_crashes_on_sixes(self, column): return column.map(lambda x: (x-6)/0 != "duck") - - df = CustomPandasDataSet({ + df = CustomPandasDataset({ 'all_odd' : [1,3,5,5,5,7,9,9,9,11], 'mostly_odd' : [1,3,5,7,9,2,4,1,3,5], 'all_even' : [2,4,4,6,6,6,8,8,8,8], 'odd_missing' : [1,3,5,None,None,None,None,1,3,None], 'mixed_missing' : [1,3,5,None,None,2,4,1,3,None], - 'all_missing' : [None,None,None,None,None,None,None,None,None,None,], + 'all_missing' : [None,None,None,None,None,None,None,None,None,None] }) - df.set_default_expectation_argument("output_format", "COMPLETE") + df.set_default_expectation_argument("result_format", "COMPLETE") self.assertEqual( df.expect_column_values_to_be_odd("all_odd"), - { - 'exception_list': [], - 'exception_index_list': [], - 'success': True - } + {'result': {'element_count': 10, + 'missing_count': 0, + 'missing_percent': 0.0, + 'partial_unexpected_counts': [], + 'partial_unexpected_index_list': [], + 'partial_unexpected_list': [], + 'unexpected_count': 0, + 'unexpected_index_list': [], + 'unexpected_list': [], + 'unexpected_percent': 0.0, + 'unexpected_percent_nonmissing': 0.0}, + 'success': True} ) self.assertEqual( df.expect_column_values_to_be_odd("all_missing"), - { - 'exception_list': [], - 'exception_index_list': [], - 'success': True - } + {'result': {'element_count': 10, + 'missing_count': 10, + 'missing_percent': 1, + 'partial_unexpected_counts': [], + 'partial_unexpected_index_list': [], + 'partial_unexpected_list': [], + 'unexpected_count': 0, + 'unexpected_index_list': [], + 'unexpected_list': [], + 'unexpected_percent': 0.0, + 'unexpected_percent_nonmissing': None}, + 'success': True} ) self.assertEqual( df.expect_column_values_to_be_odd("odd_missing"), - { - 'exception_list': [], - 'exception_index_list': [], - 'success': True - } + {'result': {'element_count': 10, + 'missing_count': 5, + 'missing_percent': 0.5, + 'partial_unexpected_counts': [], + 'partial_unexpected_index_list': [], + 'partial_unexpected_list': [], + 'unexpected_count': 0, + 'unexpected_index_list': [], + 'unexpected_list': [], + 'unexpected_percent': 0.0, + 'unexpected_percent_nonmissing': 0.0}, + 'success': True} ) self.assertEqual( df.expect_column_values_to_be_odd("mixed_missing"), - { - 'exception_list': [2,4], - 'exception_index_list': [5,6], - 'success': False - } + {'result': {'element_count': 10, + 'missing_count': 3, + 'missing_percent': 0.3, + 'partial_unexpected_counts': [{'value': 2., 'count': 1}, {'value': 4., 'count': 1}], + 'partial_unexpected_index_list': [5, 6], + 'partial_unexpected_list': [2., 4.], + 'unexpected_count': 2, + 'unexpected_index_list': [5, 6], + 'unexpected_list': [2., 4.], + 'unexpected_percent': 0.2, + 'unexpected_percent_nonmissing': 2/7}, + 'success': False} ) self.assertEqual( df.expect_column_values_to_be_odd("mostly_odd"), - { - 'exception_list': [2, 4], - 'exception_index_list': [5, 6], - 'success': False - } + {'result': {'element_count': 10, + 'missing_count': 0, + 'missing_percent': 0, + 'partial_unexpected_counts': [{'value': 2., 'count': 1}, {'value': 4., 'count': 1}], + 'partial_unexpected_index_list': [5, 6], + 'partial_unexpected_list': [2., 4.], + 'unexpected_count': 2, + 'unexpected_index_list': [5, 6], + 'unexpected_list': [2., 4.], + 'unexpected_percent': 0.2, + 'unexpected_percent_nonmissing': 0.2}, + 'success': False} ) self.assertEqual( df.expect_column_values_to_be_odd("mostly_odd", mostly=.6), - { - 'exception_list': [2, 4], - 'exception_index_list': [5, 6], - 'success': True - } + {'result': {'element_count': 10, + 'missing_count': 0, + 'missing_percent': 0, + 'partial_unexpected_counts': [{'value': 2., 'count': 1}, {'value': 4., 'count': 1}], + 'partial_unexpected_index_list': [5, 6], + 'partial_unexpected_list': [2., 4.], + 'unexpected_count': 2, + 'unexpected_index_list': [5, 6], + 'unexpected_list': [2., 4.], + 'unexpected_percent': 0.2, + 'unexpected_percent_nonmissing': 0.2}, + 'success': True} ) self.assertEqual( - df.expect_column_values_to_be_odd("mostly_odd", output_format="BOOLEAN_ONLY"), - False + df.expect_column_values_to_be_odd("mostly_odd", result_format="BOOLEAN_ONLY"), + {'success': False} ) - df.default_expectation_args["output_format"] = "BOOLEAN_ONLY" + df.default_expectation_args["result_format"] = "BOOLEAN_ONLY" self.assertEqual( df.expect_column_values_to_be_odd("mostly_odd"), - False + {'success': False} ) - df.default_expectation_args["output_format"] = "BASIC" - - # import json - # print json.dumps(df.expect_column_values_to_be_odd("mostly_odd", include_config=True), indent=2) + df.default_expectation_args["result_format"] = "BASIC" self.assertEqual( df.expect_column_values_to_be_odd("mostly_odd", include_config=True), { - "expectation_kwargs": { - "column": "mostly_odd", - "output_format": "BASIC" - }, - "summary_obj": { - "exception_percent": 0.2, - "exception_percent_nonmissing": 0.2, - "partial_exception_list": [ - 2, - 4 - ], - "exception_count": 2 - }, - "success": False, - "expectation_type": "expect_column_values_to_be_odd" + "expectation_config": { + "expectation_type": "expect_column_values_to_be_odd", + "kwargs": { + "column": "mostly_odd", + "result_format": "BASIC" + } + }, + 'result': {'element_count': 10, + 'missing_count': 0, + 'missing_percent': 0, + 'partial_unexpected_list': [2., 4.], + 'unexpected_count': 2, + 'unexpected_percent': 0.2, + 'unexpected_percent_nonmissing': 0.2}, + 'success': False, } - # { - # 'exception_list': [2, 4], - # 'exception_index_list': [5, 6], - # 'success': False, - # 'expectation_type' : 'expect_column_values_to_be_odd', - # 'expectation_kwargs' : { - # 'column' : 'mostly_odd' - # } - # } ) - # self.assertEqual( - # df.expect_column_value_to_be_odd("all_odd"), - # { - # 'exception_list': [], - # 'success': True - # } - # ) def test_column_aggregate_expectation_decorator(self): - # Create a new CustomPandasDataSet to + # Create a new CustomPandasDataset to # (1) Prove that custom subclassing works, AND # (2) Test expectation business logic without dependencies on any other functions. - class CustomPandasDataSet(PandasDataSet): + class CustomPandasDataset(PandasDataset): - @PandasDataSet.column_aggregate_expectation + @PandasDataset.column_aggregate_expectation def expect_column_median_to_be_odd(self, column): - return {"success": column.median() % 2, "true_value": column.median(), "summary_obj": None} + return {"success": column.median() % 2, "result": {"observed_value": column.median()}} - df = CustomPandasDataSet({ + df = CustomPandasDataset({ 'all_odd' : [1,3,5,7,9], 'all_even' : [2,4,6,8,10], 'odd_missing' : [1,3,5,None,None], @@ -174,12 +248,12 @@ def expect_column_median_to_be_odd(self, column): 'mixed_missing_2' : [1,3,None,None,6], 'all_missing' : [None,None,None,None,None,], }) - df.set_default_expectation_argument("output_format", "COMPLETE") + df.set_default_expectation_argument("result_format", "COMPLETE") self.assertEqual( df.expect_column_median_to_be_odd("all_odd"), { - 'true_value': 5, + 'result': {'observed_value': 5, 'element_count': 5, 'missing_count': 0, 'missing_percent': 0}, 'success': True } ) @@ -187,148 +261,38 @@ def expect_column_median_to_be_odd(self, column): self.assertEqual( df.expect_column_median_to_be_odd("all_even"), { - 'true_value': 6, + 'result': {'observed_value': 6, 'element_count': 5, 'missing_count': 0, 'missing_percent': 0}, 'success': False } ) self.assertEqual( - df.expect_column_median_to_be_odd("all_even", output_format="SUMMARY"), + df.expect_column_median_to_be_odd("all_even", result_format="SUMMARY"), { - 'true_value': 6, - 'success': False, - 'summary_obj': {'element_count': 5, 'missing_count': 0, 'missing_percent': 0} + 'result': {'observed_value': 6, 'element_count': 5, 'missing_count': 0, 'missing_percent': 0}, + 'success': False } ) self.assertEqual( - df.expect_column_median_to_be_odd("all_even", output_format="BOOLEAN_ONLY"), - False + df.expect_column_median_to_be_odd("all_even", result_format="BOOLEAN_ONLY"), + {'success': False} ) - df.default_expectation_args["output_format"] = "BOOLEAN_ONLY" + df.default_expectation_args["result_format"] = "BOOLEAN_ONLY" self.assertEqual( df.expect_column_median_to_be_odd("all_even"), - False + {'success': False} ) self.assertEqual( - df.expect_column_median_to_be_odd("all_even", output_format="BASIC"), + df.expect_column_median_to_be_odd("all_even", result_format="BASIC"), { - 'true_value': 6, + 'result': {'observed_value': 6, 'element_count': 5, 'missing_count': 0, 'missing_percent': 0}, 'success': False } ) - def test_expectation_decorator_catch_exceptions(self): - - class CustomPandasDataSet(PandasDataSet): - - @PandasDataSet.column_map_expectation - def expect_column_values_to_be_odd(self, column): - return column.map(lambda x: x % 2 ) - - @PandasDataSet.column_map_expectation - def expectation_that_crashes_on_sixes(self, column): - return column.map(lambda x: 1/(x-6) != "duck") - - - df = CustomPandasDataSet({ - 'all_odd' : [1,3,5,5,5,7,9,9,9,11], - 'mostly_odd' : [1,3,5,7,9,2,4,1,3,5], - 'all_even' : [2,4,4,6,6,6,8,8,8,8], - 'odd_missing' : [1,3,5,None,None,None,None,1,3,None], - 'mixed_missing' : [1,3,5,None,None,2,4,1,3,None], - 'all_missing' : [None,None,None,None,None,None,None,None,None,None,], - }) - df.set_default_expectation_argument("output_format", "COMPLETE") - - self.assertEqual( - df.expectation_that_crashes_on_sixes("all_odd"), - { - 'exception_list': [], - 'exception_index_list': [], - 'success': True - } - ) - - self.assertEqual( - df.expectation_that_crashes_on_sixes("all_odd", catch_exceptions=False), - { - 'success': True, - 'exception_list': [], - 'exception_index_list': [], - } - ) - - self.assertEqual( - df.expectation_that_crashes_on_sixes("all_odd", catch_exceptions=True), - { - 'success': True, - 'exception_list': [], - 'exception_index_list': [], - 'raised_exception': False, - 'exception_traceback': None, - } - ) - - with self.assertRaises(ZeroDivisionError): - df.expectation_that_crashes_on_sixes("all_even", catch_exceptions=False) - - result_obj = df.expectation_that_crashes_on_sixes("all_even", catch_exceptions=True) - comparison_obj = { - 'success': False, - 'raised_exception': True, - } - - self.assertEqual( - set(result_obj.keys()), - set(list(comparison_obj.keys())+['exception_traceback']), - ) - - for k,v in comparison_obj.items(): - self.assertEqual(result_obj[k], v) - - self.assertEqual( - result_obj["exception_traceback"].split('\n')[-1], - "", - ) - - if sys.version_info[0] == 3: - self.assertEqual( - result_obj["exception_traceback"].split('\n')[-2], - "ZeroDivisionError: division by zero" - ) - - else: - ## This can fail depending on the architecture, because the error may also be "long division or modulo by zero" - #self.assertEqual( - # result_obj["exception_traceback"].split('\n')[-2], - # "ZeroDivisionError: integer division or modulo by zero" - #) - self.assertEqual( - result_obj["exception_traceback"].split('\n')[-2].split(':')[0], - "ZeroDivisionError" - ) - - self.assertEqual( - result_obj["exception_traceback"].split('\n')[-3], - " return column.map(lambda x: 1/(x-6) != \"duck\")", - ) - - - self.assertEqual( - df.expectation_that_crashes_on_sixes("all_odd", output_format="BOOLEAN_ONLY", catch_exceptions=True), - True - ) - - self.assertEqual( - df.expectation_that_crashes_on_sixes("all_even", output_format="BOOLEAN_ONLY", catch_exceptions=True), - False - ) - - # with self.assertRaises(ZeroDivisionError): - # df.expectation_that_crashes_on_sixes("all_even", catch_exceptions=False) if __name__ == "__main__": unittest.main() diff --git a/tests/test_fixtures/custom_dataset.py b/tests/test_fixtures/custom_dataset.py index 3d88c610b5fd..8bf845cc4223 100644 --- a/tests/test_fixtures/custom_dataset.py +++ b/tests/test_fixtures/custom_dataset.py @@ -1,6 +1,6 @@ -from great_expectations.dataset.pandas_dataset import PandasDataSet +from great_expectations.dataset.pandas_dataset import PandasDataset -class CustomPandasDataSet(PandasDataSet): +class CustomPandasDataset(PandasDataset): drg_codes = [ 194, 690, 292, 392, 641, 871, 603, 470, 191, 190, 291, @@ -15,10 +15,10 @@ class CustomPandasDataSet(PandasDataSet): #885 ] - @PandasDataSet.column_map_expectation + @PandasDataset.column_map_expectation def expect_column_to_start_with_valid_drg(self, column): return column.map(lambda x: int(x[:3]) in self.drg_codes) - @PandasDataSet.column_map_expectation + @PandasDataset.column_map_expectation def expect_column_values_to_have_odd_lengths(self, column): return column.map(lambda x: len(x) % 2 == 1) \ No newline at end of file diff --git a/tests/test_fixtures/fixed_distribution_data.py b/tests/test_fixtures/fixed_distribution_data.py new file mode 100644 index 000000000000..d4d9f8af45d4 --- /dev/null +++ b/tests/test_fixtures/fixed_distribution_data.py @@ -0,0 +1,66 @@ +""" +Use this code to create the reproducible data in ./tests/test_sets/fixed_distributional_test_data.csv + +The data should pass a kstest with the cdf parameter=distribution and a=0.05, +e.g. kstest(data_column, "distribution name", p-value=0.05) == True + +""" +import numpy as np +from scipy import stats +import pandas as pd +import sys + +def generate_data(): + ## Code used to create reproducible and static test data + + std_loc, std_scale = 0, 1 + norm_mean, norm_std = -2, 5 + beta_a, beta_b, beta_loc, beta_scale = 0.5, 10, 5, 11 + gamma_a, gamma_loc, gamma_scale = 2, 20, 3 + poisson_lambda, poisson_loc = 8.2, 40 + uniform_loc, uniform_scale = -5, 11 + chi2_df, chi2_loc, chi2_scale = 30, 3, 5 + expon_loc, expon_scale = 4.2, 10 + + np.random.seed(12345) + fixed = pd.DataFrame({ + 'norm': stats.norm.rvs(loc=norm_mean, scale=norm_std, size=500), + 'norm_std': stats.norm.rvs(size=500), + 'beta': stats.beta.rvs(a=beta_a, b=beta_b, loc=beta_loc, scale=beta_scale, size=500), + #'beta_std': stats.beta.rvs(a=beta_a, b=beta_b, size=500), + 'gamma': stats.gamma.rvs(a=gamma_a, loc=gamma_loc, scale=gamma_scale, size=500), + #'gamma_std': stats.gamma.rvs(a=gamma_a, size=500), + #'poisson': stats.poisson.rvs(mu=poisson_lambda, loc=poisson_loc, size=500), + #'poisson_std': stats.poisson.rvs(mu=poisson_lambda, size=500), + 'uniform': stats.uniform.rvs(loc=uniform_loc, scale=uniform_scale, size=500), + #'uniform_std': stats.uniform.rvs(size=500), + 'chi2': stats.chi2.rvs(df=chi2_df, loc=chi2_loc, scale=chi2_scale, size=500), + #'chi2_std': stats.chi2.rvs(df=chi2_df, size=500), + 'exponential': stats.expon.rvs(loc=expon_loc, scale=expon_scale, size=500), + #'exponential_std': stats.expon.rvs(size=500) + }) + + # different seed for chi2 + np.random.seed(123456) + fixed['chi2'] = stats.chi2.rvs(df=chi2_df, loc=chi2_loc, scale=chi2_scale, size=500) + + return fixed + + +if __name__ == "__main__": + # Set precision we'll use: + #precision = sys.float_info.dig + #print("Setting pandas float_format to use " + str(precision) + " digits of precision.") + + + df = generate_data() + df.to_csv("../test_sets/fixed_distributional_test_dataset.csv", header=True, index=None) + with open('../test_sets/fixed_distributional_test_dataset.json', 'a') as data_file: + for column in list(df): + data_file.write("\"" + str(column) + "\" : [") + data_file.write(str(df.iloc[0][column])) + for data_point in range(1, len(df[column])): + data_file.write("," + str(df.iloc[data_point][column])) + + data_file.write("],\n") + #df.to_csv('../test_sets/fixed_distributional_test_dataset.csv', index=None, header=True) diff --git a/tests/test_great_expectations.py b/tests/test_great_expectations.py index 4ebe86721b29..2d9593bdea7b 100644 --- a/tests/test_great_expectations.py +++ b/tests/test_great_expectations.py @@ -4,10 +4,11 @@ import unittest import numpy as np +import pandas as pd import great_expectations as ge -from great_expectations.dataset import PandasDataSet, MetaPandasDataSet -from .util import assertDeepAlmostEqual +from great_expectations.dataset import PandasDataset, MetaPandasDataset +from .test_utils import assertDeepAlmostEqual def isprime(n): #https://stackoverflow.com/questions/18833759/python-prime-number-checker @@ -37,12 +38,46 @@ def isprime(n): return True -class CustomPandasDataSet(PandasDataSet): +class CustomPandasDataset(PandasDataset): - @MetaPandasDataSet.column_map_expectation + @MetaPandasDataset.column_map_expectation def expect_column_values_to_be_prime(self, column): return column.map(isprime) + @MetaPandasDataset.expectation(["column", "mostly"]) + def expect_column_values_to_equal_1(self, column, mostly=None): + not_null = self[column].notnull() + + result = self[column][not_null] == 1 + unexpected_values = list(self[column][not_null][result==False]) + + if mostly: + #Prevent division-by-zero errors + if len(not_null) == 0: + return { + 'success':True, + 'result': { + 'unexpected_list':unexpected_values, + 'unexpected_index_list':self.index[result], + } + } + + percent_equaling_1 = float(sum(result))/len(not_null) + return { + "success" : percent_equaling_1 >= mostly, + 'result': { + "unexpected_list" : unexpected_values[:20], + "unexpected_index_list" : list(self.index[result==False])[:20], + } + } + else: + return { + "success" : len(unexpected_values) == 0, + 'result': { + "unexpected_list" : unexpected_values[:20], + "unexpected_index_list" : list(self.index[result==False])[:20], + } + } class TestCustomClass(unittest.TestCase): @@ -50,31 +85,84 @@ def test_custom_class(self): script_path = os.path.dirname(os.path.realpath(__file__)) df = ge.read_csv( script_path+'/test_sets/Titanic.csv', - dataset_class=CustomPandasDataSet + dataset_class=CustomPandasDataset ) - df.set_default_expectation_argument("output_format", "COMPLETE") - + df.set_default_expectation_argument("result_format", "COMPLETE") self.assertEqual( - df.expect_column_values_to_be_prime('Age'), - {'exception_list':[30.0,25.0,0.92000000000000004,63.0,39.0,58.0,50.0,24.0,36.0,26.0,25.0,25.0,28.0,45.0,39.0,30.0,58.0,45.0,22.0,48.0,44.0,60.0,45.0,58.0,36.0,33.0,36.0,36.0,14.0,49.0,36.0,46.0,27.0,27.0,26.0,64.0,39.0,55.0,70.0,69.0,36.0,39.0,38.0,27.0,27.0,4.0,27.0,50.0,48.0,49.0,48.0,39.0,36.0,30.0,24.0,28.0,64.0,60.0,49.0,44.0,22.0,60.0,48.0,35.0,22.0,45.0,49.0,54.0,38.0,58.0,45.0,46.0,25.0,21.0,48.0,49.0,45.0,36.0,55.0,52.0,24.0,16.0,44.0,51.0,42.0,35.0,35.0,38.0,35.0,50.0,49.0,46.0,58.0,42.0,40.0,42.0,55.0,50.0,16.0,21.0,30.0,15.0,30.0,46.0,54.0,36.0,28.0,65.0,33.0,44.0,55.0,36.0,58.0,64.0,64.0,22.0,28.0,22.0,18.0,52.0,46.0,56.0,33.0,27.0,55.0,54.0,48.0,18.0,21.0,34.0,40.0,36.0,50.0,39.0,56.0,28.0,56.0,56.0,24.0,18.0,24.0,45.0,40.0,6.0,57.0,32.0,62.0,54.0,52.0,62.0,63.0,46.0,52.0,39.0,18.0,48.0,49.0,39.0,46.0,64.0,60.0,60.0,55.0,54.0,21.0,57.0,45.0,50.0,50.0,27.0,20.0,51.0,21.0,36.0,40.0,32.0,33.0,30.0,28.0,18.0,34.0,32.0,57.0,18.0,36.0,28.0,51.0,32.0,28.0,36.0,4.0,1.0,12.0,34.0,26.0,27.0,15.0,45.0,40.0,20.0,25.0,36.0,25.0,42.0,26.0,26.0,0.82999999999999996,54.0,44.0,52.0,30.0,30.0,27.0,24.0,35.0,8.0,22.0,30.0,20.0,21.0,49.0,8.0,28.0,18.0,28.0,22.0,25.0,18.0,32.0,18.0,42.0,34.0,8.0,21.0,38.0,38.0,35.0,35.0,38.0,24.0,16.0,26.0,45.0,24.0,21.0,22.0,34.0,30.0,50.0,30.0,1.0,44.0,28.0,6.0,30.0,45.0,24.0,24.0,49.0,48.0,34.0,32.0,21.0,18.0,21.0,52.0,42.0,36.0,21.0,33.0,34.0,22.0,45.0,30.0,26.0,34.0,26.0,22.0,1.0,25.0,48.0,57.0,27.0,30.0,20.0,45.0,46.0,30.0,48.0,54.0,64.0,32.0,18.0,32.0,26.0,20.0,39.0,22.0,24.0,28.0,50.0,20.0,40.0,42.0,21.0,32.0,34.0,33.0,8.0,36.0,34.0,30.0,28.0,0.80000000000000004,25.0,50.0,21.0,25.0,18.0,20.0,30.0,30.0,35.0,22.0,25.0,25.0,14.0,50.0,22.0,27.0,27.0,30.0,22.0,35.0,30.0,28.0,12.0,40.0,36.0,28.0,32.0,4.0,36.0,33.0,32.0,26.0,30.0,24.0,18.0,42.0,16.0,35.0,16.0,25.0,18.0,20.0,30.0,26.0,40.0,24.0,18.0,0.82999999999999996,20.0,25.0,35.0,32.0,20.0,39.0,39.0,6.0,38.0,9.0,26.0,4.0,20.0,26.0,25.0,18.0,24.0,35.0,40.0,38.0,9.0,45.0,27.0,20.0,32.0,33.0,18.0,40.0,26.0,15.0,45.0,18.0,27.0,22.0,26.0,22.0,20.0,32.0,21.0,18.0,26.0,6.0,9.0,40.0,32.0,26.0,18.0,20.0,22.0,22.0,35.0,21.0,20.0,18.0,18.0,38.0,30.0,21.0,21.0,21.0,24.0,33.0,33.0,28.0,16.0,28.0,24.0,21.0,32.0,26.0,18.0,20.0,24.0,24.0,36.0,30.0,22.0,35.0,27.0,30.0,36.0,9.0,44.0,45.0,22.0,30.0,34.0,28.0,0.33000000000000002,27.0,25.0,24.0,22.0,21.0,26.0,33.0,1.0,0.17000000000000001,25.0,36.0,36.0,30.0,26.0,65.0,42.0,32.0,30.0,24.0,24.0,24.0,22.0,18.0,16.0,45.0,21.0,18.0,9.0,48.0,16.0,25.0,38.0,22.0,16.0,33.0,9.0,38.0,40.0,14.0,16.0,9.0,10.0,6.0,40.0,32.0,20.0,28.0,24.0,28.0,24.0,20.0,45.0,26.0,21.0,27.0,18.0,26.0,22.0,28.0,22.0,27.0,42.0,27.0,25.0,27.0,20.0,48.0,34.0,22.0,33.0,32.0,26.0,49.0,1.0,33.0,4.0,24.0,32.0,27.0,21.0,32.0,20.0,21.0,30.0,21.0,22.0,4.0,39.0,20.0,21.0,44.0,42.0,21.0,24.0,25.0,22.0,22.0,39.0,26.0,4.0,22.0,26.0,1.5,36.0,18.0,25.0,22.0,20.0,26.0,22.0,32.0,21.0,21.0,36.0,39.0,25.0,45.0,36.0,30.0,20.0,21.0,1.5,25.0,18.0,63.0,18.0,15.0,28.0,36.0,28.0,10.0,36.0,30.0,22.0,14.0,22.0,51.0,18.0,45.0,28.0,21.0,27.0,36.0,27.0,15.0,27.0,26.0,22.0,24.0],'exception_index_list':[2,3,4,6,7,8,15,16,17,20,21,22,24,25,26,27,28,30,31,34,36,38,39,42,43,44,47,48,49,51,53,55,57,63,64,67,69,70,72,73,74,75,76,78,80,86,87,88,89,90,91,92,95,98,99,101,103,104,106,108,109,110,111,113,115,116,117,120,121,123,124,126,127,128,129,130,131,132,134,135,136,140,141,142,143,144,145,146,147,149,150,151,153,156,157,161,162,163,164,167,168,169,170,174,175,176,177,179,180,181,184,186,187,191,193,194,195,198,201,203,204,205,212,214,215,216,219,220,222,226,227,228,229,230,231,232,233,234,235,236,238,240,241,242,243,245,246,247,249,251,253,255,256,257,258,259,261,262,264,270,271,272,273,274,275,276,277,279,280,281,282,283,285,288,306,309,316,322,323,324,326,327,328,329,331,332,333,334,336,337,338,339,340,341,344,346,347,348,349,350,351,352,353,355,356,357,358,362,363,364,365,366,372,373,374,376,377,378,380,382,383,384,385,386,388,389,390,391,392,393,395,396,397,401,406,408,409,410,411,412,413,414,415,416,417,418,420,421,422,423,425,426,427,428,429,432,434,435,436,437,439,440,441,442,445,447,448,449,450,454,463,465,468,472,473,475,476,477,478,483,485,487,493,495,496,497,499,503,504,506,509,510,511,513,515,518,520,521,523,525,527,528,529,530,531,532,533,536,538,539,540,541,542,544,545,547,549,552,553,554,555,557,558,559,561,563,564,565,566,568,570,571,572,573,574,575,578,579,580,581,582,584,588,589,593,596,598,599,601,602,604,605,606,607,608,609,610,611,612,613,615,616,618,619,620,622,623,624,625,626,629,630,631,633,634,635,636,637,638,639,640,641,643,649,652,654,655,656,661,662,663,664,665,666,667,668,670,671,672,673,674,675,676,677,680,681,682,684,685,686,689,690,691,692,693,695,696,697,699,701,702,703,706,707,708,709,710,712,714,715,717,719,720,721,723,724,725,728,729,732,733,735,736,738,741,744,745,748,749,750,751,752,753,754,755,756,760,761,762,763,764,765,766,767,770,772,774,776,778,779,782,783,785,787,788,789,806,807,808,809,810,813,815,816,817,819,821,823,824,826,827,828,829,830,832,833,835,837,839,843,844,845,847,848,849,855,857,858,860,864,865,869,872,875,876,878,881,882,886,887,888,889,891,892,893,894,895,896,903,904,905,906,907,909,910,911,913,914,915,917,920,921,922,923,924,928,929,930,931,932,933,934,935,936,937,938,940,946,947,948,951,958,960,961,962,963,964,965,966,967,968,1181,1188,1258,1263,1264,1269,1272,1273,1274,1275,1276,1277,1278,1279,1283,1284,1291,1292,1293,1297,1298,1299,1301,1303,1304,1308,1309,1310,1311],'success':False} + df.expect_column_values_to_be_prime('Age')['result']['unexpected_list'], + [30.0, 25.0, 0.92000000000000004, 63.0, 39.0, 58.0, 50.0, 24.0, 36.0, 26.0, 25.0, 25.0, 28.0, 45.0, 39.0, + 30.0, 58.0, 45.0, 22.0, 48.0, 44.0, 60.0, 45.0, 58.0, 36.0, 33.0, 36.0, 36.0, 14.0, 49.0, 36.0, 46.0, 27.0, + 27.0, 26.0, 64.0, 39.0, 55.0, 70.0, 69.0, 36.0, 39.0, 38.0, 27.0, 27.0, 4.0, 27.0, 50.0, 48.0, 49.0, 48.0, + 39.0, 36.0, 30.0, 24.0, 28.0, 64.0, 60.0, 49.0, 44.0, 22.0, 60.0, 48.0, 35.0, 22.0, 45.0, 49.0, 54.0, 38.0, + 58.0, 45.0, 46.0, 25.0, 21.0, 48.0, 49.0, 45.0, 36.0, 55.0, 52.0, 24.0, 16.0, 44.0, 51.0, 42.0, 35.0, 35.0, + 38.0, 35.0, 50.0, 49.0, 46.0, 58.0, 42.0, 40.0, 42.0, 55.0, 50.0, 16.0, 21.0, 30.0, 15.0, 30.0, 46.0, 54.0, + 36.0, 28.0, 65.0, 33.0, 44.0, 55.0, 36.0, 58.0, 64.0, 64.0, 22.0, 28.0, 22.0, 18.0, 52.0, 46.0, 56.0, 33.0, + 27.0, 55.0, 54.0, 48.0, 18.0, 21.0, 34.0, 40.0, 36.0, 50.0, 39.0, 56.0, 28.0, 56.0, 56.0, 24.0, 18.0, 24.0, + 45.0, 40.0, 6.0, 57.0, 32.0, 62.0, 54.0, 52.0, 62.0, 63.0, 46.0, 52.0, 39.0, 18.0, 48.0, 49.0, 39.0, 46.0, + 64.0, 60.0, 60.0, 55.0, 54.0, 21.0, 57.0, 45.0, 50.0, 50.0, 27.0, 20.0, 51.0, 21.0, 36.0, 40.0, 32.0, 33.0, + 30.0, 28.0, 18.0, 34.0, 32.0, 57.0, 18.0, 36.0, 28.0, 51.0, 32.0, 28.0, 36.0, 4.0, 1.0, 12.0, 34.0, 26.0, + 27.0, 15.0, 45.0, 40.0, 20.0, 25.0, 36.0, 25.0, 42.0, 26.0, 26.0, 0.82999999999999996, 54.0, 44.0, 52.0, + 30.0, 30.0, 27.0, 24.0, 35.0, 8.0, 22.0, 30.0, 20.0, 21.0, 49.0, 8.0, 28.0, 18.0, 28.0, 22.0, 25.0, 18.0, + 32.0, 18.0, 42.0, 34.0, 8.0, 21.0, 38.0, 38.0, 35.0, 35.0, 38.0, 24.0, 16.0, 26.0, 45.0, 24.0, 21.0, 22.0, + 34.0, 30.0, 50.0, 30.0, 1.0, 44.0, 28.0, 6.0, 30.0, 45.0, 24.0, 24.0, 49.0, 48.0, 34.0, 32.0, 21.0, 18.0, + 21.0, 52.0, 42.0, 36.0, 21.0, 33.0, 34.0, 22.0, 45.0, 30.0, 26.0, 34.0, 26.0, 22.0, 1.0, 25.0, 48.0, 57.0, + 27.0, 30.0, 20.0, 45.0, 46.0, 30.0, 48.0, 54.0, 64.0, 32.0, 18.0, 32.0, 26.0, 20.0, 39.0, 22.0, 24.0, 28.0, + 50.0, 20.0, 40.0, 42.0, 21.0, 32.0, 34.0, 33.0, 8.0, 36.0, 34.0, 30.0, 28.0, 0.80000000000000004, 25.0, + 50.0, 21.0, 25.0, 18.0, 20.0, 30.0, 30.0, 35.0, 22.0, 25.0, 25.0, 14.0, 50.0, 22.0, 27.0, 27.0, 30.0, 22.0, + 35.0, 30.0, 28.0, 12.0, 40.0, 36.0, 28.0, 32.0, 4.0, 36.0, 33.0, 32.0, 26.0, 30.0, 24.0, 18.0, 42.0, 16.0, + 35.0, 16.0, 25.0, 18.0, 20.0, 30.0, 26.0, 40.0, 24.0, 18.0, 0.82999999999999996, 20.0, 25.0, 35.0, 32.0, + 20.0, 39.0, 39.0, 6.0, 38.0, 9.0, 26.0, 4.0, 20.0, 26.0, 25.0, 18.0, 24.0, 35.0, 40.0, 38.0, 9.0, 45.0, + 27.0, 20.0, 32.0, 33.0, 18.0, 40.0, 26.0, 15.0, 45.0, 18.0, 27.0, 22.0, 26.0, 22.0, 20.0, 32.0, 21.0, 18.0, + 26.0, 6.0, 9.0, 40.0, 32.0, 26.0, 18.0, 20.0, 22.0, 22.0, 35.0, 21.0, 20.0, 18.0, 18.0, 38.0, 30.0, 21.0, + 21.0, 21.0, 24.0, 33.0, 33.0, 28.0, 16.0, 28.0, 24.0, 21.0, 32.0, 26.0, 18.0, 20.0, 24.0, 24.0, 36.0, 30.0, + 22.0, 35.0, 27.0, 30.0, 36.0, 9.0, 44.0, 45.0, 22.0, 30.0, 34.0, 28.0, 0.33000000000000002, 27.0, 25.0, + 24.0, 22.0, 21.0, 26.0, 33.0, 1.0, 0.17000000000000001, 25.0, 36.0, 36.0, 30.0, 26.0, 65.0, 42.0, 32.0, + 30.0, 24.0, 24.0, 24.0, 22.0, 18.0, 16.0, 45.0, 21.0, 18.0, 9.0, 48.0, 16.0, 25.0, 38.0, 22.0, 16.0, 33.0, + 9.0, 38.0, 40.0, 14.0, 16.0, 9.0, 10.0, 6.0, 40.0, 32.0, 20.0, 28.0, 24.0, 28.0, 24.0, 20.0, 45.0, 26.0, + 21.0, 27.0, 18.0, 26.0, 22.0, 28.0, 22.0, 27.0, 42.0, 27.0, 25.0, 27.0, 20.0, 48.0, 34.0, 22.0, 33.0, 32.0, + 26.0, 49.0, 1.0, 33.0, 4.0, 24.0, 32.0, 27.0, 21.0, 32.0, 20.0, 21.0, 30.0, 21.0, 22.0, 4.0, 39.0, 20.0, + 21.0, 44.0, 42.0, 21.0, 24.0, 25.0, 22.0, 22.0, 39.0, 26.0, 4.0, 22.0, 26.0, 1.5, 36.0, 18.0, 25.0, 22.0, + 20.0, 26.0, 22.0, 32.0, 21.0, 21.0, 36.0, 39.0, 25.0, 45.0, 36.0, 30.0, 20.0, 21.0, 1.5, 25.0, 18.0, 63.0, + 18.0, 15.0, 28.0, 36.0, 28.0, 10.0, 36.0, 30.0, 22.0, 14.0, 22.0, 51.0, 18.0, 45.0, 28.0, 21.0, 27.0, 36.0, + 27.0, 15.0, 27.0, 26.0, 22.0, 24.0] ) primes = [3,5,7,11,13,17,23,31] df["primes"] = df.Age.map(lambda x: random.choice(primes)) self.assertEqual( - df.expect_column_values_to_be_prime("primes"), - {'exception_list': [], 'exception_index_list': [], 'success': True} + df.expect_column_values_to_be_prime("primes")['result']['unexpected_list'], + [] ) + def test_custom_expectation(self): + df = CustomPandasDataset({'x': [1,1,1,1,2]}) + df.set_default_expectation_argument("result_format", "COMPLETE") + + out = df.expect_column_values_to_be_prime('x') + t = {'out': {'unexpected_list':[1,1,1,1],'unexpected_index_list':[0,1,2,3], 'success':False}} + self.assertEqual(t['out']['success'], out['success']) + if 'unexpected_index_list' in t['out']: + self.assertEqual(t['out']['unexpected_index_list'], out['result']['unexpected_index_list']) + if 'unexpected_list' in t['out']: + self.assertEqual(t['out']['unexpected_list'], out['result']['unexpected_list']) + + out = df.expect_column_values_to_equal_1('x', mostly=.8) + print(out) + t = {'out': {'unexpected_list':[2],'unexpected_index_list':[4],'success':True}} + self.assertEqual(t['out']['success'], out['success']) + if 'unexpected_index_list' in t['out']: + self.assertEqual(t['out']['unexpected_index_list'], out['result']['unexpected_index_list']) + if 'unexpected_list' in t['out']: + self.assertEqual(t['out']['unexpected_list'], out['result']['unexpected_list']) + # Ensure that Custom Data Set classes can properly call non-overridden methods from their parent class def test_base_class_expectation(self): - df = CustomPandasDataSet({ + df = CustomPandasDataset({ "aaa": [1, 2, 3, 4, 5], "bbb": [10, 20, 30, 40, 50], "ccc": [9, 10, 11, 12, 13], }) - self.assertEqual( df.expect_column_values_to_be_between("aaa", min_value=1, max_value=5)['success'], True @@ -91,43 +179,168 @@ def test_validate(self): "./tests/test_sets/Titanic.csv", expectations_config=my_expectations_config ) - my_df.set_default_expectation_argument("output_format", "COMPLETE") + my_df.set_default_expectation_argument("result_format", "COMPLETE") results = my_df.validate(catch_exceptions=False) # print json.dumps(results, indent=2) - with open('./tests/test_sets/expected_results_20170721.json') as f: + with open('./tests/test_sets/expected_results_20180303.json') as f: expected_results = json.load(f) - # print json.dumps(expected_results, indent=2) + #print json.dumps(expected_results, indent=2) self.maxDiff = None - #!!! This needs to be converted to unicode, I think - - # print json.dumps(results, indent=2) - # print '-'*80 - # print json.dumps(expected_results, indent=2) - # self.assertEqual( - # json.loads(json.dumps(results)), - # json.loads(json.dumps(expected_results)) - # ) assertDeepAlmostEqual(self, results, expected_results ) - #Now, change the results and ensure they are no longer equal + # Now, change the results and ensure they are no longer equal results[0] = {} self.assertNotEqual(results, expected_results ) - + # Finally, confirm that only_return_failures works validation_results = my_df.validate(only_return_failures=True) - # print json.dumps(validation_results, indent=2) + #print json.dumps(validation_results) assertDeepAlmostEqual( self, validation_results, - {"results": [{"exception_traceback": None, "expectation_type": "expect_column_values_to_be_in_set", "success": False, "exception_list": ["*"], "raised_exception": False, "kwargs": {"column": "PClass", "output_format": "COMPLETE", "values_set": ["1st", "2nd", "3rd"]}, "exception_index_list": [456]}]} + {"results": [ + {"expectation_config": { + "expectation_type": "expect_column_values_to_be_in_set", + "kwargs": {"column": "PClass", "values_set": ["1st", "2nd", "3rd"], "result_format": "COMPLETE"} + }, + "success": False, + "exception_info": {"exception_message": None, + "exception_traceback": None, + "raised_exception": False}, + "result": {"partial_unexpected_index_list": [456], "unexpected_count": 1, "unexpected_list": ["*"], + "unexpected_percent": 0.0007616146230007616, "element_count": 1313, + "missing_percent": 0.0, "partial_unexpected_counts": [{"count": 1, "value": "*"}], + "partial_unexpected_list": ["*"], + "unexpected_percent_nonmissing": 0.0007616146230007616, "missing_count": 0, + "unexpected_index_list": [456]}}]} + + ) + + def test_validate_catch_non_existent_expectation(self): + df = ge.dataset.PandasDataset({ + "x" : [1,2,3,4,5] + }) + + validation_config_non_existent_expectation = { + "dataset_name" : None, + "meta": { + "great_expectations.__version__": ge.__version__ + }, + "expectations" : [{ + "expectation_type" : "non_existent_expectation", + "kwargs" : { + "column" : "x" + } + }] + } + results = df.validate(expectations_config=validation_config_non_existent_expectation)['results'] + + self.assertIn( + "object has no attribute 'non_existent_expectation'", + results[0]['exception_info']['exception_message'] + ) + + def test_validate_catch_invalid_parameter(self): + df = ge.dataset.PandasDataset({ + "x": [1, 2, 3, 4, 5] + }) + + validation_config_invalid_parameter = { + "dataset_name" : None, + "meta": { + "great_expectations.__version__": ge.__version__ + }, + "expectations" : [{ + "expectation_type" : "expect_column_values_to_be_between", + "kwargs" : { + "column" : "x", + "min_value" : 6, + "max_value" : 5 + } + }] + } + + results = df.validate(expectations_config=validation_config_invalid_parameter)['results'] + print(results[0]['exception_info']) + self.assertIn( + "min_value cannot be greater than max_value", + results[0]['exception_info']['exception_message'] + ) + + def test_top_level_validate(self): + my_df = pd.DataFrame({ + "x" : [1,2,3,4,5] + }) + validation_result = ge.validate(my_df, { + "dataset_name" : None, + "meta": { + "great_expectations.__version__": ge.__version__ + }, + "expectations" : [{ + "expectation_type" : "expect_column_to_exist", + "kwargs" : { + "column" : "x" + } + },{ + "expectation_type" : "expect_column_values_to_be_between", + "kwargs" : { + "column" : "x", + "min_value" : 3, + "max_value" : 5 + } + }] + }) + self.assertEqual( + validation_result, + { + "results": [ + { + "expectation_config": { + "kwargs": { + "column": "x" + }, + "expectation_type": "expect_column_to_exist", + }, + "exception_info": {"exception_message": None, + "exception_traceback": None, + "raised_exception": False}, + "success": True + }, + { + "expectation_config": { + "expectation_type": "expect_column_values_to_be_between", + "kwargs": { + "column": "x", + "max_value": 5, + "min_value": 3 + } + }, + "exception_info": {"exception_message": None, + "exception_traceback": None, + "raised_exception": False}, + "success": False, + "result": {'element_count': 5, + 'missing_count': 0, + 'missing_percent': 0.0, + "unexpected_percent": 0.4, + "partial_unexpected_list": [ + 1, + 2 + ], + "unexpected_percent_nonmissing": 0.4, + "unexpected_count": 2 + } + } + ] + } ) @@ -152,6 +365,25 @@ def test_validate(self): 7 ) +class TestIO(unittest.TestCase): + + def test_read_csv(self): + script_path = os.path.dirname(os.path.realpath(__file__)) + df = ge.read_csv( + script_path+'/test_sets/Titanic.csv', + ) + + def test_read_json(self): + script_path = os.path.dirname(os.path.realpath(__file__)) + df = ge.read_json( + script_path+'/test_sets/test_json_data_file.json', + ) + + df = ge.read_json( + script_path+'/test_sets/nested_test_json_data_file.json', + accessor_func= lambda x: x["data"] + ) + if __name__ == "__main__": unittest.main() diff --git a/tests/test_pandas_dataset.py b/tests/test_pandas_dataset.py index 728d1262a029..8b2c2ca94078 100644 --- a/tests/test_pandas_dataset.py +++ b/tests/test_pandas_dataset.py @@ -1,250 +1,89 @@ +from __future__ import division + import unittest import json import numpy as np import datetime +import pandas as pd import great_expectations as ge +from .test_utils import assertDeepAlmostEqual class TestPandasDataset(unittest.TestCase): - def test_expect_column_to_exist(self): - print("=== test_expect_column_to_exist ===") - with open("./tests/test_sets/expect_column_to_exist_test_set.json") as f: - J = json.load(f) - D = ge.dataset.PandasDataSet(J["dataset"]) - D.set_default_expectation_argument("output_format", "COMPLETE") - T = J["tests"] - - self.maxDiff = None - - for t in T: - print(t) - out = D.expect_column_to_exist(**t['in']) - - if 'out' in t: - self.assertEqual(out, t['out']) - - if 'error' in t: - self.assertEqual(out['raised_exception'], True) - self.assertIn(t['error']['traceback_substring'], out['exception_traceback']) - - def test_expect_table_row_count_to_be_between(self): - - # Pulled out into expect_table_row_count_to_be_between_test_set.json - # Data for testing - # D = ge.dataset.PandasDataSet({ - # 'c1' : [4,5,6,7], - # 'c2' : ['a','b','c','d'], - # 'c3' : [None,None,None,None] - # }) - # D.set_default_expectation_argument("output_format", "COMPLETE") - - # # Tests - # T = [ - # { - # 'in':[3,5], - # 'kwargs':{}, - # 'out':{'success':True, 'true_value':4}}, - # { - # 'in':[0,1], - # 'kwargs':{}, - # 'out':{'success':False, 'true_value':4}}, - # { - # 'in':[4,4], - # 'kwargs':{}, - # 'out':{'success':True, 'true_value':4}}, - # { - # 'in':[1,0], - # 'kwargs':{}, - # 'out':{'success':False, 'true_value':4}} - # ] - - # for t in T: - # out = D.expect_table_row_count_to_be_between(*t['in'], **t['kwargs']) - # self.assertEqual(out, t['out']) - - D = ge.dataset.PandasDataSet({ - 'c1':[1,None,3,None,5], - 'c2':[None,4,5,None,None], - 'c3':[None,None,None,None,None] - }) - D.set_default_expectation_argument("output_format", "COMPLETE") - - T = [ - { - 'in':[5,6], - 'kwargs':{}, - 'out':{'success':True, 'true_value':5}}, - { - 'in':[2,4], - 'kwargs':{}, - 'out':{'success':False, 'true_value':5}}, - { - 'in':[5,5], - 'kwargs':{}, - 'out':{'success':True, 'true_value':5}}, - { - 'in':[2,1], - 'kwargs':{}, - 'out':{'success':False, 'true_value':5}} - ] - - for t in T: - out = D.expect_table_row_count_to_be_between(*t['in'], **t['kwargs']) - self.assertEqual(out, t['out']) - - def test_expect_table_row_count_to_be_between(self): - print("=== test_expect_table_row_count_to_be_between ===") - with open("./tests/test_sets/expect_table_row_count_to_be_between_test_set.json") as f: - J = json.load(f) - D = ge.dataset.PandasDataSet(J["dataset"]) - D.set_default_expectation_argument("output_format", "COMPLETE") - T = J["tests"] - - self.maxDiff = None - - for t in T: - print(t) - out = D.expect_table_row_count_to_be_between(**t['in']) - - if 'out' in t: - self.assertEqual(out, t['out']) - - if 'error' in t: - self.assertEqual(out['raised_exception'], True) - self.assertIn(t['error']['traceback_substring'], out['exception_traceback']) - - - def test_expect_table_row_count_to_equal(self): - - D = ge.dataset.PandasDataSet({ - 'c1':[4,5,6,7], - 'c2':['a','b','c','d'], - 'c3':[None,None,None,None] - }) - D.set_default_expectation_argument("output_format", "COMPLETE") - - # Tests - T = [ - { - 'in':[4], - 'kwargs':{}, - 'out':{'success':True, 'true_value':4}}, - { - 'in':[5], - 'kwargs':{}, - 'out':{'success':False, 'true_value':4}}, - { - 'in':[3], - 'kwargs':{}, - 'out':{'success':False, 'true_value':4}}, - { - 'in':[0], - 'kwargs':{}, - 'out':{'success':False, 'true_value':4}} - ] - - for t in T: - out = D.expect_table_row_count_to_equal(*t['in'], **t['kwargs']) - self.assertEqual(out, t['out']) - - D = ge.dataset.PandasDataSet({ - 'c1':[1,None,3,None,5], - 'c2':[None,4,5,None,None], - 'c3':[None,None,None,None,None] - }) - D.set_default_expectation_argument("output_format", "COMPLETE") - - T = [ - { - 'in':[5], - 'kwargs':{}, - 'out':{'success':True, 'true_value':5}}, - { - 'in':[3], - 'kwargs':{}, - 'out':{'success':False, 'true_value':5}} - ] - - for t in T: - out = D.expect_table_row_count_to_equal(*t['in'], **t['kwargs']) - self.assertEqual(out, t['out']) - - with self.assertRaises(ValueError): - D.expect_table_row_count_to_equal("c1", value="hello") - - def test_expect_column_values_to_be_unique(self): - D = ge.dataset.PandasDataSet({ + D = ge.dataset.PandasDataset({ 'a' : ['2', '2'], 'b' : [1, '2'], 'c' : [1, 1], 'd' : [1, '1'], 'n' : [None, np.nan] }) - D.set_default_expectation_argument("output_format", "COMPLETE") + D.set_default_expectation_argument("result_format", "COMPLETE") # Tests for D T = [ { 'in':{'column':'a'}, - 'out':{'success':False, 'exception_index_list':[0,1], 'exception_list':['2','2']}}, + 'out':{'success':False, 'unexpected_index_list':[0,1], 'unexpected_list':['2','2']}}, { 'in':{'column':'b'}, - 'out':{'success':True, 'exception_index_list':[], 'exception_list':[]}}, + 'out':{'success':True, 'unexpected_index_list':[], 'unexpected_list':[]}}, { 'in':{'column':'c'}, - 'out':{'success':False, 'exception_index_list':[0,1], 'exception_list':[1,1]}}, + 'out':{'success':False, 'unexpected_index_list':[0,1], 'unexpected_list':[1,1]}}, { 'in':{'column':'d'}, - 'out':{'success':True, 'exception_index_list':[], 'exception_list':[]}}, + 'out':{'success':True, 'unexpected_index_list':[], 'unexpected_list':[]}}, { 'in':{'column':'n'}, - 'out':{'success':True, 'exception_index_list':[], 'exception_list':[]}} + 'out':{'success':True, 'unexpected_index_list':[], 'unexpected_list':[]}} ] for t in T: out = D.expect_column_values_to_be_unique(**t['in']) - self.assertEqual(out, t['out']) - + self.assertEqual(t['out']['success'], out['success']) + self.assertEqual(t['out']['unexpected_index_list'], out['result']['unexpected_index_list']) + self.assertEqual(t['out']['unexpected_list'], out['result']['unexpected_list']) - df = ge.dataset.PandasDataSet({ + df = ge.dataset.PandasDataset({ 'a' : ['2', '2', '2', '2'], 'b' : [1, '2', '2', '3'], 'n' : [None, None, np.nan, None], }) - df.set_default_expectation_argument("output_format", "COMPLETE") + df.set_default_expectation_argument("result_format", "COMPLETE") # Tests for df T = [ { 'in':['a'], 'kwargs':{}, - 'out':{'success':False, 'exception_index_list':[0,1,2,3], 'exception_list':['2','2','2','2']}}, + 'out':{'success':False, 'unexpected_index_list':[0,1,2,3], 'unexpected_list':['2','2','2','2']}}, { 'in':['b'], 'kwargs':{'mostly':.25}, - 'out':{'success':True, 'exception_index_list':[1,2], 'exception_list':['2','2']}}, + 'out':{'success':True, 'unexpected_index_list':[1,2], 'unexpected_list':['2','2']}}, { 'in':['b'], 'kwargs':{'mostly':.75}, - 'out':{'success':False, 'exception_index_list':[1,2], 'exception_list':['2','2']}}, + 'out':{'success':False, 'unexpected_index_list':[1,2], 'unexpected_list':['2','2']}}, { 'in':['a'], 'kwargs':{'mostly':1}, - 'out':{'success':False, 'exception_index_list':[0,1,2,3], 'exception_list':['2','2','2','2']}}, + 'out':{'success':False, 'unexpected_index_list':[0,1,2,3], 'unexpected_list':['2','2','2','2']}}, { 'in':['n'], 'kwargs':{'mostly':.2}, - 'out':{'success':True, 'exception_index_list':[], 'exception_list':[]}} + 'out':{'success':True, 'unexpected_index_list':[], 'unexpected_list':[]}} ] for t in T: out = df.expect_column_values_to_be_unique(*t['in'], **t['kwargs']) - self.assertEqual(out, t['out']) - + self.assertEqual(t['out']['success'], out['success']) + self.assertEqual(t['out']['unexpected_index_list'], out['result']['unexpected_index_list']) + self.assertEqual(t['out']['unexpected_list'], out['result']['unexpected_list']) def test_expect_column_values_to_not_be_null(self): @@ -256,103 +95,106 @@ def test_expect_column_values_to_not_be_null(self): T: Column with non None or np.nan """ - D = ge.dataset.PandasDataSet({ + D = ge.dataset.PandasDataset({ 'x' : [2, None], 'y' : [2, np.nan], 'n' : [None, np.nan], 'z' : [2, 5], }) - D.set_default_expectation_argument("output_format", "COMPLETE") + D.set_default_expectation_argument("result_format", "COMPLETE") T = [ { 'in':{'column':'y'}, - 'out':{'success':False, 'exception_index_list':[1], 'exception_list':[None]}}, + 'out':{'success':False, 'unexpected_index_list':[1], 'unexpected_list':[None]}}, { 'in':{'column':'n'}, - 'out':{'success':False, 'exception_index_list':[0,1], 'exception_list':[None, None]}}, + 'out':{'success':False, 'unexpected_index_list':[0,1], 'unexpected_list':[None, None]}}, # { # 'in':{'column':'y'}, - # 'out':{'success':False, 'exception_index_list':[1], 'exception_list':[np.nan]}}, + # 'out':{'success':False, 'unexpected_index_list':[1], 'unexpected_list':[np.nan]}}, # { # 'in':{'column':'n'}, - # 'out':{'success':False, 'exception_index_list':[0,1], 'exception_list':[None, np.nan]}}, + # 'out':{'success':False, 'unexpected_index_list':[0,1], 'unexpected_list':[None, np.nan]}}, { 'in':{'column':'x'}, - 'out':{'success':False, 'exception_index_list':[1], 'exception_list':[None]}}, + 'out':{'success':False, 'unexpected_index_list':[1], 'unexpected_list':[None]}}, { 'in':{'column':'z'}, - 'out':{'success':True, 'exception_index_list':[], 'exception_list':[]}} + 'out':{'success':True, 'unexpected_index_list':[], 'unexpected_list':[]}} ] for t in T: out = D.expect_column_values_to_not_be_null(**t['in']) - self.assertEqual(out, t['out']) + self.assertEqual(t['out']['success'], out['success']) + self.assertEqual(t['out']['unexpected_index_list'], out['result']['unexpected_index_list']) + self.assertEqual(t['out']['unexpected_list'], out['result']['unexpected_list']) - - D2 = ge.dataset.PandasDataSet({ + D2 = ge.dataset.PandasDataset({ 'a' : [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 'b' : [1, 2, 3, 4, 5, 6, 7, 8, 9, None], }) - D2.set_default_expectation_argument("output_format", "COMPLETE") + D2.set_default_expectation_argument("result_format", "COMPLETE") #assert_equal( # D.expect_column_values_to_not_be_null('x'), - # {'success':False, 'exception_list':[None]} + # {'success':False, 'unexpected_list':[None]} #) T = [ { 'in':['a'], 'kwargs':{}, - 'out':{'success':True, 'exception_index_list':[], 'exception_list':[]}}, + 'out':{'success':True, 'unexpected_index_list':[], 'unexpected_list':[]}}, { 'in':['a'], 'kwargs':{'mostly':.90}, - 'out':{'success':True, 'exception_index_list':[], 'exception_list':[]}}, + 'out':{'success':True, 'unexpected_index_list':[], 'unexpected_list':[]}}, { 'in':['b'], 'kwargs':{}, - 'out':{'success':False, 'exception_index_list':[9], 'exception_list':[None]}}, + 'out':{'success':False, 'unexpected_index_list':[9], 'unexpected_list':[None]}}, { 'in':['b'], 'kwargs':{'mostly':.95}, - 'out':{'success':False, 'exception_index_list':[9], 'exception_list':[None]}}, + 'out':{'success':False, 'unexpected_index_list':[9], 'unexpected_list':[None]}}, { 'in':['b'], 'kwargs':{'mostly':.90}, - 'out':{'success':True, 'exception_index_list':[9], 'exception_list':[None]}} + 'out':{'success':True, 'unexpected_index_list':[9], 'unexpected_list':[None]}} ] for t in T: out = D2.expect_column_values_to_not_be_null(*t['in'], **t['kwargs']) - self.assertEqual(out, t['out']) + self.assertEqual(t['out']['success'], out['success']) + self.assertEqual(t['out']['unexpected_index_list'], out['result']['unexpected_index_list']) + self.assertEqual(t['out']['unexpected_list'], out['result']['unexpected_list']) - - D3 = ge.dataset.PandasDataSet({ + D3 = ge.dataset.PandasDataset({ 'a' : [None, None, None, None], }) - D3.set_default_expectation_argument("output_format", "COMPLETE") + D3.set_default_expectation_argument("result_format", "COMPLETE") T = [ { 'in':['a'], 'kwargs':{}, - 'out':{'success':False, 'exception_index_list':[0,1,2,3], 'exception_list':[None,None,None,None]} + 'out':{'success':False, 'unexpected_index_list':[0,1,2,3], 'unexpected_list':[None,None,None,None]} }, { 'in':['a'], 'kwargs':{"mostly":.95}, - 'out':{'success':False, 'exception_index_list':[0,1,2,3], 'exception_list':[None,None,None,None]} + 'out':{'success':False, 'unexpected_index_list':[0,1,2,3], 'unexpected_list':[None,None,None,None]} }, ] for t in T: out = D3.expect_column_values_to_not_be_null(*t['in'], **t['kwargs']) # out = D3.expect_column_values_to_be_null(*t['in'], **t['kwargs']) - self.assertEqual(out, t['out']) - + self.assertEqual(t['out']['success'], out['success']) + self.assertEqual(t['out']['unexpected_index_list'], out['result']['unexpected_index_list']) + self.assertEqual(t['out']['unexpected_list'], out['result']['unexpected_list']) def test_expect_column_values_to_be_null(self): """ @@ -364,82 +206,84 @@ def test_expect_column_values_to_be_null(self): T: Column with non None or np.nan values """ - D = ge.dataset.PandasDataSet({ + D = ge.dataset.PandasDataset({ 'x' : [2, None, 2], 'y' : [2, np.nan, 2], 'z' : [2, 5, 7], 'a' : [None, np.nan, None], }) - D.set_default_expectation_argument("output_format", "COMPLETE") + D.set_default_expectation_argument("result_format", "COMPLETE") T = [ { 'in':{'column':'x'}, - 'out':{'success':False, 'exception_index_list':[0,2], 'exception_list':[2,2]}}, + 'out':{'success':False, 'unexpected_index_list':[0,2], 'unexpected_list':[2,2]}}, { 'in':{'column':'y'}, - 'out':{'success':False, 'exception_index_list':[0,2], 'exception_list':[2,2]}}, + 'out':{'success':False, 'unexpected_index_list':[0,2], 'unexpected_list':[2,2]}}, { 'in':{'column':'z'}, - 'out':{'success':False, 'exception_index_list':[0,1,2], 'exception_list':[2,5,7]}}, + 'out':{'success':False, 'unexpected_index_list':[0,1,2], 'unexpected_list':[2,5,7]}}, { 'in':{'column':'a'}, - 'out':{'success':True, 'exception_index_list':[], 'exception_list':[]}}, + 'out':{'success':True, 'unexpected_index_list':[], 'unexpected_list':[]}}, { 'in':{'column':'x', 'mostly':.2}, - 'out':{'success':True, 'exception_index_list':[0,2], 'exception_list':[2,2]}}, + 'out':{'success':True, 'unexpected_index_list':[0,2], 'unexpected_list':[2,2]}}, { 'in':{'column':'x', 'mostly':.8}, - 'out':{'success':False, 'exception_index_list':[0,2], 'exception_list':[2,2]} + 'out':{'success':False, 'unexpected_index_list':[0,2], 'unexpected_list':[2,2]} }, { 'in':{'column':'a', 'mostly':.5}, - 'out':{'success':True, 'exception_index_list':[], 'exception_list':[]}} + 'out':{'success':True, 'unexpected_index_list':[], 'unexpected_list':[]}} ] for t in T: out = D.expect_column_values_to_be_null(**t['in']) - self.assertEqual(out, t['out']) - + self.assertEqual(t['out']['success'], out['success']) + self.assertEqual(t['out']['unexpected_index_list'], out['result']['unexpected_index_list']) + self.assertEqual(t['out']['unexpected_list'], out['result']['unexpected_list']) - D3 = ge.dataset.PandasDataSet({ + D3 = ge.dataset.PandasDataset({ 'a' : [None, None, None, None], 'b' : [np.nan, np.nan, np.nan, np.nan], }) - D3.set_default_expectation_argument("output_format", "COMPLETE") + D3.set_default_expectation_argument("result_format", "COMPLETE") T = [ { 'in':['a'], 'kwargs':{}, - 'out':{'success':True, 'exception_index_list':[], 'exception_list':[]} + 'out':{'success':True, 'unexpected_index_list':[], 'unexpected_list':[]} }, { 'in':['a'], 'kwargs':{"mostly":.95}, - 'out':{'success':True, 'exception_index_list':[], 'exception_list':[]} + 'out':{'success':True, 'unexpected_index_list':[], 'unexpected_list':[]} }, { 'in':['b'], 'kwargs':{}, - 'out':{'success':True, 'exception_index_list':[], 'exception_list':[]} + 'out':{'success':True, 'unexpected_index_list':[], 'unexpected_list':[]} }, { 'in':['b'], 'kwargs':{"mostly":.95}, - 'out':{'success':True, 'exception_index_list':[], 'exception_list':[]} + 'out':{'success':True, 'unexpected_index_list':[], 'unexpected_list':[]} }, ] for t in T: # out = D3.expect_column_values_to_not_be_null(*t['in'], **t['kwargs']) out = D3.expect_column_values_to_be_null(*t['in'], **t['kwargs']) - self.assertEqual(out, t['out']) - + self.assertEqual(t['out']['success'], out['success']) + self.assertEqual(t['out']['unexpected_index_list'], out['result']['unexpected_index_list']) + self.assertEqual(t['out']['unexpected_list'], out['result']['unexpected_list']) def test_expect_column_values_to_be_of_type(self): - D = ge.dataset.PandasDataSet({ + D = ge.dataset.PandasDataset({ 'x' : [1,2,4], 'y' : [1.0,2.2,5.3], 'z' : ['hello', 'jello', 'mello'], @@ -448,44 +292,46 @@ def test_expect_column_values_to_be_of_type(self): 's' : ['hello', 'jello', 1], 's1' : ['hello', 2.0, 1], }) - D.set_default_expectation_argument("output_format", "COMPLETE") + D.set_default_expectation_argument("result_format", "COMPLETE") T = [ { 'in':{"column":"x","type_":"int","target_datasource":"python"}, - 'out':{'success':True, 'exception_list':[], 'exception_index_list':[]}}, + 'out':{'success':True, 'unexpected_list':[], 'unexpected_index_list':[]}}, { 'in':{"column":"x","type_":"string","target_datasource":"numpy"}, - 'out':{'success':False, 'exception_list':[1,2,4], 'exception_index_list':[0,1,2]}}, + 'out':{'success':False, 'unexpected_list':[1,2,4], 'unexpected_index_list':[0,1,2]}}, { 'in':{"column":"y","type_":"float","target_datasource":"python"}, - 'out':{'success':True, 'exception_list':[], 'exception_index_list':[]}}, + 'out':{'success':True, 'unexpected_list':[], 'unexpected_index_list':[]}}, { 'in':{"column":"y","type_":"float","target_datasource":"numpy"}, - 'out':{'success':False, 'exception_list':[1.0,2.2,5.3], 'exception_index_list':[0,1,2]}}, + 'out':{'success':False, 'unexpected_list':[1.0,2.2,5.3], 'unexpected_index_list':[0,1,2]}}, { 'in':{"column":"z","type_":"string","target_datasource":"python"}, - 'out':{'success':True, 'exception_list':[], 'exception_index_list':[]}}, + 'out':{'success':True, 'unexpected_list':[], 'unexpected_index_list':[]}}, { 'in':{"column":"b","type_":"boolean","target_datasource":"python"}, - 'out':{'success':True, 'exception_list':[], 'exception_index_list':[]}} + 'out':{'success':True, 'unexpected_list':[], 'unexpected_index_list':[]}} #{ # 'in':['n','null','python'], # 'kwargs':{}, - # 'out':{'success':False, 'exception_list':[np.nan]}}, + # 'out':{'success':False, 'unexpected_list':[np.nan]}}, #{ # 'in':['n','null','python'], # 'kwargs':{'mostly':.5}, - # 'out':{'success':True, 'exception_list':[np.nan]}} + # 'out':{'success':True, 'unexpected_list':[np.nan]}} ] for t in T: out = D.expect_column_values_to_be_of_type(**t['in']) - self.assertEqual(out, t['out']) + self.assertEqual(t['out']['success'], out['success']) + self.assertEqual(t['out']['unexpected_index_list'], out['result']['unexpected_index_list']) + self.assertEqual(t['out']['unexpected_list'], out['result']['unexpected_list']) def test_expect_column_values_to_be_in_type_list(self): - D = ge.dataset.PandasDataSet({ + D = ge.dataset.PandasDataset({ 'x' : [1,2,4], 'y' : [1.0,2.2,5.3], 'z' : ['hello', 'jello', 'mello'], @@ -494,40 +340,41 @@ def test_expect_column_values_to_be_in_type_list(self): 's' : ['hello', 'jello', 1], 's1' : ['hello', 2.0, 1], }) - D.set_default_expectation_argument("output_format", "COMPLETE") + D.set_default_expectation_argument("result_format", "COMPLETE") T = [ { 'in':{"column":"x","type_list":["int"],"target_datasource":"python"}, - 'out':{'success':True, 'exception_list':[], 'exception_index_list':[]}}, + 'out':{'success':True, 'unexpected_list':[], 'unexpected_index_list':[]}}, { 'in':{"column":"x","type_list":["string"],"target_datasource":"numpy"}, - 'out':{'success':False, 'exception_list':[1,2,4], 'exception_index_list':[0,1,2]}}, + 'out':{'success':False, 'unexpected_list':[1,2,4], 'unexpected_index_list':[0,1,2]}}, { 'in':{"column":"y","type_list":["float"],"target_datasource":"python"}, - 'out':{'success':True, 'exception_list':[], 'exception_index_list':[]}}, + 'out':{'success':True, 'unexpected_list':[], 'unexpected_index_list':[]}}, { 'in':{"column":"y","type_list":["float"],"target_datasource":"numpy"}, - 'out':{'success':False, 'exception_list':[1.0,2.2,5.3], 'exception_index_list':[0,1,2]}}, + 'out':{'success':False, 'unexpected_list':[1.0,2.2,5.3], 'unexpected_index_list':[0,1,2]}}, { 'in':{"column":"z","type_list":["string"],"target_datasource":"python"}, - 'out':{'success':True, 'exception_list':[], 'exception_index_list':[]}}, + 'out':{'success':True, 'unexpected_list':[], 'unexpected_index_list':[]}}, { 'in':{"column":"b","type_list":["boolean"],"target_datasource":"python"}, - 'out':{'success':True, 'exception_list':[], 'exception_index_list':[]}}, + 'out':{'success':True, 'unexpected_list':[], 'unexpected_index_list':[]}}, { 'in':{"column":"s", "type_list":["string", "int"], "target_datasource":"python"}, - 'out':{'success':True, 'exception_list':[], 'exception_index_list':[]}}, + 'out':{'success':True, 'unexpected_list':[], 'unexpected_index_list':[]}}, #{ # 'in':['n','null','python'], # 'kwargs':{'mostly':.5}, - # 'out':{'success':True, 'exception_list':[np.nan]}} + # 'out':{'success':True, 'unexpected_list':[np.nan]}} ] for t in T: out = D.expect_column_values_to_be_in_type_list(**t['in']) - self.assertEqual(out, t['out']) - + self.assertEqual(t['out']['success'], out['success']) + self.assertEqual(t['out']['unexpected_index_list'], out['result']['unexpected_index_list']) + self.assertEqual(t['out']['unexpected_list'], out['result']['unexpected_list']) def test_expect_column_values_to_be_in_set(self): """ @@ -535,71 +382,74 @@ def test_expect_column_values_to_be_in_set(self): """ - D = ge.dataset.PandasDataSet({ + D = ge.dataset.PandasDataset({ 'x' : [1,2,4], 'y' : [1,2,5], 'z' : ['hello', 'jello', 'mello'], }) - D.set_default_expectation_argument("output_format", "COMPLETE") + D.set_default_expectation_argument("result_format", "COMPLETE") T = [ { 'in':['x', [1,2,4]], - 'out':{'success':True, 'exception_index_list':[], 'exception_list':[]}}, + 'out':{'success':True, 'unexpected_index_list':[], 'unexpected_list':[]}}, { 'in':['x', [4,2]], - 'out':{'success':False, 'exception_index_list':[0], 'exception_list':[1]}}, + 'out':{'success':False, 'unexpected_index_list':[0], 'unexpected_list':[1]}}, { 'in':['y', []], - 'out':{'success':False, 'exception_index_list':[0,1,2], 'exception_list':[1,2,5]}}, + 'out':{'success':False, 'unexpected_index_list':[0,1,2], 'unexpected_list':[1,2,5]}}, { 'in':['z', ['hello','jello','mello']], - 'out': {'success':True, 'exception_index_list':[], 'exception_list':[]}}, + 'out': {'success':True, 'unexpected_index_list':[], 'unexpected_list':[]}}, { 'in':['z', ['hello']], - 'out': {'success':False, 'exception_index_list':[1,2], 'exception_list':['jello','mello']}} + 'out': {'success':False, 'unexpected_index_list':[1,2], 'unexpected_list':['jello','mello']}} ] for t in T: out = D.expect_column_values_to_be_in_set(*t['in']) - self.assertEqual(out,t['out']) + self.assertEqual(t['out']['success'], out['success']) + self.assertEqual(t['out']['unexpected_index_list'], out['result']['unexpected_index_list']) + self.assertEqual(t['out']['unexpected_list'], out['result']['unexpected_list']) self.assertRaises( TypeError, D.expect_column_values_to_be_in_set, 'x', None ) - D2 = ge.dataset.PandasDataSet({ + D2 = ge.dataset.PandasDataset({ 'x' : [1,1,2,None], 'y' : [None,None,None,None], }) - D2.set_default_expectation_argument("output_format", "COMPLETE") + D2.set_default_expectation_argument("result_format", "COMPLETE") T = [ { 'in':{'column':'x', 'values_set':[1,2]}, - 'out':{'success':True, 'exception_index_list':[], 'exception_list':[]}}, + 'out':{'success':True, 'unexpected_index_list':[], 'unexpected_list':[]}}, { 'in':{'column':'x', 'values_set':[1]}, - 'out':{'success':False, 'exception_index_list':[2], 'exception_list':[2]}}, + 'out':{'success':False, 'unexpected_index_list':[2], 'unexpected_list':[2]}}, { 'in':{'column':'x', 'values_set':[1], 'mostly':.66}, - 'out':{'success':True, 'exception_index_list':[2], 'exception_list':[2]}}, + 'out':{'success':True, 'unexpected_index_list':[2], 'unexpected_list':[2]}}, { 'in':{'column':'x', 'values_set':[2], 'mostly':.66}, - 'out':{'success':False, 'exception_index_list':[0,1], 'exception_list':[1,1]}}, + 'out':{'success':False, 'unexpected_index_list':[0,1], 'unexpected_list':[1,1]}}, { 'in':{'column':'y', 'values_set':[]}, - 'out':{'success':True, 'exception_index_list':[], 'exception_list':[]}}, + 'out':{'success':True, 'unexpected_index_list':[], 'unexpected_list':[]}}, { 'in':{'column':'y', 'values_set':[2], 'mostly':.5}, - 'out':{'success':True, 'exception_index_list':[], 'exception_list':[]}} + 'out':{'success':True, 'unexpected_index_list':[], 'unexpected_list':[]}} ] for t in T: out = D2.expect_column_values_to_be_in_set(**t['in']) - self.assertEqual(out, t['out']) - + self.assertEqual(t['out']['success'], out['success']) + self.assertEqual(t['out']['unexpected_index_list'], out['result']['unexpected_index_list']) + self.assertEqual(t['out']['unexpected_list'], out['result']['unexpected_list']) def test_expect_column_values_to_not_be_in_set(self): """ @@ -608,111 +458,119 @@ def test_expect_column_values_to_not_be_in_set(self): -Running expectations only on nonmissing values """ - D = ge.dataset.PandasDataSet({ + D = ge.dataset.PandasDataset({ 'x' : [1,2,4], 'z' : ['hello', 'jello', 'mello'], 'a' : [1,1,2], 'n' : [None,None,2], }) - D.set_default_expectation_argument("output_format", "COMPLETE") + D.set_default_expectation_argument("result_format", "COMPLETE") T = [ { 'in':['x', [1,2]],'kwargs':{}, - 'out':{'success':False, 'exception_index_list':[0,1], 'exception_list':[1,2]}}, + 'out':{'success':False, 'unexpected_index_list':[0,1], 'unexpected_list':[1,2]}}, { 'in':['x',[5,6]],'kwargs':{}, - 'out':{'success':True, 'exception_index_list':[], 'exception_list':[]}}, + 'out':{'success':True, 'unexpected_index_list':[], 'unexpected_list':[]}}, { 'in':['z',['hello', 'jello']],'kwargs':{}, - 'out':{'success':False, 'exception_index_list':[0,1], 'exception_list':['hello', 'jello']}}, + 'out':{'success':False, 'unexpected_index_list':[0,1], 'unexpected_list':['hello', 'jello']}}, { 'in':['z',[]],'kwargs':{}, - 'out':{'success':True, 'exception_index_list':[], 'exception_list':[]}}, + 'out':{'success':True, 'unexpected_index_list':[], 'unexpected_list':[]}}, { 'in':['a', [1]],'kwargs':{}, - 'out':{'success':False, 'exception_index_list':[0,1], 'exception_list':[1, 1]}}, + 'out':{'success':False, 'unexpected_index_list':[0,1], 'unexpected_list':[1, 1]}}, { 'in':['n', [2]], 'kwargs':{}, - 'out':{'success':False, 'exception_index_list':[2], 'exception_list':[2]}}, + 'out':{'success':False, 'unexpected_index_list':[2], 'unexpected_list':[2]}}, { 'in':['n', []], 'kwargs':{}, - 'out':{'success':True, 'exception_index_list':[], 'exception_list':[]}}, + 'out':{'success':True, 'unexpected_index_list':[], 'unexpected_list':[]}}, { - 'in':['a', [1]], + 'in':['a', [1]], 'kwargs':{'mostly':.1}, - 'out':{'success':True, 'exception_index_list':[0,1], 'exception_list':[1, 1]}}, + 'out':{'success':True, 'unexpected_index_list':[0,1], 'unexpected_list':[1, 1]}}, { 'in':['n', [2]], 'kwargs':{'mostly':.9}, - 'out':{'success':False, 'exception_index_list':[2], 'exception_list':[2]}} + 'out':{'success':False, 'unexpected_index_list':[2], 'unexpected_list':[2]}} ] for t in T: out = D.expect_column_values_to_not_be_in_set(*t['in'],**t['kwargs']) - self.assertEqual(out, t['out']) - + self.assertEqual(t['out']['success'], out['success']) + self.assertEqual(t['out']['unexpected_index_list'], out['result']['unexpected_index_list']) + self.assertEqual(t['out']['unexpected_list'], out['result']['unexpected_list']) - def test_expect_column_values_to_be_between(self): - """ + # def test_expect_column_values_to_be_between(self): + # """ - """ + # """ - with open("./tests/test_sets/expect_column_values_to_be_between_test_set_ADJ.json") as f: - fixture = json.load(f) + # with open("./tests/test_sets/expect_column_values_to_be_between_test_set.json") as f: + # fixture = json.load(f) - dataset = fixture["dataset"] - tests = fixture["tests"] + # dataset = fixture["dataset"] + # tests = fixture["tests"] - D = ge.dataset.PandasDataSet(dataset) - D.set_default_expectation_argument("output_format", "COMPLETE") + # D = ge.dataset.PandasDataset(dataset) + # D.set_default_expectation_argument("result_format", "COMPLETE") - self.maxDiff = None + # self.maxDiff = None - for t in tests: - out = D.expect_column_values_to_be_between(**t['in']) + # for t in tests: + # out = D.expect_column_values_to_be_between(**t['in']) - # print '-'*80 - print(t) - # print(json.dumps(out, indent=2)) + # # print '-'*80 + # print(t) + # # print(json.dumps(out, indent=2)) - if 'out' in t: - self.assertEqual(out, t['out']) + # if 'out' in t: + # self.assertEqual(t['out']['success'], out['success']) + # if 'unexpected_index_list' in t['out']: + # self.assertEqual(t['out']['unexpected_index_list'], out['result']['unexpected_index_list']) + # if 'unexpected_list' in t['out']: + # self.assertEqual(t['out']['unexpected_list'], out['result']['unexpected_list']) - if 'error' in t: - self.assertEqual(out['raised_exception'], True) - self.assertIn(t['error']['traceback_substring'], out['exception_traceback']) + # if 'error' in t: + # self.assertEqual(out['exception_info']['raised_exception'], True) + # self.assertIn(t['error']['traceback_substring'], out['exception_info']['exception_traceback']) def test_expect_column_value_lengths_to_be_between(self): - D = ge.dataset.PandasDataSet({ + D = ge.dataset.PandasDataset({ 's1':['smart','silly','sassy','slimy','sexy'], 's2':['cool','calm','collected','casual','creepy'], 's3':['cool','calm','collected','casual',None], 's4':[1,2,3,4,5] }) - D.set_default_expectation_argument("output_format", "COMPLETE") + D.set_default_expectation_argument("result_format", "COMPLETE") T = [ { 'in':{'column':'s1', 'min_value':3, 'max_value':5}, - 'out':{'success':True, 'exception_index_list':[], 'exception_list':[]}}, + 'out':{'success':True, 'unexpected_index_list':[], 'unexpected_list':[]}}, { 'in':{'column':'s2', 'min_value':4, 'max_value':6}, - 'out':{'success':False, 'exception_index_list':[2], 'exception_list':['collected']}}, + 'out':{'success':False, 'unexpected_index_list':[2], 'unexpected_list':['collected']}}, { 'in':{'column':'s2', 'min_value':None, 'max_value':10}, - 'out':{'success':True, 'exception_index_list':[], 'exception_list':[]}}, + 'out':{'success':True, 'unexpected_index_list':[], 'unexpected_list':[]}}, { 'in':{'column':'s3', 'min_value':None, 'max_value':10}, - 'out':{'success':True, 'exception_index_list':[], 'exception_list':[]}} + 'out':{'success':True, 'unexpected_index_list':[], 'unexpected_list':[]}} ] for t in T: out = D.expect_column_value_lengths_to_be_between(**t['in']) - self.assertEqual(out, t['out']) + self.assertEqual(t['out']['success'], out['success']) + self.assertEqual(t['out']['unexpected_index_list'], out['result']['unexpected_index_list']) + self.assertEqual(t['out']['unexpected_list'], out['result']['unexpected_list']) + with self.assertRaises(TypeError): D.expect_column_value_lengths_to_be_between(**{'column':'s4', 'min_value':None, 'max_value':10}) @@ -727,121 +585,128 @@ def test_expect_column_values_to_match_regex(self): Tested mostly alphabet regex """ - D = ge.dataset.PandasDataSet({ + D = ge.dataset.PandasDataset({ 'x' : ['aa', 'ab', 'ac', 'a1', None], 'y' : ['aa', 'ab', 'ac', 'ba', 'ca'], }) - D.set_default_expectation_argument("output_format", "COMPLETE") + D.set_default_expectation_argument("result_format", "COMPLETE") - D2 = ge.dataset.PandasDataSet({ + D2 = ge.dataset.PandasDataset({ 'a' : ['aaa', 'abb', 'acc', 'add', 'bee'], 'b' : ['aaa', 'abb', 'acc', 'bdd', None], 'c' : [ None, None, None, None, None], }) - D2.set_default_expectation_argument("output_format", "COMPLETE") + D2.set_default_expectation_argument("result_format", "COMPLETE") T = [ { 'in':{'column':'x', 'regex':'^a'}, - 'out':{'success':True, 'exception_list':[], 'exception_index_list':[]}}, + 'out':{'success':True, 'unexpected_list':[], 'unexpected_index_list':[]}}, { 'in':{'column':'x', 'regex':'aa'}, - 'out':{'success':False, 'exception_list':['ab', 'ac', 'a1'], 'exception_index_list':[1,2,3]}}, + 'out':{'success':False, 'unexpected_list':['ab', 'ac', 'a1'], 'unexpected_index_list':[1,2,3]}}, { 'in':{'column':'x', 'regex':'a[a-z]'}, - 'out':{'success':False, 'exception_list':['a1'], 'exception_index_list':[3]}}, + 'out':{'success':False, 'unexpected_list':['a1'], 'unexpected_index_list':[3]}}, { 'in':{'column':'y', 'regex':'[abc]{2}'}, - 'out':{'success':True, 'exception_list':[], 'exception_index_list':[]}}, + 'out':{'success':True, 'unexpected_list':[], 'unexpected_index_list':[]}}, { 'in':{'column':'y', 'regex':'[z]'}, - 'out':{'success':False, 'exception_list':['aa', 'ab', 'ac', 'ba', 'ca'], 'exception_index_list':[0,1,2,3,4]}} + 'out':{'success':False, 'unexpected_list':['aa', 'ab', 'ac', 'ba', 'ca'], 'unexpected_index_list':[0,1,2,3,4]}} ] for t in T: out = D.expect_column_values_to_match_regex(**t['in']) - self.assertEqual(out, t['out']) + self.assertEqual(t['out']['success'], out['success']) + self.assertEqual(t['out']['unexpected_index_list'], out['result']['unexpected_index_list']) + self.assertEqual(t['out']['unexpected_list'], out['result']['unexpected_list']) T = [ { 'in':{'column':'a', 'regex':'^a', 'mostly':.9}, - 'out':{'success':False, 'exception_list':['bee'], 'exception_index_list':[4]}}, + 'out':{'success':False, 'unexpected_list':['bee'], 'unexpected_index_list':[4]}}, { 'in':{'column':'a', 'regex':'^a', 'mostly':.8}, - 'out':{'success':True, 'exception_list':['bee'], 'exception_index_list':[4]}}, + 'out':{'success':True, 'unexpected_list':['bee'], 'unexpected_index_list':[4]}}, { 'in':{'column':'a', 'regex':'^a', 'mostly':.7}, - 'out':{'success':True, 'exception_list':['bee'], 'exception_index_list':[4]}}, + 'out':{'success':True, 'unexpected_list':['bee'], 'unexpected_index_list':[4]}}, { 'in':{'column':'b', 'regex':'^a', 'mostly':.9}, - 'out':{'success':False, 'exception_list':['bdd'], 'exception_index_list':[3]}}, + 'out':{'success':False, 'unexpected_list':['bdd'], 'unexpected_index_list':[3]}}, { 'in':{'column':'b', 'regex':'^a', 'mostly':.75}, - 'out':{'success':True, 'exception_list':['bdd'], 'exception_index_list':[3]}}, + 'out':{'success':True, 'unexpected_list':['bdd'], 'unexpected_index_list':[3]}}, { 'in':{'column':'b', 'regex':'^a', 'mostly':.5}, - 'out':{'success':True, 'exception_list':['bdd'], 'exception_index_list':[3]}}, + 'out':{'success':True, 'unexpected_list':['bdd'], 'unexpected_index_list':[3]}}, { 'in':{'column':'c', 'regex':'^a'}, - 'out':{'success':True, 'exception_list':[], 'exception_index_list':[]}}, + 'out':{'success':True, 'unexpected_list':[], 'unexpected_index_list':[]}}, { 'in':{'column':'c', 'regex':'^a', 'mostly':.5}, - 'out':{'success':True, 'exception_list':[], 'exception_index_list':[]}} + 'out':{'success':True, 'unexpected_list':[], 'unexpected_index_list':[]}} ] for t in T: out = D2.expect_column_values_to_match_regex(**t['in']) - self.assertEqual(out, t['out']) - + self.assertEqual(t['out']['success'], out['success']) + self.assertEqual(t['out']['unexpected_index_list'], out['result']['unexpected_index_list']) + self.assertEqual(t['out']['unexpected_list'], out['result']['unexpected_list']) def test_expect_column_values_to_not_match_regex(self): #!!! Need to test mostly and suppress_exceptions - D = ge.dataset.PandasDataSet({ + D = ge.dataset.PandasDataset({ 'x' : ['aa', 'ab', 'ac', 'a1', None, None, None], 'y' : ['axxx', 'exxxx', 'ixxxx', 'oxxxxx', 'uxxxxx', 'yxxxxx', 'zxxxx'], 'z' : [None, None, None, None, None, None, None] }) - D.set_default_expectation_argument("output_format", "COMPLETE") + D.set_default_expectation_argument("result_format", "COMPLETE") T = [ { 'in':{'column':'x', 'regex':'^a'}, - 'out':{'success':False, 'exception_index_list':[0,1,2,3], 'exception_list':['aa', 'ab', 'ac', 'a1']}}, + 'out':{'success':False, 'unexpected_index_list':[0,1,2,3], 'unexpected_list':['aa', 'ab', 'ac', 'a1']}}, { 'in':{'column':'x', 'regex':'^b'}, - 'out':{'success':True, 'exception_index_list':[], 'exception_list':[]}}, + 'out':{'success':True, 'unexpected_index_list':[], 'unexpected_list':[]}}, { 'in':{'column':'y', 'regex':'^z'}, - 'out':{'success':False, 'exception_index_list':[6], 'exception_list':['zxxxx']}} + 'out':{'success':False, 'unexpected_index_list':[6], 'unexpected_list':['zxxxx']}} ] for t in T: out = D.expect_column_values_to_not_match_regex(**t['in']) - self.assertEqual(out, t['out']) - - - def test_expect_column_values_to_match_regex_list(self): - with open("./tests/test_sets/expect_column_values_to_match_regex_list_test_set.json") as f: - J = json.load(f) - D = ge.dataset.PandasDataSet(J["dataset"]) - D.set_default_expectation_argument("output_format", "COMPLETE") - T = J["tests"] - - self.maxDiff = None - - for t in T: - out = D.expect_column_values_to_match_regex_list(**t['in']) - self.assertEqual(out, t['out']) - + self.assertEqual(t['out']['success'], out['success']) + self.assertEqual(t['out']['unexpected_index_list'], out['result']['unexpected_index_list']) + self.assertEqual(t['out']['unexpected_list'], out['result']['unexpected_list']) + + # def test_expect_column_values_to_match_regex_list(self): + # with open("./tests/test_sets/expect_column_values_to_match_regex_list_test_set.json") as f: + # J = json.load(f) + # D = ge.dataset.PandasDataset(J["dataset"]) + # D.set_default_expectation_argument("result_format", "COMPLETE") + # T = J["tests"] + + # self.maxDiff = None + + # for t in T: + # out = D.expect_column_values_to_match_regex_list(**t['in']) + # self.assertEqual(t['out']['success'], out['success']) + # if 'unexpected_index_list' in t['out']: + # self.assertEqual(t['out']['unexpected_index_list'], out['result']['unexpected_index_list']) + # if 'unexpected_list' in t['out']: + # self.assertEqual(t['out']['unexpected_list'], out['result']['unexpected_list']) def test_expect_column_values_to_match_strftime_format(self): """ """ - D = ge.dataset.PandasDataSet({ + D = ge.dataset.PandasDataset({ 'x' : [1,2,4], 'us_dates' : ['4/30/2017','4/30/2017','7/4/1776'], 'us_dates_type_error' : ['4/30/2017','4/30/2017', 5], @@ -849,16 +714,16 @@ def test_expect_column_values_to_match_strftime_format(self): 'almost_iso8601_val_error' : ['1977-05-55T00:00:00', '1980-05-21T13:47:59', '2017-06-12T23:57:59'], 'already_datetime' : [datetime.datetime(2015,1,1), datetime.datetime(2016,1,1), datetime.datetime(2017,1,1)] }) - D.set_default_expectation_argument("output_format", "COMPLETE") + D.set_default_expectation_argument("result_format", "COMPLETE") T = [ { 'in':{'column':'us_dates', 'strftime_format':'%m/%d/%Y'}, - 'out':{'success':True, 'exception_index_list':[], 'exception_list':[]} + 'out':{'success':True, 'unexpected_index_list':[], 'unexpected_list':[]} }, { 'in':{'column':'us_dates_type_error','strftime_format':'%m/%d/%Y', 'mostly': 0.5, 'catch_exceptions': True}, - # 'out':{'success':True, 'exception_index_list':[2], 'exception_list':[5]}}, + # 'out':{'success':True, 'unexpected_index_list':[2], 'unexpected_list':[5]}}, 'error':{ 'traceback_substring' : 'TypeError' }, @@ -871,13 +736,13 @@ def test_expect_column_values_to_match_strftime_format(self): }, { 'in':{'column':'almost_iso8601','strftime_format':'%Y-%m-%dT%H:%M:%S'}, - 'out':{'success':True,'exception_index_list':[], 'exception_list':[]}}, + 'out':{'success':True,'unexpected_index_list':[], 'unexpected_list':[]}}, { 'in':{'column':'almost_iso8601_val_error','strftime_format':'%Y-%m-%dT%H:%M:%S'}, - 'out':{'success':False,'exception_index_list':[0], 'exception_list':['1977-05-55T00:00:00']}}, + 'out':{'success':False,'unexpected_index_list':[0], 'unexpected_list':['1977-05-55T00:00:00']}}, { 'in':{'column':'already_datetime','strftime_format':'%Y-%m-%d', 'catch_exceptions':True}, - # 'out':{'success':False,'exception_index_list':[0], 'exception_list':['1977-05-55T00:00:00']}, + # 'out':{'success':False,'unexpected_index_list':[0], 'unexpected_list':['1977-05-55T00:00:00']}, 'error':{ 'traceback_substring' : 'TypeError: Values passed to expect_column_values_to_match_strftime_format must be of type string.' }, @@ -887,41 +752,45 @@ def test_expect_column_values_to_match_strftime_format(self): for t in T: out = D.expect_column_values_to_match_strftime_format(**t['in']) if 'out' in t: - self.assertEqual(out, t['out']) + self.assertEqual(t['out']['success'], out['success']) + if 'unexpected_index_list' in t['out']: + self.assertEqual(t['out']['unexpected_index_list'], out['result']['unexpected_index_list']) + if 'unexpected_list' in t['out']: + self.assertEqual(t['out']['unexpected_list'], out['result']['unexpected_list']) elif 'error' in t: - self.assertEqual(out['raised_exception'], True) - self.assertIn(t['error']['traceback_substring'], out['exception_traceback']) + self.assertEqual(out['exception_info']['raised_exception'], True) + self.assertIn(t['error']['traceback_substring'], out['exception_info']['exception_traceback']) def test_expect_column_values_to_be_dateutil_parseable(self): - D = ge.dataset.PandasDataSet({ + D = ge.dataset.PandasDataset({ 'c1':['03/06/09','23 April 1973','January 9, 2016'], 'c2':['9/8/2012','covfefe',25], 'c3':['Jared','June 1, 2013','July 18, 1976'], 'c4':['1', '2', '49000004632'], 'already_datetime' : [datetime.datetime(2015,1,1), datetime.datetime(2016,1,1), datetime.datetime(2017,1,1)], }) - D.set_default_expectation_argument("output_format", "COMPLETE") + D.set_default_expectation_argument("result_format", "COMPLETE") T = [ { 'in':{'column': 'c1'}, - 'out':{'success':True, 'exception_list':[], 'exception_index_list': []}}, + 'out':{'success':True, 'unexpected_list':[], 'unexpected_index_list': []}}, { 'in':{"column":'c2', "catch_exceptions":True}, - # 'out':{'success':False, 'exception_list':['covfefe', 25], 'exception_index_list': [1, 2]}}, + # 'out':{'success':False, 'unexpected_list':['covfefe', 25], 'unexpected_index_list': [1, 2]}}, 'error':{ 'traceback_substring' : 'TypeError: Values passed to expect_column_values_to_be_dateutil_parseable must be of type string' }, }, { 'in':{"column":'c3'}, - 'out':{'success':False, 'exception_list':['Jared'], 'exception_index_list': [0]}}, + 'out':{'success':False, 'unexpected_list':['Jared'], 'unexpected_index_list': [0]}}, { 'in':{'column': 'c3', 'mostly':.5}, - 'out':{'success':True, 'exception_list':['Jared'], 'exception_index_list': [0]} + 'out':{'success':True, 'unexpected_list':['Jared'], 'unexpected_index_list': [0]} }, { 'in':{'column': 'c4'}, - 'out':{'success':False, 'exception_list':['49000004632'], 'exception_index_list': [2]} + 'out':{'success':False, 'unexpected_list':['49000004632'], 'unexpected_index_list': [2]} }, { 'in':{'column':'already_datetime', 'catch_exceptions':True}, @@ -932,10 +801,12 @@ def test_expect_column_values_to_be_dateutil_parseable(self): for t in T: out = D.expect_column_values_to_be_dateutil_parseable(**t['in']) if 'out' in t: - self.assertEqual(out, t['out']) + self.assertEqual(t['out']['success'], out['success']) + self.assertEqual(t['out']['unexpected_index_list'], out['result']['unexpected_index_list']) + self.assertEqual(t['out']['unexpected_list'], out['result']['unexpected_list']) elif 'error' in t: - self.assertEqual(out['raised_exception'], True) - self.assertIn(t['error']['traceback_substring'], out['exception_traceback']) + self.assertEqual(out['exception_info']['raised_exception'], True) + self.assertIn(t['error']['traceback_substring'], out['exception_info']['exception_traceback']) def test_expect_column_values_to_be_json_parseable(self): @@ -943,151 +814,98 @@ def test_expect_column_values_to_be_json_parseable(self): d2 = json.dumps({'i':1,'j':2,'k':[3,4,5]}) d3 = json.dumps({'i':'a', 'j':'b', 'k':'c'}) d4 = json.dumps({'i':[4,5], 'j':[6,7], 'k':[8,9], 'l':{4:'x', 5:'y', 6:'z'}}) - D = ge.dataset.PandasDataSet({ + D = ge.dataset.PandasDataset({ 'json_col':[d1,d2,d3,d4], 'not_json':[4,5,6,7], 'py_dict':[{'a':1, 'out':1},{'b':2, 'out':4},{'c':3, 'out':9},{'d':4, 'out':16}], 'most':[d1,d2,d3,'d4'] }) - D.set_default_expectation_argument("output_format", "COMPLETE") + D.set_default_expectation_argument("result_format", "COMPLETE") T = [ { 'in':{'column':'json_col'}, - 'out':{'success':True, 'exception_index_list':[], 'exception_list':[]}}, + 'out':{'success':True, 'unexpected_index_list':[], 'unexpected_list':[]}}, { 'in':{'column':'not_json'}, - 'out':{'success':False, 'exception_index_list':[0,1,2,3], 'exception_list':[4,5,6,7]}}, + 'out':{'success':False, 'unexpected_index_list':[0,1,2,3], 'unexpected_list':[4,5,6,7]}}, { 'in':{'column':'py_dict'}, - 'out':{'success':False, 'exception_index_list':[0,1,2,3], 'exception_list':[{'a':1, 'out':1},{'b':2, 'out':4},{'c':3, 'out':9},{'d':4, 'out':16}]}}, + 'out':{'success':False, 'unexpected_index_list':[0,1,2,3], 'unexpected_list':[{'a':1, 'out':1},{'b':2, 'out':4},{'c':3, 'out':9},{'d':4, 'out':16}]}}, { 'in':{'column':'most'}, - 'out':{'success':False, 'exception_index_list':[3], 'exception_list':['d4']}}, + 'out':{'success':False, 'unexpected_index_list':[3], 'unexpected_list':['d4']}}, { 'in':{'column':'most', 'mostly':.75}, - 'out':{'success':True, 'exception_index_list':[3], 'exception_list':['d4']}} + 'out':{'success':True, 'unexpected_index_list':[3], 'unexpected_list':['d4']}} ] for t in T: out = D.expect_column_values_to_be_json_parseable(**t['in']) - self.assertEqual(out, t['out']) - - def test_expect_column_values_to_match_json_schema(self): - - with open("./tests/test_sets/expect_column_values_to_match_json_schema_test_set.json") as f: - J = json.load(f) - D = ge.dataset.PandasDataSet(J["dataset"]) - D.set_default_expectation_argument("output_format", "COMPLETE") - T = J["tests"] - - self.maxDiff = None - - for t in T: - out = D.expect_column_values_to_match_json_schema(**t['in'])#, **t['kwargs']) - self.assertEqual(out, t['out']) - - - - def test_expect_column_mean_to_be_between(self): - """ - #!!! Ignores null (None and np.nan) values. If all null values, return {'success':False, 'exception_list':None) - Cases Tested: - Tested with float - float - Tested with float - int - Tested with np.nap - """ - - D = ge.dataset.PandasDataSet({ - 'x' : [2.0, 5.0], - 'y' : [5.0, 5], - 'z' : [0, 10], - 'n' : [0, None], - 'b' : [True, False], + self.assertEqual(t['out']['success'], out['success']) + self.assertEqual(t['out']['unexpected_index_list'], out['result']['unexpected_index_list']) + self.assertEqual(t['out']['unexpected_list'], out['result']['unexpected_list']) + + # def test_expect_column_values_to_match_json_schema(self): + + # with open("./tests/test_sets/expect_column_values_to_match_json_schema_test_set.json") as f: + # J = json.load(f) + # D = ge.dataset.PandasDataset(J["dataset"]) + # D.set_default_expectation_argument("result_format", "COMPLETE") + # T = J["tests"] + + # self.maxDiff = None + + # for t in T: + # out = D.expect_column_values_to_match_json_schema(**t['in'])#, **t['kwargs']) + # self.assertEqual(t['out']['success'], out['success']) + # if 'unexpected_index_list' in t['out']: + # self.assertEqual(t['out']['unexpected_index_list'], out['result']['unexpected_index_list']) + # if 'unexpected_list' in t['out']: + # self.assertEqual(t['out']['unexpected_list'], out['result']['unexpected_list']) + + def test_expect_column_median_to_be_between(self): + ds = ge.dataset.PandasDataset({ + 'a': [0,1,2,3], + 'b': [0,1,1,2] }) - D.set_default_expectation_argument("output_format", "BASIC") - T = [ - { - 'in':{'column':'x', 'min_value':2, 'max_value':5}, - 'out':{'success':True, 'true_value':3.5}}, - { - 'in':{'column':'x', 'min_value':1, 'max_value':2}, - 'out':{'success':False, 'true_value':3.5}}, - { - 'in':{'column':'y', 'min_value':5, 'max_value':5}, - 'out':{'success':True, 'true_value':5}}, - { - 'in':{'column':'y', 'min_value':4, 'max_value':4}, - 'out':{'success':False, 'true_value':5}}, - { - 'in':{'column':'z', 'min_value':5, 'max_value':5}, - 'out':{'success':True, 'true_value':5}}, - { - 'in':{'column':'z', 'min_value':13, 'max_value':14}, - 'out':{'success':False, 'true_value':5}}, - { - 'in':{'column':'n', 'min_value':0, 'max_value':0}, - 'out':{'success':True, 'true_value':0.0}} - ] - - for t in T: - out = D.expect_column_mean_to_be_between(**t['in']) - self.assertEqual(out, t['out']) - - - typedf = ge.dataset.PandasDataSet({ - 's' : ['s', np.nan, None, None], - 'b' : [True, False, False, True], - 'x' : [True, None, False, None], - }) - typedf.set_default_expectation_argument("output_format", "BASIC") - - T = [ - { - 'in':{'column':'s', 'min_value':0, 'max_value':0}, - 'out':{'success':False, 'true_value':None}}, - { - 'in':{'column':'b', 'min_value':0, 'max_value':1}, - 'out':{'success':True, 'true_value':0.5}}, - { - 'in':{'column':'x', 'min_value':0, 'max_value':1}, - 'out':{'success':True, 'true_value':0.5}} - ] - - for t in T[1:]: - out = typedf.expect_column_mean_to_be_between(**t['in']) - self.assertEqual(out, t['out']) - - with self.assertRaises(TypeError): - typedf.expect_column_mean_to_be_between(T[0]['in']) - - with self.assertRaises(ValueError): - typedf.expect_column_mean_to_be_between("s") + self.assertEqual( + True, + ds.expect_column_median_to_be_between('a', 1, 2)['success'] + ) + self.assertEqual( + 1.5, + ds.expect_column_median_to_be_between('a', 1, 2)['result']['observed_value'] + ) + self.assertEqual( + 1, + ds.expect_column_median_to_be_between('b', 1, 1)['result']['observed_value'] + ) def test_expect_column_stdev_to_be_between(self): - D = ge.dataset.PandasDataSet({ + D = ge.dataset.PandasDataset({ 'dist1' : [1,1,3], 'dist2' : [-1,0,1] }) - D.set_default_expectation_argument("output_format", "COMPLETE") + D.set_default_expectation_argument("result_format", "COMPLETE") T = [ { 'in':{'column':'dist1', 'min_value':.5, 'max_value':1.5}, - 'out':{'success':True, 'true_value':D['dist1'].std()}}, + 'out':{'success':True, "result": { "observed_value": D['dist1'].std(), "element_count": 3, "missing_count": 0, "missing_percent": 0}}}, { 'in':{'column':'dist1', 'min_value':2, 'max_value':3}, - 'out':{'success':False, 'true_value':D['dist1'].std()}}, + 'out':{'success':False, "result": { "observed_value": D['dist1'].std(), "element_count": 3, "missing_count": 0, "missing_percent": 0}}}, { 'in':{'column':'dist2', 'min_value':2, 'max_value':3}, - 'out':{'success':False, 'true_value':1.0}}, + 'out':{'success':False, "result": { "observed_value": 1, "element_count": 3, "missing_count": 0, "missing_percent": 0}}}, { 'in':{'column':'dist2', 'min_value':0, 'max_value':1}, - 'out':{'success':True, 'true_value':1.0}} + 'out':{'success':True, "result": { "observed_value": 1, "element_count": 3, "missing_count": 0, "missing_percent": 0}}} ] for t in T: @@ -1097,145 +915,34 @@ def test_expect_column_stdev_to_be_between(self): with self.assertRaises(ValueError): D.expect_column_stdev_to_be_between("dist1") - - def test_expect_column_unique_value_count_to_be_between(self): - - D = ge.dataset.PandasDataSet({ - 'dist1' : [1,2,3,4,5,6,7,8], - 'dist2' : [1,2,3,4,5,None,None,None], - 'dist3' : [2,2,2,2,5,6,7,8], - 'dist4' : [1,1,1,1,None,None,None,None] - }) - D.set_default_expectation_argument("output_format", "COMPLETE") - - T = [ - { - 'in':{ - 'column': 'dist1', - 'min_value': 0, - 'max_value': 10 - }, - 'kwargs':{}, - 'out':{'success':True, 'true_value': 8} - },{ - 'in':{ - "column" : 'dist2', - "min_value" : None, - "max_value" : None - }, - 'kwargs':{}, - 'out':{'success':True, 'true_value': 5} - },{ - 'in':{ - "column": 'dist3', - "min_value": None, - "max_value": 5 - }, - 'kwargs':{}, - 'out':{'success':True, 'true_value': 5} - },{ - 'in':{ - "column": 'dist4', - "min_value": 2, - "max_value": None - }, - 'kwargs':{}, - 'out':{'success':False, 'true_value': 1} - } - ] - - for t in T: - try: - out = D.expect_column_unique_value_count_to_be_between(**t['in']) - self.assertEqual(out, t['out']) - except ValueError as err: - self.assertEqual(str(err), "min_value and max_value cannot both be None") - - def test_expect_column_proportion_of_unique_values_to_be_between(self): - - D = ge.dataset.PandasDataSet({ - 'dist1' : [1,1,3], - 'dist2' : [-1,0,1] - }) - D.set_default_expectation_argument("output_format", "COMPLETE") - - T = [ - { - 'in':{'column':'dist1', 'min_value':.5, 'max_value':1.5}, - 'out':{'success':True, 'true_value': 2./3}}, - { - 'in':{'column':'dist1', 'min_value':2, 'max_value':3}, - 'out':{'success':False, 'true_value': 2./3}}, - { - 'in':{'column':'dist2', 'min_value':2, 'max_value':3}, - 'out':{'success':False, 'true_value':1.0}}, - { - 'in':{'column':'dist2', 'min_value':0, 'max_value':1}, - 'out':{'success':True, 'true_value':1.0}} - ] - - for t in T: - out = D.expect_column_proportion_of_unique_values_to_be_between(**t['in']) - self.assertEqual(out, t['out']) - - def test_expect_column_values_to_be_increasing(self): - print("=== test_expect_column_values_to_be_increasing ===") - with open("./tests/test_sets/expect_column_values_to_be_increasing_test_set.json") as f: - J = json.load(f) - D = ge.dataset.PandasDataSet(J["dataset"]) - D.set_default_expectation_argument("output_format", "COMPLETE") - T = J["tests"] - - self.maxDiff = None - - for t in T: - print(t) - out = D.expect_column_values_to_be_increasing(**t['in'])#, **t['kwargs']) - self.assertEqual(out, t['out']) - - def test_expect_column_values_to_be_decreasing(self): - print("=== test_expect_column_values_to_be_decreasing ===") - with open("./tests/test_sets/expect_column_values_to_be_decreasing_test_set.json") as f: - J = json.load(f) - D = ge.dataset.PandasDataSet(J["dataset"]) - D.set_default_expectation_argument("output_format", "COMPLETE") - T = J["tests"] - - self.maxDiff = None - - for t in T: - print(t) - out = D.expect_column_values_to_be_decreasing(**t['in']) - self.assertEqual(out, t['out']) - def test_expect_column_most_common_value_to_be_in_set(self): - D = ge.dataset.PandasDataSet({ + D = ge.dataset.PandasDataset({ 'x' : [1,1,2,2,3,None, None, None, None, None], 'y' : ['hello', 'jello', 'mello', 'hello', 'jello', 'mello', 'hello', 'jello', 'mello', 'jello'], 'z' : [1,2,2,3,3,3,4,4,4,4], }) - D.set_default_expectation_argument("output_format", "COMPLETE") + D.set_default_expectation_argument("result_format", "COMPLETE") T = [ { 'in':{"column":"x","value_set":[1]}, - 'out':{"success":False, "true_value":[1,2]}, + 'out':{"success":False, "result": { "observed_value": [1,2], "element_count": 10, "missing_count": 5, "missing_percent": 0.5}}, },{ 'in':{"column":"x", "value_set":[1], "ties_okay":True}, - 'out':{"success":True, "true_value":[1,2]}, + 'out':{"success":True, "result": { "observed_value": [1,2], "element_count": 10, "missing_count": 5, "missing_percent": 0.5}}, },{ 'in':{"column":"x","value_set":[3]}, - 'out':{"success":False, "true_value":[1,2]}, + 'out':{"success":False, "result": { "observed_value": [1,2], "element_count": 10, "missing_count": 5, "missing_percent": 0.5}}, },{ 'in':{"column":"y","value_set":["jello", "hello"]}, - 'out':{'success':True, "true_value":["jello"]}, + 'out':{'success':True, "result": { "observed_value": ["jello"], "element_count": 10, "missing_count": 0, "missing_percent": 0}}, },{ 'in':{"column":"y","value_set":["hello", "mello"]}, - 'out':{'success':False, "true_value":["jello"]}, + 'out':{'success':False, "result": { "observed_value": ["jello"], "element_count": 10, "missing_count": 0, "missing_percent": 0}}, },{ 'in':{"column":"z","value_set":[4]}, - 'out':{'success':True, "true_value":[4]}, + 'out':{'success':True, "result": { "observed_value": [4], "element_count": 10, "missing_count": 0, "missing_percent": 0}}, } ] @@ -1245,107 +952,46 @@ def test_expect_column_most_common_value_to_be_in_set(self): self.assertEqual(out, t['out']) - def test_expect_column_sum_to_be_between(self): - with open("./tests/test_sets/expect_column_sum_to_be_between_test_set.json") as f: - J = json.load(f) - D = ge.dataset.PandasDataSet(J["dataset"]) - D.set_default_expectation_argument("output_format", "COMPLETE") - T = J["tests"] - - self.maxDiff = None - - for t in T: - print(json.dumps(t)) - out = D.expect_column_sum_to_be_between(**t['in']) - print(out) - - if "out" in t: - self.assertEqual(out, t['out']) - - if "error" in t: - self.assertEqual(out['raised_exception'], True) - self.assertIn(t['error']['traceback_substring'], out['exception_traceback']) - - def test_expect_column_min_to_be_between(self): - with open("./tests/test_sets/expect_column_min_to_be_between_test_set.json") as f: - J = json.load(f) - D = ge.dataset.PandasDataSet(J["dataset"]) - D.set_default_expectation_argument("output_format", "COMPLETE") - T = J["tests"] - - self.maxDiff = None - - for t in T: - print(json.dumps(t)) - out = D.expect_column_min_to_be_between(**t['in']) - print(out) - - if "out" in t: - self.assertEqual(out, t['out']) - - if "error" in t: - self.assertEqual(out['raised_exception'], True) - self.assertIn(t['error']['traceback_substring'], out['exception_traceback']) - - def test_expect_column_max_to_be_between(self): - with open("./tests/test_sets/expect_column_max_to_be_between_test_set.json") as f: - J = json.load(f) - D = ge.dataset.PandasDataSet(J["dataset"]) - D.set_default_expectation_argument("output_format", "COMPLETE") - T = J["tests"] - - self.maxDiff = None - - for t in T: - print(json.dumps(t)) - out = D.expect_column_max_to_be_between(**t['in']) - print(out) - - if "out" in t: - self.assertEqual(out, t['out']) - - if "error" in t: - self.assertEqual(out['raised_exception'], True) - self.assertIn(t['error']['traceback_substring'], out['exception_traceback']) - def test_expectation_decorator_summary_mode(self): - df = ge.dataset.PandasDataSet({ + df = ge.dataset.PandasDataset({ 'x' : [1,2,3,4,5,6,7,7,None,None], }) - df.set_default_expectation_argument("output_format", "COMPLETE") + df.set_default_expectation_argument("result_format", "COMPLETE") # print '&'*80 - # print json.dumps(df.expect_column_values_to_be_between('x', min_value=1, max_value=5, output_format="SUMMARY"), indent=2) + # print json.dumps(df.expect_column_values_to_be_between('x', min_value=1, max_value=5, result_format="SUMMARY"), indent=2) self.maxDiff = None self.assertEqual( - df.expect_column_values_to_be_between('x', min_value=1, max_value=5, output_format="SUMMARY"), + df.expect_column_values_to_be_between('x', min_value=1, max_value=5, result_format="SUMMARY"), { "success" : False, - "summary_obj" : { + "result" : { "element_count" : 10, "missing_count" : 2, "missing_percent" : .2, - "exception_count" : 3, - "partial_exception_counts": { - 6.0 : 1, - 7.0 : 2, - }, - "exception_percent": 0.3, - "exception_percent_nonmissing": 0.375, - "partial_exception_list" : [6.0,7.0,7.0], - "partial_exception_index_list": [5,6,7], + "unexpected_count" : 3, + "partial_unexpected_counts": [ + {"value": 7.0, + "count": 2}, + {"value": 6.0, + "count": 1} + ], + "unexpected_percent": 0.3, + "unexpected_percent_nonmissing": 0.375, + "partial_unexpected_list" : [6.0,7.0,7.0], + "partial_unexpected_index_list": [5,6,7], } } ) self.assertEqual( - df.expect_column_mean_to_be_between("x", 3, 7, output_format="SUMMARY"), + df.expect_column_mean_to_be_between("x", 3, 7, result_format="SUMMARY"), { 'success': True, - 'true_value': 4.375, - 'summary_obj': { + 'result': { + 'observed_value': 4.375, 'element_count': 10, 'missing_count': 2, 'missing_percent': .2 @@ -1355,63 +1001,588 @@ def test_expectation_decorator_summary_mode(self): def test_positional_arguments(self): - df = ge.dataset.PandasDataSet({ + df = ge.dataset.PandasDataset({ 'x':[1,3,5,7,9], 'y':[2,4,6,8,10], 'z':[None,'a','b','c','abc'] }) - df.set_default_expectation_argument('output_format', 'COMPLETE') + df.set_default_expectation_argument('result_format', 'COMPLETE') self.assertEqual( df.expect_column_mean_to_be_between('x',4,6), - {'success':True, 'true_value':5} + {'success':True, 'result': {'observed_value': 5, 'element_count': 5, + 'missing_count': 0, + 'missing_percent': 0.0}} ) - self.assertEqual( - df.expect_column_values_to_be_between('y',1,6), - {'success':False, 'exception_list':[8,10], 'exception_index_list':[3,4]} - ) + out = df.expect_column_values_to_be_between('y',1,6) + t = {'out': {'success':False, 'unexpected_list':[8,10], 'unexpected_index_list': [3,4]}} + if 'out' in t: + self.assertEqual(t['out']['success'], out['success']) + if 'unexpected_index_list' in t['out']: + self.assertEqual(t['out']['unexpected_index_list'], out['result']['unexpected_index_list']) + if 'unexpected_list' in t['out']: + self.assertEqual(t['out']['unexpected_list'], out['result']['unexpected_list']) + + out = df.expect_column_values_to_be_between('y',1,6,mostly=.5) + t = {'out': {'success':True, 'unexpected_list':[8,10], 'unexpected_index_list':[3,4]}} + if 'out' in t: + self.assertEqual(t['out']['success'], out['success']) + if 'unexpected_index_list' in t['out']: + self.assertEqual(t['out']['unexpected_index_list'], out['result']['unexpected_index_list']) + if 'unexpected_list' in t['out']: + self.assertEqual(t['out']['unexpected_list'], out['result']['unexpected_list']) + + out = df.expect_column_values_to_be_in_set('z',['a','b','c']) + t = {'out': {'success':False, 'unexpected_list':['abc'], 'unexpected_index_list':[4]}} + if 'out' in t: + self.assertEqual(t['out']['success'], out['success']) + if 'unexpected_index_list' in t['out']: + self.assertEqual(t['out']['unexpected_index_list'], out['result']['unexpected_index_list']) + if 'unexpected_list' in t['out']: + self.assertEqual(t['out']['unexpected_list'], out['result']['unexpected_list']) + + out = df.expect_column_values_to_be_in_set('z',['a','b','c'],mostly=.5) + t = {'out': {'success':True, 'unexpected_list':['abc'], 'unexpected_index_list':[4]}} + if 'out' in t: + self.assertEqual(t['out']['success'], out['success']) + if 'unexpected_index_list' in t['out']: + self.assertEqual(t['out']['unexpected_index_list'], out['result']['unexpected_index_list']) + if 'unexpected_list' in t['out']: + self.assertEqual(t['out']['unexpected_list'], out['result']['unexpected_list']) + + def test_result_format_argument_in_decorators(self): + df = ge.dataset.PandasDataset({ + 'x':[1,3,5,7,9], + 'y':[2,4,6,8,10], + 'z':[None,'a','b','c','abc'] + }) + df.set_default_expectation_argument('result_format', 'COMPLETE') + #Test explicit Nones in result_format self.assertEqual( - df.expect_column_values_to_be_between('y',1,6,mostly=.5), - {'success':True, 'exception_list':[8,10], 'exception_index_list':[3,4]} + df.expect_column_mean_to_be_between('x',4,6, result_format=None), + {'success':True, 'result': {'observed_value': 5, 'element_count': 5, + 'missing_count': 0, + 'missing_percent': 0.0 + }} ) self.assertEqual( - df.expect_column_values_to_be_in_set('z',['a','b','c']), - {'success':False, 'exception_list':['abc'], 'exception_index_list':[4]} + df.expect_column_values_to_be_between('y',1,6, result_format=None), + {'result': {'element_count': 5, + 'missing_count': 0, + 'missing_percent': 0.0, + 'partial_unexpected_counts': [{'count': 1, 'value': 8}, + {'count': 1, 'value': 10}], + 'partial_unexpected_index_list': [3, 4], + 'partial_unexpected_list': [8, 10], + 'unexpected_count': 2, + 'unexpected_index_list': [3, 4], + 'unexpected_list': [8, 10], + 'unexpected_percent': 0.4, + 'unexpected_percent_nonmissing': 0.4}, + 'success': False} ) - self.assertEqual( - df.expect_column_values_to_be_in_set('z',['a','b','c'],mostly=.5), - {'success':True, 'exception_list':['abc'], 'exception_index_list':[4]} - ) + #Test unknown output format + with self.assertRaises(ValueError): + df.expect_column_values_to_be_between('y',1,6, result_format="QUACK") + + with self.assertRaises(ValueError): + df.expect_column_mean_to_be_between('x',4,6, result_format="QUACK") - def test_output_format_argument_in_decorators(self): - df = ge.dataset.PandasDataSet({ + def test_from_pandas(self): + pd_df = pd.DataFrame({ 'x':[1,3,5,7,9], 'y':[2,4,6,8,10], 'z':[None,'a','b','c','abc'] }) - df.set_default_expectation_argument('output_format', 'COMPLETE') - #Test explicit Nones in output_format - self.assertEqual( - df.expect_column_mean_to_be_between('x',4,6, output_format=None), - {'success':True, 'true_value':5} - ) + ge_df = ge.from_pandas(pd_df) + self.assertIsInstance(ge_df, ge.dataset.Dataset) + self.assertEquals(list(ge_df.columns), ['x', 'y', 'z']) + self.assertEquals(list(ge_df['x']), list(pd_df['x'])) + self.assertEquals(list(ge_df['y']), list(pd_df['y'])) + self.assertEquals(list(ge_df['z']), list(pd_df['z'])) - self.assertEqual( - df.expect_column_values_to_be_between('y',1,6, output_format=None), - {'success':False, 'exception_list':[8,10], 'exception_index_list':[3,4]} - ) - #Test unknown output format - with self.assertRaises(ValueError): - df.expect_column_values_to_be_between('y',1,6, output_format="QUACK") + def test_from_pandas_expectations_config(self): + # Logic mostly copied from TestValidation.test_validate + def load_ge_config(file): + with open(file) as f: + return json.load(f) + + my_expectations_config = load_ge_config("./tests/test_sets/titanic_expectations.json") + + pd_df = pd.read_csv("./tests/test_sets/Titanic.csv") + my_df = ge.from_pandas(pd_df, expectations_config=my_expectations_config) + + my_df.set_default_expectation_argument("result_format", "COMPLETE") + + results = my_df.validate(catch_exceptions=False) + + expected_results = load_ge_config("./tests/test_sets/expected_results_20180303.json") + + self.maxDiff = None + assertDeepAlmostEqual(self, results, expected_results) + + def test_ge_pandas_concatenating(self): + df1 = ge.dataset.PandasDataset({ + 'A': ['A0', 'A1', 'A2'], + 'B': ['B0', 'B1', 'B2'] + }) + + df1.expect_column_values_to_match_regex('A', '^A[0-2]$') + df1.expect_column_values_to_match_regex('B', '^B[0-2]$') + + df2 = ge.dataset.PandasDataset({ + 'A': ['A3', 'A4', 'A5'], + 'B': ['B3', 'B4', 'B5'] + }) + + df2.expect_column_values_to_match_regex('A', '^A[3-5]$') + df2.expect_column_values_to_match_regex('B', '^B[3-5]$') + + df = pd.concat([df1, df2]) + + exp_c = [ + {'expectation_type': 'expect_column_to_exist', + 'kwargs': {'column': 'A'}}, + {'expectation_type': 'expect_column_to_exist', + 'kwargs': {'column': 'B'}} + ] + + # The concatenated data frame will: + # + # 1. Be a ge.dataset.PandaDataSet + # 2. Only have the default expectations + + self.assertIsInstance(df, ge.dataset.PandasDataset) + self.assertEqual(df.find_expectations(), exp_c) + + def test_ge_pandas_joining(self): + df1 = ge.dataset.PandasDataset({ + 'A': ['A0', 'A1', 'A2'], + 'B': ['B0', 'B1', 'B2']}, + index=['K0', 'K1', 'K2']) + + df1.expect_column_values_to_match_regex('A', '^A[0-2]$') + df1.expect_column_values_to_match_regex('B', '^B[0-2]$') + + df2 = ge.dataset.PandasDataset({ + 'C': ['C0', 'C2', 'C3'], + 'D': ['C0', 'D2', 'D3']}, + index=['K0', 'K2', 'K3']) + + df2.expect_column_values_to_match_regex('C', '^C[0-2]$') + df2.expect_column_values_to_match_regex('D', '^D[0-2]$') + + df = df1.join(df2) + + exp_j = [ + {'expectation_type': 'expect_column_to_exist', + 'kwargs': {'column': 'A'}}, + {'expectation_type': 'expect_column_to_exist', + 'kwargs': {'column': 'B'}}, + {'expectation_type': 'expect_column_to_exist', + 'kwargs': {'column': 'C'}}, + {'expectation_type': 'expect_column_to_exist', + 'kwargs': {'column': 'D'}} + ] + + # The joined data frame will: + # + # 1. Be a ge.dataset.PandaDataSet + # 2. Only have the default expectations + + self.assertIsInstance(df, ge.dataset.PandasDataset) + self.assertEqual(df.find_expectations(), exp_j) + + def test_ge_pandas_merging(self): + df1 = ge.dataset.PandasDataset({ + 'id': [1, 2, 3, 4], + 'name': ['a', 'b', 'c', 'd'] + }) + + df1.expect_column_values_to_match_regex('name', '^[A-Za-z ]+$') + + df2 = ge.dataset.PandasDataset({ + 'id': [1, 2, 3, 4], + 'salary': [57000, 52000, 59000, 65000] + }) + + df2.expect_column_values_to_match_regex('salary', '^[0-9]{4,6]$') + + df = df1.merge(df2, on='id') + + exp_m = [ + {'expectation_type': 'expect_column_to_exist', + 'kwargs': {'column': 'id'}}, + {'expectation_type': 'expect_column_to_exist', + 'kwargs': {'column': 'name'}}, + {'expectation_type': 'expect_column_to_exist', + 'kwargs': {'column': 'salary'}} + ] + + # The merged data frame will: + # + # 1. Be a ge.dataset.PandaDataSet + # 2. Only have the default expectations + + self.assertIsInstance(df, ge.dataset.PandasDataset) + self.assertEqual(df.find_expectations(), exp_m) + + def test_ge_pandas_sampling(self): + df = ge.dataset.PandasDataset({ + 'A': [1, 2, 3, 4], + 'B': [5, 6, 7, 8], + 'C': ['a', 'b', 'c', 'd'], + 'D': ['e', 'f', 'g', 'h'] + }) + + # Put some simple expectations on the data frame + df.expect_column_values_to_be_in_set("A", [1, 2, 3, 4]) + df.expect_column_values_to_be_in_set("B", [5, 6, 7, 8]) + df.expect_column_values_to_be_in_set("C", ['a', 'b', 'c', 'd']) + df.expect_column_values_to_be_in_set("D", ['e', 'f', 'g', 'h']) + + exp1 = df.find_expectations() + + # The sampled data frame should: + # + # 1. Be a ge.dataset.PandaDataSet + # 2. Inherit ALL the non-failing expectations of the parent data frame + + samp1 = df.sample(n=2) + self.assertIsInstance(samp1, ge.dataset.PandasDataset) + self.assertEqual(samp1.find_expectations(), exp1) + + samp1 = df.sample(frac=0.25, replace=True) + self.assertIsInstance(samp1, ge.dataset.PandasDataset) + self.assertEqual(samp1.find_expectations(), exp1) + + # Change expectation on column "D", sample, and check expectations. + # The failing expectation on column "D" is automatically dropped in + # the sample. + df.expect_column_values_to_be_in_set("D", ['e', 'f', 'g', 'x']) + samp1 = df.sample(n=2) + exp1 = [ + {'expectation_type': 'expect_column_to_exist', + 'kwargs': {'column': 'A'}}, + {'expectation_type': 'expect_column_to_exist', + 'kwargs': {'column': 'B'}}, + {'expectation_type': 'expect_column_to_exist', + 'kwargs': {'column': 'C'}}, + {'expectation_type': 'expect_column_to_exist', + 'kwargs': {'column': 'D'}}, + {'expectation_type': 'expect_column_values_to_be_in_set', + 'kwargs': {'column': 'A', 'values_set': [1, 2, 3, 4]}}, + {'expectation_type': 'expect_column_values_to_be_in_set', + 'kwargs': {'column': 'B', 'values_set': [5, 6, 7, 8]}}, + {'expectation_type': 'expect_column_values_to_be_in_set', + 'kwargs': {'column': 'C', 'values_set': ['a', 'b', 'c', 'd']}} + ] + self.assertEqual(samp1.find_expectations(), exp1) + + + def test_ge_pandas_concatenating(self): + df1 = ge.dataset.PandasDataset({ + 'A': ['A0', 'A1', 'A2'], + 'B': ['B0', 'B1', 'B2'] + }) + + df1.expect_column_values_to_match_regex('A', '^A[0-2]$') + df1.expect_column_values_to_match_regex('B', '^B[0-2]$') + + df2 = ge.dataset.PandasDataset({ + 'A': ['A3', 'A4', 'A5'], + 'B': ['B3', 'B4', 'B5'] + }) + + df2.expect_column_values_to_match_regex('A', '^A[3-5]$') + df2.expect_column_values_to_match_regex('B', '^B[3-5]$') + + df = pd.concat([df1, df2]) + + exp_c = [ + {'expectation_type': 'expect_column_to_exist', + 'kwargs': {'column': 'A'}}, + {'expectation_type': 'expect_column_to_exist', + 'kwargs': {'column': 'B'}} + ] + + # The concatenated data frame will: + # + # 1. Be a ge.dataset.PandaDataSet + # 2. Only have the default expectations + + self.assertIsInstance(df, ge.dataset.PandasDataset) + self.assertEqual(df.find_expectations(), exp_c) + + def test_ge_pandas_joining(self): + df1 = ge.dataset.PandasDataset({ + 'A': ['A0', 'A1', 'A2'], + 'B': ['B0', 'B1', 'B2']}, + index=['K0', 'K1', 'K2']) + + df1.expect_column_values_to_match_regex('A', '^A[0-2]$') + df1.expect_column_values_to_match_regex('B', '^B[0-2]$') + + df2 = ge.dataset.PandasDataset({ + 'C': ['C0', 'C2', 'C3'], + 'D': ['C0', 'D2', 'D3']}, + index=['K0', 'K2', 'K3']) + + df2.expect_column_values_to_match_regex('C', '^C[0-2]$') + df2.expect_column_values_to_match_regex('D', '^D[0-2]$') + + df = df1.join(df2) + + exp_j = [ + {'expectation_type': 'expect_column_to_exist', + 'kwargs': {'column': 'A'}}, + {'expectation_type': 'expect_column_to_exist', + 'kwargs': {'column': 'B'}}, + {'expectation_type': 'expect_column_to_exist', + 'kwargs': {'column': 'C'}}, + {'expectation_type': 'expect_column_to_exist', + 'kwargs': {'column': 'D'}} + ] + + # The joined data frame will: + # + # 1. Be a ge.dataset.PandaDataSet + # 2. Only have the default expectations + + self.assertIsInstance(df, ge.dataset.PandasDataset) + self.assertEqual(df.find_expectations(), exp_j) + + def test_ge_pandas_merging(self): + df1 = ge.dataset.PandasDataset({ + 'id': [1, 2, 3, 4], + 'name': ['a', 'b', 'c', 'd'] + }) + + df1.expect_column_values_to_match_regex('name', '^[A-Za-z ]+$') + + df2 = ge.dataset.PandasDataset({ + 'id': [1, 2, 3, 4], + 'salary': [57000, 52000, 59000, 65000] + }) + + df2.expect_column_values_to_match_regex('salary', '^[0-9]{4,6]$') + + df = df1.merge(df2, on='id') + + exp_m = [ + {'expectation_type': 'expect_column_to_exist', + 'kwargs': {'column': 'id'}}, + {'expectation_type': 'expect_column_to_exist', + 'kwargs': {'column': 'name'}}, + {'expectation_type': 'expect_column_to_exist', + 'kwargs': {'column': 'salary'}} + ] + + # The merged data frame will: + # + # 1. Be a ge.dataset.PandaDataSet + # 2. Only have the default expectations + + self.assertIsInstance(df, ge.dataset.PandasDataset) + self.assertEqual(df.find_expectations(), exp_m) + + def test_ge_pandas_sampling(self): + df = ge.dataset.PandasDataset({ + 'A': [1, 2, 3, 4], + 'B': [5, 6, 7, 8], + 'C': ['a', 'b', 'c', 'd'], + 'D': ['e', 'f', 'g', 'h'] + }) + + # Put some simple expectations on the data frame + df.expect_column_values_to_be_in_set("A", [1, 2, 3, 4]) + df.expect_column_values_to_be_in_set("B", [5, 6, 7, 8]) + df.expect_column_values_to_be_in_set("C", ['a', 'b', 'c', 'd']) + df.expect_column_values_to_be_in_set("D", ['e', 'f', 'g', 'h']) + + exp1 = df.find_expectations() + + # The sampled data frame should: + # + # 1. Be a ge.dataset.PandaDataSet + # 2. Inherit ALL the expectations of the parent data frame + + samp1 = df.sample(n=2) + self.assertIsInstance(samp1, ge.dataset.PandasDataset) + self.assertEqual(samp1.find_expectations(), exp1) + + samp1 = df.sample(frac=0.25, replace=True) + self.assertIsInstance(samp1, ge.dataset.PandasDataset) + self.assertEqual(samp1.find_expectations(), exp1) + + # Change expectation on column "D", sample, and check expectations. + # The failing expectation on column "D" is NOT automatically dropped + # in the sample. + df.expect_column_values_to_be_in_set("D", ['e', 'f', 'g', 'x']) + samp1 = df.sample(n=2) + exp1 = [ + {'expectation_type': 'expect_column_to_exist', + 'kwargs': {'column': 'A'}}, + {'expectation_type': 'expect_column_to_exist', + 'kwargs': {'column': 'B'}}, + {'expectation_type': 'expect_column_to_exist', + 'kwargs': {'column': 'C'}}, + {'expectation_type': 'expect_column_to_exist', + 'kwargs': {'column': 'D'}}, + {'expectation_type': 'expect_column_values_to_be_in_set', + 'kwargs': {'column': 'A', 'values_set': [1, 2, 3, 4]}}, + {'expectation_type': 'expect_column_values_to_be_in_set', + 'kwargs': {'column': 'B', 'values_set': [5, 6, 7, 8]}}, + {'expectation_type': 'expect_column_values_to_be_in_set', + 'kwargs': {'column': 'C', 'values_set': ['a', 'b', 'c', 'd']}}, + {'expectation_type': 'expect_column_values_to_be_in_set', + 'kwargs': {'column': 'D', 'values_set': ['e', 'f', 'g', 'x']}} + ] + self.assertEqual(samp1.find_expectations(), exp1) + + + def test_ge_pandas_subsetting(self): + df = ge.dataset.PandasDataset({ + 'A':[1,2,3,4], + 'B':[5,6,7,8], + 'C':['a','b','c','d'], + 'D':['e','f','g','h'] + }) + + # Put some simple expectations on the data frame + df.expect_column_values_to_be_in_set("A", [1, 2, 3, 4]) + df.expect_column_values_to_be_in_set("B", [5, 6, 7, 8]) + df.expect_column_values_to_be_in_set("C", ['a', 'b', 'c', 'd']) + df.expect_column_values_to_be_in_set("D", ['e', 'f', 'g', 'h']) + + # The subsetted data frame should: + # + # 1. Be a ge.dataset.PandaDataSet + # 2. Inherit ALL the expectations of the parent data frame + + exp1 = df.find_expectations() + + sub1 = df[['A', 'D']] + self.assertIsInstance(sub1, ge.dataset.PandasDataset) + self.assertEqual(sub1.find_expectations(), exp1) + + sub1 = df[['A']] + self.assertIsInstance(sub1, ge.dataset.PandasDataset) + self.assertEqual(sub1.find_expectations(), exp1) + + sub1 = df[:3] + self.assertIsInstance(sub1, ge.dataset.PandasDataset) + self.assertEqual(sub1.find_expectations(), exp1) + + sub1 = df[1:2] + self.assertIsInstance(sub1, ge.dataset.PandasDataset) + self.assertEqual(sub1.find_expectations(), exp1) + + sub1 = df[:-1] + self.assertIsInstance(sub1, ge.dataset.PandasDataset) + self.assertEqual(sub1.find_expectations(), exp1) + + sub1 = df[-1:] + self.assertIsInstance(sub1, ge.dataset.PandasDataset) + self.assertEqual(sub1.find_expectations(), exp1) + + sub1 = df.iloc[:3, 1:4] + self.assertIsInstance(sub1, ge.dataset.PandasDataset) + self.assertEqual(sub1.find_expectations(), exp1) + + sub1 = df.loc[0:, 'A':'B'] + self.assertIsInstance(sub1, ge.dataset.PandasDataset) + self.assertEqual(sub1.find_expectations(), exp1) + + def test_ge_pandas_automatic_failure_removal(self): + df = ge.dataset.PandasDataset({ + 'A': [1, 2, 3, 4], + 'B': [5, 6, 7, 8], + 'C': ['a', 'b', 'c', 'd'], + 'D': ['e', 'f', 'g', 'h'] + }) + + # Put some simple expectations on the data frame + df.expect_column_values_to_be_in_set("A", [1, 2, 3, 4]) + df.expect_column_values_to_be_in_set("B", [5, 6, 7, 8]) + df.expect_column_values_to_be_in_set("C", ['w', 'x', 'y', 'z']) + df.expect_column_values_to_be_in_set("D", ['e', 'f', 'g', 'h']) + + # First check that failing expectations are NOT automatically + # dropped when sampling. + # For this data frame, the expectation on column "C" above fails. + exp1 = [ + {'expectation_type': 'expect_column_to_exist', + 'kwargs': {'column': 'A'}}, + {'expectation_type': 'expect_column_to_exist', + 'kwargs': {'column': 'B'}}, + {'expectation_type': 'expect_column_to_exist', + 'kwargs': {'column': 'C'}}, + {'expectation_type': 'expect_column_to_exist', + 'kwargs': {'column': 'D'}}, + {'expectation_type': 'expect_column_values_to_be_in_set', + 'kwargs': {'column': 'A', 'values_set': [1, 2, 3, 4]}}, + {'expectation_type': 'expect_column_values_to_be_in_set', + 'kwargs': {'column': 'B', 'values_set': [5, 6, 7, 8]}}, + {'expectation_type': 'expect_column_values_to_be_in_set', + 'kwargs': {'column': 'C', 'values_set': ['w', 'x', 'y', 'z']}}, + {'expectation_type': 'expect_column_values_to_be_in_set', + 'kwargs': {'column': 'D', 'values_set': ['e', 'f', 'g', 'h']}} + ] + samp1 = df.sample(n=2) + self.assertEqual(samp1.find_expectations(), exp1) + + # Now check subsetting to verify that failing expectations are NOT + # automatically dropped when subsetting. + sub1 = df[['A', 'D']] + self.assertEqual(sub1.find_expectations(), exp1) + + # Set property/attribute so that failing expectations are + # automatically removed when sampling or subsetting. + df.discard_subset_failing_expectations = True + + exp_samp = [ + {'expectation_type': 'expect_column_to_exist', + 'kwargs': {'column': 'A'}}, + {'expectation_type': 'expect_column_to_exist', + 'kwargs': {'column': 'B'}}, + {'expectation_type': 'expect_column_to_exist', + 'kwargs': {'column': 'C'}}, + {'expectation_type': 'expect_column_to_exist', + 'kwargs': {'column': 'D'}}, + {'expectation_type': 'expect_column_values_to_be_in_set', + 'kwargs': {'column': 'A', 'values_set': [1, 2, 3, 4]}}, + {'expectation_type': 'expect_column_values_to_be_in_set', + 'kwargs': {'column': 'B', 'values_set': [5, 6, 7, 8]}}, + {'expectation_type': 'expect_column_values_to_be_in_set', + 'kwargs': {'column': 'D', 'values_set': ['e', 'f', 'g', 'h']}} + ] + + samp2 = df.sample(n=2) + self.assertEqual(samp2.find_expectations(), exp_samp) + + # Now check subsetting. In additional to the failure on column "C", + # the expectations on column "B" now fail since column "B" doesn't + # exist in the subset. + sub2 = df[['A', 'D']] + exp_sub = [ + {'expectation_type': 'expect_column_to_exist', + 'kwargs': {'column': 'A'}}, + {'expectation_type': 'expect_column_to_exist', + 'kwargs': {'column': 'D'}}, + {'expectation_type': 'expect_column_values_to_be_in_set', + 'kwargs': {'column': 'A', 'values_set': [1, 2, 3, 4]}}, + {'expectation_type': 'expect_column_values_to_be_in_set', + 'kwargs': {'column': 'D', 'values_set': ['e', 'f', 'g', 'h']}} + ] + self.assertEqual(sub2.find_expectations(), exp_sub) - with self.assertRaises(ValueError): - df.expect_column_mean_to_be_between('x',4,6, output_format="QUACK") if __name__ == "__main__": unittest.main() diff --git a/tests/test_sets/expect_column_max_to_be_between_test_set.json b/tests/test_sets/expect_column_max_to_be_between_test_set.json deleted file mode 100644 index b7f00570240f..000000000000 --- a/tests/test_sets/expect_column_max_to_be_between_test_set.json +++ /dev/null @@ -1,109 +0,0 @@ -{ - "dataset" : { - "w" : [1, 2, 3, 4, 5, 5, 4, 3, 2, 1], - "x" : [2, 3, 4, 5, 6, 7, 8, 9, null, null], - "y" : [1, 1, 1, 2, 2, 2, 3, 3, 3, 4], - "z" : ["a", "b", "c", "d", "e", null, null, null, null, null], - "zz" : ["1/1/2016", "1/2/2016", "2/2/2016", "2/2/2016", "3/1/2016", "2/1/2017", null, null, null, null], - "a" : [null, 0, null, null, 1, null, null, 2, null, null], - "b" : [null, 0, null, null, 2, null, null, 1, null, null] - }, - "tests" : [{ - "notes": "Basic positive test case", - "in": { - "column": "w", - "output_format": "BASIC", - "min_value": 4, - "max_value": 6 - }, - "out": { - "success": true, - "true_value": 5 - } - },{ - "notes": "Basic negative test case", - "in": { - "column": "w", - "output_format": "BASIC", - "min_value": null, - "max_value": 4 - }, - "out": { - "success": false, - "true_value": 5 - } - },{ - "notes": "Test case with output_format=SUMMARY. Also verifies that max_value is inclusive", - "in": { - "column": "w", - "output_format": "SUMMARY", - "min_value": 0, - "max_value": 5 - }, - "out": { - "success": true, - "true_value": 5, - "summary_obj": { - "element_count": 10, - "missing_count": 0, - "missing_percent": 0.0 - } - } - },{ - "notes": "Test case with only a lower bound, and a missing value", - "in": { - "column": "x", - "min_value": 3 - }, - "out": { - "success": true, - "true_value": 9.0 - } - },{ - "notes": "Negative test case with only a lower bound", - "in": { - "column": "w", - "min_value": 50 - }, - "out": { - "success": false, - "true_value": 5 - } - },{ - "notes": "Test on a series containing dates, with an output_strftime_format value", - "in": { - "column": "zz", - "min_value": "2/1/2016", - "max_value": "3/1/2016", - "parse_strings_as_datetimes": true, - "output_strftime_format" : "%m/%d/%Y" - }, - "out": { - "success": false, - "true_value": "02/01/2017" - } - },{ - "notes": "Test on a date-like series containing strings", - "in": { - "column": "zz", - "min_value": "2/1/2016", - "max_value": "3/1/2016", - "parse_strings_as_datetimes": false - }, - "out": { - "success": true, - "true_value": "3/1/2016" - } - },{ - "notes": "Test on a strings", - "in": { - "column": "z", - "min_value": "d", - "max_value": "f" - }, - "out": { - "success": true, - "true_value": "e" - } - }] -} \ No newline at end of file diff --git a/tests/test_sets/expect_column_min_to_be_between_test_set.json b/tests/test_sets/expect_column_min_to_be_between_test_set.json deleted file mode 100644 index 4b61381f497f..000000000000 --- a/tests/test_sets/expect_column_min_to_be_between_test_set.json +++ /dev/null @@ -1,128 +0,0 @@ -{ - "dataset" : { - "w" : [1, 2, 3, 4, 5, 5, 4, 3, 2, 1], - "x" : [2, 3, 4, 5, 6, 7, 8, 9, null, null], - "y" : [1, 1, 1, 2, 2, 2, 3, 3, 3, 4], - "z" : ["a", "b", "c", "d", "e", null, null, null, null, null], - "zz" : ["2/1/2016", "2/2/2016", "2/2/2016", "10/1/2016", "1/2/2017", "10/1/2017", null, null, null, null], - "a" : [null, 0, null, null, 1, null, null, 2, null, null], - "b" : [null, 0, null, null, 2, null, null, 1, null, null] - }, - "tests" : [{ - "notes": "Basic positive test case", - "in": { - "column": "w", - "output_format": "BASIC", - "min_value": -10, - "max_value": 5 - }, - "out": { - "success": true, - "true_value": 1 - } - },{ - "notes": "Negative test case, with max_value=None", - "in": { - "column": "w", - "output_format": "BASIC", - "min_value": 4, - "max_value": null - }, - "out": { - "success": false, - "true_value": 1 - } - },{ - "notes": "Test case with output_format=SUMMARY. Also verifies that max_value is inclusive", - "in": { - "column": "w", - "output_format": "SUMMARY", - "min_value": 0, - "max_value": 1 - }, - "out": { - "success": true, - "true_value": 1, - "summary_obj": { - "element_count": 10, - "missing_count": 0, - "missing_percent": 0.0 - } - } - },{ - "notes": "Test case with only a lower bound, and a missing value", - "in": { - "column": "x", - "min_value": 1 - }, - "out": { - "success": true, - "true_value": 2.0 - } - },{ - "notes": "Negative test case with only a lower bound", - "in": { - "column": "w", - "min_value": 50 - }, - "out": { - "success": false, - "true_value": 1 - } - },{ - "notes": "Test on a series with mostly nulls", - "in": { - "column": "a", - "min_value": 1, - "max_value": 2 - }, - "out": { - "success": false, - "true_value": 0 - } - },{ - "notes": "Test on a series containing dates", - "in": { - "column": "zz", - "min_value": "2/1/2016", - "max_value": "3/1/2016", - "parse_strings_as_datetimes": true - }, - "out": { - "success": true, - "true_value": "2016-02-01 00:00:00" - } - },{ - "notes": "Test on a series containing dates, with an output_strftime_format value", - "in": { - "column": "zz", - "min_value": "2/1/2016", - "max_value": "3/1/2016", - "parse_strings_as_datetimes": true, - "output_strftime_format" : "%m/%d/%Y" - }, - "out": { - "success": true, - "true_value": "02/01/2016" - } - },{ - "notes": "Negative test case with only a max", - "in": { - "column": "y", - "max_value": 0 - }, - "out": { - "success": false, - "true_value": 1 - } - },{ - "notes": "Raise ValueError with both max and min are missing", - "in": { - "column": "y", - "catch_exceptions": true - }, - "error": { - "traceback_substring": "cannot both be None" - } - }] -} \ No newline at end of file diff --git a/tests/test_sets/expect_column_sum_to_be_between_test_set.json b/tests/test_sets/expect_column_sum_to_be_between_test_set.json deleted file mode 100644 index 8514510f3041..000000000000 --- a/tests/test_sets/expect_column_sum_to_be_between_test_set.json +++ /dev/null @@ -1,92 +0,0 @@ -{ - "dataset" : { - "w" : [1, 2, 3, 4, 5, 5, 4, 3, 2, 1], - "x" : [2, 3, 4, 5, 6, 7, 8, 9, null, null], - "y" : [1, 1, 1, 2, 2, 2, 3, 3, 3, 4], - "z" : ["a", "b", "c", "d", "e", null, null, null, null, null], - "zz" : ["1/1/2016", "1/2/2016", "2/2/2016", "2/2/2016", "3/1/2016", null, null, null, null, null], - "a" : [null, 0, null, null, 1, null, null, 2, null, null], - "b" : [null, 0, null, null, 2, null, null, 1, null, null] - }, - "tests" : [{ - "notes": "Basic positive test case", - "in": { - "column": "w", - "output_format": "BASIC", - "min_value": 30, - "max_value": 30 - }, - "out": { - "success": true, - "true_value": 30 - } - },{ - "notes": "Basic negative test case", - "in": { - "column": "w", - "output_format": "BASIC", - "min_value": 40, - "max_value": 50 - }, - "out": { - "success": false, - "true_value": 30 - } - },{ - "notes": "Test case with output_format=SUMMARY", - "in": { - "column": "w", - "output_format": "SUMMARY", - "min_value": 20, - "max_value": 40 - }, - "out": { - "success": true, - "true_value": 30, - "summary_obj": { - "element_count": 10, - "missing_count": 0, - "missing_percent": 0.0 - } - } - },{ - "notes": "Test case with only a lower bound, and a missing value", - "in": { - "column": "x", - "min_value": 30 - }, - "out": { - "success": true, - "true_value": 44.0 - } - },{ - "notes": "Negative test case with only a lower bound", - "in": { - "column": "w", - "min_value": 50 - }, - "out": { - "success": false, - "true_value": 30 - } - },{ - "notes": "Negative test case with only a max", - "in": { - "column": "y", - "max_value": 20 - }, - "out": { - "success": false, - "true_value": 22 - } - },{ - "notes": "Raise ValueError with both max and min are missing", - "in": { - "column": "y", - "catch_exceptions": true - }, - "error": { - "traceback_substring": "cannot both be None" - } - }] -} \ No newline at end of file diff --git a/tests/test_sets/expect_column_to_exist_test_set.json b/tests/test_sets/expect_column_to_exist_test_set.json deleted file mode 100644 index 93df7a14bcbb..000000000000 --- a/tests/test_sets/expect_column_to_exist_test_set.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "dataset" : { - "c1" : [4,5,6,7], - "c2" : ["a","b","c","d"], - "c3" : [null,null,null,null] - }, - "tests": [{ - "notes": "Basic positive test", - "in":{ - "column": "c1" - }, - "out":{ - "success":true - } - },{ - "notes": "Basic negative test", - "in":{ - "column": "covfefe" - }, - "out":{ - "success":false - } - }] -} \ No newline at end of file diff --git a/tests/test_sets/expect_column_values_to_be_between_test_set.json b/tests/test_sets/expect_column_values_to_be_between_test_set.json index 93841c45ca3d..b944958b44bc 100644 --- a/tests/test_sets/expect_column_values_to_be_between_test_set.json +++ b/tests/test_sets/expect_column_values_to_be_between_test_set.json @@ -1,252 +1,332 @@ -[ - { - "kwargs": {}, - "out": { - "exception_list": [], - "success": true - }, - "in": [ - "x", - 1, - 10 - ] - }, - { - "kwargs": {}, - "out": { - "exception_list": [], - "success": true - }, - "in": [ - "x", - 0, - 20 - ] - }, - { - "kwargs": {}, - "out": { - "exception_list": [ - 10 - ], - "success": false - }, - "in": [ - "x", - 1, - 9 - ] - }, - { - "kwargs": {}, - "out": { - "exception_list": [ - 1, - 2 - ], - "success": false - }, - "in": [ - "x", - 3, - 10 - ] - }, - { - "kwargs": { - "suppress_exceptions": true - }, - "out": { - "exception_list": null, - "success": true - }, - "in": [ - "x", - 1, - 10 - ] - }, - { - "kwargs": { - "suppress_exceptions": true - }, - "out": { - "exception_list": null, - "success": true - }, - "in": [ - "x", - 0, - 20 - ] - }, - { - "kwargs": { - "suppress_exceptions": true - }, - "out": { - "exception_list": null, - "success": false - }, - "in": [ - "x", - 1, - 9 - ] - }, - { - "kwargs": { - "suppress_exceptions": true - }, - "out": { - "exception_list": null, - "success": false - }, - "in": [ - "x", - 3, - 10 - ] - }, - { - "kwargs": { - "mostly": 0.9 - }, - "out": { - "exception_list": [], - "success": true - }, - "in": [ - "x", - 1, - 10 - ] - }, - { - "kwargs": { - "mostly": 0.9 - }, - "out": { - "exception_list": [], - "success": true - }, - "in": [ - "x", - 0, - 20 - ] - }, - { - "kwargs": { - "mostly": 0.9 - }, - "out": { - "exception_list": [ - 10 - ], - "success": true - }, - "in": [ - "x", - 1, - 9 - ] - }, - { - "kwargs": { - "mostly": 0.9 - }, - "out": { - "exception_list": [ - 1, - 2 - ], - "success": false - }, - "in": [ - "x", - 3, - 10 - ] - }, - { - "kwargs": { - "mostly": 0.95 - }, - "out": { - "exception_list": [ - "abc" - ], - "success": false - }, - "in": [ - "y", - 1, - 10 - ] - }, - { - "kwargs": { - "mostly": 0.9 - }, - "out": { - "exception_list": [ - "abc" - ], - "success": true - }, - "in": [ - "y", - 1, - 10 - ] - }, - { - "kwargs": { - "mostly": 0.8 - }, - "out": { - "exception_list": [ - "abc" - ], - "success": true - }, - "in": [ - "y", - 1, - 10 - ] - }, - { - "kwargs": { - "mostly": 0.9 - }, - "out": { - "exception_list": [ - 5 - ], - "success": false - }, - "in": [ - "z", - 1, - 4 - ] - }, - { - "kwargs": { - "mostly": 0.8 - }, - "out": { - "exception_list": [ - 5 - ], - "success": true - }, - "in": [ - "z", - 1, - 4 - ] - } -] +{ + "dataset" : { + "x" : [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + "y" : [1, 2, 3, 4, 5, 6, 7, 8, 9, "abc"], + "z" : [1, 2, 3, 4, 5, null, null, null, null, null], + "ts" : [ + "Jan 01 1870 12:00:01", + "Dec 31 1999 12:00:01", + "Jan 01 2000 12:00:01", + "Feb 01 2000 12:00:01", + "Mar 01 2000 12:00:01", + "Apr 01 2000 12:00:01", + "May 01 2000 12:00:01", + "Jun 01 2000 12:00:01", + null, + "Jan 01 2001 12:00:01" + ], + "alpha": ["a","b","c","d","e","f","g","h","i","j"], + "numeric": ["1","2","3","4","5","6","7","8","9","10"] + }, + "tests": [ + { + "out": { + "unexpected_list": [], + "unexpected_index_list": [], + "success": true + }, + "in": { + "column": "x", + "max_value": 10, + "min_value": 1 + } + }, + { + "out": { + "unexpected_list": [], + "unexpected_index_list": [], + "success": true + }, + "in": { + "column": "x", + "max_value": 20, + "min_value": 0 + } + }, + { + "out": { + "unexpected_list": [ + 10 + ], + "unexpected_index_list": [9], + "success": false + }, + "in": { + "column": "x", + "max_value": 9, + "min_value": 1 + } + }, + { + "out": { + "unexpected_list": [ + 1, + 2 + ], + "unexpected_index_list": [0, 1], + "success": false + }, + "in": { + "column": "x", + "max_value": 10, + "min_value": 3 + } + }, + { + "out": {"success": true}, + "in": { + "column": "x", + "max_value": 10, + "min_value": 1, + "result_format": "BOOLEAN_ONLY" + } + }, + { + "out": {"success": true}, + "in": { + "column": "x", + "max_value": 20, + "min_value": 0, + "result_format": "BOOLEAN_ONLY" + } + }, + { + "out": {"success": false}, + "in": { + "column": "x", + "max_value": 9, + "min_value": 1, + "result_format": "BOOLEAN_ONLY" + } + }, + { + "out": {"success": false}, + "in": { + "column": "x", + "max_value": 10, + "min_value": 3, + "result_format": "BOOLEAN_ONLY" + } + }, + { + "out": { + "unexpected_list": [], + "unexpected_index_list": [], + "success": true + }, + "in": { + "column": "x", + "max_value": 10, + "min_value": 1, + "mostly": 0.9 + } + }, + { + "out": { + "unexpected_list": [], + "unexpected_index_list": [], + "success": true + }, + "in": { + "column": "x", + "max_value": 20, + "min_value": 0, + "mostly": 0.9 + } + }, + { + "out": { + "unexpected_list": [ + 10 + ], + "unexpected_index_list": [ + 9 + ], + "success": true + }, + "in": { + "column": "x", + "max_value": 9, + "min_value": 1, + "mostly": 0.9 + } + }, + { + "out": { + "unexpected_list": [ + 1, + 2 + ], + "unexpected_index_list": [ + 0, + 1 + ], + "success": false + }, + "in": { + "column": "x", + "max_value": 10, + "min_value": 3, + "mostly": 0.9 + } + }, + { + "in": { + "column": "y", + "max_value": 10, + "min_value": 1, + "mostly": 0.95, + "catch_exceptions": true + }, + "error":{ + "traceback_substring" : "TypeError: Column values, min_value, and max_value must either be None or of the same type." + } + }, + { + "in": { + "column": "y", + "max_value": 10, + "min_value": 1, + "mostly": 0.9, + "catch_exceptions": true + }, + "error":{ + "traceback_substring" : "TypeError: Column values, min_value, and max_value must either be None or of the same type." + } + }, + { + "in": { + "column": "y", + "max_value": 10, + "min_value": 1, + "mostly": 0.8, + "catch_exceptions": true + }, + "error":{ + "traceback_substring" : "TypeError: Column values, min_value, and max_value must either be None or of the same type." + } + }, + { + "out": { + "unexpected_list": [ + 5 + ], + "unexpected_index_list": [ + 4 + ], + "success": false + }, + "in": { + "column": "z", + "max_value": 4, + "min_value": 1, + "mostly": 0.9 + } + }, + { + "out": { + "unexpected_list": [ + 5 + ], + "unexpected_index_list": [ + 4 + ], + "success": true + }, + "in": { + "column": "z", + "max_value": 4, + "min_value": 1, + "mostly": 0.8 + } + }, + { + "out": { + "unexpected_list": [ + "Jan 01 1870 12:00:01", + "Dec 31 1999 12:00:01", + "Jan 01 2001 12:00:01" + ], + "unexpected_index_list": [ + 0, 1, 9 + ], + "success": false + }, + "in": { + "column": "ts", + "max_value": "Dec 31 2000", + "min_value": "Jan 01 2000", + "parse_strings_as_datetimes" : true + } + }, + { + "in": { + "column": "numeric", + "max_value": 10, + "min_value": 0, + "catch_exceptions": true + }, + "error":{ + "traceback_substring" : "TypeError: Column values, min_value, and max_value must either be None or of the same type." + } + }, + { + "note" : "Test 'min_value is greater than max_value'", + "in": { + "column": "x", + "min_value": 10, + "max_value": 0, + "catch_exceptions": true + }, + "error":{ + "traceback_substring" : "ValueError: min_value is greater than max_value" + } + }, + { + "note": "Test allow_cross_type_comparisons", + "in": { + "column": "y", + "min_value": 0, + "max_value": 10, + "allow_cross_type_comparisons": true + }, + "out": { + "unexpected_list": [ + "abc" + ], + "unexpected_index_list": [ + 9 + ], + "success": false + } + }, + { + "note": "Test allow_cross_type_comparisons", + "in": { + "column": "numeric", + "min_value": 0, + "max_value": 10, + "allow_cross_type_comparisons": true + }, + "out": { + "unexpected_list": ["1","2","3","4","5","6","7","8","9","10"], + "unexpected_index_list": [ + 0,1,2,3,4,5,6,7,8,9 + ], + "success": false + } + }, + { + "notes": "Verify that min_value=max_value=None raises an error", + "in": { + "column": "y", + "max_value": null, + "min_value": null, + "catch_exceptions": true + }, + "error":{ + "traceback_substring" : "cannot both be None" + } + } + ] +} \ No newline at end of file diff --git a/tests/test_sets/expect_column_values_to_be_between_test_set_ADJ.json b/tests/test_sets/expect_column_values_to_be_between_test_set_ADJ.json deleted file mode 100644 index 5ad9ddd79f15..000000000000 --- a/tests/test_sets/expect_column_values_to_be_between_test_set_ADJ.json +++ /dev/null @@ -1,332 +0,0 @@ -{ - "dataset" : { - "x" : [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], - "y" : [1, 2, 3, 4, 5, 6, 7, 8, 9, "abc"], - "z" : [1, 2, 3, 4, 5, null, null, null, null, null], - "ts" : [ - "Jan 01 1870 12:00:01", - "Dec 31 1999 12:00:01", - "Jan 01 2000 12:00:01", - "Feb 01 2000 12:00:01", - "Mar 01 2000 12:00:01", - "Apr 01 2000 12:00:01", - "May 01 2000 12:00:01", - "Jun 01 2000 12:00:01", - null, - "Jan 01 2001 12:00:01" - ], - "alpha": ["a","b","c","d","e","f","g","h","i","j"], - "numeric": ["1","2","3","4","5","6","7","8","9","10"] - }, - "tests": [ - { - "out": { - "exception_list": [], - "exception_index_list": [], - "success": true - }, - "in": { - "column": "x", - "max_value": 10, - "min_value": 1 - } - }, - { - "out": { - "exception_list": [], - "exception_index_list": [], - "success": true - }, - "in": { - "column": "x", - "max_value": 20, - "min_value": 0 - } - }, - { - "out": { - "exception_list": [ - 10 - ], - "exception_index_list": [9], - "success": false - }, - "in": { - "column": "x", - "max_value": 9, - "min_value": 1 - } - }, - { - "out": { - "exception_list": [ - 1, - 2 - ], - "exception_index_list": [0, 1], - "success": false - }, - "in": { - "column": "x", - "max_value": 10, - "min_value": 3 - } - }, - { - "out": true, - "in": { - "column": "x", - "max_value": 10, - "min_value": 1, - "output_format": "BOOLEAN_ONLY" - } - }, - { - "out": true, - "in": { - "column": "x", - "max_value": 20, - "min_value": 0, - "output_format": "BOOLEAN_ONLY" - } - }, - { - "out": false, - "in": { - "column": "x", - "max_value": 9, - "min_value": 1, - "output_format": "BOOLEAN_ONLY" - } - }, - { - "out": false, - "in": { - "column": "x", - "max_value": 10, - "min_value": 3, - "output_format": "BOOLEAN_ONLY" - } - }, - { - "out": { - "exception_list": [], - "exception_index_list": [], - "success": true - }, - "in": { - "column": "x", - "max_value": 10, - "min_value": 1, - "mostly": 0.9 - } - }, - { - "out": { - "exception_list": [], - "exception_index_list": [], - "success": true - }, - "in": { - "column": "x", - "max_value": 20, - "min_value": 0, - "mostly": 0.9 - } - }, - { - "out": { - "exception_list": [ - 10 - ], - "exception_index_list": [ - 9 - ], - "success": true - }, - "in": { - "column": "x", - "max_value": 9, - "min_value": 1, - "mostly": 0.9 - } - }, - { - "out": { - "exception_list": [ - 1, - 2 - ], - "exception_index_list": [ - 0, - 1 - ], - "success": false - }, - "in": { - "column": "x", - "max_value": 10, - "min_value": 3, - "mostly": 0.9 - } - }, - { - "in": { - "column": "y", - "max_value": 10, - "min_value": 1, - "mostly": 0.95, - "catch_exceptions": true - }, - "error":{ - "traceback_substring" : "TypeError: Column values, min_value, and max_value must either be None or of the same type." - } - }, - { - "in": { - "column": "y", - "max_value": 10, - "min_value": 1, - "mostly": 0.9, - "catch_exceptions": true - }, - "error":{ - "traceback_substring" : "TypeError: Column values, min_value, and max_value must either be None or of the same type." - } - }, - { - "in": { - "column": "y", - "max_value": 10, - "min_value": 1, - "mostly": 0.8, - "catch_exceptions": true - }, - "error":{ - "traceback_substring" : "TypeError: Column values, min_value, and max_value must either be None or of the same type." - } - }, - { - "out": { - "exception_list": [ - 5 - ], - "exception_index_list": [ - 4 - ], - "success": false - }, - "in": { - "column": "z", - "max_value": 4, - "min_value": 1, - "mostly": 0.9 - } - }, - { - "out": { - "exception_list": [ - 5 - ], - "exception_index_list": [ - 4 - ], - "success": true - }, - "in": { - "column": "z", - "max_value": 4, - "min_value": 1, - "mostly": 0.8 - } - }, - { - "out": { - "exception_list": [ - "Jan 01 1870 12:00:01", - "Dec 31 1999 12:00:01", - "Jan 01 2001 12:00:01" - ], - "exception_index_list": [ - 0, 1, 9 - ], - "success": false - }, - "in": { - "column": "ts", - "max_value": "Dec 31 2000", - "min_value": "Jan 01 2000", - "parse_strings_as_datetimes" : true - } - }, - { - "in": { - "column": "numeric", - "max_value": 10, - "min_value": 0, - "catch_exceptions": true - }, - "error":{ - "traceback_substring" : "TypeError: Column values, min_value, and max_value must either be None or of the same type." - } - }, - { - "note" : "Test 'min_value is greater than max_value'", - "in": { - "column": "x", - "min_value": 10, - "max_value": 0, - "catch_exceptions": true - }, - "error":{ - "traceback_substring" : "ValueError: min_value is greater than max_value" - } - }, - { - "note": "Test allow_cross_type_comparisons", - "in": { - "column": "y", - "min_value": 0, - "max_value": 10, - "allow_cross_type_comparisons": true - }, - "out": { - "exception_list": [ - "abc" - ], - "exception_index_list": [ - 9 - ], - "success": false - } - }, - { - "note": "Test allow_cross_type_comparisons", - "in": { - "column": "numeric", - "min_value": 0, - "max_value": 10, - "allow_cross_type_comparisons": true - }, - "out": { - "exception_list": ["1","2","3","4","5","6","7","8","9","10"], - "exception_index_list": [ - 0,1,2,3,4,5,6,7,8,9 - ], - "success": false - } - }, - { - "notes": "Verify that min_value=max_value=None raises an error", - "in": { - "column": "y", - "max_value": null, - "min_value": null, - "catch_exceptions": true - }, - "error":{ - "traceback_substring" : "cannot both be None" - } - } - ] -} \ No newline at end of file diff --git a/tests/test_sets/expect_column_values_to_be_decreasing_test_set.json b/tests/test_sets/expect_column_values_to_be_decreasing_test_set.json deleted file mode 100644 index 22e133321f0a..000000000000 --- a/tests/test_sets/expect_column_values_to_be_decreasing_test_set.json +++ /dev/null @@ -1,68 +0,0 @@ -{ - "dataset" : { - "w" : [1, 2, 3, 7, 6, 5, 4, 3, 2, 1], - "x" : [null, null, 10, 9, 8, 7, null, 6, 5, 4], - "y" : [1, 1, 1, 2, 2, 2, 3, 3, 3, 4], - "z" : ["12/1/2016", "11/2/2016", "10/2/2016", "10/2/2016", "8/1/2016", null, null, null, null, null] - }, - "tests" : [{ - "in": { - "column": "w", - "mostly": 0.6 - }, - "out": { - "exception_list": [2,3,7], - "exception_index_list": [1,2,3], - "success": true - } - },{ - "in": { - "column": "y" - }, - "out": { - "exception_list": [2,3,4], - "exception_index_list": [3,6,9], - "success": false - } - },{ - "in": { - "column": "y", - "strictly": true - }, - "out": { - "exception_list": [1,1,2,2,2,3,3,3,4], - "exception_index_list": [1,2,3,4,5,6,7,8,9], - "success": false - } - },{ - "in": { - "column": "x" - }, - "out": { - "exception_list": [], - "exception_index_list": [], - "success": true - } - },{ - "in": { - "column": "z", - "parse_strings_as_datetimes" : true - }, - "out": { - "exception_list": [], - "exception_index_list": [], - "success": true - } - },{ - "in": { - "column": "z", - "parse_strings_as_datetimes" : true, - "strictly": true - }, - "out": { - "exception_list": ["10/2/2016"], - "exception_index_list": [3], - "success": false - } - }] -} \ No newline at end of file diff --git a/tests/test_sets/expect_column_values_to_be_increasing_test_set.json b/tests/test_sets/expect_column_values_to_be_increasing_test_set.json deleted file mode 100644 index 1608d7c74eb6..000000000000 --- a/tests/test_sets/expect_column_values_to_be_increasing_test_set.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "dataset" : { - "w" : [1, 2, 3, 4, 5, 5, 4, 3, 2, 1], - "x" : [2, 3, 4, 5, 6, 7, 8, 9, 10, null], - "y" : [1, 1, 1, 2, 2, 2, 3, 3, 3, 4], - "z" : ["a", "b", "c", "d", "e", null, null, null, null, null], - "zz" : ["1/1/2016", "1/2/2016", "2/2/2016", "2/2/2016", "3/1/2016", null, null, null, null, null], - "a" : [null, 0, null, null, 1, null, null, 2, null, null], - "b" : [null, 0, null, null, 2, null, null, 1, null, null] - }, - "tests" : [{ - "in": { - "column": "x" - }, - "out": { - "exception_list": [], - "exception_index_list": [], - "success": true - } - },{ - "in": { - "column": "y" - }, - "out": { - "exception_list": [], - "exception_index_list": [], - "success": true - } - },{ - "in": { - "column": "y", - "strictly": true - }, - "out": { - "exception_list": [1,1,2,2,3,3], - "exception_index_list": [1,2,4,5,7,8], - "success": false - } - },{ - "in": { - "column": "w" - }, - "out": { - "exception_list": [4,3,2,1], - "exception_index_list": [6,7,8,9], - "success": false - } - },{ - "in": { - "column": "zz", - "parse_strings_as_datetimes": true - }, - "out": { - "exception_list": [], - "exception_index_list": [], - "success": true - } - },{ - "in": { - "column": "a" - }, - "out": { - "exception_list": [], - "exception_index_list": [], - "success": true - } - },{ - "in": { - "column": "b" - }, - "out": { - "exception_list": [1], - "exception_index_list": [7], - "success": false - } - }] -} \ No newline at end of file diff --git a/tests/test_sets/expect_column_values_to_match_json_schema_test_set.json b/tests/test_sets/expect_column_values_to_match_json_schema_test_set.json deleted file mode 100644 index c2004dd10e76..000000000000 --- a/tests/test_sets/expect_column_values_to_match_json_schema_test_set.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "dataset" : { - "w" : [2, 3, 4, 5, 6, 7, 8, 9, 10, null], - "x" : ["{\"a\":1}", "{\"a\":2}", "{\"a\":3}", "{\"a\":4}", "{\"a\":5}", null, null, null, null, null] - }, - "tests" : [{ - "in": { - "column": "x", - "json_schema": {} - }, - "out": { - "exception_list": [], - "exception_index_list": [], - "success": true - } - },{ - "in": { - "column": "x", - "json_schema": { - "properties": { - "a": { - "type": "integer" - } - }, - "required": ["a"] - } - }, - "out": { - "exception_list": [], - "exception_index_list": [], - "success": true - } - },{ - "in": { - "column": "x", - "json_schema": { - "properties": { - "a": { - "type": "integer" - } - }, - "required": ["b"] - } - }, - "out": { - "exception_list": ["{\"a\":1}", "{\"a\":2}", "{\"a\":3}", "{\"a\":4}", "{\"a\":5}"], - "exception_index_list": [0,1,2,3,4], - "success": false - } - }] -} \ No newline at end of file diff --git a/tests/test_sets/expect_column_values_to_match_regex_list_test_set.json b/tests/test_sets/expect_column_values_to_match_regex_list_test_set.json deleted file mode 100644 index 8adfc533977b..000000000000 --- a/tests/test_sets/expect_column_values_to_match_regex_list_test_set.json +++ /dev/null @@ -1,60 +0,0 @@ -{ - "dataset" : { - "w" : ["111", "222", "333", "123", "321", "444", "456", "654", "555", null], - "x" : ["man", "plan", "canal", "panama", "hat", "bat", "bit", "bot", "but", "bet"] - }, - "tests" : [{ - "in": { - "column": "w", - "regex_list": ["\\d+"] - }, - "out": { - "exception_list": [], - "exception_index_list": [], - "success": true - } - },{ - "in": { - "column": "w", - "regex_list": ["[123]+", "[456]+"], - "match_on": "any" - }, - "out": { - "exception_list": [], - "exception_index_list": [], - "success": true - } - },{ - "in": { - "column": "w", - "regex_list": ["[123]+", "[456]+"], - "match_on": "all" - }, - "out": { - "exception_list": ["111", "222", "333", "123", "321", "444", "456", "654", "555"], - "exception_index_list": [0,1,2,3,4,5,6,7,8], - "success": false - } - },{ - "in": { - "column": "x", - "regex_list": ["^.*a.*$"] - }, - "out": { - "exception_list": ["bit", "bot", "but", "bet"], - "exception_index_list": [6,7,8,9], - "success": false - } - },{ - "in": { - "column": "x", - "regex_list": ["^.*a.*$", "b.t"], - "match_on": "any" - }, - "out": { - "exception_list": [], - "exception_index_list": [], - "success": true - } - }] -} \ No newline at end of file diff --git a/tests/test_sets/expect_table_row_count_to_be_between_test_set.json b/tests/test_sets/expect_table_row_count_to_be_between_test_set.json deleted file mode 100644 index cc18f57307d1..000000000000 --- a/tests/test_sets/expect_table_row_count_to_be_between_test_set.json +++ /dev/null @@ -1,60 +0,0 @@ -{ - "dataset" : { - "c1" : [4,5,6,7], - "c2" : ["a","b","c","d"], - "c3" : [null,null,null,null] - }, - "tests": [{ - "in":{ - "min_value": 3, - "max_value": 5 - }, - "out":{"success":true, "true_value":4} - },{ - "in":{ - "min_value": 0, - "max_value": 1 - }, - "out":{"success":false, "true_value":4} - },{ - "in":{ - "min_value": null, - "max_value": 4 - }, - "out":{"success":true, "true_value":4} - },{ - "in":{ - "min_value": 1, - "max_value": 0 - }, - "out":{"success":false, "true_value":4} - },{ - "in":{ - "min_value": null, - "max_value": 10 - }, - "out":{ - "success": true, - "true_value": 4 - } - },{ - "notes": "Test error handling for non-int min_value", - "in":{ - "min_value": "quack", - "max_value": 0, - "catch_exceptions": true - }, - "error":{ - "traceback_substring": "must be integers" - } - },{ - "notes": "Test error handling for non-int max_value", - "in":{ - "max_value": "quack", - "catch_exceptions": true - }, - "error":{ - "traceback_substring": "must be integers" - } - }] -} \ No newline at end of file diff --git a/tests/test_sets/expected_cli_results_custom.json b/tests/test_sets/expected_cli_results_custom.json new file mode 100644 index 000000000000..ae4cbec2ee9d --- /dev/null +++ b/tests/test_sets/expected_cli_results_custom.json @@ -0,0 +1,68 @@ +{ "results": [ + { + "expectation_config": { + "expectation_type": "expect_column_values_to_have_odd_lengths", + "kwargs": { + "column": "Name", + "result_format": "SUMMARY" + } + }, + "exception_info": {"exception_message": null, + "exception_traceback": null, + "raised_exception": false}, + "success": false, + "result": { + "partial_unexpected_index_list": [ + 0, + 5, + 6, + 7, + 8, + 9, + 11, + 12, + 13, + 14, + 15, + 18, + 20, + 21, + 22, + 23, + 27, + 31, + 32, + 33 + ], + "unexpected_count": 660, + "unexpected_percent": 0.5026656511805027, + "partial_unexpected_list": [ + "Allen, Miss Elisabeth Walton", + "Anderson, Mr Harry", + "Andrews, Miss Kornelia Theodosia", + "Andrews, Mr Thomas, jr", + "Appleton, Mrs Edward Dale (Charlotte Lamson)", + "Artagaveytia, Mr Ramon", + "Astor, Mrs John Jacob (Madeleine Talmadge Force)", + "Aubert, Mrs Leontine Pauline", + "Barkworth, Mr Algernon H", + "Baumann, Mr John D", + "Baxter, Mrs James (Helene DeLaudeniere Chaput)", + "Beckwith, Mr Richard Leonard", + "Behr, Mr Karl Howell", + "Birnbaum, Mr Jakob", + "Bishop, Mr Dickinson H", + "Bishop, Mrs Dickinson H (Helen Walton)", + "Bonnell, Miss Caroline", + "Bowerman, Miss Elsie Edith", + "Bradley, Mr George", + "Brady, Mr John Bertram" + ], + "missing_percent": 0.0, + "element_count": 1313, + "unexpected_percent_nonmissing": 0.5026656511805027, + "missing_count": 0 + } + } + ] +} \ No newline at end of file diff --git a/tests/test_sets/expected_cli_results_default.json b/tests/test_sets/expected_cli_results_default.json new file mode 100644 index 000000000000..15fd46fbdb83 --- /dev/null +++ b/tests/test_sets/expected_cli_results_default.json @@ -0,0 +1,217 @@ +{ + "results": [ + { + "expectation_config": { + "expectation_type": "expect_column_to_exist", + "kwargs": { + "column": "Name", + "result_format": "SUMMARY" + } + }, + "success": true, + "exception_info": {"exception_message": null, + "exception_traceback": null, + "raised_exception": false} + }, + { + "expectation_config": { + "expectation_type": "expect_column_to_exist", + "kwargs": { + "column": "PClass", + "result_format": "SUMMARY" + } + }, + "success": true, + "exception_info": {"exception_message": null, + "exception_traceback": null, + "raised_exception": false} + }, + { + "expectation_config": { + "expectation_type": "expect_column_to_exist", + "kwargs": { + "column": "Age", + "result_format": "SUMMARY" + } + }, + "success": true, + "exception_info": {"exception_message": null, + "exception_traceback": null, + "raised_exception": false} + }, + { + "expectation_config": { + "expectation_type": "expect_column_to_exist", + "kwargs": { + "column": "Sex", + "result_format": "SUMMARY" + } + }, + "success": true, + "exception_info": {"exception_message": null, + "exception_traceback": null, + "raised_exception": false} + }, + { + "expectation_config": { + "expectation_type": "expect_column_to_exist", + "kwargs": { + "column": "Survived", + "result_format": "SUMMARY" + } + }, + "success": true, + "exception_info": {"exception_message": null, + "exception_traceback": null, + "raised_exception": false} + }, + { + "expectation_config": { + "expectation_type": "expect_column_to_exist", + "kwargs": { + "column": "SexCode", + "result_format": "SUMMARY" + } + }, + "success": true, + "exception_info": {"exception_message": null, + "exception_traceback": null, + "raised_exception": false} + }, + { + "expectation_config": { + "expectation_type": "expect_column_mean_to_be_between", + "kwargs": { + "column": "Age", + "max_value": 40, + "min_value": 20, + "result_format": "SUMMARY" + } + }, + "success": true, + "exception_info": {"exception_message": null, + "exception_traceback": null, + "raised_exception": false}, + "result": { + "observed_value": 30.397989417989418, + "element_count": 1313, + "missing_percent": 0.4242193450114242, + "missing_count": 557 + } + }, + { + "expectation_config": { + "expectation_type": "expect_column_values_to_be_between", + "kwargs": { + "column": "Age", + "max_value": 80, + "min_value": 0, + "result_format": "SUMMARY" + } + }, + "success": true, + "exception_info": {"exception_message": null, + "exception_traceback": null, + "raised_exception": false}, + "result": { + "partial_unexpected_index_list": [], + "unexpected_count": 0, + "unexpected_percent": 0.0, + "partial_unexpected_list": [], + "missing_percent": 0.4242193450114242, + "partial_unexpected_counts": [], + "element_count": 1313, + "unexpected_percent_nonmissing": 0.0, + "missing_count": 557 + } + }, + { + "expectation_config": { + "expectation_type": "expect_column_values_to_match_regex", + "kwargs": { + "regex": "[A-Z][a-z]+(?: \\([A-Z][a-z]+\\))?, ", + "column": "Name", + "result_format": "SUMMARY", + "mostly": 0.95 + } + }, + "exception_info": {"exception_message": null, + "exception_traceback": null, + "raised_exception": false}, + "success": true, + "exception_info": {"exception_message": null, + "exception_traceback": null, + "raised_exception": false}, + "result": { + "partial_unexpected_index_list": [ + 394, + 456, + 1195 + ], + "unexpected_count": 3, + "unexpected_percent": 0.002284843869002285, + "partial_unexpected_list": [ + "Downton (?Douton), Mr William James", + "Jacobsohn Mr Samuel", + "Seman Master Betros" + ], + "missing_percent": 0.0, + "partial_unexpected_counts": [ + { + "count": 1, + "value": "Downton (?Douton), Mr William James" + }, + { + "count": 1, + "value": "Jacobsohn Mr Samuel" + }, + { + "count": 1, + "value": "Seman Master Betros" + } + ], + "element_count": 1313, + "unexpected_percent_nonmissing": 0.002284843869002285, + "missing_count": 0 + } + }, + { + "expectation_config": { + "expectation_type": "expect_column_values_to_be_in_set", + "kwargs": { + "column": "PClass", + "values_set": [ + "1st", + "2nd", + "3rd" + ], + "result_format": "SUMMARY" + } + }, + "exception_info": {"exception_message": null, + "exception_traceback": null, + "raised_exception": false}, + "success": false, + "result": { + "partial_unexpected_index_list": [ + 456 + ], + "unexpected_count": 1, + "unexpected_percent": 0.0007616146230007616, + "partial_unexpected_list": [ + "*" + ], + "missing_percent": 0.0, + "partial_unexpected_counts": [ + { + "count": 1, + "value": "*" + } + ], + "element_count": 1313, + "unexpected_percent_nonmissing": 0.0007616146230007616, + "missing_count": 0 + } + } + ] +} \ No newline at end of file diff --git a/tests/test_sets/expected_results_20170721.json b/tests/test_sets/expected_results_20170721.json deleted file mode 100644 index 0bc3e0e6a780..000000000000 --- a/tests/test_sets/expected_results_20170721.json +++ /dev/null @@ -1,104 +0,0 @@ -{ - "results": [ - { - "expectation_type": "expect_column_to_exist", - "success": true, - "kwargs": { - "column": "Name" - } - }, - { - "expectation_type": "expect_column_to_exist", - "success": true, - "kwargs": { - "column": "PClass" - } - }, - { - "expectation_type": "expect_column_to_exist", - "success": true, - "kwargs": { - "column": "Age" - } - }, - { - "expectation_type": "expect_column_to_exist", - "success": true, - "kwargs": { - "column": "Sex" - } - }, - { - "expectation_type": "expect_column_to_exist", - "success": true, - "kwargs": { - "column": "Survived" - } - }, - { - "expectation_type": "expect_column_to_exist", - "success": true, - "kwargs": { - "column": "SexCode" - } - }, - { - "true_value": 30.397989417989415, - "expectation_type": "expect_column_mean_to_be_between", - "success": true, - "kwargs": { - "column": "Age", - "max_value": 40, - "min_value": 20 - } - }, - { - "exception_list": [], - "exception_index_list": [], - "expectation_type": "expect_column_values_to_be_between", - "success": true, - "kwargs": { - "column": "Age", - "max_value": 80, - "min_value": 0 - } - }, - { - "exception_list": [ - "Downton (?Douton), Mr William James", - "Jacobsohn Mr Samuel", - "Seman Master Betros" - ], - "exception_index_list": [ - 394, - 456, - 1195 - ], - "expectation_type": "expect_column_values_to_match_regex", - "success": true, - "kwargs": { - "regex": "[A-Z][a-z]+(?: \\([A-Z][a-z]+\\))?, ", - "column": "Name", - "mostly": 0.95 - } - }, - { - "exception_list": [ - "*" - ], - "exception_index_list": [ - 456 - ], - "expectation_type": "expect_column_values_to_be_in_set", - "success": false, - "kwargs": { - "column": "PClass", - "values_set": [ - "1st", - "2nd", - "3rd" - ] - } - } - ] -} diff --git a/tests/test_sets/expected_results_20180303.json b/tests/test_sets/expected_results_20180303.json new file mode 100644 index 000000000000..abfff609cc97 --- /dev/null +++ b/tests/test_sets/expected_results_20180303.json @@ -0,0 +1,192 @@ +{ + "results": [ + { + "expectation_config": { + "expectation_type": "expect_column_to_exist", + "kwargs": { + "column": "Name" + } + }, + "success": true + }, + { + "expectation_config": { + "expectation_type": "expect_column_to_exist", + "kwargs": { + "column": "PClass" + } + }, + "success": true + }, + { + "expectation_config": { + "expectation_type": "expect_column_to_exist", + "kwargs": { + "column": "Age" + } + }, + "success": true + }, + { + "expectation_config": { + "expectation_type": "expect_column_to_exist", + "kwargs": { + "column": "Sex" + } + }, + "success": true + }, + { + "expectation_config": { + "expectation_type": "expect_column_to_exist", + "kwargs": { + "column": "Survived" + } + }, + "success": true + }, + { + "expectation_config": { + "expectation_type": "expect_column_to_exist", + "kwargs": { + "column": "SexCode" + } + }, + "success": true + }, + { + "expectation_config": { + "expectation_type": "expect_column_mean_to_be_between", + "kwargs": { + "column": "Age", + "max_value": 40, + "min_value": 20 + } + }, + "success": true, + "result": { + "observed_value": 30.397989417989418, + "element_count": 1313, + "missing_count": 557, + "missing_percent": 0.4242193450114242 + } + }, + { + "expectation_config": { + "expectation_type": "expect_column_values_to_be_between", + "kwargs": { + "column": "Age", + "max_value": 80, + "min_value": 0 + } + }, + "success": true, + "result": { + "partial_unexpected_index_list": [], + "unexpected_count": 0, + "unexpected_list": [], + "unexpected_percent": 0.0, + "element_count": 1313, + "missing_percent": 0.4242193450114242, + "partial_unexpected_counts": [], + "partial_unexpected_list": [], + "unexpected_percent_nonmissing": 0.0, + "missing_count": 557, + "unexpected_index_list": [] + } + }, + { + "expectation_config": { + "expectation_type": "expect_column_values_to_match_regex", + "kwargs": { + "regex": "[A-Z][a-z]+(?: \\([A-Z][a-z]+\\))?, ", + "column": "Name", + "mostly": 0.95 + } + }, + "success": true, + "result": { + "partial_unexpected_index_list": [ + 394, + 456, + 1195 + ], + "unexpected_count": 3, + "unexpected_list": [ + "Downton (?Douton), Mr William James", + "Jacobsohn Mr Samuel", + "Seman Master Betros" + ], + "unexpected_percent": 0.002284843869002285, + "element_count": 1313, + "missing_percent": 0.0, + "partial_unexpected_counts": [ + { + "count": 1, + "value": "Downton (?Douton), Mr William James" + }, + { + "count": 1, + "value": "Jacobsohn Mr Samuel" + }, + { + "count": 1, + "value": "Seman Master Betros" + } + ], + "partial_unexpected_list": [ + "Downton (?Douton), Mr William James", + "Jacobsohn Mr Samuel", + "Seman Master Betros" + ], + "unexpected_percent_nonmissing": 0.002284843869002285, + "missing_count": 0, + "unexpected_index_list": [ + 394, + 456, + 1195 + ] + } + }, + { + "expectation_config": { + "expectation_type": "expect_column_values_to_be_in_set", + "kwargs": { + "column": "PClass", + "values_set": [ + "1st", + "2nd", + "3rd" + ] + } + }, + "success": false, + "result": { + "partial_unexpected_index_list": [ + 456 + ], + "unexpected_count": 1, + "unexpected_list": [ + "*" + ], + "unexpected_percent": 0.0007616146230007616, + "element_count": 1313, + "missing_percent": 0.0, + "partial_unexpected_counts": [ + { + "count": 1, + "value": "*" + } + ], + "partial_unexpected_list": [ + "*" + ], + "unexpected_percent_nonmissing": 0.0007616146230007616, + "missing_count": 0, + "unexpected_index_list": [ + 456 + ] + } + } + ] +} \ No newline at end of file diff --git a/tests/test_sets/fixed_distributional_test_dataset.csv b/tests/test_sets/fixed_distributional_test_dataset.csv new file mode 100644 index 000000000000..09b16ac89c6e --- /dev/null +++ b/tests/test_sets/fixed_distributional_test_dataset.csv @@ -0,0 +1,501 @@ +beta,chi2,exponential,gamma,norm,norm_std,uniform +5.850493790446149,168.37583838447307,4.7656535139688065,23.223571166966874,-3.0235382974235647,-0.7618372133529946,-3.5983922926489176 +5.212445964529123,139.0983421204337,23.46558902670113,25.15684719768373,0.39471669028774103,-0.3316168981944163,-0.7874208669561309 +5.070654221058406,201.15663432172246,17.330225811687882,24.736722630689428,-4.597193575283691,-1.751315429921936,-3.510069639802669 +5.977732536766124,143.1325590947298,7.211183925671038,27.228130010606723,-4.77865152173745,0.6288941107731483,1.057903249763931 +5.067087692330409,119.07439924887161,5.918149079102183,22.220948125469228,7.828902862513571,0.28250186412914624,-0.23090361514895896 +5.005918326011746,82.93035205958049,5.713901340687993,31.67283453520234,4.967029164864952,-1.3381394299976357,4.152071928369951 +7.315431494529531,131.51711049940366,5.564832682574504,23.004831553625106,-1.5354606162814117,-0.5006068496298897,-1.7943122909495979 +5.7864638254582506,194.66191261889207,8.787290449052568,31.61719242276042,-0.5912692358489875,0.12164502989195974,-2.9983716950024686 +5.074704656405195,113.34771091491636,7.269443779233034,26.35388268219478,1.845112838059194,1.7083234738053625,2.594180036133011 +5.378992256336856,160.3264100965871,48.588368364869325,22.177816055579832,4.232173681931411,-0.9709994483211053,2.4582237282267787 +5.556857767840859,160.50193310792417,10.966184514449546,22.80690400710654,3.035946787915025,-0.6193323434441171,-0.3007805270138544 +5.000008259948673,111.83951010505658,14.622140445174889,32.09199106932245,-8.481105545561316,-0.7267081317912971,-1.5440133436991332 +5.091088171654885,100.02054302238373,4.824596390372838,25.83954702893555,-0.6250418328393799,1.2216554167232163,1.0308520459933028 +5.167367048207986,170.7048636767713,13.35335313227921,26.810249417641742,-0.8554356053234204,0.5036992883408409,-1.3369276527678347 +5.89913711144649,165.71806135405825,10.254687849603236,21.288479509645978,4.764584175827249,-1.3878740774042613,-1.5519714697815616 +5.046323395693867,172.89436807296988,7.798890577507486,25.505126160483062,2.4321467029579438,0.20485141954334923,-4.378167049141784 +5.067926145467801,135.92497861991816,6.27811889270558,20.508554273619644,-12.008186548301987,0.603705215944543,3.9477947986336286 +5.023430979756082,109.6416851486888,9.699941679885967,28.4339718417268,-3.859212685701272,0.5456803086925712,2.310650931909067 +5.880463646158996,194.84351543580328,12.19861723118078,23.24062806363517,6.3451265476243535,0.2354770194411183,-0.44068398122223584 +5.340405161822466,145.5298848120054,24.835450649641505,26.556996633951435,-4.192848679177859,0.11183499382178441,-1.963268218940173 +5.117293117073133,163.76879324417945,14.73082244724127,23.916313693713114,-4.698707227608314,-1.2515037504006048,5.044210542764667 +6.070254347893259,125.31871064982151,17.150987449671998,22.572757969423133,0.38492505206149774,-2.949343498142557,-3.445946388242596 +5.579982633889694,105.4663611152068,38.31498349438113,24.71594811260326,14.244719597153775,0.6346341605845754,-2.8305447781762725 +8.811985843058352,166.08907648353483,16.474141331327374,31.238038843318087,-7.106137621777984,0.12415701611085053,-1.623676847799838 +7.141117336346991,149.13269817255218,12.412354766242235,27.36248496260416,-4.8854365152038355,1.29762248958513,1.7595868302930802 +5.036823261179525,136.21584346796394,18.789607028349078,25.533391544014794,-1.3793936216329614,-1.6869334111624485,-4.9782942741286185 +5.32084048698381,186.7138924369762,6.333270559501931,32.64023362870029,-0.48693219043744307,1.0895390465511505,-2.0259993645799024 +5.020840818183034,182.71705367632822,18.03186861455212,22.364183160443783,0.6188603407520827,2.0608817396766486,3.8158665045174853 +6.701659825024205,211.5914554833888,4.328722428817827,31.112493916194317,-1.9952986111233355,-0.24123532626897323,4.149449638311854 +7.305202454141947,207.21753519102833,5.931878616490867,23.09243681415518,4.719048968070661,-0.9478721803195337,-0.3283556933406082 +7.67591993861027,165.96975335015273,17.638711114787498,31.853418096036307,-5.567719925481915,0.6762940289231819,4.543992310012888 +5.044519744591826,183.09473259625503,32.274545135277286,20.673147488587922,-6.15576769426957,-0.653356162094209,-1.2734918203186116 +5.762887170892392,146.7576502896834,36.44155654027296,41.424025507052406,-13.851158269783724,-0.6522952979435112,2.8549405663824405 +5.000656244986999,108.72198601713461,17.92638052204978,21.790287494363646,-11.303803942753673,0.5288276042049607,2.4588451328587544 +5.006728243430519,197.34412889462862,9.345058582089735,27.312294507729785,-6.303786992158743,0.35779324933543777,-3.364353701598707 +5.351428268556014,101.23771467631632,5.0157734419452575,38.87698317943691,0.8007264651401709,0.18864935973204697,-3.5718825226692044 +5.029090097808851,192.4901601299909,5.544451370165162,23.586796777510052,-8.329672458468462,0.8694168790354193,-4.327073222481118 +5.655424791504987,172.5802607940119,5.493647742617743,24.20079225309662,-1.4008643766972435,-0.050667448143797304,-2.371775173357337 +5.679947684198958,190.24064547957914,10.25401158660286,24.39915014822348,-7.317562240267703,-0.7163645746100656,3.560688515184543 +5.007215237186791,85.45601241242746,7.489097731614368,26.33467907312035,-0.33558642196164357,-0.10325872083866922,0.11952027431480428 +6.097857598292621,134.5185098659878,4.700580397544634,24.301724680279538,-13.797094036918407,-1.1410365793954031,1.292650605981498 +5.010722322408675,146.6756422970796,11.111313553262455,32.41120492405945,-2.9977147766783374,-0.5007769008723629,-3.588429981889763 +6.169279956003463,107.52126222092609,13.671222566184486,27.587932100542695,-9.709977639370559,-0.38930137038190693,0.5299474517299059 +5.055323742611372,181.1673446190734,15.703171016586612,22.030834903287978,-6.8536795612959125,-0.4738505304071177,-0.10273533740366592 +5.115921650432545,105.86720406191836,10.150821612927807,22.297198736552772,-8.535151254854483,0.1286643037951197,2.9330272470819985 +6.167666401592254,123.5255323375789,9.280161292342466,22.647046323521266,-0.5682512649292244,0.1536943052813672,-3.32383764716755 +5.020071774406607,177.9590560312002,7.600709691867554,23.566375483584075,-0.11007944531311376,0.44479005808082905,-4.5234932431643164 +5.521268849145694,163.14723022026612,24.425331157056267,21.099280256531028,-5.769432673949486,0.12853166665543062,5.6903989607988485 +7.768718833773574,127.18158444171755,51.098886782407604,20.534640208978963,-0.34357174902740484,0.25252986603237887,4.031726928896452 +7.250375047266558,155.47666393309652,10.703656501257816,22.281659127690073,4.748711072493927,-0.940638662695016,3.9339037966380666 +5.30951566535077,123.36995595250701,23.530570143850756,20.608642183676494,-1.6506165558462427,1.002145448157643,-3.401373266960689 +5.058842634215484,177.6131140622398,11.755012828755405,23.73874624597,-0.76662945008886,-0.5254149843099643,2.691049914275764 +6.104797828202727,143.81217647733152,12.14476010020881,26.103796541174745,-2.059308005355411,-0.8874009356229543,-2.9825038594068394 +5.082275213541087,161.52391526003402,27.313259059976676,39.97490798796946,3.0240579539353982,1.8313136032669823,3.884637948301691 +5.230648438078691,81.02192493736874,7.311863630205063,21.073810425991283,4.635973068928171,-0.9230293320982033,-2.1191407484098037 +5.107191849686973,103.21179483166631,26.434796626104568,23.37537108406534,-6.596307788725246,0.7005376866384789,-4.9864608401397215 +5.506193583464529,92.52513497214262,8.823680125541042,27.097465343501913,-9.745532191422248,-0.8921511976641738,4.21750944000242 +5.171359136966488,120.2335342971065,9.376661819558588,35.48578826538342,-1.8890770067091376,2.300740002905247,-1.0669669551059076 +5.019030180702731,177.7353578846301,20.741252505682414,26.772313850828837,1.7918157250778965,-0.8177652993706169,5.000222255369659 +5.08738503586073,191.2020236666273,15.529940924636442,33.91879452275805,-5.302621638622837,0.5137596315384769,4.685802727815917 +5.154356460095473,319.4807989085277,10.851227871054416,29.04679427693558,2.312900412650235,0.6235869434828482,2.0807783833904843 +6.7696748528259185,137.84241203675109,18.956004279994403,32.39888135118758,-2.050159510443726,1.4892059269873128,-1.3505653535641402 +5.342299039688756,107.23297789850824,12.939958178265762,27.621954907414604,-1.7499532204790835,1.9404786705229335,-4.093263926210216 +5.003594341979336,186.7340677142572,4.44789753128205,28.700870035419236,1.351077971751268,0.543237129287503,4.7714873438887935 +6.21282110428459,80.94460682213136,9.475611252388571,23.103450570998163,2.2648251612002177,0.506190912339065,-1.7961332661132885 +5.0952927935716295,164.72236720417618,15.194324280329727,27.13088574286086,-6.779344261472072,1.662014490552018,3.361662198760655 +5.907026660138126,215.91773570128714,4.37445388485112,28.63062567447397,-2.117466603459154,-1.1892025001545674,1.9717011115286667 +5.504462786755248,131.56155341402297,22.548574156815754,22.71451358026694,-13.52116939398571,0.09359744903280859,0.7275749365019575 +5.102189267370559,154.81487103031853,22.823049817446044,30.97482535158679,-5.2623442073044755,-0.5391639051311026,0.680279762089822 +5.0268501192812405,148.76133634721214,5.5884602620766355,25.34242614778434,-8.091509882905694,-1.4373956042240192,2.3690288022411155 +6.919203619233675,157.70028560282196,14.919123229732879,21.66560884495244,-8.663048538416614,0.18793738602503937,0.8976327455679662 +5.007331094775549,140.22942277016676,18.19071270620283,29.056598410850363,3.3731134504749054,-0.4504544572949267,-0.7634124916137681 +6.502508214208149,203.69627570499588,31.019986801553532,29.411582445526033,1.6182075264896247,-0.5168782316146914,1.7262326422724463 +5.001023563997772,161.4329046152968,20.622291260572705,24.044176538241896,1.4500092640457969,-0.09563566771150116,-0.8270646323063833 +5.091458178954452,113.07819709823733,9.120436427085167,26.677021769095347,3.007717212468944,0.31642380457853764,-2.83652093527106 +5.070168141752316,148.68282259813677,10.201499034354999,25.703113335148,-4.515436956801723,0.6033346572916749,5.649536697115707 +5.0050091683526485,203.53524113264078,6.9128705608118715,25.94026129019985,-5.111371125298227,-1.494591463883685,2.6897178695812025 +5.001062899861727,204.82437125009588,39.25127162506981,22.19598987734589,-6.605843040065054,-0.11089407932547171,-1.1820566316637118 +5.372909222829188,171.19078598685508,5.113362951378434,23.784084776309264,-5.631067463304145,0.24128940396657955,-0.3444948055619568 +5.271964681608004,131.7012074061667,28.426258097515213,24.23592957697986,-0.8855222708241159,-0.5826451090518829,5.447701995538017 +5.012880371894564,139.14943100114525,7.296944494572127,21.703251664732754,-1.74341949539556,-0.24111265234658688,4.5131348847432236 +5.208498528598445,197.16286061260672,11.380886280354503,21.01859839862216,-7.788597331563619,0.23636053732082465,5.3820636819262795 +5.030138328789264,115.33079331679834,4.9950530266258255,20.36262294808322,2.0835346794531153,0.12472072520346711,4.67548461280753 +5.369294053562075,105.8208225981946,5.285623706140623,22.957398391099638,0.16804802827060694,1.0463259795215636,5.1014232564283635 +6.676827916511275,181.69247193523296,18.78375569660771,22.02632813599229,3.0536847300180634,-0.27309185588007256,-4.317195820625894 +5.748624160914366,112.34392202090925,13.911096449939212,26.670307039619235,7.124376056730155,-0.5348340202772364,-3.127370036997506 +5.074372933643425,167.21857442109086,15.402224693948927,30.413565027632885,-6.987591238552275,-0.3065633045092193,-1.6095224473045953 +5.040104336716964,158.3128940478945,4.275248057852886,21.888359850168047,2.2529554936845653,-0.16224266472256815,-1.4406846690463553 +5.82309590196872,122.96313842331887,8.255996216687542,25.64409013221207,-2.6578880060601464,-1.0832321958497246,5.660239131171718 +5.159307089559664,115.72779962502776,14.171720933048103,21.785331666117727,2.5620707602115793,0.7084014934534155,3.6004065677146304 +5.025475666791459,119.60449697815753,8.2171345785723,30.10743661725457,-1.0589465978188661,1.5207430404312408,-3.0765474857556336 +5.655415518964453,104.43433235778907,34.26478916701691,33.866368421665086,8.84730718844977,0.29034318323317193,0.4581020681791257 +5.1150446144065205,224.00849946951405,19.05450496520648,29.081621645213037,-2.57464102291421,-0.6830663297113391,-0.5515628488761752 +5.748080180741014,93.22879977073204,4.502493723282115,27.67387051243358,8.018486812082678,-0.9503128662965102,3.7865143777210566 +5.070687173549019,186.40542591836794,11.774397073610224,26.93469771914949,-1.8519492384285745,0.40070993582420605,-4.926321669277671 +5.03008172832996,199.24870939488312,9.60673986589765,24.252424500498016,1.9762657793066247,-0.12607168361009707,3.3155442884414654 +5.206084237707756,170.5069639642042,12.009980101553339,27.1671944103809,-1.409451231547251,0.3982048880621819,-3.3695742093403664 +6.932053503251343,108.43302668076001,5.032044360292103,25.39236398836456,-5.742657742458668,0.14163847335453222,1.6563152953990041 +6.767955948573178,112.4821373675749,4.487447056496042,27.693501257709755,0.9248486892617525,-0.26414142191051887,-3.032474295293988 +5.1377202146781515,161.74068407455323,7.999229315790755,21.274848714373814,-1.236617136000173,-0.4522120743109245,-3.2115720344118137 +5.0008266971631805,148.5716096227751,5.851862519534821,23.462721784814278,-9.828286469080911,0.7582019727208021,-0.6680107786456277 +5.934436153074724,219.32171848409658,29.54796164175928,26.367297114793935,-4.812700940373627,-0.5155834984569123,4.6480690469669455 +5.042599772483323,168.6916843323151,44.936880659890846,36.46405952224584,-2.1633206957941686,-0.5912023219792532,-1.9620336732675927 +6.379889088049531,149.10041197990344,7.6292814463015395,29.286178734855564,-6.64503101117176,0.8967457840864574,2.9299169771210742 +5.42858807127586,139.04531620196872,11.694997721920842,22.330195894599417,-4.412863228302487,-0.9714375237468943,5.20546444871481 +5.0626146060012385,110.2679702949606,6.869808049241632,24.828926249270985,-2.181319230507077,1.8408099134921863,5.4755986673970565 +5.172168526015837,118.11985960194387,13.684169224630171,28.8156259498849,3.4769503006972142,0.15388123245219912,-3.266438623406585 +5.143089783822764,168.16381023321867,5.497415431721966,23.140375702906923,2.9046423874365344,-0.2740839434521689,1.0090480473919552 +5.098150201309473,158.5477668371911,21.38281085469685,21.365742196921197,-4.947438431017408,-1.7849256899581514,-1.3840449263479435 +5.22572505751807,78.27025152602553,8.27969015378048,28.859175114592723,5.908500470977272,0.9810066864016238,1.5688679040675693 +5.117739945552996,110.36387344539241,7.9161496202267845,28.59302740329852,-4.64367413061728,-0.8737171397872998,-4.835076447943326 +5.009232451035363,160.73194476667157,5.4361679947707,37.42992355530017,0.28500935648038794,-1.0156344201369412,-4.639780311862317 +5.341787332219115,216.11376728380438,4.970691346137502,31.04720511872688,2.64984379486763,-0.4112435373111226,-0.29227934916444553 +5.889255694971011,147.46143034879253,10.955764566064769,21.620762409171398,-9.846353026009593,1.4656211675254827,3.7012589886297587 +5.002451915236872,136.0126105912556,8.258864875800931,27.68795197009323,-7.112434890229368,-1.0062190621092781,-3.844434744164299 +5.021989133402158,110.07018976422894,22.39326961782017,25.15613185231137,-4.014134622064711,-0.9021477623819238,-1.8163577764767287 +5.027483489976809,185.18252289823067,6.814976454431368,23.498327489735217,-0.8975656836740715,0.7527691429611131,-4.370969941211017 +5.704017291164417,112.41517004556798,39.522400887606366,27.278834387610722,-2.9670055395026154,-0.49050852650590193,0.0898557479269515 +5.6564027574225415,111.4680292553495,20.72139683554872,28.655693794942756,1.3457916823263871,-0.524672210141139,1.7486924676770537 +5.071117871895484,176.46896776277063,13.88585497357057,24.39181172042727,-10.244924097514087,-0.6991958611427955,0.1878740518361015 +5.000318162688183,164.67838321707455,23.49376734971163,26.82102289058795,-13.26398623401001,0.35236093927160195,-1.3752967497582014 +5.538026582486467,112.80759002926108,9.17665426070732,32.711482218509985,-7.834161081326092,0.06810259833711926,5.714453784413472 +5.1736733854341805,171.0802816158368,13.953811330205411,27.64675786981629,-0.2319644896273525,-0.9303417070007342,-4.967071441736699 +5.657217643709122,131.0762150767605,15.493405973883931,30.11055523138139,1.5105508573939517,0.8453995602773925,2.686861882000442 +5.0542055357310645,141.0299802230024,4.653911839279282,20.982487897584644,-3.372846022834303,0.016472381649072398,-3.9004939662601177 +6.3439323974817885,95.04687655121455,4.378367845551179,25.209870145666173,-2.695710940607964,0.8449629554575987,5.528082901339264 +5.013864331937038,214.38864766624624,18.480641167161384,24.357528873632255,-1.4617138924941147,1.8508339476801332,0.7408643152833339 +5.288904935601775,198.07839903638111,18.912190151905005,23.47111620704113,-5.03272562667399,0.02207424087123658,-3.2654731471403635 +5.005884559030896,214.32001962227224,9.591672440067516,24.41618371599086,-4.085322041140121,-1.3691790221620341,2.1534821785795693 +5.000173951399615,173.7683242218134,20.95251458011525,33.59837571154292,-2.0850351841311534,0.8872035233421282,5.923993060518699 +5.004809705400636,172.33124142583398,23.665421017279648,23.598228440630315,-8.120726420966895,0.014331182149166751,-0.8548327910632603 +5.582518727715748,189.93409886958273,16.09120822212449,24.552706215952867,-11.004199567496542,-0.07415470511506256,-3.472570165912596 +5.488002038085024,112.76791327068064,10.007478478941607,23.127576600422437,6.173681023050403,-0.048564787847951034,-2.874128819094591 +5.055599054202576,138.15363070227863,4.295611200990243,23.976346899523627,2.945041509646196,1.235021453144089,-3.769858697176522 +5.267334195367094,125.09872196591476,12.83092672660888,28.11578399847221,0.2897007154718301,-0.4332949239040796,1.367618623792616 +5.096165933498112,164.62256044805292,5.059974383733773,28.655847853410904,0.7757720516043864,1.3910354560887865,-1.6867826108496233 +5.05496792955222,169.39688466658163,21.71156438992786,38.526266458985084,4.533598624636477,0.8202107414766083,-2.896300971040257 +5.505296250816592,74.15343082131469,11.362132171783667,29.085730726390132,-4.2027678509834185,-0.24742346531674608,-0.9298407799729018 +5.452908645354024,97.84258845538943,12.6062671238729,22.460257536669058,-3.506751399401888,0.3022707463795339,-3.3822313968179287 +5.006308048534326,228.520527999733,9.396924870217724,25.59430696195617,0.49395744956304943,0.5439803613456898,5.021396437586024 +5.9268911483324915,125.47396661129596,15.333316209661554,21.96581571833358,-6.119955199457767,-0.9423685037543559,0.39303335965870545 +5.950175732872159,182.16822026852589,8.62174373035949,28.986185714498358,4.602829213160346,-1.2663828124528027,1.8991383087868812 +5.129126661985634,155.33376099349886,12.265576035175386,25.55668082242676,0.5398239289823854,0.9372495450912421,-1.953373631706108 +5.548197228055904,235.29673095058934,5.006751999632714,27.46873727451076,-5.267188375436672,-0.7201022447501776,0.28460384539499817 +5.203977348899807,165.2740076895671,5.178968207550663,24.26953863395395,-1.0651024293105165,-1.5939515375049897,-2.5455079915723173 +5.000245550374629,233.1294835431617,4.399635833307902,27.453923057430217,-3.958626244421642,-0.3754978160092561,2.907546008721103 +6.807083814295886,188.5359731972431,5.851280340897151,24.931574005813612,-3.361464876201636,-0.958703834468276,3.264525655152065 +5.581431234972504,203.20591162211534,7.270540610357275,21.01655778677677,-2.085707178159838,0.7943364000653058,0.6648518598423454 +5.018677366102792,152.06542047022998,8.00096344953502,20.98321887655134,1.4016037437075082,-1.6051078356175037,5.2283696645186915 +5.428430388236532,145.47963449732111,4.476879983476929,24.17744585713443,1.1775617833389855,0.5437102534580702,-3.1217145428202224 +5.990610024198509,152.6683103157619,5.279741734152472,29.18503558538232,-5.785882512199045,0.9251663642076984,0.747220391654011 +5.381791446226724,175.11963027684072,44.503374704592005,22.026198355067443,1.590429172382005,-1.4696286040036017,-1.2872847731441723 +7.053236480539149,169.81511242405864,7.744742934546991,22.10420943083906,-3.521365379885727,-0.3995923463080995,-1.7246524141599497 +5.351654469971737,166.06226972277526,10.527460224061654,23.637504267078736,-10.388951228808384,1.4173426443802095,2.7005948567841216 +5.4724144277298326,110.22623408755973,39.15151403094109,23.2016304434594,0.1349304257443329,-0.8976086679663611,-4.4233013270216945 +5.405317246505034,176.43719315039425,22.854018411190328,25.93558923831964,-9.818699228649448,1.8448050159120464,5.810053083976861 +5.000681591604632,126.16920743795083,4.371813484403072,22.01831540157641,-3.837437607414693,1.2531682095041274,1.4141405184412568 +5.258931890484195,177.44193725346,6.920131807515935,31.093929714842343,3.2295626662770154,-1.4909324172096663,5.120476306342336 +5.29632698863878,192.8826538520294,24.451588231982758,30.35420426506024,4.099771824603084,-0.02773392455738246,-2.493890093694059 +6.9759772135929445,105.5879303385864,18.650671023303296,21.039859152506818,-3.2384955809830602,1.3752359608682563,-2.337046821109422 +5.6244049927245925,139.33078820325102,5.495082500783385,34.25921422172662,-4.08116065875504,-0.025208170147126435,2.5668761868786847 +5.344572789319235,141.26304294164862,21.63978115632485,28.40889687774964,-2.583735021310363,-0.6678801790750701,4.697479430444362 +6.036240241229871,132.07988023651146,13.758386720481742,22.679971529519314,-11.223938080515861,-2.8680175289766323,-3.6309001686230777 +5.017913758609126,144.52155820258224,5.192079888135073,21.654817603099353,8.34353925625966,0.21068854342755527,-2.3105251181212045 +5.028904021399622,160.69955892672348,5.550956003575292,22.14980989327131,-5.884837367971322,1.287155307846868,0.5524837706244341 +5.112562040031711,105.33401167018214,15.618738344327511,23.3273575648183,5.200834344132629,-0.5743059884856377,-2.025354337796836 +5.209160508573222,111.22693806382583,11.440155348532892,31.311500317149125,-2.5527867978237646,0.4953266465704801,2.384654374421399 +5.237050849021069,116.60567406540218,8.066472571307834,32.87488391990703,4.136934974228638,0.39604958998547224,-2.718445422207169 +5.438548093233366,78.59797322030215,13.803631238887736,23.172714155334994,7.603921309261947,0.5887981897504261,3.8303902332911886 +5.166476861835458,135.2248996494871,15.831974828512926,28.658564687118695,1.7321651912025033,-1.2817571301388209,0.9382754446470383 +5.6566968661486445,114.47322858424673,15.488809994473758,28.150928615242197,9.123297950802462,2.0299226130509536,1.0164837817722496 +6.032793105847513,102.98091832129212,6.117097124857777,20.447385996153162,-5.397002052491512,-0.5019445162745081,-0.4291733279800569 +5.072855164668721,155.98093450427456,17.495673945220783,26.512199821850714,1.6368439079025423,-0.1592845656277424,4.28010412963703 +6.58901837996285,147.08598382438885,22.298903641546303,26.799896704357877,-6.343653671897531,-1.496216295671384,-3.327426413488124 +6.790343003282402,106.51069016569537,27.272784346925146,28.58859605084178,-8.06925452784165,0.011447713852114603,-0.4407166678744847 +5.002806543634851,110.4176264387344,11.506867496904928,29.621119387362626,-4.353154656077415,0.4194459852506574,-2.40448686930763 +5.721949712137914,151.94234673819196,7.788274788597189,23.636389136557835,-6.596208485417514,2.051213880475156,-0.24655516266212274 +6.433503925803514,197.72979382341742,9.534052842639674,23.765358896331286,-6.1941334431675035,-0.3687653325107265,0.6592662414440493 +6.787251229917718,150.6513039052918,11.019231316312503,26.050687992216407,0.17577652281399514,-1.6892546780329039,-0.28780966309763656 +5.008369344621648,102.73062937934911,5.88785244419555,33.048912270052085,-4.7890235856717815,0.1476811614209946,-0.6868769288543257 +6.219794849832951,214.9666185578298,4.57143792571961,34.36606463466008,-4.837274355026171,-0.1809983919627219,-3.9527020480473514 +5.0075501364034745,193.81167349900736,20.132631310116757,25.33703041508593,-3.8632077635634183,0.15805905426345215,-2.0405528058016698 +5.010729255407096,76.58071026504506,18.59922229169808,24.263060317528833,-6.632784506296855,-0.3966154217680058,-0.34234049386596155 +5.013818491703776,109.64187249794152,12.919607231587502,21.56366617318176,6.775541929027694,-0.4002366295625488,-4.551566151006195 +5.104222085774095,129.53077607539262,24.710980140025978,21.474926219567838,4.0490499304113845,-0.8248956662894505,-3.098508297093609 +6.189241199072019,219.5074528312304,6.022706134027107,29.87492181793138,4.3501236586898715,-0.24444044589254627,-4.24778926946212 +5.577496298020845,180.88237427930335,9.835671259198076,35.52216811266159,-6.871890632516705,1.219457428248391,4.808371130208824 +5.049024089313317,155.9597356278816,4.841744632721653,25.60434996034886,-5.173546276054617,-0.4336304917402885,-0.4494242277199856 +5.318687585409055,142.1681356761573,21.269651192777324,23.00719182949297,-3.9785037616811225,0.8611838731078146,1.6652662973106782 +5.814490968380008,167.92619940713774,7.377118285805161,21.926540446271137,-3.447179499724833,-0.3345036934943053,4.575134023867747 +5.121486357500009,198.16466377099954,8.360688924458657,22.224903633841087,-5.671485359621157,0.15955995924177305,-2.7206214949390435 +5.229948028052266,106.60622484082135,16.47133242419393,21.2760028469181,-5.642523392550522,-0.9841644763462932,5.421517189619827 +5.439751852939279,190.16677811598376,18.784004239273973,25.354130591606072,2.193875363370238,0.7540849738227458,-4.910835647794459 +5.065383227487699,151.2537660183294,17.2813349901969,25.361891164371148,-0.6655339363446351,-0.2843916621652262,4.778671626051366 +7.211062628274247,197.32249307198015,25.772053630155177,25.825922872491528,1.6059716944858193,0.32479752956975927,-2.684278377972962 +7.64785255389115,153.02094352598354,5.435405609332546,24.524941561470463,2.554913209464642,-0.8854246017060757,-3.004696975879726 +5.05674806404623,133.2532067558037,5.780008062222702,24.643183970938754,-7.1045130663034275,-1.280893483344987,-2.4132240966525473 +6.3123933928533456,141.49363116496914,13.506645771368895,29.87666370348659,-9.067080193992165,0.19610993505485214,-1.7893673962664405 +7.22283149658154,109.86640263173138,11.557631387578523,27.317035433633052,4.483039209984084,0.9546441561162023,5.825922656007284 +5.010494840669713,121.16804796707021,5.293178139768288,26.721735454378365,-0.7386239555826566,-0.8009713318143824,5.754307425105031 +5.196838792411529,207.0653759011587,25.044668402387934,30.891597247487095,3.6374055177867373,0.015851472982513822,-3.2024882179942904 +5.098252034136081,98.49724957298537,33.402260052967335,28.419939724599132,-4.8418172343404375,1.0875532925321485,5.333171103329807 +8.831325202886507,206.7951745439862,18.305066815153396,28.381319997767623,-0.4531891593750619,-0.6312428196865475,1.720006120828308 +5.009672420663231,128.85645086792772,11.812813186251795,25.650476596185825,-4.886927363789129,-0.022689324872778884,5.446539432217042 +5.51767114766518,110.98363078290627,60.702111148644626,26.41742300347613,-7.8431703666307495,0.6858792422022852,3.763361976821331 +6.148770850517723,147.44845091034028,27.104043338366726,20.794914500261164,-6.125099859151969,0.5191792079282154,1.3746353951477506 +5.088326144105548,154.02023555600766,14.244742203221325,36.16204967680426,-15.222047452119597,0.1827018919718896,-3.0120324157087373 +5.0068185688548414,99.53052491687534,27.250924521714392,21.164635167326047,-2.764929016735146,0.2046473806071294,5.45167449708004 +5.000529847050169,96.64009920616247,10.373060666351993,26.831712251224502,-5.759605015269736,-0.2659863561971663,3.409321038173493 +6.016594325327578,155.86626776551623,23.198850725053607,28.13112567534052,-2.66304625964886,-0.0002272887041735507,-1.4111842612775094 +5.689637424322019,159.93649653912692,11.330646479336727,22.439478303062188,5.286498523192671,1.2394523155345292,-0.054404101938577476 +5.795342564373953,151.05814777392462,25.72552171917778,24.913916846339074,1.0475592231755497,-0.8197152556937111,5.6556036817310495 +5.001422715530808,139.5303172355821,8.25880350944211,37.85571621771783,-4.468896284880813,-0.26038890651424507,3.3715699278401416 +5.818174512900004,204.74840309889572,8.201070288667808,26.302212411164657,4.199899411415207,0.5191402566926833,3.6441999071501936 +5.499313995416,184.5532662490382,7.799296789003146,24.1707809353684,-2.6786107011856632,0.14309164453127915,-4.718341985518558 +5.927237175373399,124.94592935865559,18.259459840973165,25.49168836689425,5.150209029791533,-0.11667774653872956,-3.0629147411290294 +6.143884739486767,174.95165446638512,12.882520255298264,33.17055077058404,-6.234262256640504,1.4967441114487494,-2.1833817326156364 +5.109380463250347,147.40856723672027,5.914712741111176,27.62122026188983,1.0164106502315189,-1.4842743753150234,-3.074417749633206 +6.083591182632377,159.4387057618383,11.71911071382059,23.09021060782667,4.31786128238046,-1.6711827560315686,4.921506968783758 +6.713775634366947,196.1371478574837,6.412376187685615,26.48593878376012,-3.2774527808103393,0.9171734085747983,3.7024205267886057 +5.030312847779033,140.49913830033788,5.382877119728068,23.155587726926367,-4.228441899362336,-0.75801415139682,-4.624534049070043 +5.34186406080635,154.94619214262102,22.789609345587216,21.325241621313907,0.34183340617788405,2.0647924029659173,2.0002558218331306 +5.965547535945161,118.27348575310184,6.492795844851981,27.488042594872866,-6.808019621735122,-0.8507783959780162,1.1847409027998523 +5.003887520969348,114.18167823360082,8.46051713945775,37.8285306262823,-11.122522686261286,0.49945071275830666,5.986388350303148 +5.487099279505115,77.6879561700497,30.613353612383502,23.39590823641279,1.127140782487687,-0.07926636550307542,4.470564079987641 +5.04387359515257,176.65854003019538,11.219548922866895,24.843676425526965,3.114361921507834,-1.4032926370345968,4.957001177539494 +5.491027444786547,165.63011241848932,8.50504033166098,26.13507374074574,3.5371230038441297,1.5789479132548372,2.425678780036975 +5.541522445194145,165.50676235635356,30.922558382019506,27.049299577585078,-1.5453145526751992,0.00036902898762658065,4.77528809581502 +5.662234807375358,91.89843562696123,4.539193845678934,23.437713953539237,-3.750543283750745,0.9008849143633637,0.6800453778881588 +5.342429195207901,184.70561935920617,12.326054514744833,25.83859151878858,-0.9102149176022178,-0.4548692200805128,1.5413180048291002 +5.16803038414764,180.04839345198255,7.930392454123936,30.908165425222837,-6.474065652495413,-0.8645466454571408,2.8961481846133292 +5.335549471973353,123.12046625159985,11.225780460534342,21.29585126856847,-10.707469773611082,1.1291199035301664,-2.381058330071345 +5.332232779730463,175.54993496585158,17.0083491796962,27.984543798901107,-7.261278697847967,0.05787441286108142,-3.683258951890479 +7.067373837338476,219.1274107574171,7.209433913894369,29.821677254931373,5.183013928977649,-0.4337386663118338,1.0117569912534767 +5.68469455971026,156.43446010201066,9.469982099060793,25.11409058498649,-4.881036931783186,0.0926976373756347,0.8912222746626082 +5.066031652112717,133.47213854775336,4.567681569488029,26.073722012614954,-14.101472145437732,-1.3978201450384964,4.440193347058527 +5.63800124279964,123.06796480908986,44.89382212302497,29.511105156764714,-7.311648141522921,1.4578226500595075,1.5320239211350781 +5.000216657361009,146.63653091317514,12.315854065276021,24.109221499561656,-0.8131386922087867,-0.17675691566549942,-4.957042756812091 +6.502960853077601,117.26468525657369,6.962555395957302,23.723260278482016,-1.9952131546801644,-0.2542403001996208,-4.145414079953253 +6.202179193928178,95.8474897051506,10.985064886687717,31.756467911471116,-1.6737340958005447,-1.2634374960231483,5.5288924730538955 +5.150694498663257,122.49339840420214,10.742827923805171,21.227498549539977,-8.837620525077462,0.45226274119917315,5.735090392822647 +5.01004141241662,129.2188712728286,20.391068153266534,26.891636095727222,-2.151400259371282,-0.8401174092412682,5.027889027429847 +5.00657473434786,128.24616948302838,50.57714130453859,23.222045750725925,2.702446606978535,-0.5026780708589532,-1.5764806321823333 +5.230375078742775,112.36530105520654,12.235412446306036,27.10476123316684,-5.212183752523897,0.5133925869020991,5.860604243888282 +5.002100758887664,174.28430953896213,4.754580595175034,23.11059316133471,3.200896229474994,1.6416530047475275,-2.661860464649015 +5.1526181575494645,165.0307703541151,13.96275917144781,24.783342352965658,-7.414611278342835,0.5807900362143437,0.9477891574075823 +5.069961753209398,125.54113134534751,9.800686150696444,22.679140521935143,0.14606794167301107,-1.7073402693705495,-0.5197403755356387 +5.901102130648453,159.15115502246306,9.397729538278199,24.703573842228632,-3.181118342591296,-0.17835543085451414,-2.1307205712422337 +5.988821337544251,162.00018396417227,6.233056316548753,25.411478491919624,1.2090890813024533,-0.8284599544575123,2.5156177093025684 +5.000047040043329,154.34685064877286,4.758719944209579,20.380098806653606,-3.6583027834941912,1.286311684505601,-4.438615686810774 +5.5764385545781225,141.75125466307028,33.48439401547777,21.2676142496908,4.9703611309250295,-0.40645236179259414,4.213971407717285 +6.788237025787544,116.81730365407648,21.51094301405703,43.50963126642375,-7.3837097202036475,0.1566320471424112,0.7921091706638119 +5.034290378732073,159.07712700869197,5.940219544095493,24.05407969963833,-2.9623299098090317,0.05210668042668796,-3.5760909556406877 +5.01153030744054,148.617453757973,10.017714760591875,24.39467691827265,-6.3559382535085325,0.9558131774069075,0.3188136021305539 +5.0585305658010675,162.43225044826494,29.819535049407012,21.378364787136224,0.10425998452750118,0.7431915005285292,1.2416873343183035 +5.017853627041993,102.22872902769585,6.521977465348575,30.202022324231173,-8.057055333310622,-0.4863230843246441,-4.2253997652074045 +6.12223146093074,51.928201425755304,9.603782071870102,22.543700643755713,-3.2943345605778536,1.9204672735058752,-3.237009048799016 +5.053870281004484,170.36514892895053,12.49574708861909,27.512218308603657,-4.908234250282303,-0.6527490226835765,0.5488601289529944 +5.347451309649581,167.6846282656186,10.817035398269038,25.192436650341968,-8.302103138247741,-0.17330377691631182,-0.25536632287874284 +6.809681528746068,168.956954592191,6.933388068042609,26.073194212292954,0.3228739674943637,-0.36041008241889144,-0.803222473630135 +5.015611517488871,152.0343919286409,10.0673292047152,23.25510390880948,-7.351204537658473,-0.3804139766622162,-3.392243939940819 +5.774067661335773,159.0819323039345,22.61939684435642,24.16110958473537,2.0211134891939793,-1.2981398054638749,3.5289045944158097 +5.285697020404792,118.46017639549456,23.51944144912533,22.497888189078374,-2.7836775385980634,0.5279190075209275,-1.6811554766060475 +6.723306972153739,181.50639697984354,11.381540577160347,25.547849744445557,8.051950056000912,-0.093100276250494,-3.34447769860527 +5.0409692575584595,148.02786458513495,16.132849758951114,22.43076301312006,-6.435522151922219,0.4011846806149476,-0.4841310966156467 +5.198446775646373,149.27307317079274,12.023562677837749,31.328292174706696,-6.8896811617582445,-0.1025833801475749,4.209379270469356 +6.5848319249507625,80.5265102836185,17.936405462125528,21.566934283018583,-3.3360867476325113,0.0308690977380319,2.9136454133745904 +5.474120133779393,120.08995975400569,17.937534609009884,26.20139991226406,0.4166891093884093,2.61610050962273,4.5645854541695705 +5.923297624630204,217.90182040485254,8.120722850712125,21.905167247823183,-4.00166366693832,-0.7855779448724177,3.341327234830402 +5.298932257966983,160.27656250645833,4.918384731274645,24.934880858845993,0.24940207576919526,-0.5069981205034998,1.7601167688729857 +8.334959898193159,114.1603259882404,4.6601876242871265,33.3068799591987,-0.002030232928964981,-2.018205715548186,4.374024547025057 +5.199396195454307,183.9514042073851,4.434170461741739,35.09616694940495,-2.7578740187346917,-0.676853137903355,0.36058739254689254 +5.520025555066134,137.95257147056597,5.726198707690422,36.59736605134472,-14.789670280013844,2.6667436770376436,2.9233822873996136 +5.091775921862744,119.11446309343803,14.81610699189644,31.868807844190115,-1.195965795009493,1.4514561467263356,5.3927796528631315 +6.261032816874299,117.84655077308187,11.278413696324815,28.048940055048746,-1.6173746612604751,0.6346288553151814,-1.2719559834910346 +5.215846196108037,181.09563398337139,17.184599949959942,24.219625494968312,-3.48602083030871,-0.5028268637974027,1.0475638397655587 +5.373600335875736,158.47092315717072,14.462407188150717,31.094375588524187,-8.471370118548815,0.5129316587643497,0.9602875929354102 +5.047969226524963,171.47264109052188,5.919501744550685,22.801274251403523,-6.42590006390143,1.7567793708333177,4.825946463681948 +5.265551369854345,109.07011078477397,12.70043079223652,25.792344976060598,-2.9374826305089536,-0.9743108006928138,4.129732168750113 +5.40582425228502,58.22567990795484,8.45895670237913,23.19577762648818,-4.467799999223012,0.6803970482160467,4.166593440158355 +5.347825658712292,148.14961345992702,8.418251461337409,22.3718151640254,-2.5770648198962163,0.9557987256060957,5.667711368558887 +5.931648972540552,170.99897293225453,17.03650833837207,20.87750126470711,-3.753723037433046,1.5015354777068726,4.093623416463517 +5.172455112220562,160.76321531552182,9.886261828105924,37.79760540162157,-1.7765131181872809,-0.7562656480252864,3.2879724949754046 +5.015782144894088,123.43994735819773,24.22787789984847,27.904277526542813,-6.488781579778351,0.47350460418154716,-1.488242778140135 +5.127565066957176,137.05422246529048,4.529670428959425,22.757929171609508,2.4543675124792923,1.7137434476698434,-3.9741267896046835 +5.13896653173137,220.30419869350516,6.839154403852289,29.39466240749847,-7.755925824100542,-1.1476992204820962,-1.970844971637105 +5.461842797822865,228.8317584693307,8.045339281176155,23.52137566180604,-15.061513523980114,0.0029032204951271443,-3.4978481537153856 +6.590832351771924,188.80151683835365,9.273677483676238,25.837027810556364,3.706250950755929,-1.10057035995619,4.554489089507246 +6.293729293999679,172.52840019983125,18.175239439009953,21.29654680291706,-6.335677623372416,-0.2975317815472589,5.431226630824485 +5.0021215153708045,158.49058500937136,32.16429864764434,28.307058898686066,-0.08208370765592399,0.5024090781917618,4.728302907812612 +7.933516599132563,147.55942186716328,15.063709660345985,26.04721945070422,-4.185150821869554,-0.0009874189814476862,-3.34149330673138 +5.575086772160388,136.27656133291688,34.90236514815867,23.17937058804676,-0.26255594914923996,-0.67456027794399,-4.489488393525409 +8.274309241339138,157.81434201797913,14.800090159207723,26.69509664674644,-8.150895190800787,0.2979582792246579,5.826500160899382 +5.320376606474622,161.1383774992055,16.684707026103077,22.163123509689566,0.8553906931552886,1.4655731387841706,3.088304981235394 +5.059164194717581,147.86827474396455,6.092071727654186,29.649618721331393,-1.6996939361437593,-0.30362859377309165,-4.5004677211905 +5.759779229535652,153.08104739611665,4.758757143834732,20.925449742424505,-3.1276199702056973,-0.9944798845871008,-1.4642523958030473 +6.873353114497654,112.76373344122229,6.404809214100814,24.831589197476927,4.748630698173843,0.1898899912663152,4.1012680043968714 +5.490766663867337,125.88348713754182,81.75900105135844,25.951893486906407,4.751498670075968,-1.6840295718358629,0.11783732453408469 +5.282529511311333,84.6286856150788,15.888153363671464,20.855929792364808,-3.9332666089470956,-0.45838074161046016,0.8371316691436066 +5.5375836873241315,155.21001670156096,10.006990446855252,21.303787362696916,2.3299477100779713,0.5434059083011952,5.25671196899807 +5.10841458632832,185.2781090425066,13.206895285386636,29.662631919495553,6.736167995485495,-1.1872642574269,4.657289050815047 +5.179703779038611,219.93036125251678,15.158132916720195,24.88818514089817,-9.051230719390261,-0.4126416926835999,-1.3684877100850952 +5.6817046048044455,144.440502462978,8.00728994215364,33.917122535086705,-3.891207623734119,1.1771253451621082,1.1514618528327896 +6.488677211269962,184.7825307528252,5.661401177382234,23.649599374501665,-3.729103333157748,-0.3137041653151179,4.054849040192833 +5.012274678203555,155.32014758286573,41.9307035342118,40.98563246040749,-0.09968767664501521,1.5790316190072575,2.094577743303031 +5.785677829942113,196.69611417265793,33.658954718180674,35.09540655827608,-1.0550266327068454,0.37538823587259657,-1.0031145086778972 +5.316547917426791,195.61698614517073,10.656108811791174,26.335451151037113,4.616491021242108,-1.5681388188050611,4.7630359654874095 +5.316433308228687,94.41123557668953,19.364577392353056,21.563200530644007,-13.322942962020813,-0.900886519307947,2.872968849875366 +6.862195419167806,168.8126023666489,5.762233501676568,25.62747655400151,-6.57489305267431,0.6523455186868065,-4.30570756739496 +5.114099501220913,127.69410122903639,11.640851340391741,26.779797382164563,-4.394820819730244,0.8716003139067918,4.922608812016225 +5.269698071475958,142.12828781308846,4.782252569114988,25.032410184929983,3.2359224881847553,0.2682161699781121,0.0637437645136334 +6.228176499861469,170.59750683574856,10.511033287130722,21.374852156020417,2.619742089127355,0.9476812196610377,5.383442315638504 +5.59599434330549,124.13713116497294,9.921212851848537,24.683588349341612,-2.570750703135061,0.1472675878697148,0.06581475836809769 +5.046496910057696,108.4796457630219,19.019057344406235,28.38755337295246,0.02901221264641185,-1.7724554632563914,2.8347794722114017 +8.517263087487422,188.88589690015226,30.677493003674186,23.861957120574353,-0.5577409624059628,0.5924196108902267,3.303808093680429 +7.746444388962386,185.49072510970598,6.840458473525908,26.2700340421604,-4.173942457546202,0.09032547448461785,-2.749455148326066 +6.3604714663726165,172.34884963314244,7.341097458236925,24.98298657979307,-0.2062218361826489,0.6511214539840977,3.7205591023373454 +6.261026298135848,122.89622983933124,8.304752163607851,23.620980651156533,-3.9412247179764908,-0.08119469622984171,1.3607262696200344 +5.002525432038443,111.52467989018638,20.62121123421154,22.487675565294197,8.64373145740509,0.8018976029720893,0.40346458325501366 +5.022616993858572,154.64050720293832,20.399891066222278,24.991486473301972,5.048023393836501,0.1398452271056895,-4.983818121818919 +5.527596402907112,121.05250094856682,11.121991605383572,30.099314830015384,-2.52717139012318,-0.5010027615175823,-3.772502935186993 +5.95726567976099,117.31714882657324,5.981796835685552,22.21782732516144,1.5021418345669435,-0.1283025591749987,3.140829935816864 +5.651226276661181,146.8936618927134,5.245039929895488,28.634607298787564,8.464259390958851,0.41460596648430903,-4.681950344925564 +5.383918972832712,90.18743001878524,14.971262633680244,22.06458571104138,-2.6848589806661796,0.6045777859731639,-0.09768051791284904 +5.235768996302648,118.58879836348851,4.716233460521881,29.677121497081338,-6.652447116925501,2.1340947532357673,0.1347961139022491 +6.295601598357028,87.75692318770655,21.577582762022338,22.649983832158398,-0.36251382850375613,0.9411878373853054,1.352363436077443 +5.386318582929444,104.24733529068362,5.869179968102086,23.095440879684517,4.515065403795112,-0.9314567957057386,4.9614753320602 +5.327122483716359,106.98303047542193,5.550817106000779,22.90316603034016,-9.047011804397714,-0.1246675392556697,-0.39997496611582495 +5.033776358362779,163.37492633563076,7.006440461086446,29.23485140253435,-2.720629724281536,0.20069629093714655,-3.2760190554906843 +5.014994544141749,128.47828552199678,6.177744144311893,26.990962255628517,-5.582070121241552,0.18025628555518106,-2.976100592690427 +6.24667362145642,156.97804643023093,19.098883799549107,24.843220669391112,-1.4819290704951316,-0.32037009686771123,3.1263208559045967 +5.007495569366597,122.24331721888329,9.199961996537313,20.921661793607964,-9.478592782904487,-1.596128031338419,2.6693922172058517 +5.089065012680195,225.96954305860356,4.851237276268539,27.462905070648617,-7.874467778994696,-1.281698983627304,2.249615221245488 +5.8965113930416315,193.20435645081804,16.804335902392218,30.231908256273705,11.069995440570954,1.5025857467722896,1.6502120779166498 +5.114145700234427,173.7052473305092,25.314781471257195,27.909487734753103,-5.446536992704093,0.6535380021026145,-3.983101880664116 +5.667547952676443,92.38842307718548,7.802224376525217,22.105948680045884,-5.758263179392211,-0.3195366262887513,-0.02638438828709866 +5.492907756048042,143.01493689252268,5.597325386189273,31.688778446207845,1.1814048051266024,0.9550940105332724,3.94722671780319 +6.019973574882636,86.41552494682853,12.451306185137831,27.861572215088366,-7.788220792454326,0.26199595539595605,-0.008985973316209872 +5.00057571820316,87.89216803408375,13.041729158089005,24.22468118716276,1.0733996202387548,0.16079290091813944,-1.546448349172079 +5.089718147762762,120.23854546901926,6.7035253404382775,23.502458972191505,3.106955571991839,-0.5716806424065041,4.098182119973339 +5.264968516293055,154.498546823009,4.612126679135526,23.34030394033299,1.341362459580317,0.3516600585958921,1.6675693533866136 +6.229300743361989,139.44130931849614,10.186592398133435,23.263990595739223,-6.047677412274274,1.1149800606616962,5.305333189697603 +5.300590461312921,91.90813576892032,4.223056398736185,24.56140559728724,-6.540622866197957,1.1832682572865134,4.147222716070361 +5.104779313868483,172.94408504143962,10.45981349224791,33.51817648401022,5.561446962976134,1.0609410642711623,-1.8614638301072537 +5.7609120256261575,159.1589799461598,14.060285070134487,30.573221182745158,-1.5244128400120809,0.5107126304164932,-2.099541411313575 +5.219943772714608,157.7826622054792,9.420252736785592,30.692950805959768,3.923342726024515,-0.9387839980045412,-0.701524297998743 +6.485833146937357,132.7018558015903,20.75504841274147,21.02193928245218,1.1851646720742899,-0.5464961411913734,-4.982074875034449 +5.6286122924291595,168.32749209472126,12.580365511420819,31.876234425017806,-4.696372934158394,0.590029971201994,3.094490778345783 +5.186416719333324,190.49631152317977,7.775042163614658,21.96295771301876,-2.2755049646372454,1.4821852357181209,2.0481875311238076 +5.921594528620752,107.78293379848463,12.054849514761298,28.845717950716516,-7.679628939252686,0.10211810416005715,4.427042449471498 +5.000379886455414,120.89086819039079,22.762875350254948,28.102299152004694,-2.852531272109531,0.2654380493120825,4.196388275659093 +5.075761156386094,182.30478273020992,7.284941747657368,32.96418032814862,-7.790436335504627,0.003193074333946576,-0.7855204442743062 +5.84649809830538,152.52638635696812,9.3651613726464,27.817668068443993,3.522995684568121,-2.595011500290185,-0.4528767860505649 +5.303360009088694,140.309790964386,4.752472336760687,23.02118539626776,1.1711904999344793,-1.5555693321217359,4.063809919819944 +5.587375408602016,185.8094178845838,21.69111568449125,21.37185627775997,4.29841677382008,1.1029959578327817,3.2360343582872204 +6.937029055757119,127.36533635227175,22.64442116891287,24.8752118981921,2.8246538795617617,0.5547365038545873,3.8970261695806023 +5.099859791243686,181.99997598543635,27.302489051108733,29.49635113336186,-4.172229690812818,-1.2890116363784185,1.4046566546096333 +5.0079947128389035,139.1901838101202,14.792827986688533,22.248254305888622,-6.398014298024203,0.385241647644517,4.42167516598737 +5.0020106240263615,188.31250074225284,22.20078475194756,20.63249793719176,-5.474191149869526,-1.7172917262560201,1.731921342638473 +5.109984216368625,124.43699892925497,7.030435367396771,25.987266811205338,4.131870229127385,-1.0183531291127577,-0.5820293450214447 +5.006699369539265,205.14087336025926,6.195141450390248,32.56263053945094,0.2863932437702199,0.0516353172730078,0.38428779418752956 +5.486367617204413,175.62660133267676,24.538272712646307,22.03455987097881,-1.4215068725613422,0.5032987099706072,-4.692570059803644 +5.7609560739874315,96.29368493859621,16.392522663793102,29.530301267098483,3.0702117511666565,-0.5431862306985483,4.059664409947722 +5.016402374429918,192.2598948324358,12.126515018200884,33.739067253319206,-7.675038589946082,-0.5066784173896016,5.431928896393467 +5.696650009810396,154.1063028926035,5.406128916401498,22.718088636690645,-3.3168529080918567,0.7296528334741877,5.722872806574502 +5.338725300951328,264.0781838550351,12.038606092962386,21.66853713538765,4.532125905496244,0.43427336273906414,-4.741459668209236 +5.214276662209207,137.72130487550075,22.49319059015449,25.15266874366113,-10.054205046868852,-1.1336736059915875,-1.4281502367609655 +5.029343828092537,235.5791950672993,4.859112466328151,24.519090020333774,-7.13310344721829,1.4239533377656168,-0.8610880117168263 +5.022711673250201,106.48627512723324,6.750175407589019,25.702954228643176,4.207863945359025,0.26635153655421817,5.408884786015445 +5.001499524186699,168.84918810633576,6.713139991143716,29.787128335286503,-2.7837976607586876,-0.8542643927186325,5.274075621036005 +5.533452405433886,85.41265460553636,16.534235127571918,23.77153814419528,-14.24547767115867,-0.5505965612489012,1.2622475002441833 +5.000004100961268,182.67890815004586,19.57679784092099,24.186577951302638,-7.169740087830272,-0.6191098589746231,-0.9636435204767633 +5.040497188604156,182.58684056023105,10.095217973234945,21.278579254225598,5.997668132239495,1.0389333891660284,5.770905904086062 +5.236529020296579,169.929552880397,10.196419686285154,27.145872431471204,0.37035384892342016,-0.910610824815791,1.976172263366152 +8.189545727680962,150.20001839683198,11.41986839993713,24.808893794142726,-1.2433703651278232,0.5299525668725942,-0.35379631649806775 +5.012246176427039,132.80691236919372,9.355621416096561,21.443713107951197,-4.710865828292514,-0.008471436147967222,-2.9230104375428048 +5.065731533422785,185.60239945669025,5.861478728431173,33.5570945158792,-4.377481086511165,-1.1290382488353403,-1.165851383724764 +5.077614799123859,168.26618360582174,32.00525682871908,24.681831603380108,-1.4679869411900444,0.5698541909626413,-3.799776855957626 +5.020960116173688,107.41881637325734,15.898270724707423,23.427281297013682,-8.541140960641886,-0.8633916215611627,5.635037187130461 +5.29992949579546,104.86500389399654,24.926280754601574,21.807503430086033,8.865923769612527,-1.3561442698272417,2.0809159098127337 +5.001540390544045,126.26455043251738,4.494512805800294,28.94996674938855,0.8228060869670548,-0.05715155687864965,2.419337495013897 +5.349372377605224,157.1284100006506,53.752206852451394,36.3022195012377,-2.952403823736671,-0.10862112158472408,3.0606379809481794 +5.048282103203652,158.7118757859099,11.540485138560399,23.590824827791202,-6.584671950619427,1.6523840921634045,-1.517337154256126 +6.969335764812465,168.42848645412263,37.98015913200013,23.99468517829465,-6.879068031923152,-1.3509280784017614,2.828061267277767 +5.302432840949962,142.0927489026888,7.5578440497349595,22.307465853339583,9.061513446319488,-0.546096737434385,-4.493599489377636 +5.240144488487954,176.5107367641791,6.7919890555365265,24.793880336878633,-1.6303465868919094,0.9914001844639295,0.880272999875201 +5.060470695431767,154.68054792884385,16.366848673001567,21.473941726618722,7.092972807073966,2.2009973860751386,4.005584813044317 +5.307069020774286,180.78751221665658,6.310211594709301,21.57584087144187,-9.907654991039813,0.4278997910904522,-0.653950330236964 +5.001673469713119,184.8008745800625,39.26612313063202,22.62787428822746,-5.871817044754032,0.2904682827648246,-1.1894764751876403 +5.331332911333905,117.91184345207758,15.43440708248442,25.367668486917587,0.7646824653576636,0.611953095869391,-4.4125027395809315 +6.717941143205787,209.0268162259183,9.630752554192924,23.937914005173365,-1.4696930523777054,-0.5124509841764628,5.265731795265326 +5.1146186670649785,163.1533807090918,6.1667933458918744,34.55072153784474,17.637640203963652,-0.7242306910323367,-3.8697395003986546 +5.292301145073905,88.67943896551911,20.92674827833265,23.517442674505673,-3.2756279933108816,1.692881895744937,0.8976186136567232 +5.213026965936819,179.19548575325894,5.69957756460733,26.864459339287652,2.270686576243471,-0.2993391196823086,-4.953042635491336 +5.025484674096087,106.17236792481191,27.067213506068292,23.76324547947017,-3.8240326608799027,1.571727194445566,5.721473822864683 +5.013553959164741,149.2821748419964,6.993340062718557,25.78782321063955,-1.3444913171008421,0.4614440672335327,-0.12818399078106335 +6.122908906746606,106.3488912170244,30.81701248858648,30.883080211615294,-5.488069478118493,-0.6738297010943158,2.1674797208961927 +5.0772045391519995,178.61511155280112,5.639155536312673,25.631308765000554,4.67824731159479,-1.1410362601848796,1.771823643975722 +5.022826944691997,120.51051491689397,6.0122274382125305,30.153696162710503,-2.755194288538327,-1.2289179758390327,0.8398871394635039 +5.0131577948530905,171.4830723293253,17.988212812525962,23.293464861015643,0.21468925296225327,-1.1592824639010875,-3.466380318286117 +5.733735680441337,122.00364787319539,26.020297619121067,22.18071246104998,2.7078561038591724,-0.3208290182546895,0.31794078497846456 +6.026478723455582,96.10043247518803,8.346805194039389,23.85590991744309,0.6668197637772275,1.0883475833718372,-3.7092315583210675 +6.249100303810138,89.31032855461498,14.745299076832264,24.06028271305486,-0.21866903281740813,-0.9062031453791941,-4.168645982354269 +5.054337725707808,123.0313527404729,12.498734289864426,26.676721837807584,-2.0505765952483737,-0.4641520619045426,-2.595003065006799 +5.098207831627606,185.68750180171276,5.1247035208012,23.767879559837432,5.07876585808818,-0.5133783725156535,5.004913350079514 +5.0007433648910595,98.30981975151084,5.185906106670484,22.21320155618596,0.8305276630172393,1.6178376750723735,-0.7714537937543042 +5.554416289190312,136.03871730289856,9.697433078186295,20.790131105906873,0.28243675915387545,-0.8166506055996811,-4.105976512760119 +5.183367022906939,181.72373631235797,5.250001701019094,30.84385823675227,-1.0260594730083061,0.24471960532267073,5.5229834610332045 +5.009019469698935,161.63996257467528,5.945505881358027,21.59859201905419,-5.275268785726331,-1.3110942268262473,-4.517414871117985 +5.22866811687999,163.64753172037484,48.19473718957431,23.07375016230905,-4.826150472037841,0.3884064950766477,-3.4075447979594973 +5.409047126633279,160.34997343425758,8.735570828610587,28.333318913324227,13.884365594184796,1.5923737096884454,-3.705687577191383 +7.928679014964926,146.00355488725825,10.618592758209036,23.473025178882253,2.7976627097333395,0.8703990367794033,-3.5679864224670084 +6.3325927208916255,146.50065102025502,17.504327595001943,41.26702867445318,-6.876699174566986,0.3352493252882758,1.850334584016828 +5.202568456287276,139.42316634096557,28.65338269744863,33.97825743919117,-7.583710512549207,0.6489599073316843,-4.065231898657856 +5.292098733140554,122.03112912971734,7.922843030789891,28.052763862217574,-7.521881723654581,-0.18315179107491206,-0.1713126212946774 +5.373247022754651,107.03371293182603,10.859892428363384,34.79391890601862,-6.493775875630855,0.5002412695546493,-0.6056915655397717 +5.002146639869168,135.3630555999753,14.87023833796017,22.95687883354811,-8.833161804963984,1.3688263949669717,2.1002918110325792 +5.725884809811948,142.78841724967637,5.086568201360881,22.038437647131694,0.2570086619636607,0.895091842246569,3.308028738790034 +5.016076943332657,156.06961572043218,14.711494763042836,26.502771679233675,-9.936109046700025,0.6472933719994759,-4.6573806031433005 +7.891940854970242,152.19427097251454,7.970452204928157,27.756342158155704,-5.663944152394029,-0.5678787082592508,3.2227903062419436 +5.5665139890815025,161.7676698351547,6.942429063094069,21.392177355235184,-4.572813134399696,-0.5795174466078753,2.964391274668415 +5.022829954795309,88.31602289684506,6.634856844938355,23.756475188187,-5.059380267492137,-0.7514485725735525,-1.9940036055179982 +5.488178285059967,189.6383637482969,15.588504179211217,27.573642382404493,-2.157993334950782,1.0755191761124863,2.4325495225452425 +7.8768971020042695,151.68151532053702,15.898246350141594,28.127435091879086,-0.8293605624559377,-0.6211423596095553,2.725228426589428 +5.138180102788169,185.09695160333584,8.80208889346768,20.62743564222135,-0.6403185943114127,1.8785557166405065,1.217510188806088 +5.61058224817868,114.73680274333509,37.72068461350401,27.136963932157652,4.153846763293238,1.2602399307487704,4.6196739366134825 +5.317223961901428,164.74660939856903,27.87320392190919,23.502931645048452,4.441530805783947,0.31005097298861517,2.677162721716349 +6.391369921121494,255.99364407988503,12.377298150484055,21.1346268597977,2.259067539323521,0.10640229208752597,-0.2759309370412284 +5.013708117865872,209.55373118447915,14.185118703807909,23.64311284669158,-9.645903033898456,0.2480129971126506,3.695598400433685 +5.02506106002431,118.69027667474629,16.70629050638992,22.502176969746106,-9.758574959752856,-1.393839592053868,2.6338038630686738 +5.495819348909238,132.4108271310188,7.29748556564423,31.62293844729713,-0.5135354140656949,-0.6694363070014848,-3.6568390242634408 +5.137695937856658,197.9256691348957,5.9380612778348265,32.643686050043954,-0.2760439938363648,-0.56679147381699,-4.963194564260739 +6.495488444930541,196.9719210014921,8.984407370573017,22.683570827267012,-3.9905168021185116,-0.3817789027533773,0.2888043176311408 +5.368360228761943,152.24163514607676,16.005738008686258,22.71511046071724,0.1470202873685147,-0.9465469068253418,4.201290937821701 +5.202071161547862,110.872953386959,5.174788435752335,22.386088620825173,-3.4297232434298373,-1.0651029999382307,0.020102419163344543 +5.209783640362083,191.87815957473265,15.805245682945092,21.450983266546643,-13.142185003208674,-1.3318261811828218,4.425350926641052 +5.818893141958671,143.28636524569947,8.12732159910207,25.347116678656477,-1.6672060750751763,-0.9864531911437774,5.300893594271177 +5.011814472948384,184.25948626263988,5.994704668708813,22.988678138956768,0.44982380648959763,-0.37839114740815155,1.4278309385191275 +7.208127608528111,86.96118360665656,16.140417614507626,25.101860905606255,7.339629228031733,0.7647119747882449,-3.9073359277889286 +5.023947284162984,149.53506892268697,10.768705139361737,31.902739221385218,8.352189000415512,0.06035941645409672,-4.004434057777858 +5.812789889529301,177.34298882021625,8.172571846070793,22.1091523297023,-3.226916135546551,0.6185099989528171,-3.065921244528515 +5.001530166611245,201.61178346841882,10.067523325388255,22.183273378954084,1.8115105402148752,-0.4849210202193874,-2.979477990233029 +5.0017776727115555,142.3021295018551,4.868399280822001,22.256276207738495,-1.3549226257942855,-0.2805302397084217,5.306444830905482 +5.111187985482449,203.01448267328243,27.60292334083058,25.09695848391263,1.1353795690793955,0.4069629040789198,-3.8050479774761725 +5.000740296603212,159.11502899424147,7.99970565941492,23.078619697583132,-7.311173698548545,1.0251877866115997,0.16123679291599213 +5.022800542867576,164.94837893968446,19.13840999197092,20.290921061356514,-9.497517251434829,0.254751681262609,3.4893373935008007 +5.033417589463134,151.07313765368664,15.739477563911525,27.26368532939605,0.7257709636366352,0.08752399050356235,1.4598782459644548 +6.280608487625858,163.88956549614605,9.729187544087782,30.5098113801114,0.0041163332643034956,0.0706983543388612,-0.7156568601759163 +5.023776378108382,201.14695037917596,37.58374636005625,22.195545637114023,-11.731151928493622,-0.5731526028171189,3.389197727783589 +5.896023055637545,184.7057816031382,16.965490744828582,21.903766308382963,0.5251589193579123,1.2289259733227433,-2.816800423235815 +5.610924460908346,99.13989013408995,17.898573374901527,22.33556162476657,-6.552445690222577,-0.9622018930691817,3.688470745239803 +5.27649878516714,127.25429305126164,16.60059266094665,22.96825949043408,-3.098481427238015,1.5255567551356655,-4.094739215974712 +5.6840600540308,171.17334102818376,13.918419059174965,22.417262287970853,0.040276914527972885,0.827282588613972,-2.463460703277375 +6.056022959612331,249.75831180321086,5.315830442600355,24.911664506575075,-5.0157270558240965,0.9124704702989214,-0.5263079441497389 +5.380680634360978,138.18803897723546,7.110904024329202,28.07152478463355,-3.805662972375787,-0.12729234258168096,5.108805082015341 +5.059246950090577,166.6996024787804,6.107321121203667,22.934137645835097,0.8201226178866374,0.6343166412403691,-2.7977497714373607 +5.238588975073159,179.23129412792323,29.745456263637234,24.669416859757877,-7.2830828216352,-1.5308984278773312,2.1761699073740273 +5.052097613334605,174.9113131534673,4.350622834858682,30.556224286495944,4.959750271472909,-1.290701492807146,0.6802915650235981 +5.064632956808395,137.4545673298971,4.986195533302197,20.451008661315928,-10.806338365019728,-0.5262283405853051,-2.5360276980274192 +5.063019488105045,245.42299167333175,5.633369847828847,31.231414517640335,-6.558165639969709,-1.1322339600410363,-2.5360592865610725 +5.063271282731727,233.70565581681245,5.051597032291291,24.311786470498276,1.2916963176615779,-0.4997971265974612,-4.642353339793621 +5.229098708662146,72.93100552912064,20.375789391583393,20.208708809644225,-9.897329177086004,-0.7284630873821777,4.997465708214593 +5.510759238436143,172.6644023690368,12.61575993542457,26.95620207028675,0.28852988172811456,-0.5831441696495306,3.1746392011445366 +5.218911942193697,149.64078364933263,19.084344547825424,21.475805817437795,-2.916340458727304,0.3292906567067371,3.6726092085700817 +5.088830965229165,227.75262792662187,16.08523393940525,28.228955873433886,-5.427419480998935,-0.8268607980284262,1.6826395557920115 +5.098952390964463,122.85933708238313,4.6991680095314114,27.667582430851237,-1.4678434263204267,-0.5368679830527096,-0.34164033722616427 +5.66941984232698,147.70478929746457,28.638191082502207,21.745206829990266,-3.591180418968916,-0.5629801335454141,5.603823096251494 +5.016817947761588,188.59667689269563,7.786123122386803,24.85018505947342,0.16856486351996125,0.918404799512225,5.32862378321561 +5.0059735702792265,119.58831992920778,7.255051891973404,23.469592427451378,0.8591296432267428,-0.07939937821137097,-0.37169436332535266 +6.875567240118441,200.15258563754017,6.378166470210984,23.312751159907414,0.8355301464478968,-0.2786246826161107,3.820230194843207 +5.130188076784411,181.54709693907992,22.022689193841153,22.15166770155563,-1.5921151728379062,-0.13045953860486403,3.716115995389096 +5.366232987821421,191.7787064358127,6.385563059715171,33.752757486224226,-3.5116743568480624,-1.3969976144874845,-3.0409164920317986 +5.664105155439537,203.52052295864786,19.1764993846994,35.148447949312626,-5.634581941525429,-0.2447138892725467,-4.2792916588762875 +5.22302758815823,237.7410630550417,23.597779320717954,25.269854162603835,-1.098324436524579,0.8302539105775428,4.33992317747434 +5.1277098275949795,183.14285083965297,14.29799133437573,24.277461676189734,-4.6010436723522865,0.24082120154368933,3.0644753821616604 +5.085665444771837,156.72556193154418,14.05688683027908,21.1114352528955,-0.009539593293878212,-0.9156971231330113,5.852670247999738 +5.555320665320105,145.493629145155,6.40516472268755,27.42816054391654,-6.584675372102863,-2.225279963766481,-0.4726505609504432 +5.706212825141252,76.46760963383691,28.68652441048765,22.258766497148354,-2.4132506725335365,-0.663067012308599,3.1154080871146537 +5.050937464270307,230.42937902281255,16.374255043501233,24.149885508947705,-11.698454030669764,-0.3211947637458778,-2.1260692010263362 +5.341084316744034,161.30672708952295,13.410806846298119,27.500198776195163,5.039971816804681,0.4983881646344933,2.8969404945796153 +5.004706842366555,144.2375191685432,24.51369024511393,22.398198052424014,5.562032467116224,0.38033897644152503,0.6943015051152601 +6.23171040566495,161.03141185391357,9.549333479482119,30.286452380763052,0.6324656023029034,-1.0670353245381747,4.910211556093435 +5.708879562869736,93.87196720389032,17.232004528597184,23.699188397638316,-3.334654164523241,0.25545217235462564,-2.957112983187642 +5.289000358728343,142.79767328983627,4.353658751256308,23.90978591752161,2.3114202380853675,2.1112871852212045,-3.0379815333640003 +5.024384889839267,146.90108882103698,9.00828226260434,30.357017734716067,-1.58098466977257,-0.6341899620370256,-1.7000174674398894 +5.615234675512363,148.66431979318054,18.55987880394087,24.555404243764876,-11.361694483437521,1.3687557678741993,3.549712965200957 +5.0641943598125225,104.04701203044146,7.761243218772023,23.44981210433137,-6.813955082646977,-0.9706494892585454,2.0746419457761505 +6.324950503018815,162.8447620681178,57.469067609675974,23.634615737539605,-1.5996658542287852,0.654245334262701,0.3431819886024021 +6.531632824869133,143.54116352676547,10.876690722835344,28.928957385921603,-1.3563677891921773,-1.1718952191319842,-3.7630825870735976 +5.217406714897531,191.7480975872611,13.277617034863741,29.521765432166575,-4.395601701572262,-0.0031598719752672345,-1.3281912680919792 +5.039765236065133,96.93355448231107,5.872059469098129,30.044880939103702,-5.201402519815042,-0.7456048254649407,5.547826309844291 +5.4134315557052,117.83744146156303,6.860064792368025,23.70536352509721,1.729869003133559,1.598290886100259,-4.320383906883926 +5.296894481719984,147.41184071912147,20.16146605949767,25.23255082355607,-5.112735314628445,-0.9133999980362452,-0.4013664395060488 +6.119281204438443,151.91105756642511,4.527847585259222,25.78271095396888,2.681446576657386,2.4029120879100194,-0.9931378431462816 +5.264865489364216,187.59016692913713,7.238674950296112,37.55940803000724,1.7500918837291612,-0.58936026220779,-0.7726985096784418 +5.60696900246229,124.5831708512769,10.145459606556152,36.76634575985617,-2.283575140893225,0.10765744232455844,4.5714230635085595 +5.058876486995051,151.21420782809602,11.963735947929365,22.63623453097565,9.503372563550034,-0.1392975162227896,2.2145435534029314 +5.895784530704265,152.21235214059033,18.388944027301434,31.12168076352348,0.8474873350349044,-1.1599257250807788,2.654827587283573 +5.986353668191711,167.13323014735553,6.610427617565741,23.088465540564272,5.447048405914259,0.6189647819702774,-1.350904303198996 +5.2505747653200885,183.04353268650715,12.66218470160576,23.615072344751702,4.321251415073675,1.3738904696665981,5.306516623055428 diff --git a/tests/test_sets/fixed_distributional_test_dataset.json b/tests/test_sets/fixed_distributional_test_dataset.json new file mode 100644 index 000000000000..466870e2da9a --- /dev/null +++ b/tests/test_sets/fixed_distributional_test_dataset.json @@ -0,0 +1,21 @@ +"beta" : [5.85049379045,5.21244596453,5.07065422106,5.97773253677,5.06708769233,5.00591832601,7.31543149453,5.78646382546,5.07470465641,5.37899225634,5.55685776784,5.00000825995,5.09108817165,5.16736704821,5.89913711145,5.04632339569,5.06792614547,5.02343097976,5.88046364616,5.34040516182,5.11729311707,6.07025434789,5.57998263389,8.81198584306,7.14111733635,5.03682326118,5.32084048698,5.02084081818,6.70165982502,7.30520245414,7.67591993861,5.04451974459,5.76288717089,5.00065624499,5.00672824343,5.35142826856,5.02909009781,5.6554247915,5.6799476842,5.00721523719,6.09785759829,5.01072232241,6.169279956,5.05532374261,5.11592165043,6.16766640159,5.02007177441,5.52126884915,7.76871883377,7.25037504727,5.30951566535,5.05884263422,6.1047978282,5.08227521354,5.23064843808,5.10719184969,5.50619358346,5.17135913697,5.0190301807,5.08738503586,5.1543564601,6.76967485283,5.34229903969,5.00359434198,6.21282110428,5.09529279357,5.90702666014,5.50446278676,5.10218926737,5.02685011928,6.91920361923,5.00733109478,6.50250821421,5.001023564,5.09145817895,5.07016814175,5.00500916835,5.00106289986,5.37290922283,5.27196468161,5.01288037189,5.2084985286,5.03013832879,5.36929405356,6.67682791651,5.74862416091,5.07437293364,5.04010433672,5.82309590197,5.15930708956,5.02547566679,5.65541551896,5.11504461441,5.74808018074,5.07068717355,5.03008172833,5.20608423771,6.93205350325,6.76795594857,5.13772021468,5.00082669716,5.93443615307,5.04259977248,6.37988908805,5.42858807128,5.062614606,5.17216852602,5.14308978382,5.09815020131,5.22572505752,5.11773994555,5.00923245104,5.34178733222,5.88925569497,5.00245191524,5.0219891334,5.02748348998,5.70401729116,5.65640275742,5.0711178719,5.00031816269,5.53802658249,5.17367338543,5.65721764371,5.05420553573,6.34393239748,5.01386433194,5.2889049356,5.00588455903,5.0001739514,5.0048097054,5.58251872772,5.48800203809,5.0555990542,5.26733419537,5.0961659335,5.05496792955,5.50529625082,5.45290864535,5.00630804853,5.92689114833,5.95017573287,5.12912666199,5.54819722806,5.2039773489,5.00024555037,6.8070838143,5.58143123497,5.0186773661,5.42843038824,5.9906100242,5.38179144623,7.05323648054,5.35165446997,5.47241442773,5.40531724651,5.0006815916,5.25893189048,5.29632698864,6.97597721359,5.62440499272,5.34457278932,6.03624024123,5.01791375861,5.0289040214,5.11256204003,5.20916050857,5.23705084902,5.43854809323,5.16647686184,5.65669686615,6.03279310585,5.07285516467,6.58901837996,6.79034300328,5.00280654363,5.72194971214,6.4335039258,6.78725122992,5.00836934462,6.21979484983,5.0075501364,5.01072925541,5.0138184917,5.10422208577,6.18924119907,5.57749629802,5.04902408931,5.31868758541,5.81449096838,5.1214863575,5.22994802805,5.43975185294,5.06538322749,7.21106262827,7.64785255389,5.05674806405,6.31239339285,7.22283149658,5.01049484067,5.19683879241,5.09825203414,8.83132520289,5.00967242066,5.51767114767,6.14877085052,5.08832614411,5.00681856885,5.00052984705,6.01659432533,5.68963742432,5.79534256437,5.00142271553,5.8181745129,5.49931399542,5.92723717537,6.14388473949,5.10938046325,6.08359118263,6.71377563437,5.03031284778,5.34186406081,5.96554753595,5.00388752097,5.48709927951,5.04387359515,5.49102744479,5.54152244519,5.66223480738,5.34242919521,5.16803038415,5.33554947197,5.33223277973,7.06737383734,5.68469455971,5.06603165211,5.6380012428,5.00021665736,6.50296085308,6.20217919393,5.15069449866,5.01004141242,5.00657473435,5.23037507874,5.00210075889,5.15261815755,5.06996175321,5.90110213065,5.98882133754,5.00004704004,5.57643855458,6.78823702579,5.03429037873,5.01153030744,5.0585305658,5.01785362704,6.12223146093,5.053870281,5.34745130965,6.80968152875,5.01561151749,5.77406766134,5.2856970204,6.72330697215,5.04096925756,5.19844677565,6.58483192495,5.47412013378,5.92329762463,5.29893225797,8.33495989819,5.19939619545,5.52002555507,5.09177592186,6.26103281687,5.21584619611,5.37360033588,5.04796922652,5.26555136985,5.40582425229,5.34782565871,5.93164897254,5.17245511222,5.01578214489,5.12756506696,5.13896653173,5.46184279782,6.59083235177,6.293729294,5.00212151537,7.93351659913,5.57508677216,8.27430924134,5.32037660647,5.05916419472,5.75977922954,6.8733531145,5.49076666387,5.28252951131,5.53758368732,5.10841458633,5.17970377904,5.6817046048,6.48867721127,5.0122746782,5.78567782994,5.31654791743,5.31643330823,6.86219541917,5.11409950122,5.26969807148,6.22817649986,5.59599434331,5.04649691006,8.51726308749,7.74644438896,6.36047146637,6.26102629814,5.00252543204,5.02261699386,5.52759640291,5.95726567976,5.65122627666,5.38391897283,5.2357689963,6.29560159836,5.38631858293,5.32712248372,5.03377635836,5.01499454414,6.24667362146,5.00749556937,5.08906501268,5.89651139304,5.11414570023,5.66754795268,5.49290775605,6.01997357488,5.0005757182,5.08971814776,5.26496851629,6.22930074336,5.30059046131,5.10477931387,5.76091202563,5.21994377271,6.48583314694,5.62861229243,5.18641671933,5.92159452862,5.00037988646,5.07576115639,5.84649809831,5.30336000909,5.5873754086,6.93702905576,5.09985979124,5.00799471284,5.00201062403,5.10998421637,5.00669936954,5.4863676172,5.76095607399,5.01640237443,5.69665000981,5.33872530095,5.21427666221,5.02934382809,5.02271167325,5.00149952419,5.53345240543,5.00000410096,5.0404971886,5.2365290203,8.18954572768,5.01224617643,5.06573153342,5.07761479912,5.02096011617,5.2999294958,5.00154039054,5.34937237761,5.0482821032,6.96933576481,5.30243284095,5.24014448849,5.06047069543,5.30706902077,5.00167346971,5.33133291133,6.71794114321,5.11461866706,5.29230114507,5.21302696594,5.0254846741,5.01355395916,6.12290890675,5.07720453915,5.02282694469,5.01315779485,5.73373568044,6.02647872346,6.24910030381,5.05433772571,5.09820783163,5.00074336489,5.55441628919,5.18336702291,5.0090194697,5.22866811688,5.40904712663,7.92867901496,6.33259272089,5.20256845629,5.29209873314,5.37324702275,5.00214663987,5.72588480981,5.01607694333,7.89194085497,5.56651398908,5.0228299548,5.48817828506,7.876897102,5.13818010279,5.61058224818,5.3172239619,6.39136992112,5.01370811787,5.02506106002,5.49581934891,5.13769593786,6.49548844493,5.36836022876,5.20207116155,5.20978364036,5.81889314196,5.01181447295,7.20812760853,5.02394728416,5.81278988953,5.00153016661,5.00177767271,5.11118798548,5.0007402966,5.02280054287,5.03341758946,6.28060848763,5.02377637811,5.89602305564,5.61092446091,5.27649878517,5.68406005403,6.05602295961,5.38068063436,5.05924695009,5.23858897507,5.05209761333,5.06463295681,5.06301948811,5.06327128273,5.22909870866,5.51075923844,5.21891194219,5.08883096523,5.09895239096,5.66941984233,5.01681794776,5.00597357028,6.87556724012,5.13018807678,5.36623298782,5.66410515544,5.22302758816,5.12770982759,5.08566544477,5.55532066532,5.70621282514,5.05093746427,5.34108431674,5.00470684237,6.23171040566,5.70887956287,5.28900035873,5.02438488984,5.61523467551,5.06419435981,6.32495050302,6.53163282487,5.2174067149,5.03976523607,5.41343155571,5.29689448172,6.11928120444,5.26486548936,5.60696900246,5.058876487,5.8957845307,5.98635366819,5.25057476532], +"beta_std" : [0.00116574239466,0.00644157155636,0.00018750767914,0.0282775199602,0.0067213115226,0.000102381990508,0.0181488708111,0.0318169263582,0.0916220021421,0.00546042212631,0.093369083129,0.032014927639,0.00136051551503,0.0781147062789,6.21962128887e-06,0.00339038065496,0.00438945347902,0.0241769229802,0.0106299141665,0.00836172106078,0.000276088402841,0.00885082418834,0.0138327518875,0.0735671138776,0.0193878718243,0.060949724346,0.0017421383559,0.00802474249161,0.00948680807578,0.407321908675,0.0812116798733,0.075296748124,0.023985863444,0.0681372749506,0.0592962070708,0.00405070135213,0.00245558186341,0.00440273727221,0.0571455789006,0.0462422720719,0.0084688095909,0.0197343931062,0.29924190345,0.0576025723601,0.0310028868894,0.0399432898212,0.00177390115767,0.0489864840779,0.0831259393195,0.191635364046,0.0269346286575,0.0309634109697,0.000636409219246,0.0147177755248,0.106507374573,0.00172343440124,0.0703164806219,0.0320642722728,0.0649164767271,0.0361576903646,0.003581761514,0.0172222549396,0.000225732138202,0.305504103069,0.0150389108502,0.0929363232526,0.109474686786,0.0112518664371,0.013445289751,0.000501978889659,0.00532638109171,0.0992136267477,0.0556095578998,0.0394298082047,0.109421983742,0.0285899375181,0.0480555881983,0.0439683844021,0.109173487786,0.00774129823492,0.000175087359234,0.0622653339531,0.0156055501207,0.000973185406827,0.0357351200451,0.249898602044,0.0162549977442,0.00011171196087,0.00987812705409,0.0462873269865,0.0815285637072,0.0205150560806,0.118805058439,0.0402286291709,0.00719587166567,0.000779474526343,0.0235054374814,0.0133310417896,0.0386802079533,0.05449565721,0.274013813032,0.104336390642,0.00799941837345,6.50326727427e-05,0.00370000503957,0.000383251228186,0.0195797229846,0.00205279369444,0.0516969872554,0.169206290727,0.00412260648718,0.0381833084259,0.150852528877,0.0752462545162,0.0505240980932,0.00918378077794,0.166196603369,0.0136862974921,0.0541347485984,0.0770368110173,0.143019803659,0.000472431615785,0.00876967254413,0.0694443003978,0.00278613613345,0.158226349049,0.0129960767878,0.131732896355,0.0096047268689,0.092656371666,0.179703800002,0.101442987294,0.00205917596174,0.0135586117772,0.0408665788938,0.0204354321485,0.0218417658819,0.0178810416363,0.00169084062201,0.0426145863449,0.0208083012707,0.0294832719622,0.0837045045468,0.0227802939948,0.0541349362069,0.0324020202993,0.213611975669,0.0387722718427,1.39432225989e-05,0.00800924029708,0.0171847581832,0.0247015251695,0.113852161448,0.00118593791826,0.0558065405771,0.0050574730565,0.0154892188964,0.0416376479492,0.0121277477245,0.000577906981657,0.0338173822461,0.00571133136812,0.026320482271,0.0840016647612,0.133265967849,0.335554821719,0.000386751876577,0.010472295472,0.177126254701,0.0640022396213,0.0283922193323,0.00596518085392,0.000404914795581,0.000110794982901,0.0522087747946,0.108237100155,0.0672475822678,0.153583725365,0.00373212952447,0.000182585552256,1.22160667329e-05,0.0616363716798,0.00108286429367,0.00142599410089,0.00735315286709,0.228519969458,0.021947862633,0.000485315957092,0.00293810384157,0.0263505003224,0.17284792465,0.00176102132236,0.0119361723556,0.00847529547369,0.0298316859368,0.000910183049801,0.0114227491984,0.00131418114916,0.0641680833809,0.109226083129,0.0465964877035,0.0420914781748,0.0173354178204,0.0701666430993,0.0596168879598,0.0213005751657,0.000741851623502,0.0282462755861,0.00187882976935,0.093171077294,0.0164738810314,0.000716747055824,0.0301033025308,0.168020940619,0.157140837964,0.0178600623781,0.00034228245397,0.0515797839002,0.0151184140774,0.00214089674588,0.00611599225632,0.0108012259969,0.00987169074475,0.0180353794929,1.08481269383e-05,9.58068883509e-06,0.00421210330498,0.0079361216625,0.000149494042576,0.0100035478604,0.0186313111237,0.0356981851255,0.0533529813544,0.0569556277228,0.0158920706104,0.0937436671159,0.101211969517,0.0146546611127,0.00177983686916,0.0130044216005,0.0110512102475,0.0162901171973,0.00429332950495,0.163210896463,0.309792268329,0.218217161466,0.0911625432695,0.113792974883,0.00385908093581,0.0403374627563,0.179984312894,0.00967221726463,0.235382431413,0.126832791898,0.000724490361421,0.00309897160159,0.0261122091712,0.0114704055289,0.0306610804323,0.0129388468833,0.0070738479192,0.0321109446695,0.00624449238589,0.00709414703183,0.152185493696,0.00252441386058,0.0082033505118,0.00816636572344,0.00242516146377,0.00108502321115,0.0531356545292,0.0482181415188,0.158681934325,0.101689553421,0.0238360697158,0.0141986214574,0.00561477295191,0.159196568507,0.180874052083,0.115255260066,0.0150677902233,0.00391858105349,0.00743553169048,0.000167961872559,0.00116170631614,0.0281073435234,1.41436315121e-06,0.00194139838488,0.0400850002729,0.223007692447,0.00628633737482,0.0362168726085,0.0200970072189,0.230148555085,0.0590544027076,0.00888293473328,0.180147388225,0.00163933455311,0.0373802850117,0.0115485617103,0.00329030118816,0.0973580048825,8.22022265127e-06,0.0103607915559,0.00232976419805,0.0329954211242,0.0639315627677,0.0824002575327,0.00652415173184,0.0412308005857,0.096884571424,0.0330023705044,0.00104746193889,1.52153570663e-06,0.282583718793,0.0115937331208,0.106023819799,0.000744687189115,0.01665695068,0.00277657962621,0.0505086253127,0.0159756043769,0.00877108135392,0.0992808105131,0.075856767756,0.14168775368,0.00243252239655,0.0239895102501,0.0927936730258,0.0125221209643,0.000334045213261,0.0597629899838,0.0864802843993,0.0201447942763,0.171323371397,0.0621059239909,0.000941774691565,0.0685325968671,0.123884272884,0.103897306672,0.074056849521,0.0996808145744,0.0454350743778,0.217242400198,0.0215368501671,0.0316588257514,0.199699898809,0.0221425117253,0.066006408212,0.00137893725531,0.195084424195,0.0151814299464,0.000138728724286,0.0427567835438,0.0454142776107,8.54216387809e-07,0.0202499703273,0.000686994286095,0.0410111151076,0.00267512212383,0.00508297097007,0.0917596533062,0.0260653649332,0.00366512925069,0.0414860153187,0.0718115268423,0.00262165659695,0.0144631627171,0.0132878913153,0.0601206937615,0.0451123303684,0.0684056425308,0.100517934059,0.0365359945176,0.0412063896044,0.0230322745255,0.00180385134194,0.141274192723,0.00244354079303,0.157721865174,0.127790956919,0.146345884658,0.07341626908,0.0727633724539,0.0076074423627,0.000488997725051,0.188948244685,0.00605380648488,0.0494047391607,0.0874705163732,0.00194692666037,0.0812567576449,0.0296070233383,0.0331649277667,0.00130893679063,0.125223338235,0.27295162897,0.0293080700556,0.00207707755765,0.0212393401249,0.00028317365599,0.00499583711092,0.000347401083206,0.0140283498108,7.14262576255e-05,0.0360496600525,0.0372768810461,0.117883219049,0.0436885047775,0.0467890451225,0.243551716954,0.0902153467041,0.000940599745841,0.0181277239555,0.0150615366671,0.00216033550202,0.0371843132159,0.0553234011476,0.00335784226371,0.0595303549288,0.00326223045181,0.0697198535888,0.0872940646474,0.0419215718019,0.00185447464741,0.042114002334,0.0145528928139,0.028287672854,0.00161394683637,0.035612390441,0.0310593922782,0.0208640386703,0.0606897615431,0.00155320238875,0.00703216101611,0.0258813696042,0.092212785197,0.215359740123,0.00211753756519,0.119870782455,0.109345596481,0.0199830265157,0.0369247294863,0.12922036566,0.125047275129,0.0300346930819,0.0164191975644,0.0484312006994,0.00036731162446,0.275721532273,0.00849742412309,0.102615988164,0.077110641212,0.0425222319674,0.00883196654238,0.00107229664453,0.217989672828,0.000139638732251,0.0256345246264,0.0565471555872,0.0166802315519,0.0571633494295,0.00171391540273,0.000433437968232,0.00933279310548,0.00667171052542,0.0371666643478,0.00537213525143,0.0461674115441,0.01010732596,0.00435954509591,0.00205961774847,0.000200079567974,0.000838080295037,0.00844047131635,0.000971570628295,0.0063739668837,0.00805974493667,0.0168097093896,0.0428648759884,0.00561973605881,0.0098940830938,0.13830882269,0.11308181244,0.109262608978,0.0131659663175,0.16010707566,0.0147394038013,3.81000767445e-05,0.0588317876967,0.0487327085473,0.214498999773,0.0210924377134,0.0663552773565,0.0969217570186,0.000204338801391,0.00147181989751,0.0114056270752,0.00143897011757,0.0135320064798], +"chi2" : [168.375838384,139.09834212,201.156634322,143.132559095,119.074399249,82.9303520596,131.517110499,194.661912619,113.347710915,160.326410097,160.501933108,111.839510105,100.020543022,170.704863677,165.718061354,172.894368073,135.92497862,109.641685149,194.843515436,145.529884812,163.768793244,125.31871065,105.466361115,166.089076484,149.132698173,136.215843468,186.713892437,182.717053676,211.591455483,207.217535191,165.96975335,183.094732596,146.75765029,108.721986017,197.344128895,101.237714676,192.49016013,172.580260794,190.24064548,85.4560124124,134.518509866,146.675642297,107.521262221,181.167344619,105.867204062,123.525532338,177.959056031,163.14723022,127.181584442,155.476663933,123.369955953,177.613114062,143.812176477,161.52391526,81.0219249374,103.211794832,92.5251349721,120.233534297,177.735357885,191.202023667,319.480798909,137.842412037,107.232977899,186.734067714,80.9446068221,164.722367204,215.917735701,131.561553414,154.81487103,148.761336347,157.700285603,140.22942277,203.696275705,161.432904615,113.078197098,148.682822598,203.535241133,204.82437125,171.190785987,131.701207406,139.149431001,197.162860613,115.330793317,105.820822598,181.692471935,112.343922021,167.218574421,158.312894048,122.963138423,115.727799625,119.604496978,104.434332358,224.00849947,93.2287997707,186.405425918,199.248709395,170.506963964,108.433026681,112.482137368,161.740684075,148.571609623,219.321718484,168.691684332,149.10041198,139.045316202,110.267970295,118.119859602,168.163810233,158.547766837,78.270251526,110.363873445,160.731944767,216.113767284,147.461430349,136.012610591,110.070189764,185.182522898,112.415170046,111.468029255,176.468967763,164.678383217,112.807590029,171.080281616,131.076215077,141.029980223,95.0468765512,214.388647666,198.078399036,214.320019622,173.768324222,172.331241426,189.93409887,112.767913271,138.153630702,125.098721966,164.622560448,169.396884667,74.1534308213,97.8425884554,228.520528,125.473966611,182.168220269,155.333760993,235.296730951,165.27400769,233.129483543,188.535973197,203.205911622,152.06542047,145.479634497,152.668310316,175.119630277,169.815112424,166.062269723,110.226234088,176.43719315,126.169207438,177.441937253,192.882653852,105.587930339,139.330788203,141.263042942,132.079880237,144.521558203,160.699558927,105.33401167,111.226938064,116.605674065,78.5979732203,135.224899649,114.473228584,102.980918321,155.980934504,147.085983824,106.510690166,110.417626439,151.942346738,197.729793823,150.651303905,102.730629379,214.966618558,193.811673499,76.580710265,109.641872498,129.530776075,219.507452831,180.882374279,155.959735628,142.168135676,167.926199407,198.164663771,106.606224841,190.166778116,151.253766018,197.322493072,153.020943526,133.253206756,141.493631165,109.866402632,121.168047967,207.065375901,98.497249573,206.795174544,128.856450868,110.983630783,147.44845091,154.020235556,99.5305249169,96.6400992062,155.866267766,159.936496539,151.058147774,139.530317236,204.748403099,184.553266249,124.945929359,174.951654466,147.408567237,159.438705762,196.137147857,140.4991383,154.946192143,118.273485753,114.181678234,77.68795617,176.65854003,165.630112418,165.506762356,91.898435627,184.705619359,180.048393452,123.120466252,175.549934966,219.127410757,156.434460102,133.472138548,123.067964809,146.636530913,117.264685257,95.8474897052,122.493398404,129.218871273,128.246169483,112.365301055,174.284309539,165.030770354,125.541131345,159.151155022,162.000183964,154.346850649,141.751254663,116.817303654,159.077127009,148.617453758,162.432250448,102.228729028,51.9282014258,170.365148929,167.684628266,168.956954592,152.034391929,159.081932304,118.460176395,181.50639698,148.027864585,149.273073171,80.5265102836,120.089959754,217.901820405,160.276562506,114.160325988,183.951404207,137.952571471,119.114463093,117.846550773,181.095633983,158.470923157,171.472641091,109.070110785,58.225679908,148.14961346,170.998972932,160.763215316,123.439947358,137.054222465,220.304198694,228.831758469,188.801516838,172.5284002,158.490585009,147.559421867,136.276561333,157.814342018,161.138377499,147.868274744,153.081047396,112.763733441,125.883487138,84.6286856151,155.210016702,185.278109043,219.930361253,144.440502463,184.782530753,155.320147583,196.696114173,195.616986145,94.4112355767,168.812602367,127.694101229,142.128287813,170.597506836,124.137131165,108.479645763,188.8858969,185.49072511,172.348849633,122.896229839,111.52467989,154.640507203,121.052500949,117.317148827,146.893661893,90.1874300188,118.588798363,87.7569231877,104.247335291,106.983030475,163.374926336,128.478285522,156.97804643,122.243317219,225.969543059,193.204356451,173.705247331,92.3884230772,143.014936893,86.4155249468,87.8921680341,120.238545469,154.498546823,139.441309318,91.9081357689,172.944085041,159.158979946,157.782662205,132.701855802,168.327492095,190.496311523,107.782933798,120.89086819,182.30478273,152.526386357,140.309790964,185.809417885,127.365336352,181.999975985,139.19018381,188.312500742,124.436998929,205.14087336,175.626601333,96.2936849386,192.259894832,154.106302893,264.078183855,137.721304876,235.579195067,106.486275127,168.849188106,85.4126546055,182.67890815,182.58684056,169.92955288,150.200018397,132.806912369,185.602399457,168.266183606,107.418816373,104.865003894,126.264550433,157.128410001,158.711875786,168.428486454,142.092748903,176.510736764,154.680547929,180.787512217,184.80087458,117.911843452,209.026816226,163.153380709,88.6794389655,179.195485753,106.172367925,149.282174842,106.348891217,178.615111553,120.510514917,171.483072329,122.003647873,96.1004324752,89.3103285546,123.03135274,185.687501802,98.3098197515,136.038717303,181.723736312,161.639962575,163.64753172,160.349973434,146.003554887,146.50065102,139.423166341,122.03112913,107.033712932,135.3630556,142.78841725,156.06961572,152.194270973,161.767669835,88.3160228968,189.638363748,151.681515321,185.096951603,114.736802743,164.746609399,255.99364408,209.553731184,118.690276675,132.410827131,197.925669135,196.971921001,152.241635146,110.872953387,191.878159575,143.286365246,184.259486263,86.9611836067,149.535068923,177.34298882,201.611783468,142.302129502,203.014482673,159.115028994,164.94837894,151.073137654,163.889565496,201.146950379,184.705781603,99.1398901341,127.254293051,171.173341028,249.758311803,138.188038977,166.699602479,179.231294128,174.911313153,137.45456733,245.422991673,233.705655817,72.9310055291,172.664402369,149.640783649,227.752627927,122.859337082,147.704789297,188.596676893,119.588319929,200.152585638,181.547096939,191.778706436,203.520522959,237.741063055,183.14285084,156.725561932,145.493629145,76.4676096338,230.429379023,161.30672709,144.237519169,161.031411854,93.8719672039,142.79767329,146.901088821,148.664319793,104.04701203,162.844762068,143.541163527,191.748097587,96.9335544823,117.837441462,147.411840719,151.911057566,187.590166929,124.583170851,151.214207828,152.212352141,167.133230147,183.043532687], +"chi2_std" : [25.0037746206,30.6122090115,27.9064770936,29.97563896,26.4935441287,40.4405778431,39.1276275237,31.777182125,28.8366626969,34.6614232668,27.6212256413,20.1246330026,29.0365679087,32.172633464,30.5787318366,24.942292115,21.7108423604,42.1542884425,22.6664215784,17.7070488031,35.797099126,16.4733286953,18.3414317196,24.8404819966,25.7432303967,20.1348911712,34.8600668302,25.1184545439,23.4840500763,27.8326277863,43.9649830274,21.5623918963,29.7917636038,16.9872393477,21.5966424279,35.8316842722,44.1004759908,23.3818564993,39.2275024263,32.2127152384,29.3144290528,27.3700575473,15.3672621781,26.2730920288,36.4911701958,25.0833361443,20.1906974537,31.9453342192,45.0774478795,33.1873474113,28.9322972759,25.0717296605,24.3358757098,28.0602659327,36.5667610862,31.9319210091,26.3707923453,18.4680931946,22.6171530366,20.924887411,31.5378380965,22.2094825928,33.2151693755,25.2425676839,27.0571404627,39.493129938,38.4431789559,24.607001323,46.7106317631,33.4161850698,29.2864043772,29.6638034615,36.0395991427,37.0829127175,30.7763714891,40.3768171605,28.4157858248,27.9008806753,24.2882138084,22.6755109525,38.8208869597,38.8481656802,24.104308084,16.9235297176,37.3075596269,26.845777317,45.9125550359,40.8238772267,23.891337687,26.6849676089,30.1568663568,27.3406023774,35.9251286555,23.4951552434,49.9125465709,23.3185899724,21.7626224201,33.7609312327,32.3048229729,27.9985624231,35.571317902,23.7830863138,10.6856879381,27.6218107005,29.5343064409,30.2124020466,29.7530228006,33.5491182503,33.4750525144,29.1238839223,32.4753249179,27.2663767101,30.3013877638,53.9425267218,33.0338357206,29.0101054984,22.4691819999,28.0952265081,42.9629813226,40.9042476386,52.5415836723,22.019866332,44.3860136126,30.5693402115,22.0292918902,24.7301058523,32.4927800835,27.0518981893,33.1986960004,30.5164780349,17.4590715482,26.0691538956,41.3138031777,23.6079937227,33.8656345678,39.5936923053,26.0915316963,47.1704330308,37.0025079495,18.9632789873,41.9007705801,31.1593356514,26.8495231269,29.5523527995,34.9998707346,15.3554984032,41.1157862951,23.3187880385,57.035050126,25.7151758519,22.8788957977,18.735173155,30.0914064466,33.5999359396,29.8569156986,42.5237597636,32.4309688401,36.276556571,24.8153562215,17.3626643368,28.1230195291,14.9070422588,20.6860828926,32.9794837415,39.513201,22.9629487116,28.5869968316,19.7090365805,28.5020945977,34.0974720257,21.232269197,36.1561431006,27.9832941507,41.3494324964,37.0210684845,38.7344186091,29.7637944929,36.2591927339,24.1410389333,39.7004294104,34.6557613351,21.9556563148,31.9276966777,42.498826372,19.8398760346,25.5602510357,31.8487743286,21.095552316,25.5649009592,37.0210621795,29.1877552301,24.4109225334,41.9246792545,30.4180828736,23.0674060652,30.3919262254,26.6291723442,35.8734790837,40.3360914371,16.9846006877,36.4085639128,28.3288115935,22.5167911434,36.1424473215,40.9768542129,31.0278376213,34.4130405148,23.7709216221,28.625510538,47.3110398865,23.0504574596,19.0460177164,24.6921809866,28.1581676324,27.6137276939,28.6096760169,22.4049063464,38.7632563833,19.362563489,34.5812733561,42.1373361835,17.2777465286,33.2292354023,35.2440480589,53.4237118134,25.8489257203,25.2199631732,29.0051524273,19.2696393259,25.9936236715,24.7262647129,19.816253194,40.5959006547,23.3574743161,18.7474034317,22.683208219,15.9605803986,27.8339616772,33.1351875742,33.9114452429,26.591751071,24.1835904649,29.2919685295,26.3726576494,42.3660225245,39.9301243694,22.2388750825,26.3981748421,36.6239044575,24.1121266704,27.5681902264,38.1992274535,16.0085079654,13.9315013151,35.048782533,36.8021566742,23.0814342882,33.93897889,32.4733856871,31.4450781755,20.6546406389,27.9825390965,35.1735483295,16.2887597964,32.26273433,30.1205652176,38.7937681671,36.4350992252,18.307302378,44.6499711478,30.3808423311,32.6056868821,24.5863725616,36.6522607476,21.7942289209,28.6888796872,17.8155248476,25.971791956,26.842533348,26.1285024345,23.2880458559,35.8139560919,26.0433586112,24.6491509758,36.3707201618,31.3263714151,33.9277275696,41.4705225017,37.1920423923,19.0762792896,27.664538928,22.8529117437,37.7695882089,44.5530416403,28.2751219828,56.6739413511,33.366181328,35.723139683,22.7285657729,29.0017161312,44.7492152771,29.4915572001,30.9361121372,32.6683353659,43.2780732928,29.5329061599,29.1712580872,26.5504516849,38.704194019,28.4743195122,23.7656788862,30.6757383223,30.0728601845,25.2564093919,20.2004256671,26.3382866396,32.2389339815,29.4639201458,30.4897292747,30.9040093959,36.9276062614,29.8262428931,26.3471005197,19.7285477755,22.4784096095,27.6420281187,27.2664372546,24.9604729495,20.385549167,27.2377971935,18.4515170302,22.3625301499,19.1010727066,19.5239430771,20.2769414767,18.755886416,41.5600698042,28.8215425576,34.6121706872,35.0432088992,26.1970630755,31.7484293172,22.1195172532,26.207184493,32.8161395828,27.7263785539,24.2589708448,35.1857808164,45.8032820312,31.4833417435,30.3126915843,31.8489632727,58.3582355067,28.147670508,26.7421062908,33.2516507057,35.5288716621,28.6572458504,27.1076100135,28.6503285658,25.710537946,19.3284769295,15.1216010604,36.8220598926,26.9824455948,34.7735992911,50.8268142437,25.255203329,26.1231086368,16.9596459051,22.9616705469,40.6000658375,19.8938826829,30.7720978046,26.2445788474,45.8584282105,34.2572821991,24.2908359125,36.6729754507,36.4777451541,34.2521930145,38.796276648,22.7351346933,35.3655449393,23.4323170488,46.0237009306,20.9999403926,23.1367885174,32.4034650534,31.7920396814,19.6601442266,27.5520110961,46.5542708619,18.520432422,19.8729809592,19.130429678,37.1754809413,31.8130006529,29.5474516977,39.5020839308,24.6345080272,20.60892581,27.3684733018,25.5614051689,26.5664671259,33.8085550029,39.9331113755,28.4555434737,21.5305923869,22.5513735527,20.3792087525,34.5538083007,21.6235219705,24.1381116989,30.9150902237,42.6254496171,22.1905603066,38.1832982038,38.8656781826,24.4917369288,25.0189582988,25.9770080146,39.8980185132,14.8587344728,22.1174512796,17.9344531082,21.2801968406,24.8579965689,17.829743452,26.4288294929,17.2000041795,19.9077940606,32.8643245024,31.3194194449,24.1099896915,26.3414310856,19.5623385565,27.330010613,31.3030345583,22.1982298839,27.8603674479,28.6950512244,32.6065303582,24.1195761577,28.2493643177,39.2757688591,34.8641013237,25.1489845533,34.539291415,30.8221705286,32.4020945918,32.8189110815,41.9972925298,21.1979359675,29.4573991857,36.0213958004,27.2993332338,24.0504766085,43.9028537646,24.0565550988,20.2942979984,35.2558281181,28.4880238395,21.5982995944,31.1259369483,34.2298738489,25.3108380376,34.4751898858,43.3268240194,43.0106001078,33.4728295684,12.7112127135,39.2901629263,29.5311211612,35.8335924974,34.4066172591,36.4902348827,22.9113427528,34.7981888887,16.0998607153,23.2279036726,33.3974083335,28.3093480096,34.7696222396,47.8383231844,32.0996801288,35.5569938804,18.7926524791,36.9073471643,33.9750540535,45.1134452544,29.8403426463,37.8801334896,31.8739791795,25.8223617555,34.8668210465,27.7323052715,35.72647487,24.0342867741,21.672793843], +"exponential" : [6.09860553115,14.4907502645,7.97544359057,12.5657511989,10.3874510477,19.0418549089,14.4664802346,4.6875591347,16.5462641156,6.67211520137,11.2599083291,36.2116169816,22.3715690568,5.14281515003,11.0323717388,5.22619328482,9.80913520132,28.3640646531,9.87800998772,6.02589591186,12.0758385193,9.10645020811,28.0980275459,13.6913897203,12.9550635694,8.03179998815,9.4956705514,17.2633395869,4.22311072265,12.563530232,40.5243569382,41.6745438784,8.45844793912,11.7318142571,18.3767247242,12.0552303119,8.73201083196,7.79148127608,20.8522810001,4.97643435927,11.9822960012,26.1740713697,13.4388764987,9.24049270408,16.0937795972,4.6152170334,16.6771056062,6.83076685884,20.5689853916,4.67583957333,29.2559609388,4.87965575585,31.2608032606,14.9902474409,11.4110067765,12.7378039917,12.5890122546,14.3257452213,19.7261769196,7.96868921224,5.33443394641,17.3345010184,29.8319084431,8.92857829616,9.22728801634,6.10300899769,11.2055064007,12.8031424555,10.7181186645,8.80298974442,5.9221423063,5.16752345317,11.1684708727,10.1646974508,6.46731448887,9.52982158816,9.65630598642,25.8831414885,26.7764765688,5.5612788177,7.69231640302,19.8372020952,12.8402465676,14.991222045,30.063230661,15.7428352217,7.95643147156,21.6647393731,5.11790835408,9.06058390785,7.90076001828,13.9316994375,44.0787880854,6.99622903988,8.70668491183,10.1776076901,7.92822819953,7.70535115409,8.66613893944,15.2066259054,4.42920814489,15.1412862928,6.77734409728,18.4920982015,5.64360812327,7.39162496028,20.2891723259,9.11860850435,35.5328837269,6.61684696342,12.556129958,9.59250124074,6.52968926821,6.48620983768,29.8767568291,10.4642686982,12.8799634813,6.75369150786,23.2922516123,29.6559725594,15.7670409639,7.15307114293,13.0082365306,29.8126349339,10.0067394145,32.4991225522,4.36598683671,31.790551337,23.9350506124,17.8366942482,28.6663194237,19.8217205855,9.57716053374,25.5411995236,17.5967249655,29.8181952031,9.4533476024,4.34375747289,4.63805149082,12.5606633163,7.91733299212,15.3770052856,11.1898493567,6.78898254417,20.288095297,12.3608409198,14.9277536018,19.7708045394,10.06106629,10.0274410302,42.6670485895,6.14114806976,6.14822059991,26.4268865554,12.8714707952,17.1353802554,7.07202299011,5.74567358518,10.6365145439,16.5280924382,11.2977357838,4.9826341759,9.72664009615,4.54113058621,9.02633813864,25.3326511304,5.11321022969,12.8117533477,22.3591581404,4.71697745058,4.74869758199,26.2787041306,27.3287694913,33.8268262242,5.32105893409,5.68310433939,5.44672612771,12.4192940225,12.4599154029,26.3603842569,42.5177846569,4.3843870354,17.7617141011,24.7318956661,65.2695575503,7.66797369622,7.25448861877,13.3542431956,10.4599218836,17.7062897227,46.47150395,11.6633597217,11.7892716501,6.80232605778,10.3405591079,10.3590589499,12.8977827201,9.43631311433,41.0260948078,4.44341624921,4.2459411303,25.6869749578,68.9836774355,6.84267438916,32.308477631,17.6646328949,28.7141001627,8.60731435599,19.9001322715,11.6477982863,13.4979851592,4.22004747591,6.88000721786,11.6704196964,6.12804025448,7.65072750176,5.29353122735,27.3772881044,11.4260432741,36.1948876972,9.10374375832,7.07002301357,7.55239514185,34.0796067355,7.89155847641,21.5119912429,17.3062009817,7.06952473949,5.42988874714,13.206320461,16.2374525554,11.547170999,12.591695914,4.57476694768,7.03654918729,24.5176347074,4.58210133233,27.5996412452,8.24674017997,12.6734075923,5.05850186933,52.0762548464,7.90897286391,33.7983400795,36.34942319,15.871747113,6.39354979848,5.70803830305,14.2715114903,4.36289862412,9.84055520361,13.6326684855,21.5673686928,18.1498881752,15.2386017253,7.71546432612,5.1482962739,7.02383681383,26.8889846473,5.95396660132,6.84294838102,31.9879801775,8.82031153459,6.03427892758,7.50402785132,10.3743347009,9.73330747806,4.54010274783,5.56809710674,35.048577014,4.58834182517,17.2374605291,12.9695677575,7.57920787973,5.94614711897,35.4115261916,8.97093525276,20.604657256,11.7403706574,14.2749162175,12.6531993867,8.31282081447,7.08466530379,13.0967284629,16.2098946133,11.2891680933,5.56825010165,5.11554332978,7.40164245027,15.4771998562,21.2413420292,40.6227396024,15.0961744586,9.38467479771,39.3932192826,67.2596770831,16.4623747725,5.18998470272,9.93883755847,4.33260583121,5.4814682856,16.1815786201,56.9680573254,4.97325551608,50.1971032038,22.1760214195,13.5592101391,8.41249895674,10.874851449,4.84119480052,7.42563416164,7.78457047134,5.51774303797,9.25009386938,5.5410924717,5.97002161681,10.1583648355,15.9104072888,7.76581318535,12.5749297325,19.7186125781,6.31663820833,19.9154151277,7.5909386383,11.6590246238,12.9906399048,6.16948646005,22.496599581,17.4238783264,7.95648063522,4.91252507353,23.6068032417,25.3902412485,6.24398129917,15.9265755353,7.70620856722,26.1114250421,6.80568492844,7.25690180928,40.432713129,6.13048547025,4.43811338206,21.0092150442,6.8597854351,16.9597579264,13.3212875281,20.9338504039,16.8479832676,22.7473050441,15.4425361659,10.9545873543,15.2280971367,19.3717051708,14.1122700782,8.3965718686,33.9692006895,13.4856103518,17.4688366198,9.86340594191,40.8853973309,30.5207324165,9.65664199585,7.36498827981,10.4839217106,12.1968674312,18.6452429366,4.2491485917,28.1491081788,5.61905386697,25.7641469193,7.4236678491,11.6703048568,17.9980100084,10.3121139646,13.6418970747,6.17918815582,8.36069398569,11.5281350766,12.1860139548,7.32067249425,12.6220418847,15.5714323251,7.90803242811,4.44422814627,17.4178521425,5.84257659767,27.0372455264,8.96716662464,30.0957138667,5.60050360991,27.6638244283,31.7399258616,18.870733507,4.95813130888,4.50267395403,7.2932172421,6.28768602203,33.3727602296,19.8342602312,6.17099647445,18.8157575772,10.7943904472,7.59196198272,8.71678162191,13.5111071894,9.48713731167,6.0712865193,6.46178049691,42.9648240479,32.522258854,7.36377817696,9.23001127401,5.94874859634,11.2712225675,13.1976861391,15.0079665055,9.62680132149,11.8513849674,18.3729964567,10.5241978932,8.32025163692,16.2834667283,12.0590391827,23.1116561097,13.2015140752,4.47436598126,19.4987778656,21.44726627,10.2817579698,10.8940057443,31.3379455717,4.33576329293,7.64705606315,9.9136658258,6.33332156524,17.85795628,8.68923712526,30.5555207591,7.75342290085,9.30974834243,5.90535174067,27.3724587929,10.2484361715,14.8839473873,6.98086779198,11.5653791191,20.1658903237,10.671698565,21.7759467747,5.82474524984,18.5886777241,20.0888541994,13.7279346003,6.46749107644,5.31468482626,10.2756523017,51.2479100716,18.0283058646,12.2588725339,6.33122701447,4.22864268254,33.959085915,26.1608013643,22.7699412903,6.53854177987,16.3490613397,10.4153543542,7.6760977424,7.92158223884,15.3466066948,16.1538105864,48.4063718907,14.0115504922,14.7436410881,14.6603740288,16.4643441281,4.51862232607,5.13148846757,12.7904037404,4.32163238406,11.2405742479,27.7807552329,14.6775592948,5.22121670253,9.4360725574,13.8245250823,26.2817191202,18.1151605408,13.4733044425,7.51520676318,26.0255829221,20.9573721795,6.14646775356,18.975415779,4.40086400156,5.72294432106,17.3385101681,12.0335488785,16.8329629183,6.81240273994], +"exponential_std" : [1.13496618261,0.0112138125988,0.761383767183,0.18126449795,2.10906749839,1.5672649157,0.894727378543,0.904623648163,1.74493841675,0.555286307388,1.13081483131,1.06113365454,0.0303050056888,0.673243672383,0.985299011337,0.475220261941,0.135742200078,0.21911175792,0.693655155545,0.269721911442,0.43519077025,0.259554778025,0.2043069471,0.0375652694237,0.16417222385,1.65020236499,1.11706900958,0.126120574232,1.87627130756,0.07371834021,6.88348630525,2.37343828054,0.195730347119,0.810432486961,0.315717648654,1.69966663514,0.117188445928,0.056235754638,0.83417708011,0.691007775936,0.108873137771,0.978102817975,0.494443397561,0.79561557545,1.24932192902,0.455572056743,0.489456701166,0.0102358494706,0.319837277244,2.69741436907,1.13635128397,2.1369655227,0.641799759433,0.604256212311,0.135856121881,1.99990041827,0.198066087034,0.346767939353,2.44920980616,2.33060184533,1.39844138312,1.28052290751,3.33654446659,0.0977999118366,3.52384308004,0.139444820096,0.246671832513,0.287817470218,0.874108677105,2.93453171094,0.360365084878,1.29265704059,0.970929636741,0.864927892588,1.55234898112,0.303523557879,1.56920555926,0.0896558159083,2.48999390774,0.6408087286,0.133559499269,0.118593829443,2.38480026129,0.924173369349,0.611481195949,0.0855822813,0.323282277487,0.340199236034,0.604504052957,2.21041246497,0.0880813992062,0.35281191274,1.32079389008,0.131145132453,0.54714309612,1.12134170339,2.96841071174,0.618813848467,0.717743835312,1.09419793321,1.41838672835,2.71424546386,0.364225545143,0.157835292096,0.178049468502,0.496016662642,1.46843028718,0.341437498955,0.00325655018262,0.786036479293,1.26450917614,0.122756035855,0.0253770451292,1.84054300994,1.86771806058,3.92942631796,0.464507300302,3.05776081402,0.933251238347,0.398035799747,1.06160750307,0.115154134357,0.00423713168082,0.643247880626,0.363542672633,0.32932226655,0.321429733191,0.89716656149,0.143719262402,1.07168535991,1.33729470959,0.860287511999,0.240555274678,1.16806440535,1.46602232227,0.570166578791,0.635967239106,0.0155275718693,1.11780813662,0.527456054065,1.46104451445,0.260711293013,0.979526374849,0.998144618483,0.126147637393,1.4982312922,0.642781133891,0.225580054771,1.21790466399,0.364986222544,0.91784174112,1.62768633999,0.995431947458,0.086150873668,0.0112284382894,0.499563281597,0.0185154535257,1.51312603075,0.360002046002,0.380131657803,0.0641087809261,0.42382206075,2.16030255551,0.0622993986426,0.358395874029,1.87329245505,4.50039460718,0.183865236975,2.05220989483,1.50373918491,1.05990745854,0.323385686214,0.0953560257569,0.392025324095,0.204436008562,0.430888063828,0.862790063408,0.269545794302,0.93587532547,2.28639663318,2.61703711983,0.975863045789,3.06001636619,0.0522185497677,0.184249987023,0.399472084255,1.64167701629,1.26990683864,0.796911644638,3.63491409356,1.80131138392,0.56284758715,0.0308651912505,0.0904690588723,0.189833499917,0.255677345661,1.0806550224,0.333939472275,0.899963947284,0.925688401883,0.732617850679,0.598678904927,0.0858732462117,0.118656901917,0.512483257911,0.431840830204,2.57853031218,1.3353132003,0.430468320853,0.787682414782,0.488378912351,0.896190971816,0.96578121663,1.68387953817,0.28525132736,0.0885847210852,0.836573917571,2.23265448311,0.265000060484,0.0933562429879,2.08557054471,1.09443465213,0.53206992481,0.188341169524,0.255584247239,1.43462510313,1.34743243976,0.509672466285,0.84884903611,0.891730150701,0.607659417517,0.101176208203,1.14837798413,0.153066288185,0.710989010285,0.192843627741,0.92907189649,0.925532385524,0.502250472799,0.538572633338,0.329592471235,2.4531003072,0.0309693880773,1.44255991196,3.35822186242,0.472069131611,0.00688960672196,2.96645229833,0.421912973883,1.2778068098,4.10177001051,0.0687395020444,0.417960778599,0.0951855176617,1.36834800096,1.3014295582,0.0332031355635,1.50568065999,0.293823940235,0.596546213639,0.373563027833,1.31023651213,2.13121561987,0.905757534358,0.475417596983,0.786525186168,2.93918223312,0.80385985925,0.935915242982,0.653839538087,0.40288634373,0.0872926801261,1.33056409308,4.6238428648,0.840692289965,1.4101621139,0.206946712566,0.768894202262,1.07089764584,0.186199730598,0.823902802648,0.300029679007,0.452289916896,0.193828895706,0.476140201634,0.630011678338,0.715765567981,0.0400477529803,1.6533867086,3.78648597137,0.461512453025,1.94687586413,0.0986462109706,1.7295855595,0.917808676504,0.493855986185,0.710444279236,0.203826781772,1.11494023745,3.54725096344,2.81856136723,0.138293582615,0.341474390174,0.517794467763,0.337913379361,0.469197349899,5.81554924468,1.72185384027,1.53172057303,1.8041730901,0.819306186949,0.336371091757,0.882959488709,0.488141839847,0.956157224876,4.24933733514,0.601497978733,0.344467395019,0.253886952657,5.24423786581,0.395167758728,0.519804006228,0.661038092769,0.211097076826,1.11855820049,0.988917301718,3.47549867435,0.136259112677,1.16978306635,0.754020144862,0.752930331325,3.84310426556,0.100684348597,0.444253997264,0.93921953394,0.00213887185319,1.11100254693,0.00840691573553,1.49424001383,0.0856848610865,0.749770073999,0.256603104497,3.9948633653,0.309296607092,0.0367791153832,0.389072361039,1.45459120231,2.30131344953,1.65525883316,0.380023865904,0.60612567314,0.0983230254408,0.483770524442,0.613613794341,0.37741289924,0.724924530517,2.2562417838,0.873637519635,1.01774385514,1.92239319072,1.15102306128,2.50562994167,2.02594213697,0.0694905721975,0.378327108211,1.16463819957,1.01723154543,0.295071777632,0.271222919223,0.449436921589,0.00619728046472,1.51449823337,1.70086334985,2.63328980906,0.10721443707,1.21151448763,0.0594197242575,0.677328478418,0.795507688087,0.515931478015,0.374138694857,0.163714272942,1.07995393058,0.63858661741,3.11709988086,2.53266672899,1.07426182692,0.55906473773,0.212397184757,0.209487433618,0.00107427069768,2.59560621701,1.19177467823,0.835646439464,0.0781437599819,0.219933327146,0.0562710689091,0.259501406401,1.72860705456,0.181623457606,0.886924665843,1.67272649027,0.0893048164655,0.291558917928,2.79197122938,0.225418446011,0.3370764614,0.744715275478,0.277333590566,0.0831880423921,2.50340185489,0.838383530185,0.0547018364632,1.16394416908,0.731611857956,3.29303742567,1.12801843495,0.254816251312,0.857783279871,0.787497716543,1.19372921728,0.98078147759,1.23879574264,1.31251951519,2.27786155362,0.692481459804,0.16166975921,0.193742679692,2.3563304585,0.544828757624,0.201799269775,2.93326788759,0.0140702699341,0.67244444668,1.08378335765,2.23381197207,0.613669894989,0.576715179968,0.188296121206,2.35410213585,0.0377573334966,1.4134057471,0.0528480021368,1.07661874846,0.0507691477149,0.00363888055823,1.54349941844,1.81590809279,1.31178666422,0.711370275419,0.896241176661,1.26436895305,0.466654628652,1.6701835118,0.293235395084,0.524518398029,1.0314292644,1.33793985532,3.26328424693,2.54683285705,0.0259424416851,0.242164917052,0.575301610372,0.183163887093,0.522989812693,0.143061006192,0.229988821562,1.69716008596,0.454170048548,0.178959468165,1.1585417534,0.204649860902,1.00244866191,0.910885073252,1.45592456565,0.505246184684,2.82330268666,0.455597411321,0.265836200093,0.191223934017,0.0152975711968,0.117611429487,0.185115682801,3.69290048021,0.0644582479873,0.843571266368,0.0695567387244,0.694759390417,0.0856428367593,0.508870513824,0.0571663182431,0.0123152369735,0.313527036541,0.175281661623,1.16160657468,0.738599910714,0.653332035432,0.88688431925,0.992552753254,0.354851443695], +"gamma" : [23.4020700939,21.5298748203,32.2562321362,25.7763193718,23.4703121579,29.3661202701,26.4459369299,27.3791809985,21.9847200673,30.4283213154,25.2524564292,23.0766681004,22.0748007426,22.8738575385,24.7173832218,20.4867806803,23.6115319518,22.3396012727,34.0332461228,22.4481130156,26.2994393417,23.6842779672,26.4469684249,32.208328631,25.2154795653,21.514914246,27.9812265667,29.1646242454,36.1741183773,21.9241086942,25.0200978673,34.541160644,21.1872171313,20.6643523094,28.0611449787,25.0763752424,21.1316987329,28.4729997234,28.297691206,27.9971971914,22.7498137295,27.8584113561,22.9930654114,23.3932717127,26.5867370467,20.5603038407,24.4578198277,31.0726756074,21.9970997924,25.8772696238,23.2792448714,22.1746018538,25.0520189677,30.0147724528,20.9379872315,24.1727033283,34.9513827989,21.8579958707,29.1922941933,32.268170894,23.9759615289,23.2633791899,28.3401947019,23.4056316197,21.9573537249,23.5790724502,32.4038252295,21.7878154345,27.7219914485,21.6891773332,29.0876555664,28.2659118607,27.7141202827,26.4474394796,22.2405955932,26.373367326,29.7809748078,25.1458962029,22.7640487649,29.1364244361,21.2416301746,26.1620639266,28.9165242516,24.1332087951,24.5034865528,22.0630655187,37.0706767896,29.5045377518,34.5009633676,25.8583734444,25.6237019739,29.1496197025,22.7821172328,22.9674190397,20.6825965477,30.0805247229,31.1243102911,21.378796057,24.9602505008,24.498254177,29.0456549503,25.4614759249,23.5492250542,23.6296093195,27.7048563838,23.6143923219,30.8096926411,25.2366118111,22.9342018597,20.3329277139,30.8160753344,25.1544738495,20.256646979,24.7267776724,28.1766743251,21.6930035797,24.3736036984,23.0584508765,24.4423122369,22.99470941,21.0695771875,30.8894198903,21.2748243436,22.3587844346,23.2265191862,21.8254137702,20.9182783187,22.8858016571,22.8513749082,32.8200661037,22.1810922078,21.2289369077,32.0811622537,36.2015120794,24.2696738229,21.8534574697,20.976608467,24.5070371167,28.5753591781,36.3660762521,27.6972536041,31.3783602398,30.271239401,22.1219296831,24.5513116546,27.5162180295,23.71240597,26.7569582375,28.9124084132,22.0112791255,23.6452224477,23.3271453018,28.8044969845,22.0345106059,27.1407964702,26.2810075701,23.8503234598,29.8999812194,23.435396946,23.5115874386,25.6843235086,27.877227615,21.6477642172,26.6076880577,30.5500715332,32.9591334789,20.8871158935,27.8993810718,37.6097196154,29.1195769842,30.5362603374,23.0026683957,20.2106398147,21.9748341562,29.2723974831,30.2994040965,25.9237668312,22.3755955888,21.1789713581,21.9935792747,20.3111108153,29.7688607108,22.3349178042,24.4384511424,30.2365574087,27.3852187001,27.2334957635,30.2447416054,24.2216036265,28.2320450027,27.9283189683,29.1941059574,34.3823679953,24.4002814522,25.9235910909,24.067215501,30.3709626359,22.2624007602,27.9738313877,24.3180558366,22.0444775167,24.0561351466,32.0742577005,20.2948834474,21.3472329216,24.2658670818,25.5387220467,33.7055649282,27.4390032872,23.9768265282,24.3795134206,28.035867306,26.3881524935,24.5783732033,24.6725793264,21.4417604343,21.1470873735,24.3746332706,30.655739241,23.9377251296,26.8840484717,23.4008012268,24.090496709,21.5226738631,27.9097797855,23.5760755694,29.0516553777,24.5115122439,24.2788240342,30.9313326361,22.2613336207,33.7861356829,26.9371114025,28.1509447967,22.5786110298,23.23807363,25.7090964454,22.1971094629,23.0513728887,26.4812783485,29.1454960021,27.2556497752,35.3995007571,27.1526271323,22.7495472506,23.0534100647,33.2030607167,24.5309609048,21.9713357981,41.7067257021,24.9152849464,21.0552530522,22.6147573433,30.9961614016,24.727881004,24.0037052352,27.0823289132,31.6480134684,23.6309120499,26.9003619635,32.9092386539,26.0464097751,26.0324601818,23.4618735782,31.4303877179,24.1524008973,26.7586599848,25.956830104,23.7769743583,21.2146774143,30.7673496572,25.1256169725,23.3446892483,22.3404427682,23.4278757404,25.9898800288,20.7235374574,25.4676280396,24.4834665152,24.5804004299,30.0671960069,26.4600779112,23.9859958353,25.435501002,23.4695889694,32.9267264658,23.1562693065,35.7957273664,45.2462445115,27.0057363853,28.5976138009,20.5293832446,22.3015275666,21.7734989727,27.1505254024,24.8751278647,27.2943584101,23.7005794411,26.7509335294,30.8920076413,25.6822637181,37.494063308,30.6920203637,31.141641066,20.6513374388,25.7183047116,27.9043856399,28.3421366995,24.0865190989,21.6181566532,22.3101668922,21.5457160877,27.5504160094,26.4396679234,27.0975105076,41.5602192775,25.558598806,25.0358378223,24.654272581,29.5280988255,23.8084832319,28.6418538164,27.0784635968,33.4237451,24.2509310937,24.4115364156,27.5830903361,24.2440352552,20.7122531841,27.4581685147,23.0859938202,21.0655210693,26.4210887753,22.8791327272,25.3011838547,21.5791545502,23.6994164277,23.7110020654,26.4710732813,23.6601009694,21.3707634202,23.8163315217,21.0795164284,36.7797295444,25.4827413135,22.4472708755,30.929481418,25.4076973768,38.9581577108,26.024995253,21.6995064143,23.306742254,34.7126446375,22.6507031606,21.8452608301,25.7229017859,27.1063775454,22.4196637287,27.4028710147,38.0768101456,21.4400926349,24.8620018962,28.0221917831,22.7823816283,25.1505578523,22.5758524229,22.2770660308,24.9913737829,32.7599254817,23.5802791186,25.5814804394,23.7429661864,23.239889867,22.9289813179,24.4263310857,22.6435711301,23.6173230533,39.551587905,22.9904610606,20.8094897426,27.6813523597,22.9425081042,21.116348851,29.9963248081,27.0052863552,26.7102746185,28.340255777,31.3798712633,25.628932302,28.3653252833,22.1171232767,29.0192169422,20.0788305558,25.6936015362,26.6957303186,25.0364628134,25.9492884166,28.4837974971,22.0515348994,22.4475588765,35.4840555757,24.5264131282,24.4831030618,31.5137478769,21.4416678572,24.395917165,22.3609059462,35.0327857154,24.2936196349,38.7740282327,24.3015407405,21.8588776505,22.5528269142,22.0383358154,32.9607058596,37.7792238105,29.4019278267,23.6714445364,21.8807557376,21.7961550788,31.1904264486,20.5232897739,24.1964190928,23.9694847157,27.8564640384,27.1298492223,24.7009727848,29.0612505074,33.1299997826,26.2941203858,32.2677841916,21.9101863356,22.2806392369,27.6892579428,22.8638843941,21.6456702933,27.2135023133,29.940402904,26.1445225064,24.9171311121,24.6661735576,24.9804348375,25.7901018279,28.2663345638,22.9267311643,35.2285539273,20.3616740613,32.1153877091,26.2919413995,27.3448408417,30.89118701,25.1619690682,24.444143299,37.4171120675,29.3755195815,22.5757143161,37.7233431754,24.0954964299,22.143816344,29.1261054723,30.5860998952,30.1276589243,25.6233726895,32.0897336447,28.5743220602,22.1897018993,20.4238217973,21.5890578441,23.2656723715,21.9817532227,26.8068329018,26.6076760009,25.5951740103,28.9711309123,27.4058238348,22.1712627203,42.2545658346,41.8649165338,24.2870883877,22.9669501698,20.5088492308,26.4662404469,28.7969797121,27.6409105164,22.3527977435,21.2936513083,21.106610259,24.2366360087,28.4885284624,30.082933384,22.0323717815,31.2417261432,26.4775426548,30.0371866099,30.8771594635,27.8242764897,35.5221517692,24.0058611384,27.3177404054,20.5251540551], +"gamma_std" : [3.2412004778,0.223828712261,1.39362074403,0.755086712815,0.302568571861,1.24238397568,5.5919734063,2.79549817147,1.01094144756,2.57968618888,1.93680295932,1.11272157886,2.60876734424,1.72137327007,1.7763909849,0.600976515624,0.0178772866785,1.91465694337,2.83675383791,1.99689354098,0.921203298656,2.65078312778,0.554699699151,3.38495393376,1.15288525935,2.03064072559,0.393725074124,1.76162821017,3.67435929009,1.84030071057,1.64525885157,0.493143388077,2.00740485451,1.85369465853,2.583138761,0.578735193022,3.22862223377,2.73187797804,1.83820771714,1.30576465405,1.32336305747,0.941936031694,1.84795562337,1.67312934123,2.95657390346,2.14895121539,1.93057930407,0.499882888079,0.66486308954,2.40653845435,2.40449087164,0.201570518167,0.389815529984,1.34294773064,3.62729263685,1.45449868474,5.40200826922,1.32721959691,1.40827315231,1.95821377731,2.21911185949,0.538844511005,0.369844573525,1.98637835102,5.16315241327,0.758574426127,0.534419347035,0.574437845914,0.167017686285,2.79492525433,2.89332486723,0.012797015121,3.56620493383,2.24889406363,1.64301490351,2.58190152248,0.748321658498,1.36129818767,0.788810555938,1.04808666303,0.754906559678,1.49651514043,2.16787653414,1.10614704273,4.68357967507,2.09116791505,1.72660450985,4.25552140268,2.12922261179,3.5224330083,0.74223643867,2.40282147921,1.45799809781,0.624673445951,1.58536603233,2.48284561313,1.65704815643,1.22127275197,0.91655010983,4.60916156376,0.809401668962,1.74960530207,1.06422322754,3.61795119497,1.25466152191,4.56964863985,2.14164682768,2.35528806967,1.6393557051,6.22377524405,0.55391066459,1.10311523956,1.4573136266,2.6564068022,2.72445686539,0.940404057712,2.30737816225,1.51171786186,2.27322897711,0.867944387974,3.44206026296,4.08313892139,1.11637982934,0.412257518447,2.9271565089,1.01468384553,1.51485868573,1.88337682538,0.866400565498,1.72277245771,2.43208285538,1.671379094,2.40334552765,1.70156064153,1.40175456319,2.51710479003,1.45522894681,3.40111433534,3.24220499362,0.428012067132,5.14245826037,0.892324734661,1.49466648003,2.40542631239,3.78431046004,1.27495371664,1.40926788195,2.08178719131,0.122353697008,3.27223256884,1.6881073201,0.6650455853,3.02148459146,1.37455157806,1.25142462752,2.71278367703,0.572415962508,0.686384032635,1.00817769652,2.18548462332,4.47743923974,1.77128544872,0.628050648334,2.09512919997,1.93471230008,0.498865464347,0.403647068193,4.4097423879,2.41203008491,2.14026496859,4.51913476807,2.54131435614,2.70757460698,0.837475962374,0.872485903989,5.00713257328,1.3027270571,2.17756753531,1.36107125362,3.49912483288,0.231727382321,2.40251215372,4.07521919671,4.38689066877,0.847346935187,0.338600168917,0.703405586756,0.752712902612,5.46041342598,1.87370797094,1.89574187643,4.11518319746,0.952278416581,1.34561716228,0.756304357263,0.629195789998,1.74473292752,5.40654493712,0.882652114797,0.589975954842,6.4563375431,1.75321228418,1.88447318302,2.51962071424,2.68973196704,0.549042057659,2.41309621596,3.42335763904,0.944468577554,1.32462344993,0.323422756633,2.59685461385,0.176779227547,1.33402784372,1.19129721107,4.04825484691,1.75062042187,1.28017280928,1.49360371667,1.58629987093,1.61272505399,2.8448419983,0.419391380558,0.355582704271,0.514226165155,1.23480022825,1.30435308172,0.391982446538,2.37054987632,1.26980727526,4.60077357872,1.48498341883,6.47526380537,4.12385318479,3.11800437133,1.91684720585,2.85087101037,4.5362212825,2.10250457028,5.63980309042,5.7430243865,0.95662202776,0.192990153138,0.588371038709,1.81812527792,1.27742121302,2.30657763447,1.78435617232,1.29146179278,0.252863639524,6.70057736768,1.28751753341,2.12537272517,1.98341502677,2.36137541388,0.343636752402,0.871447078712,0.992738959691,3.2003686001,0.761391021343,2.25363037317,0.358448426361,0.59762678479,1.32107015671,4.48943750501,2.90903551897,2.81356530073,0.407718451054,3.19683039822,1.62331610987,3.44509026624,2.67729101813,2.1394181012,0.917749984767,4.68244193659,3.18326933248,2.17316741511,1.36784870014,0.778084280703,1.61056931643,2.46176951978,1.01759771476,2.06891460458,1.36433385335,4.38983539826,2.9214820096,0.844805885767,1.95958304171,3.50460075133,2.18384685218,3.28730819676,0.877456038301,2.31243262206,1.60776092664,1.01675878648,0.391762174654,1.55495264709,2.21219779535,0.693270506177,1.81597887887,1.40756953052,1.60661624917,0.851636359563,1.63805745327,5.49336621112,5.00563780097,4.17440334374,0.872434104396,2.05126253706,1.22870712111,3.14612761759,0.302543195753,2.95140251872,0.240163714284,4.97174066462,0.260008770079,0.602045633747,2.03175061681,0.853029790523,5.53761433388,1.30181917483,0.641012862517,1.53727571926,0.414371710231,0.772055113836,3.0936430414,3.77790742201,0.755247004705,1.17891771601,3.84004238447,2.69477379745,2.83051153654,1.52146928539,0.729409612401,0.871959068914,2.01255593363,2.32135312899,1.86304757123,1.64519555206,0.402945828044,0.811559357881,0.731017553494,2.61961354769,0.629927440676,2.81064615631,4.4865475039,4.67435622944,0.872974822979,3.76213200383,2.69471428244,0.457423412547,2.51937634288,1.05303291673,1.40175650306,1.33721587494,3.42390523747,0.650407009212,0.431094546779,2.41080888586,0.268689344484,7.693253351,1.19290165984,2.17125384461,1.48329169366,1.5976248845,0.799807614388,1.92274116453,1.03676796946,2.59599916783,2.21515952264,1.035554753,0.718686566706,2.53116574973,2.59788541048,3.49263825198,3.07861394952,3.36981729551,0.947948729404,2.25488639844,2.6312696758,2.56368923443,0.806992953815,1.12539099133,1.25353711889,1.40062466754,0.417050719072,0.742179311213,1.46147105119,0.553255343645,0.82834957785,4.49328564469,1.4410275087,5.58094516782,0.362408282626,1.86441907308,2.64433332285,1.13085080149,0.957904898064,1.95758430908,5.73133722186,0.516516668043,3.7712697327,0.771283575434,0.731799146958,1.33311709544,2.00177407296,0.621162564914,1.83125064679,1.75218251416,0.41708307574,1.34400380444,0.748829795785,2.145086667,6.49058117314,0.937209884309,1.24569215116,4.06534826452,1.63435559734,0.428029447968,1.22785962502,0.807507723744,1.47312433431,1.1041659361,4.15043776807,0.478596898401,0.744093572906,3.76869650016,3.57776429334,1.87752413421,5.3856197345,0.850333725441,0.658160707051,3.31344469543,3.88177937761,2.59853765067,1.78694079551,1.27478983337,1.30795450602,1.60529633734,1.8037796638,0.712539819901,3.9883835576,4.28632058705,1.29743140582,1.43136780755,3.03910591515,0.319353176148,1.96424136103,5.48410762077,3.68822792025,3.52192257951,1.73783154612,2.47753252328,3.06628853572,4.27929373804,0.835633379862,0.624695587739,3.92442817034,1.64398547322,2.97219401357,2.82352895158,5.0771982642,0.378535679633,1.21236744498,3.69316010926,0.495139184921,0.806002864281,1.11995160184,1.89340462606,4.82652561561,4.55232410069,0.61920896755,3.18201830643,4.13161082807,1.84806772408,0.951118975966,3.3395704832,5.09667149903,1.17404072892,1.53946171645,1.95572745762,1.60824472299,3.11567652706,1.52730557308,0.471969228743,0.836467942222,1.80400732897,1.99251373124,0.607367369485,1.24991349959,3.18891593969,2.48594935372,2.87696557032,2.56453409347,1.44415984686,0.740305181537,1.14520632952,1.70804596235,1.17041778945,1.74298505339], +"norm" : [-3.02353829742,0.394716690288,-4.59719357528,-4.77865152174,7.82890286251,4.96702916486,-1.53546061628,-0.591269235849,1.84511283806,4.23217368193,3.03594678792,-8.48110554556,-0.625041832839,-0.855435605323,4.76458417583,2.43214670296,-12.0081865483,-3.8592126857,6.34512654762,-4.19284867918,-4.69870722761,0.384925052061,14.2447195972,-7.10613762178,-4.8854365152,-1.37939362163,-0.486932190437,0.618860340752,-1.99529861112,4.71904896807,-5.56771992548,-6.15576769427,-13.8511582698,-11.3038039428,-6.30378699216,0.80072646514,-8.32967245847,-1.4008643767,-7.31756224027,-0.335586421962,-13.7970940369,-2.99771477668,-9.70997763937,-6.8536795613,-8.53515125485,-0.568251264929,-0.110079445313,-5.76943267395,-0.343571749027,4.74871107249,-1.65061655585,-0.766629450089,-2.05930800536,3.02405795394,4.63597306893,-6.59630778873,-9.74553219142,-1.88907700671,1.79181572508,-5.30262163862,2.31290041265,-2.05015951044,-1.74995322048,1.35107797175,2.2648251612,-6.77934426147,-2.11746660346,-13.521169394,-5.2623442073,-8.09150988291,-8.66304853842,3.37311345047,1.61820752649,1.45000926405,3.00771721247,-4.5154369568,-5.1113711253,-6.60584304007,-5.6310674633,-0.885522270824,-1.7434194954,-7.78859733156,2.08353467945,0.168048028271,3.05368473002,7.12437605673,-6.98759123855,2.25295549368,-2.65788800606,2.56207076021,-1.05894659782,8.84730718845,-2.57464102291,8.01848681208,-1.85194923843,1.97626577931,-1.40945123155,-5.74265774246,0.924848689262,-1.236617136,-9.82828646908,-4.81270094037,-2.16332069579,-6.64503101117,-4.4128632283,-2.18131923051,3.4769503007,2.90464238744,-4.94743843102,5.90850047098,-4.64367413062,0.28500935648,2.64984379487,-9.84635302601,-7.11243489023,-4.01413462206,-0.897565683674,-2.9670055395,1.34579168233,-10.2449240975,-13.263986234,-7.83416108133,-0.231964489627,1.51055085739,-3.37284602283,-2.69571094061,-1.46171389249,-5.03272562667,-4.08532204114,-2.08503518413,-8.12072642097,-11.0041995675,6.17368102305,2.94504150965,0.289700715472,0.775772051604,4.53359862464,-4.20276785098,-3.5067513994,0.493957449563,-6.11995519946,4.60282921316,0.539823928982,-5.26718837544,-1.06510242931,-3.95862624442,-3.3614648762,-2.08570717816,1.40160374371,1.17756178334,-5.7858825122,1.59042917238,-3.52136537989,-10.3889512288,0.134930425744,-9.81869922865,-3.83743760741,3.22956266628,4.0997718246,-3.23849558098,-4.08116065876,-2.58373502131,-11.2239380805,8.34353925626,-5.88483736797,5.20083434413,-2.55278679782,4.13693497423,7.60392130926,1.7321651912,9.1232979508,-5.39700205249,1.6368439079,-6.3436536719,-8.06925452784,-4.35315465608,-6.59620848542,-6.19413344317,0.175776522814,-4.78902358567,-4.83727435503,-3.86320776356,-6.6327845063,6.77554192903,4.04904993041,4.35012365869,-6.87189063252,-5.17354627605,-3.97850376168,-3.44717949972,-5.67148535962,-5.64252339255,2.19387536337,-0.665533936345,1.60597169449,2.55491320946,-7.1045130663,-9.06708019399,4.48303920998,-0.738623955583,3.63740551779,-4.84181723434,-0.453189159375,-4.88692736379,-7.84317036663,-6.12509985915,-15.2220474521,-2.76492901674,-5.75960501527,-2.66304625965,5.28649852319,1.04755922318,-4.46889628488,4.19989941142,-2.67861070119,5.15020902979,-6.23426225664,1.01641065023,4.31786128238,-3.27745278081,-4.22844189936,0.341833406178,-6.80801962174,-11.1225226863,1.12714078249,3.11436192151,3.53712300384,-1.54531455268,-3.75054328375,-0.910214917602,-6.4740656525,-10.7074697736,-7.26127869785,5.18301392898,-4.88103693178,-14.1014721454,-7.31164814152,-0.813138692209,-1.99521315468,-1.6737340958,-8.83762052508,-2.15140025937,2.70244660698,-5.21218375252,3.20089622947,-7.41461127834,0.146067941673,-3.18111834259,1.2090890813,-3.65830278349,4.97036113093,-7.3837097202,-2.96232990981,-6.35593825351,0.104259984528,-8.05705533331,-3.29433456058,-4.90823425028,-8.30210313825,0.322873967494,-7.35120453766,2.02111348919,-2.7836775386,8.051950056,-6.43552215192,-6.88968116176,-3.33608674763,0.416689109388,-4.00166366694,0.249402075769,-0.00203023292896,-2.75787401873,-14.78967028,-1.19596579501,-1.61737466126,-3.48602083031,-8.47137011855,-6.4259000639,-2.93748263051,-4.46779999922,-2.5770648199,-3.75372303743,-1.77651311819,-6.48878157978,2.45436751248,-7.7559258241,-15.061513524,3.70625095076,-6.33567762337,-0.0820837076559,-4.18515082187,-0.262555949149,-8.1508951908,0.855390693155,-1.69969393614,-3.12761997021,4.74863069817,4.75149867008,-3.93326660895,2.32994771008,6.73616799549,-9.05123071939,-3.89120762373,-3.72910333316,-0.099687676645,-1.05502663271,4.61649102124,-13.322942962,-6.57489305267,-4.39482081973,3.23592248818,2.61974208913,-2.57075070314,0.0290122126464,-0.557740962406,-4.17394245755,-0.206221836183,-3.94122471798,8.64373145741,5.04802339384,-2.52717139012,1.50214183457,8.46425939096,-2.68485898067,-6.65244711693,-0.362513828504,4.5150654038,-9.0470118044,-2.72062972428,-5.58207012124,-1.4819290705,-9.4785927829,-7.87446777899,11.0699954406,-5.4465369927,-5.75826317939,1.18140480513,-7.78822079245,1.07339962024,3.10695557199,1.34136245958,-6.04767741227,-6.5406228662,5.56144696298,-1.52441284001,3.92334272602,1.18516467207,-4.69637293416,-2.27550496464,-7.67962893925,-2.85253127211,-7.7904363355,3.52299568457,1.17119049993,4.29841677382,2.82465387956,-4.17222969081,-6.39801429802,-5.47419114987,4.13187022913,0.28639324377,-1.42150687256,3.07021175117,-7.67503858995,-3.31685290809,4.5321259055,-10.0542050469,-7.13310344722,4.20786394536,-2.78379766076,-14.2454776712,-7.16974008783,5.99766813224,0.370353848923,-1.24337036513,-4.71086582829,-4.37748108651,-1.46798694119,-8.54114096064,8.86592376961,0.822806086967,-2.95240382374,-6.58467195062,-6.87906803192,9.06151344632,-1.63034658689,7.09297280707,-9.90765499104,-5.87181704475,0.764682465358,-1.46969305238,17.637640204,-3.27562799331,2.27068657624,-3.82403266088,-1.3444913171,-5.48806947812,4.67824731159,-2.75519428854,0.214689252962,2.70785610386,0.666819763777,-0.218669032817,-2.05057659525,5.07876585809,0.830527663017,0.282436759154,-1.02605947301,-5.27526878573,-4.82615047204,13.8843655942,2.79766270973,-6.87669917457,-7.58371051255,-7.52188172365,-6.49377587563,-8.83316180496,0.257008661964,-9.9361090467,-5.66394415239,-4.5728131344,-5.05938026749,-2.15799333495,-0.829360562456,-0.640318594311,4.15384676329,4.44153080578,2.25906753932,-9.6459030339,-9.75857495975,-0.513535414066,-0.276043993836,-3.99051680212,0.147020287369,-3.42972324343,-13.1421850032,-1.66720607508,0.44982380649,7.33962922803,8.35218900042,-3.22691613555,1.81151054021,-1.35492262579,1.13537956908,-7.31117369855,-9.49751725143,0.725770963637,0.0041163332643,-11.7311519285,0.525158919358,-6.55244569022,-3.09848142724,0.040276914528,-5.01572705582,-3.80566297238,0.820122617887,-7.28308282164,4.95975027147,-10.806338365,-6.55816563997,1.29169631766,-9.89732917709,0.288529881728,-2.91634045873,-5.427419481,-1.46784342632,-3.59118041897,0.16856486352,0.859129643227,0.835530146448,-1.59211517284,-3.51167435685,-5.63458194153,-1.09832443652,-4.60104367235,-0.00953959329388,-6.5846753721,-2.41325067253,-11.6984540307,5.0399718168,5.56203246712,0.632465602303,-3.33465416452,2.31142023809,-1.58098466977,-11.3616944834,-6.81395508265,-1.59966585423,-1.35636778919,-4.39560170157,-5.20140251982,1.72986900313,-5.11273531463,2.68144657666,1.75009188373,-2.28357514089,9.50337256355,0.847487335035,5.44704840591,4.32125141507], +"norm_std" : [-0.761837213353,-0.331616898194,-1.75131542992,0.628894110773,0.282501864129,-1.33813943,-0.50060684963,0.121645029892,1.70832347381,-0.970999448321,-0.619332343444,-0.726708131791,1.22165541672,0.503699288341,-1.3878740774,0.204851419543,0.603705215945,0.545680308693,0.235477019441,0.111834993822,-1.2515037504,-2.94934349814,0.634634160585,0.124157016111,1.29762248959,-1.68693341116,1.08953904655,2.06088173968,-0.241235326269,-0.94787218032,0.676294028923,-0.653356162094,-0.652295297944,0.528827604205,0.357793249335,0.188649359732,0.869416879035,-0.0506674481438,-0.71636457461,-0.103258720839,-1.1410365794,-0.500776900872,-0.389301370382,-0.473850530407,0.128664303795,0.153694305281,0.444790058081,0.128531666655,0.252529866032,-0.940638662695,1.00214544816,-0.52541498431,-0.887400935623,1.83131360327,-0.923029332098,0.700537686638,-0.892151197664,2.30074000291,-0.817765299371,0.513759631538,0.623586943483,1.48920592699,1.94047867052,0.543237129288,0.506190912339,1.66201449055,-1.18920250015,0.0935974490328,-0.539163905131,-1.43739560422,0.187937386025,-0.450454457295,-0.516878231615,-0.0956356677115,0.316423804579,0.603334657292,-1.49459146388,-0.110894079325,0.241289403967,-0.582645109052,-0.241112652347,0.236360537321,0.124720725203,1.04632597952,-0.27309185588,-0.534834020277,-0.306563304509,-0.162242664723,-1.08323219585,0.708401493453,1.52074304043,0.290343183233,-0.683066329711,-0.950312866297,0.400709935824,-0.12607168361,0.398204888062,0.141638473355,-0.264141421911,-0.452212074311,0.758201972721,-0.515583498457,-0.591202321979,0.896745784086,-0.971437523747,1.84080991349,0.153881232452,-0.274083943452,-1.78492568996,0.981006686402,-0.873717139787,-1.01563442014,-0.411243537311,1.46562116753,-1.00621906211,-0.902147762382,0.752769142961,-0.490508526506,-0.524672210141,-0.699195861143,0.352360939272,0.0681025983371,-0.930341707001,0.845399560277,0.0164723816491,0.844962955458,1.85083394768,0.0220742408712,-1.36917902216,0.887203523342,0.0143311821492,-0.0741547051151,-0.048564787848,1.23502145314,-0.433294923904,1.39103545609,0.820210741477,-0.247423465317,0.30227074638,0.543980361346,-0.942368503754,-1.26638281245,0.937249545091,-0.72010224475,-1.5939515375,-0.375497816009,-0.958703834468,0.794336400065,-1.60510783562,0.543710253458,0.925166364208,-1.469628604,-0.399592346308,1.41734264438,-0.897608667966,1.84480501591,1.2531682095,-1.49093241721,-0.0277339245574,1.37523596087,-0.0252081701471,-0.667880179075,-2.86801752898,0.210688543428,1.28715530785,-0.574305988486,0.49532664657,0.396049589985,0.58879818975,-1.28175713014,2.02992261305,-0.501944516275,-0.159284565628,-1.49621629567,0.0114477138521,0.419445985251,2.05121388048,-0.368765332511,-1.68925467803,0.147681161421,-0.180998391963,0.158059054263,-0.396615421768,-0.400236629563,-0.824895666289,-0.244440445893,1.21945742825,-0.43363049174,0.861183873108,-0.334503693494,0.159559959242,-0.984164476346,0.754084973823,-0.284391662165,0.32479752957,-0.885424601706,-1.28089348334,0.196109935055,0.954644156116,-0.800971331814,0.0158514729825,1.08755329253,-0.631242819687,-0.0226893248728,0.685879242202,0.519179207928,0.182701891972,0.204647380607,-0.265986356197,-0.000227288704174,1.23945231553,-0.819715255694,-0.260388906514,0.519140256693,0.143091644531,-0.116677746539,1.49674411145,-1.48427437532,-1.67118275603,0.917173408575,-0.758014151397,2.06479240297,-0.850778395978,0.499450712758,-0.0792663655031,-1.40329263703,1.57894791325,0.000369028987627,0.900884914363,-0.454869220081,-0.864546645457,1.12911990353,0.0578744128611,-0.433738666312,0.0926976373756,-1.39782014504,1.45782265006,-0.176756915665,-0.2542403002,-1.26343749602,0.452262741199,-0.840117409241,-0.502678070859,0.513392586902,1.64165300475,0.580790036214,-1.70734026937,-0.178355430855,-0.828459954458,1.28631168451,-0.406452361793,0.156632047142,0.0521066804267,0.955813177407,0.743191500529,-0.486323084325,1.92046727351,-0.652749022684,-0.173303776916,-0.360410082419,-0.380413976662,-1.29813980546,0.527919007521,-0.0931002762505,0.401184680615,-0.102583380148,0.030869097738,2.61610050962,-0.785577944872,-0.506998120503,-2.01820571555,-0.676853137903,2.66674367704,1.45145614673,0.634628855315,-0.502826863797,0.512931658764,1.75677937083,-0.974310800693,0.680397048216,0.955798725606,1.50153547771,-0.756265648025,0.473504604182,1.71374344767,-1.14769922048,0.00290322049513,-1.10057035996,-0.297531781547,0.502409078192,-0.000987418981448,-0.674560277944,0.297958279225,1.46557313878,-0.303628593773,-0.994479884587,0.189889991266,-1.68402957184,-0.45838074161,0.543405908301,-1.18726425743,-0.412641692684,1.17712534516,-0.313704165315,1.57903161901,0.375388235873,-1.56813881881,-0.900886519308,0.652345518687,0.871600313907,0.268216169978,0.947681219661,0.14726758787,-1.77245546326,0.59241961089,0.0903254744846,0.651121453984,-0.0811946962298,0.801897602972,0.139845227106,-0.501002761518,-0.128302559175,0.414605966484,0.604577785973,2.13409475324,0.941187837385,-0.931456795706,-0.124667539256,0.200696290937,0.180256285555,-0.320370096868,-1.59612803134,-1.28169898363,1.50258574677,0.653538002103,-0.319536626289,0.955094010533,0.261995955396,0.160792900918,-0.571680642407,0.351660058596,1.11498006066,1.18326825729,1.06094106427,0.510712630416,-0.938783998005,-0.546496141191,0.590029971202,1.48218523572,0.10211810416,0.265438049312,0.00319307433395,-2.59501150029,-1.55556933212,1.10299595783,0.554736503855,-1.28901163638,0.385241647645,-1.71729172626,-1.01835312911,0.051635317273,0.503298709971,-0.543186230699,-0.50667841739,0.729652833474,0.434273362739,-1.13367360599,1.42395333777,0.266351536554,-0.854264392719,-0.550596561249,-0.619109858975,1.03893338917,-0.910610824816,0.529952566873,-0.00847143614797,-1.12903824884,0.569854190963,-0.863391621561,-1.35614426983,-0.0571515568786,-0.108621121585,1.65238409216,-1.3509280784,-0.546096737434,0.991400184464,2.20099738608,0.42789979109,0.290468282765,0.611953095869,-0.512450984176,-0.724230691032,1.69288189574,-0.299339119682,1.57172719445,0.461444067234,-0.673829701094,-1.14103626018,-1.22891797584,-1.1592824639,-0.320829018255,1.08834758337,-0.906203145379,-0.464152061905,-0.513378372516,1.61783767507,-0.8166506056,0.244719605323,-1.31109422683,0.388406495077,1.59237370969,0.870399036779,0.335249325288,0.648959907332,-0.183151791075,0.500241269555,1.36882639497,0.895091842247,0.647293371999,-0.567878708259,-0.579517446608,-0.751448572574,1.07551917611,-0.62114235961,1.87855571664,1.26023993075,0.310050972989,0.106402292088,0.248012997113,-1.39383959205,-0.669436307001,-0.566791473817,-0.381778902753,-0.946546906825,-1.06510299994,-1.33182618118,-0.986453191144,-0.378391147408,0.764711974788,0.0603594164541,0.618509998953,-0.484921020219,-0.280530239708,0.406962904079,1.02518778661,0.254751681263,0.0875239905036,0.0706983543389,-0.573152602817,1.22892597332,-0.962201893069,1.52555675514,0.827282588614,0.912470470299,-0.127292342582,0.63431664124,-1.53089842788,-1.29070149281,-0.526228340585,-1.13223396004,-0.499797126597,-0.728463087382,-0.58314416965,0.329290656707,-0.826860798028,-0.536867983053,-0.562980133545,0.918404799512,-0.0793993782114,-0.278624682616,-0.130459538605,-1.39699761449,-0.244713889273,0.830253910578,0.240821201544,-0.915697123133,-2.22527996377,-0.663067012309,-0.321194763746,0.498388164634,0.380338976442,-1.06703532454,0.255452172355,2.11128718522,-0.634189962037,1.36875576787,-0.970649489259,0.654245334263,-1.17189521913,-0.00315987197527,-0.745604825465,1.5982908861,-0.913399998036,2.40291208791,-0.589360262208,0.107657442325,-0.139297516223,-1.15992572508,0.61896478197,1.37389046967], +"poisson" : [48.0,49.0,45.0,48.0,47.0,47.0,49.0,44.0,45.0,51.0,48.0,51.0,45.0,50.0,52.0,46.0,48.0,47.0,47.0,46.0,48.0,45.0,53.0,51.0,52.0,45.0,54.0,46.0,49.0,48.0,44.0,50.0,48.0,42.0,48.0,47.0,54.0,44.0,54.0,45.0,49.0,49.0,47.0,47.0,45.0,48.0,52.0,45.0,48.0,48.0,50.0,44.0,46.0,47.0,47.0,50.0,49.0,50.0,48.0,48.0,48.0,52.0,46.0,49.0,46.0,51.0,47.0,51.0,52.0,51.0,45.0,45.0,47.0,45.0,46.0,49.0,54.0,54.0,47.0,53.0,49.0,51.0,49.0,51.0,50.0,49.0,46.0,51.0,44.0,46.0,44.0,52.0,49.0,49.0,57.0,50.0,53.0,45.0,51.0,51.0,49.0,48.0,46.0,51.0,50.0,52.0,46.0,46.0,52.0,46.0,45.0,51.0,49.0,48.0,49.0,45.0,48.0,45.0,53.0,50.0,45.0,50.0,49.0,45.0,47.0,45.0,47.0,48.0,49.0,53.0,48.0,45.0,50.0,45.0,54.0,46.0,46.0,51.0,46.0,46.0,51.0,51.0,48.0,44.0,52.0,47.0,46.0,44.0,51.0,47.0,47.0,51.0,45.0,51.0,51.0,50.0,48.0,43.0,52.0,47.0,45.0,50.0,53.0,47.0,46.0,51.0,52.0,50.0,44.0,47.0,48.0,48.0,47.0,49.0,47.0,53.0,46.0,51.0,53.0,50.0,50.0,47.0,49.0,52.0,48.0,45.0,49.0,46.0,46.0,46.0,51.0,45.0,52.0,47.0,52.0,51.0,47.0,49.0,53.0,46.0,46.0,48.0,48.0,46.0,47.0,48.0,48.0,45.0,50.0,46.0,52.0,47.0,53.0,47.0,49.0,44.0,45.0,49.0,47.0,45.0,48.0,44.0,51.0,52.0,49.0,50.0,52.0,55.0,47.0,47.0,50.0,49.0,50.0,50.0,47.0,49.0,46.0,51.0,53.0,47.0,45.0,52.0,55.0,50.0,47.0,48.0,48.0,45.0,48.0,49.0,46.0,45.0,41.0,47.0,46.0,49.0,48.0,50.0,50.0,45.0,49.0,51.0,46.0,50.0,51.0,48.0,49.0,48.0,46.0,48.0,50.0,50.0,47.0,48.0,50.0,52.0,52.0,49.0,50.0,50.0,50.0,49.0,52.0,50.0,49.0,49.0,51.0,47.0,46.0,45.0,48.0,48.0,49.0,47.0,45.0,49.0,47.0,43.0,50.0,48.0,42.0,49.0,48.0,47.0,50.0,50.0,48.0,49.0,47.0,46.0,50.0,50.0,47.0,47.0,49.0,46.0,45.0,44.0,51.0,51.0,46.0,47.0,47.0,48.0,44.0,48.0,52.0,47.0,47.0,44.0,47.0,50.0,50.0,46.0,50.0,46.0,48.0,48.0,49.0,43.0,50.0,48.0,49.0,51.0,43.0,51.0,47.0,50.0,46.0,52.0,48.0,50.0,47.0,46.0,52.0,48.0,45.0,46.0,51.0,47.0,44.0,46.0,49.0,46.0,47.0,43.0,49.0,51.0,55.0,43.0,47.0,46.0,49.0,42.0,47.0,46.0,49.0,49.0,48.0,44.0,44.0,48.0,45.0,48.0,47.0,52.0,52.0,46.0,49.0,51.0,51.0,50.0,50.0,45.0,50.0,52.0,50.0,50.0,45.0,51.0,51.0,47.0,45.0,52.0,47.0,48.0,50.0,46.0,47.0,47.0,49.0,51.0,54.0,44.0,47.0,45.0,49.0,45.0,48.0,51.0,47.0,49.0,50.0,51.0,47.0,47.0,51.0,51.0,48.0,46.0,51.0,48.0,46.0,55.0,50.0,49.0,46.0,48.0,52.0,44.0,48.0,51.0,44.0,49.0,47.0,49.0,47.0,53.0,51.0,44.0,47.0,53.0,48.0,44.0,45.0,45.0,50.0,49.0,51.0,47.0,52.0,45.0,48.0,52.0,56.0,46.0,48.0,49.0,46.0,50.0,47.0,51.0,46.0,46.0,56.0,48.0,42.0,48.0,49.0,46.0,48.0,48.0,47.0,52.0,50.0,49.0,49.0,45.0,54.0,46.0,48.0,49.0,49.0,46.0,54.0,53.0,52.0,46.0,48.0,45.0], +"poisson_std" : [9.0,12.0,4.0,10.0,9.0,6.0,7.0,7.0,8.0,6.0,7.0,13.0,6.0,7.0,9.0,6.0,5.0,8.0,6.0,9.0,10.0,5.0,11.0,6.0,11.0,6.0,7.0,6.0,9.0,14.0,8.0,11.0,9.0,11.0,12.0,12.0,8.0,8.0,8.0,3.0,6.0,3.0,7.0,3.0,3.0,11.0,6.0,12.0,10.0,8.0,5.0,13.0,11.0,6.0,7.0,7.0,8.0,11.0,14.0,6.0,10.0,5.0,5.0,8.0,3.0,10.0,12.0,13.0,6.0,10.0,11.0,8.0,11.0,5.0,10.0,8.0,6.0,4.0,13.0,14.0,11.0,7.0,6.0,3.0,11.0,8.0,5.0,11.0,13.0,7.0,10.0,5.0,6.0,9.0,8.0,6.0,8.0,9.0,2.0,8.0,10.0,6.0,9.0,9.0,8.0,9.0,4.0,9.0,9.0,3.0,8.0,3.0,3.0,10.0,4.0,10.0,5.0,6.0,8.0,7.0,15.0,7.0,8.0,7.0,10.0,12.0,7.0,16.0,14.0,8.0,11.0,9.0,11.0,2.0,8.0,10.0,3.0,9.0,6.0,12.0,8.0,12.0,7.0,9.0,5.0,12.0,6.0,9.0,5.0,8.0,7.0,8.0,7.0,8.0,9.0,10.0,16.0,6.0,10.0,9.0,8.0,9.0,7.0,11.0,7.0,3.0,9.0,8.0,7.0,14.0,8.0,6.0,4.0,8.0,6.0,5.0,3.0,7.0,8.0,10.0,11.0,8.0,9.0,11.0,13.0,6.0,3.0,9.0,13.0,4.0,7.0,6.0,12.0,10.0,13.0,12.0,10.0,9.0,5.0,8.0,5.0,6.0,5.0,9.0,7.0,10.0,8.0,6.0,14.0,5.0,2.0,9.0,3.0,11.0,7.0,10.0,8.0,7.0,5.0,3.0,8.0,8.0,13.0,3.0,7.0,8.0,9.0,10.0,5.0,3.0,9.0,8.0,12.0,9.0,8.0,6.0,12.0,11.0,10.0,6.0,12.0,5.0,11.0,9.0,8.0,3.0,11.0,8.0,11.0,10.0,10.0,7.0,4.0,13.0,6.0,8.0,9.0,7.0,8.0,9.0,7.0,6.0,12.0,9.0,4.0,7.0,4.0,13.0,7.0,10.0,9.0,7.0,9.0,8.0,10.0,4.0,12.0,11.0,4.0,9.0,10.0,5.0,12.0,13.0,9.0,8.0,13.0,6.0,9.0,10.0,8.0,11.0,4.0,14.0,12.0,3.0,8.0,6.0,10.0,8.0,9.0,10.0,5.0,4.0,4.0,10.0,14.0,7.0,5.0,8.0,5.0,7.0,10.0,11.0,11.0,13.0,15.0,17.0,6.0,15.0,6.0,10.0,9.0,6.0,3.0,10.0,11.0,14.0,9.0,8.0,13.0,8.0,4.0,15.0,10.0,9.0,11.0,8.0,11.0,6.0,7.0,15.0,12.0,12.0,13.0,2.0,9.0,8.0,10.0,12.0,7.0,10.0,8.0,8.0,8.0,10.0,8.0,10.0,4.0,12.0,2.0,12.0,8.0,7.0,7.0,10.0,9.0,15.0,14.0,12.0,13.0,10.0,7.0,6.0,7.0,9.0,7.0,7.0,11.0,10.0,12.0,10.0,3.0,8.0,7.0,7.0,6.0,8.0,10.0,9.0,9.0,8.0,7.0,13.0,4.0,7.0,9.0,8.0,6.0,6.0,6.0,1.0,7.0,7.0,4.0,5.0,11.0,9.0,12.0,5.0,10.0,8.0,5.0,6.0,7.0,10.0,7.0,5.0,8.0,4.0,6.0,2.0,11.0,7.0,10.0,15.0,10.0,8.0,6.0,8.0,8.0,4.0,6.0,11.0,9.0,10.0,6.0,13.0,5.0,4.0,6.0,7.0,12.0,16.0,8.0,4.0,5.0,11.0,12.0,6.0,10.0,7.0,8.0,8.0,6.0,9.0,6.0,6.0,6.0,9.0,7.0,10.0,8.0,13.0,8.0,8.0,8.0,10.0,4.0,5.0,13.0,6.0,8.0,10.0,6.0,6.0,8.0,10.0,12.0,6.0,8.0,4.0,7.0,8.0,15.0,10.0,5.0,7.0,8.0,6.0,7.0,12.0,8.0,9.0,9.0,7.0,5.0,10.0,8.0,11.0], +"uniform" : [4.94099688276,3.37966636426,0.152084487878,1.06823645053,-2.47160995922,-1.67217835522,-3.08230704891,-2.95505196895,5.5570074316,-3.79235743238,5.4639414782,-1.43562567439,-2.5590997947,5.59819587828,-0.849784163784,2.53018912716,-2.35719128288,2.5089473882,-1.16887266155,-2.88836999357,2.62205566111,-0.942632088538,1.14593776749,5.75270363979,-4.51338578122,4.21962298825,-3.37680615408,3.71320677007,-4.81030001699,-3.74041146859,-3.74231502005,5.06047855983,-1.28481803924,4.01228339978,4.09549202188,3.28244391148,5.15331102442,5.25951187572,-4.15255963697,1.90980619376,2.25265048746,5.32243465448,-2.30849882567,4.75592725461,5.91352424716,-4.34268816936,5.76505993287,-0.397824721009,-1.10627465263,0.201542041799,5.52243793479,-2.93363585951,4.62348906758,1.6790887525,-0.766136269178,-3.975786286,5.66560315739,0.40106903035,1.3171142045,4.2992662936,-0.963798296979,-1.09292960234,-3.28646985633,-1.31942254263,-0.509699999767,-0.872639402519,4.7573584135,2.63497081746,0.217902794514,1.7686963394,-3.99005059985,-2.72744583315,-3.30580975398,-4.76777229032,3.68792460508,-1.63951018949,5.17179449638,-1.33571736449,4.18456647555,3.1930671168,1.39405430706,5.92161366229,-2.36997382701,-3.56537896545,0.335555357693,-4.81836227668,5.1996444754,5.85595424788,3.95309441951,2.95092998203,5.71692333873,-2.11886792661,4.50943675236,-3.15223484686,-0.456388514485,1.71168471223,-2.63991428772,2.59149039902,-2.3865977067,4.41740677273,4.98023627901,-3.86901365533,-1.10215973889,-4.37495879513,3.68031773106,-2.70041748949,-2.72476733482,-0.98218952419,-4.47491483549,-4.49394200781,2.88656748696,-4.53861674415,-1.25062628266,-3.72420530062,5.76040618702,-2.86529772156,-3.32519802094,3.56260524043,-3.96801141921,-0.906072161985,5.4953037114,-4.47883904076,-4.18807972596,1.92950682401,3.76947725667,-2.44165862824,4.31775707244,2.19872767985,-2.64811699634,-0.517000595968,-2.55739998222,-0.522881986848,3.21912777268,1.98307740458,-0.41939968261,-0.16907685216,-1.02514817896,-0.125491511999,-0.478563822726,-0.401306189172,-3.63589311623,5.82026613276,1.72505118395,-1.59974339699,4.3354144136,-2.77332192489,-4.58956352217,-4.30713681635,-1.15280111308,0.159014177571,-2.29709357963,-4.53148152956,3.86760343796,5.00582318429,2.26404441869,1.4916342855,-1.95845118553,3.84544726493,5.79836131926,-2.10056098213,3.31840643181,3.49072750298,-1.49178698701,0.735869919523,-1.47854749123,-3.32953770397,0.58559192112,1.59161455435,5.89756495513,-3.26874053258,5.90913510998,2.5226196632,3.8160420738,-1.67411954881,-3.50924255682,5.21726866531,-2.07608420415,3.5018041756,2.66292638138,-4.07961384826,0.849040634361,-4.05623813554,3.79254001071,-0.605901739168,1.16639109661,2.59611427385,-2.83362097382,-4.31202269424,-4.6740694134,5.86207693993,1.49558045706,1.09332575044,0.896488851433,4.69332995867,-1.87833183811,-0.95616803404,-3.82229942891,-1.10929397985,1.45031050126,1.31563981472,-4.40536641326,5.81066389505,0.135589204403,4.79257556175,1.936873166,3.7815413646,1.465557848,-4.20794606235,4.65578439774,-3.72596828905,-1.0163211423,-4.44260056431,-2.8971067753,1.51040475805,-0.353031158477,-3.2814989317,-3.99170452162,0.859300988889,-0.816520576089,-4.16757162441,-3.9566754884,2.22207731409,-3.72866153988,4.74528808597,5.99144816391,-4.35657076518,-1.83246616405,2.27504250623,-1.59314196098,4.33229281747,-4.76469386363,-0.839024902383,5.07791263612,0.573322905685,-3.38333958433,3.98482166753,2.58764627145,5.30223653512,-0.0577592287995,-3.18410814495,3.04160303823,2.13143389322,-0.872111302839,1.34578150915,2.97241487516,-1.74128862304,2.32896506611,3.73578798065,-4.26703637643,2.7628303782,-2.8682034649,-2.10706750445,-0.893843320814,-2.0763490656,1.98716098447,2.60614723895,-0.336993724247,-4.60226302914,2.68364897142,1.64653414341,-3.5193535324,-1.20434246189,-4.27494255351,-3.5285603455,-2.7803319293,2.9984592407,-3.12379772719,4.79547979341,5.69892435865,-1.54998164474,-3.40024415063,0.744028923026,2.29232746897,-2.71810146743,-2.67662712845,0.698623606565,-3.02798433163,-2.04685394875,1.4436511594,-1.03941090761,5.59881368191,4.84482222242,2.95980342084,5.57640823891,5.44878403188,-3.48669914,3.34412311882,4.91037867236,-1.04773991382,-3.21836594733,2.57243448853,5.03290917684,-4.49235961157,-1.48303397967,-3.0771927322,3.7575132072,-4.41571711132,2.53507808667,-0.792854704639,4.05848967697,5.97548102355,1.41580678699,-4.68054330586,-4.52928275364,-1.49804341976,5.55680553263,3.75484296674,-4.75592603179,-4.06942597415,0.460303038271,2.44555611737,-0.670043844767,2.37806151332,4.86511627523,2.05793254338,-4.56256888381,1.16033324441,0.986316515576,0.136195501124,-2.71940429412,3.81935319974,1.31950307635,1.10805123771,-1.27917600308,2.77470100203,2.75904245172,-2.01895787532,2.64904428591,-4.14918905965,3.1425837477,-3.78948884154,3.54147006537,3.53701279805,2.66426785307,0.247143371112,2.9034742762,3.22851647837,-4.21524305985,4.91442233234,-4.4470209484,3.48706539958,0.42321427196,5.88175815179,0.866465704496,2.20489199895,-1.51976833403,2.15974739302,5.38691441067,-2.77751768537,3.44883155542,2.24307174734,-0.48768315285,-0.0578404974765,2.65168836328,-4.90508797134,-1.03728611533,-1.27503572798,1.40884474138,2.53919733799,0.477433157219,2.86264916222,1.42794848495,-4.2274430085,-0.353940088053,5.01212272515,5.27484969624,-0.32548022442,1.19717445558,4.91127799505,-1.94197339611,2.83113612197,-4.81616513111,-1.90269242555,5.75010353099,3.38854191721,-3.00200252595,-3.97384781814,-1.12124769296,-4.31847401808,1.14304593645,-1.67395007536,5.14004302835,-0.251955319009,5.39592207509,3.50034606763,3.14788844378,-1.28748042142,1.82252001608,2.87831186799,0.671978303966,-1.59275319146,3.32592826843,-4.97475281396,4.81506514045,1.89480385015,4.45079662507,-4.44480537955,4.63042114318,-4.36650710665,1.4857639816,4.15181912295,2.79492042128,-0.435505364149,5.47108244642,-1.06608220314,-2.25632406759,2.9156868701,5.97010444516,-2.41303646168,0.342394184042,-0.726722582471,2.5070367092,2.42330341527,-4.94374298001,4.90758219901,5.08621589308,-2.92031823564,1.02852039594,2.75858564322,4.5118771304,5.45682593194,-0.389257408263,-1.572351072,-3.64267382038,-4.19781497753,-1.74646084402,2.31168058196,5.62050655834,1.8762273097,-0.0336430635652,-2.06924164374,-1.72823814627,5.55151521686,-2.77651847163,4.26740003869,0.519133972224,-3.42980562991,-2.4217303328,1.33381271049,5.13032865485,-1.06214461681,-3.93212776808,0.631613312599,-4.60127370415,-1.67464655219,-1.62334333954,-4.66702516146,3.40393842163,-2.48161373983,-0.695994300164,3.64605997587,-3.93606459123,-4.00949488061,-3.73082087226,-1.54647450825,-1.25528441507,-3.88123397993,-0.441025897519,5.65854978029,5.08797549641,-0.540843861407,5.86411298721,0.738983897113,0.135558662301,-4.88669071752,4.01683264079,-3.31858559419,-3.45586617183,-4.81708257252,5.98545705899,5.11136386491,1.36426884031,5.53589012393,0.874085620976,4.46650450957,0.0705781285776,1.09388466022,-4.23141513533,0.224831022586,3.17765742503,-0.3223399333,3.49519793396,-0.845182521628,4.05904770804,-1.00349712847,3.57448698443,0.880108400972,-4.29120585754,2.31986555965,-2.06436629476,-1.373719365,2.09660751388,-3.11190835721,5.68515087088,0.980569564237,-4.53673611648,-4.61515524091,5.4362925578,-0.834031937454,-2.99392265077], +"uniform_std" : [0.641322586884,0.502273451239,0.0633066931061,0.901113192881,0.553053579552,0.849965665002,0.987788857348,0.495741278355,0.393790583585,0.202313745754,0.434473314166,0.572857539946,0.63700783753,0.0150432813802,0.055096710717,0.894830197268,0.712002057883,0.556729987336,0.775128673285,0.209624989502,0.437998496746,0.595897939706,0.148206383842,0.478492430295,0.918578473075,0.368858982839,0.507570287613,0.855904789486,0.447595395537,0.262613042045,0.0269686098449,0.529356259456,0.144501359728,0.805451695801,0.308886372157,0.158847290956,0.555842768593,0.762166700321,0.683358356233,0.691429770719,0.974179045829,0.538703029438,0.587831884103,0.594516188766,0.800445222146,0.648934379325,0.586320117869,0.700591936942,0.206309648074,0.616994330572,0.0210493288566,0.0283189261581,0.813174521103,0.949569449632,0.275694179302,0.0778204218551,0.0888710384377,0.861662791655,0.930898397803,0.22138356938,0.0974565964326,0.861961606101,0.926600294783,0.0978528677199,0.148176608841,0.995443285194,0.780789435816,0.314591725094,0.155300377846,0.545130058847,0.4530917656,0.550142955459,0.488027169982,0.114522052846,0.872040858492,0.189146982567,0.26955569211,0.731675751697,0.227847210471,0.703566894573,0.617113368613,0.506485553392,0.965498310864,0.140442016494,0.521978553127,0.28496744194,0.805482524191,0.0479031953606,0.436341946654,0.48656113602,0.693565109881,0.115678825782,0.26193737609,0.926958084569,0.52493146834,0.592374905828,0.108491028215,0.110651471554,0.769018510148,0.901735176379,0.337548584162,0.357840629653,0.427243067908,0.192676590823,0.0628988576686,0.984820437837,0.795953965997,0.444264901671,0.0661226413622,0.624284742568,0.0657861998824,0.936793113743,0.275495560031,0.403632943426,0.674227433619,0.329522088919,0.828897574132,0.465741054805,0.169424945406,0.894719851747,0.404969651954,0.934344738423,0.364077896656,0.871081718414,0.52996015318,0.352743598004,0.590759994899,0.883009938313,0.925542717297,0.646916197118,0.56407708612,0.89164015939,0.878567363043,0.0803345279056,0.645299025761,0.489912558307,0.31325936166,0.649143648916,0.191782591476,0.357196809019,0.167668772592,0.366377377637,0.0105492221018,0.506287847173,0.374674259026,0.518946991478,0.963137380057,0.232498132155,0.518885879126,0.720612144236,0.812713356596,0.0148552301729,0.287024074832,0.239434695251,0.638658699127,0.541385888573,0.633116948854,0.183681373361,0.640086983754,0.683997877181,0.841311175156,0.571378387739,0.0626849726035,0.92872092202,0.134986930796,0.424920075562,0.74782416345,0.706250453767,0.640176202128,0.919802226348,0.423193821202,0.363988852679,0.792221463507,0.743007965384,0.708505872759,0.471984535446,0.53456852846,0.659730843813,0.75032233149,0.290108401227,0.934921282606,0.103738898062,0.286476736501,0.634195753219,0.961059610724,0.0771812656113,0.89638265074,0.338677572711,0.394383734738,0.910273453376,0.172391639706,0.737479177442,0.867851681613,0.68200938383,0.809750426564,0.614335913968,0.0495009068261,0.0147681645627,0.27039065452,0.482417886171,0.441895509635,0.372759179888,0.737916772351,0.359738981797,0.840313684639,0.512171386166,0.62064316074,0.169990243052,0.578023707259,0.347531283904,0.0540640347315,0.623736416919,0.667990462778,0.319129303678,0.728595239099,0.0363859608138,0.163526462206,0.693630467409,0.691974471625,0.28565359994,0.459525237547,0.986766629426,0.491234580856,0.951467870417,0.321837223375,0.501222080921,0.66033700874,0.379954613564,0.368412137165,0.0506830108557,0.920887543229,0.546869957971,0.18546279086,0.799866695967,0.0741052844786,0.130518206648,0.747548194071,0.684778964267,0.754741717719,0.211537548319,0.38219831082,0.285459203031,0.0205004930487,0.198123444045,0.112889898649,0.421660774317,0.0286106557641,0.432900564028,0.712982481563,0.817773033948,0.678434700862,0.0945868389542,0.583637732933,0.957134392801,0.858030477638,0.208718956996,0.769249334087,0.989270966452,0.676953106274,0.943358609158,0.785165071099,0.554407997105,0.354831576917,0.765872261731,0.84146664588,0.0197043915558,0.357882028024,0.453994863918,0.131448599094,0.379673634977,0.175142942745,0.930351983703,0.223750810593,0.16726941031,0.555247340996,0.597213103726,0.296979357855,0.934238193673,0.700949014228,0.635490759459,0.67110480888,0.411480517846,0.201866529915,0.851358303519,0.157993654602,0.281886181716,0.338242965442,0.0768850621147,0.214034581197,0.180091051568,0.275666185688,0.561716161419,0.258108134598,0.713748919656,0.624236671987,0.356374700975,0.672891125298,0.877172344026,0.232126314969,0.555888930942,0.152923887458,0.687825159925,0.712501933538,0.145243765192,0.657393401254,0.745888000982,0.946375126112,0.00703872330563,0.47039360567,0.204572794995,0.93029209831,0.988666910771,0.614796145574,0.381952899147,0.862952392065,0.123424698243,0.854955959387,0.255512434808,0.433413884204,0.138304194071,0.150208704252,0.60376887389,0.953165626503,0.613461096322,0.20153877527,0.479955685401,0.0457946710486,0.191813234821,0.107728703184,0.703366824831,0.509103408712,0.39068597495,0.0693948181024,0.659808230728,0.463801728494,0.203935588459,0.669326062228,0.571431351601,0.583973543434,0.800686778025,0.892485826638,0.552935206224,0.120792493818,0.43171618213,0.724419497412,0.726933206002,0.395496558508,0.94019548363,0.369010998578,0.598894466181,0.822211421067,0.676630478015,0.651261047333,0.798025181709,0.0843142184507,0.743775602728,0.781164792664,0.512076178611,0.824736083818,0.376771551899,0.00749564241925,0.911531685187,0.939601344054,0.268560099032,0.126821315556,0.302122698924,0.949943244838,0.654903218412,0.241443999516,0.0122129034247,0.976757760263,0.602770023776,0.0122208691842,0.0379635131383,0.059185398837,0.812043029581,0.604854059409,0.473251552099,0.0247043708226,0.0794773756944,0.577130146117,0.142698406869,0.389863170949,0.281521362563,0.363286926158,0.749101602301,0.270778186492,0.559736698789,0.202456971767,0.969397912913,0.452261153497,0.205979407157,0.94069774943,0.515566303925,0.418247629343,0.518326620172,0.10988417515,0.00966089608448,0.061086124458,0.325764480866,0.983384539852,0.981242794455,0.393244791201,0.0925973609569,0.654035683929,0.183074561161,0.129482640916,0.470712192061,0.35916679616,0.0700099868278,0.569965306894,0.415772749268,0.807750259806,0.915863661805,0.00166765210587,0.945795472763,0.728553357642,0.453719839886,0.964822455432,0.939876653014,0.737909060646,0.691051513731,0.173530870311,0.542331742505,0.980777137726,0.172462481893,0.225927698077,0.373028510162,0.0245020687028,0.0671893285746,0.483750753278,0.879707540984,0.515288015843,0.634994185734,0.0849667390173,0.0946947977214,0.170161816398,0.726614537052,0.0272108053648,0.94040242333,0.404651507419,0.288360139216,0.420158503298,0.991480713253,0.941193995417,0.299285229866,0.523892406996,0.155681590068,0.954538310478,0.100126880719,0.451208633599,0.658124990454,0.155513180074,0.194696923483,0.781726014717,0.65817038684,0.470353704899,0.763454253268,0.90364292975,0.470720472691,0.586339372936,0.545709621721,0.101509736894,0.312662802011,0.371032152437,0.162541651394,0.876567585309,0.473525283814,0.177596525504,0.935008818798,0.795324269788,0.379570048512,0.438164516349,0.964855690508,0.125438426235,0.188315459918,0.962941911093,0.11838242198,0.0631538670718,0.325826031456,0.892595331666,0.164429151443,0.720804025232,0.214777341338,0.421513177894,0.197625753309,0.930922723877,0.673646072129,0.47748159173,0.262513637226,0.598695075453,0.539652249546,0.356434773507,0.84491450207,0.100209382046,0.418275026898,0.419228856252,0.0331631721683,0.833109002515,0.022816076518], +"beta" : [5.85049379045,5.21244596453,5.07065422106,5.97773253677,5.06708769233,5.00591832601,7.31543149453,5.78646382546,5.07470465641,5.37899225634,5.55685776784,5.00000825995,5.09108817165,5.16736704821,5.89913711145,5.04632339569,5.06792614547,5.02343097976,5.88046364616,5.34040516182,5.11729311707,6.07025434789,5.57998263389,8.81198584306,7.14111733635,5.03682326118,5.32084048698,5.02084081818,6.70165982502,7.30520245414,7.67591993861,5.04451974459,5.76288717089,5.00065624499,5.00672824343,5.35142826856,5.02909009781,5.6554247915,5.6799476842,5.00721523719,6.09785759829,5.01072232241,6.169279956,5.05532374261,5.11592165043,6.16766640159,5.02007177441,5.52126884915,7.76871883377,7.25037504727,5.30951566535,5.05884263422,6.1047978282,5.08227521354,5.23064843808,5.10719184969,5.50619358346,5.17135913697,5.0190301807,5.08738503586,5.1543564601,6.76967485283,5.34229903969,5.00359434198,6.21282110428,5.09529279357,5.90702666014,5.50446278676,5.10218926737,5.02685011928,6.91920361923,5.00733109478,6.50250821421,5.001023564,5.09145817895,5.07016814175,5.00500916835,5.00106289986,5.37290922283,5.27196468161,5.01288037189,5.2084985286,5.03013832879,5.36929405356,6.67682791651,5.74862416091,5.07437293364,5.04010433672,5.82309590197,5.15930708956,5.02547566679,5.65541551896,5.11504461441,5.74808018074,5.07068717355,5.03008172833,5.20608423771,6.93205350325,6.76795594857,5.13772021468,5.00082669716,5.93443615307,5.04259977248,6.37988908805,5.42858807128,5.062614606,5.17216852602,5.14308978382,5.09815020131,5.22572505752,5.11773994555,5.00923245104,5.34178733222,5.88925569497,5.00245191524,5.0219891334,5.02748348998,5.70401729116,5.65640275742,5.0711178719,5.00031816269,5.53802658249,5.17367338543,5.65721764371,5.05420553573,6.34393239748,5.01386433194,5.2889049356,5.00588455903,5.0001739514,5.0048097054,5.58251872772,5.48800203809,5.0555990542,5.26733419537,5.0961659335,5.05496792955,5.50529625082,5.45290864535,5.00630804853,5.92689114833,5.95017573287,5.12912666199,5.54819722806,5.2039773489,5.00024555037,6.8070838143,5.58143123497,5.0186773661,5.42843038824,5.9906100242,5.38179144623,7.05323648054,5.35165446997,5.47241442773,5.40531724651,5.0006815916,5.25893189048,5.29632698864,6.97597721359,5.62440499272,5.34457278932,6.03624024123,5.01791375861,5.0289040214,5.11256204003,5.20916050857,5.23705084902,5.43854809323,5.16647686184,5.65669686615,6.03279310585,5.07285516467,6.58901837996,6.79034300328,5.00280654363,5.72194971214,6.4335039258,6.78725122992,5.00836934462,6.21979484983,5.0075501364,5.01072925541,5.0138184917,5.10422208577,6.18924119907,5.57749629802,5.04902408931,5.31868758541,5.81449096838,5.1214863575,5.22994802805,5.43975185294,5.06538322749,7.21106262827,7.64785255389,5.05674806405,6.31239339285,7.22283149658,5.01049484067,5.19683879241,5.09825203414,8.83132520289,5.00967242066,5.51767114767,6.14877085052,5.08832614411,5.00681856885,5.00052984705,6.01659432533,5.68963742432,5.79534256437,5.00142271553,5.8181745129,5.49931399542,5.92723717537,6.14388473949,5.10938046325,6.08359118263,6.71377563437,5.03031284778,5.34186406081,5.96554753595,5.00388752097,5.48709927951,5.04387359515,5.49102744479,5.54152244519,5.66223480738,5.34242919521,5.16803038415,5.33554947197,5.33223277973,7.06737383734,5.68469455971,5.06603165211,5.6380012428,5.00021665736,6.50296085308,6.20217919393,5.15069449866,5.01004141242,5.00657473435,5.23037507874,5.00210075889,5.15261815755,5.06996175321,5.90110213065,5.98882133754,5.00004704004,5.57643855458,6.78823702579,5.03429037873,5.01153030744,5.0585305658,5.01785362704,6.12223146093,5.053870281,5.34745130965,6.80968152875,5.01561151749,5.77406766134,5.2856970204,6.72330697215,5.04096925756,5.19844677565,6.58483192495,5.47412013378,5.92329762463,5.29893225797,8.33495989819,5.19939619545,5.52002555507,5.09177592186,6.26103281687,5.21584619611,5.37360033588,5.04796922652,5.26555136985,5.40582425229,5.34782565871,5.93164897254,5.17245511222,5.01578214489,5.12756506696,5.13896653173,5.46184279782,6.59083235177,6.293729294,5.00212151537,7.93351659913,5.57508677216,8.27430924134,5.32037660647,5.05916419472,5.75977922954,6.8733531145,5.49076666387,5.28252951131,5.53758368732,5.10841458633,5.17970377904,5.6817046048,6.48867721127,5.0122746782,5.78567782994,5.31654791743,5.31643330823,6.86219541917,5.11409950122,5.26969807148,6.22817649986,5.59599434331,5.04649691006,8.51726308749,7.74644438896,6.36047146637,6.26102629814,5.00252543204,5.02261699386,5.52759640291,5.95726567976,5.65122627666,5.38391897283,5.2357689963,6.29560159836,5.38631858293,5.32712248372,5.03377635836,5.01499454414,6.24667362146,5.00749556937,5.08906501268,5.89651139304,5.11414570023,5.66754795268,5.49290775605,6.01997357488,5.0005757182,5.08971814776,5.26496851629,6.22930074336,5.30059046131,5.10477931387,5.76091202563,5.21994377271,6.48583314694,5.62861229243,5.18641671933,5.92159452862,5.00037988646,5.07576115639,5.84649809831,5.30336000909,5.5873754086,6.93702905576,5.09985979124,5.00799471284,5.00201062403,5.10998421637,5.00669936954,5.4863676172,5.76095607399,5.01640237443,5.69665000981,5.33872530095,5.21427666221,5.02934382809,5.02271167325,5.00149952419,5.53345240543,5.00000410096,5.0404971886,5.2365290203,8.18954572768,5.01224617643,5.06573153342,5.07761479912,5.02096011617,5.2999294958,5.00154039054,5.34937237761,5.0482821032,6.96933576481,5.30243284095,5.24014448849,5.06047069543,5.30706902077,5.00167346971,5.33133291133,6.71794114321,5.11461866706,5.29230114507,5.21302696594,5.0254846741,5.01355395916,6.12290890675,5.07720453915,5.02282694469,5.01315779485,5.73373568044,6.02647872346,6.24910030381,5.05433772571,5.09820783163,5.00074336489,5.55441628919,5.18336702291,5.0090194697,5.22866811688,5.40904712663,7.92867901496,6.33259272089,5.20256845629,5.29209873314,5.37324702275,5.00214663987,5.72588480981,5.01607694333,7.89194085497,5.56651398908,5.0228299548,5.48817828506,7.876897102,5.13818010279,5.61058224818,5.3172239619,6.39136992112,5.01370811787,5.02506106002,5.49581934891,5.13769593786,6.49548844493,5.36836022876,5.20207116155,5.20978364036,5.81889314196,5.01181447295,7.20812760853,5.02394728416,5.81278988953,5.00153016661,5.00177767271,5.11118798548,5.0007402966,5.02280054287,5.03341758946,6.28060848763,5.02377637811,5.89602305564,5.61092446091,5.27649878517,5.68406005403,6.05602295961,5.38068063436,5.05924695009,5.23858897507,5.05209761333,5.06463295681,5.06301948811,5.06327128273,5.22909870866,5.51075923844,5.21891194219,5.08883096523,5.09895239096,5.66941984233,5.01681794776,5.00597357028,6.87556724012,5.13018807678,5.36623298782,5.66410515544,5.22302758816,5.12770982759,5.08566544477,5.55532066532,5.70621282514,5.05093746427,5.34108431674,5.00470684237,6.23171040566,5.70887956287,5.28900035873,5.02438488984,5.61523467551,5.06419435981,6.32495050302,6.53163282487,5.2174067149,5.03976523607,5.41343155571,5.29689448172,6.11928120444,5.26486548936,5.60696900246,5.058876487,5.8957845307,5.98635366819,5.25057476532], +"chi2" : [168.375838384,139.09834212,201.156634322,143.132559095,119.074399249,82.9303520596,131.517110499,194.661912619,113.347710915,160.326410097,160.501933108,111.839510105,100.020543022,170.704863677,165.718061354,172.894368073,135.92497862,109.641685149,194.843515436,145.529884812,163.768793244,125.31871065,105.466361115,166.089076484,149.132698173,136.215843468,186.713892437,182.717053676,211.591455483,207.217535191,165.96975335,183.094732596,146.75765029,108.721986017,197.344128895,101.237714676,192.49016013,172.580260794,190.24064548,85.4560124124,134.518509866,146.675642297,107.521262221,181.167344619,105.867204062,123.525532338,177.959056031,163.14723022,127.181584442,155.476663933,123.369955953,177.613114062,143.812176477,161.52391526,81.0219249374,103.211794832,92.5251349721,120.233534297,177.735357885,191.202023667,319.480798909,137.842412037,107.232977899,186.734067714,80.9446068221,164.722367204,215.917735701,131.561553414,154.81487103,148.761336347,157.700285603,140.22942277,203.696275705,161.432904615,113.078197098,148.682822598,203.535241133,204.82437125,171.190785987,131.701207406,139.149431001,197.162860613,115.330793317,105.820822598,181.692471935,112.343922021,167.218574421,158.312894048,122.963138423,115.727799625,119.604496978,104.434332358,224.00849947,93.2287997707,186.405425918,199.248709395,170.506963964,108.433026681,112.482137368,161.740684075,148.571609623,219.321718484,168.691684332,149.10041198,139.045316202,110.267970295,118.119859602,168.163810233,158.547766837,78.270251526,110.363873445,160.731944767,216.113767284,147.461430349,136.012610591,110.070189764,185.182522898,112.415170046,111.468029255,176.468967763,164.678383217,112.807590029,171.080281616,131.076215077,141.029980223,95.0468765512,214.388647666,198.078399036,214.320019622,173.768324222,172.331241426,189.93409887,112.767913271,138.153630702,125.098721966,164.622560448,169.396884667,74.1534308213,97.8425884554,228.520528,125.473966611,182.168220269,155.333760993,235.296730951,165.27400769,233.129483543,188.535973197,203.205911622,152.06542047,145.479634497,152.668310316,175.119630277,169.815112424,166.062269723,110.226234088,176.43719315,126.169207438,177.441937253,192.882653852,105.587930339,139.330788203,141.263042942,132.079880237,144.521558203,160.699558927,105.33401167,111.226938064,116.605674065,78.5979732203,135.224899649,114.473228584,102.980918321,155.980934504,147.085983824,106.510690166,110.417626439,151.942346738,197.729793823,150.651303905,102.730629379,214.966618558,193.811673499,76.580710265,109.641872498,129.530776075,219.507452831,180.882374279,155.959735628,142.168135676,167.926199407,198.164663771,106.606224841,190.166778116,151.253766018,197.322493072,153.020943526,133.253206756,141.493631165,109.866402632,121.168047967,207.065375901,98.497249573,206.795174544,128.856450868,110.983630783,147.44845091,154.020235556,99.5305249169,96.6400992062,155.866267766,159.936496539,151.058147774,139.530317236,204.748403099,184.553266249,124.945929359,174.951654466,147.408567237,159.438705762,196.137147857,140.4991383,154.946192143,118.273485753,114.181678234,77.68795617,176.65854003,165.630112418,165.506762356,91.898435627,184.705619359,180.048393452,123.120466252,175.549934966,219.127410757,156.434460102,133.472138548,123.067964809,146.636530913,117.264685257,95.8474897052,122.493398404,129.218871273,128.246169483,112.365301055,174.284309539,165.030770354,125.541131345,159.151155022,162.000183964,154.346850649,141.751254663,116.817303654,159.077127009,148.617453758,162.432250448,102.228729028,51.9282014258,170.365148929,167.684628266,168.956954592,152.034391929,159.081932304,118.460176395,181.50639698,148.027864585,149.273073171,80.5265102836,120.089959754,217.901820405,160.276562506,114.160325988,183.951404207,137.952571471,119.114463093,117.846550773,181.095633983,158.470923157,171.472641091,109.070110785,58.225679908,148.14961346,170.998972932,160.763215316,123.439947358,137.054222465,220.304198694,228.831758469,188.801516838,172.5284002,158.490585009,147.559421867,136.276561333,157.814342018,161.138377499,147.868274744,153.081047396,112.763733441,125.883487138,84.6286856151,155.210016702,185.278109043,219.930361253,144.440502463,184.782530753,155.320147583,196.696114173,195.616986145,94.4112355767,168.812602367,127.694101229,142.128287813,170.597506836,124.137131165,108.479645763,188.8858969,185.49072511,172.348849633,122.896229839,111.52467989,154.640507203,121.052500949,117.317148827,146.893661893,90.1874300188,118.588798363,87.7569231877,104.247335291,106.983030475,163.374926336,128.478285522,156.97804643,122.243317219,225.969543059,193.204356451,173.705247331,92.3884230772,143.014936893,86.4155249468,87.8921680341,120.238545469,154.498546823,139.441309318,91.9081357689,172.944085041,159.158979946,157.782662205,132.701855802,168.327492095,190.496311523,107.782933798,120.89086819,182.30478273,152.526386357,140.309790964,185.809417885,127.365336352,181.999975985,139.19018381,188.312500742,124.436998929,205.14087336,175.626601333,96.2936849386,192.259894832,154.106302893,264.078183855,137.721304876,235.579195067,106.486275127,168.849188106,85.4126546055,182.67890815,182.58684056,169.92955288,150.200018397,132.806912369,185.602399457,168.266183606,107.418816373,104.865003894,126.264550433,157.128410001,158.711875786,168.428486454,142.092748903,176.510736764,154.680547929,180.787512217,184.80087458,117.911843452,209.026816226,163.153380709,88.6794389655,179.195485753,106.172367925,149.282174842,106.348891217,178.615111553,120.510514917,171.483072329,122.003647873,96.1004324752,89.3103285546,123.03135274,185.687501802,98.3098197515,136.038717303,181.723736312,161.639962575,163.64753172,160.349973434,146.003554887,146.50065102,139.423166341,122.03112913,107.033712932,135.3630556,142.78841725,156.06961572,152.194270973,161.767669835,88.3160228968,189.638363748,151.681515321,185.096951603,114.736802743,164.746609399,255.99364408,209.553731184,118.690276675,132.410827131,197.925669135,196.971921001,152.241635146,110.872953387,191.878159575,143.286365246,184.259486263,86.9611836067,149.535068923,177.34298882,201.611783468,142.302129502,203.014482673,159.115028994,164.94837894,151.073137654,163.889565496,201.146950379,184.705781603,99.1398901341,127.254293051,171.173341028,249.758311803,138.188038977,166.699602479,179.231294128,174.911313153,137.45456733,245.422991673,233.705655817,72.9310055291,172.664402369,149.640783649,227.752627927,122.859337082,147.704789297,188.596676893,119.588319929,200.152585638,181.547096939,191.778706436,203.520522959,237.741063055,183.14285084,156.725561932,145.493629145,76.4676096338,230.429379023,161.30672709,144.237519169,161.031411854,93.8719672039,142.79767329,146.901088821,148.664319793,104.04701203,162.844762068,143.541163527,191.748097587,96.9335544823,117.837441462,147.411840719,151.911057566,187.590166929,124.583170851,151.214207828,152.212352141,167.133230147,183.043532687], +"exponential" : [4.76565351397,23.4655890267,17.3302258117,7.21118392567,5.9181490791,5.71390134069,5.56483268257,8.78729044905,7.26944377923,48.5883683649,10.9661845144,14.6221404452,4.82459639037,13.3533531323,10.2546878496,7.79889057751,6.27811889271,9.69994167989,12.1986172312,24.8354506496,14.7308224472,17.1509874497,38.3149834944,16.4741413313,12.4123547662,18.7896070283,6.3332705595,18.0318686146,4.32872242882,5.93187861649,17.6387111148,32.2745451353,36.4415565403,17.926380522,9.34505858209,5.01577344195,5.54445137017,5.49364774262,10.2540115866,7.48909773161,4.70058039754,11.1113135533,13.6712225662,15.7031710166,10.1508216129,9.28016129234,7.60070969187,24.4253311571,51.0988867824,10.7036565013,23.5305701439,11.7550128288,12.1447601002,27.31325906,7.31186363021,26.4347966261,8.82368012554,9.37666181956,20.7412525057,15.5299409246,10.8512278711,18.95600428,12.9399581783,4.44789753128,9.47561125239,15.1943242803,4.37445388485,22.5485741568,22.8230498174,5.58846026208,14.9191232297,18.1907127062,31.0199868016,20.6222912606,9.12043642709,10.2014990344,6.91287056081,39.2512716251,5.11336295138,28.4262580975,7.29694449457,11.3808862804,4.99505302663,5.28562370614,18.7837556966,13.9110964499,15.4022246939,4.27524805785,8.25599621669,14.171720933,8.21713457857,34.264789167,19.0545049652,4.50249372328,11.7743970736,9.6067398659,12.0099801016,5.03204436029,4.4874470565,7.99922931579,5.85186251953,29.5479616418,44.9368806599,7.6292814463,11.6949977219,6.86980804924,13.6841692246,5.49741543172,21.3828108547,8.27969015378,7.91614962023,5.43616799477,4.97069134614,10.9557645661,8.2588648758,22.3932696178,6.81497645443,39.5224008876,20.7213968355,13.8858549736,23.4937673497,9.17665426071,13.9538113302,15.4934059739,4.65391183928,4.37836784555,18.4806411672,18.9121901519,9.59167244007,20.9525145801,23.6654210173,16.0912082221,10.0074784789,4.29561120099,12.8309267266,5.05997438373,21.7115643899,11.3621321718,12.6062671239,9.39692487022,15.3333162097,8.62174373036,12.2655760352,5.00675199963,5.17896820755,4.39963583331,5.8512803409,7.27054061036,8.00096344954,4.47687998348,5.27974173415,44.5033747046,7.74474293455,10.5274602241,39.1515140309,22.8540184112,4.3718134844,6.92013180752,24.451588232,18.6506710233,5.49508250078,21.6397811563,13.7583867205,5.19207988814,5.55095600358,15.6187383443,11.4401553485,8.06647257131,13.8036312389,15.8319748285,15.4888099945,6.11709712486,17.4956739452,22.2989036415,27.2727843469,11.5068674969,7.7882747886,9.53405284264,11.0192313163,5.8878524442,4.57143792572,20.1326313101,18.5992222917,12.9196072316,24.71098014,6.02270613403,9.8356712592,4.84174463272,21.2696511928,7.37711828581,8.36068892446,16.4713324242,18.7840042393,17.2813349902,25.7720536302,5.43540560933,5.78000806222,13.5066457714,11.5576313876,5.29317813977,25.0446684024,33.402260053,18.3050668152,11.8128131863,60.7021111486,27.1040433384,14.2447422032,27.2509245217,10.3730606664,23.1988507251,11.3306464793,25.7255217192,8.25880350944,8.20107028867,7.799296789,18.259459841,12.8825202553,5.91471274111,11.7191107138,6.41237618769,5.38287711973,22.7896093456,6.49279584485,8.46051713946,30.6133536124,11.2195489229,8.50504033166,30.922558382,4.53919384568,12.3260545147,7.93039245412,11.2257804605,17.0083491797,7.20943391389,9.46998209906,4.56768156949,44.893822123,12.3158540653,6.96255539596,10.9850648867,10.7428279238,20.3910681533,50.5771413045,12.2354124463,4.75458059518,13.9627591714,9.8006861507,9.39772953828,6.23305631655,4.75871994421,33.4843940155,21.5109430141,5.9402195441,10.0177147606,29.8195350494,6.52197746535,9.60378207187,12.4957470886,10.8170353983,6.93338806804,10.0673292047,22.6193968444,23.5194414491,11.3815405772,16.132849759,12.0235626778,17.9364054621,17.937534609,8.12072285071,4.91838473127,4.66018762429,4.43417046174,5.72619870769,14.8161069919,11.2784136963,17.18459995,14.4624071882,5.91950174455,12.7004307922,8.45895670238,8.41825146134,17.0365083384,9.88626182811,24.2278778998,4.52967042896,6.83915440385,8.04533928118,9.27367748368,18.175239439,32.1642986476,15.0637096603,34.9023651482,14.8000901592,16.6847070261,6.09207172765,4.75875714383,6.4048092141,81.7590010514,15.8881533637,10.0069904469,13.2068952854,15.1581329167,8.00728994215,5.66140117738,41.9307035342,33.6589547182,10.6561088118,19.3645773924,5.76223350168,11.6408513404,4.78225256911,10.5110332871,9.92121285185,19.0190573444,30.6774930037,6.84045847353,7.34109745824,8.30475216361,20.6212112342,20.3998910662,11.1219916054,5.98179683569,5.2450399299,14.9712626337,4.71623346052,21.577582762,5.8691799681,5.550817106,7.00644046109,6.17774414431,19.0988837995,9.19996199654,4.85123727627,16.8043359024,25.3147814713,7.80222437653,5.59732538619,12.4513061851,13.0417291581,6.70352534044,4.61212667914,10.1865923981,4.22305639874,10.4598134922,14.0602850701,9.42025273679,20.7550484127,12.5803655114,7.77504216361,12.0548495148,22.7628753503,7.28494174766,9.36516137265,4.75247233676,21.6911156845,22.6444211689,27.3024890511,14.7928279867,22.2007847519,7.0304353674,6.19514145039,24.5382727126,16.3925226638,12.1265150182,5.4061289164,12.038606093,22.4931905902,4.85911246633,6.75017540759,6.71313999114,16.5342351276,19.5767978409,10.0952179732,10.1964196863,11.4198683999,9.3556214161,5.86147872843,32.0052568287,15.8982707247,24.9262807546,4.4945128058,53.7522068525,11.5404851386,37.980159132,7.55784404973,6.79198905554,16.366848673,6.31021159471,39.2661231306,15.4344070825,9.63075255419,6.16679334589,20.9267482783,5.69957756461,27.0672135061,6.99334006272,30.8170124886,5.63915553631,6.01222743821,17.9882128125,26.0202976191,8.34680519404,14.7452990768,12.4987342899,5.1247035208,5.18590610667,9.69743307819,5.25000170102,5.94550588136,48.1947371896,8.73557082861,10.6185927582,17.504327595,28.6533826974,7.92284303079,10.8598924284,14.870238338,5.08656820136,14.711494763,7.97045220493,6.94242906309,6.63485684494,15.5885041792,15.8982463501,8.80208889347,37.7206846135,27.8732039219,12.3772981505,14.1851187038,16.7062905064,7.29748556564,5.93806127783,8.98440737057,16.0057380087,5.17478843575,15.8052456829,8.1273215991,5.99470466871,16.1404176145,10.7687051394,8.17257184607,10.0675233254,4.86839928082,27.6029233408,7.99970565941,19.138409992,15.7394775639,9.72918754409,37.5837463601,16.9654907448,17.8985733749,16.6005926609,13.9184190592,5.3158304426,7.11090402433,6.1073211212,29.7454562636,4.35062283486,4.9861955333,5.63336984783,5.05159703229,20.3757893916,12.6157599354,19.0843445478,16.0852339394,4.69916800953,28.6381910825,7.78612312239,7.25505189197,6.37816647021,22.0226891938,6.38556305972,19.1764993847,23.5977793207,14.2979913344,14.0568868303,6.40516472269,28.6865244105,16.3742550435,13.4108068463,24.5136902451,9.54933347948,17.2320045286,4.35365875126,9.0082822626,18.5598788039,7.76124321877,57.4690676097,10.8766907228,13.2776170349,5.8720594691,6.86006479237,20.1614660595,4.52784758526,7.2386749503,10.1454596066,11.9637359479,18.3889440273,6.61042761757,12.6621847016], +"gamma" : [23.223571167,25.1568471977,24.7367226307,27.2281300106,22.2209481255,31.6728345352,23.0048315536,31.6171924228,26.3538826822,22.1778160556,22.8069040071,32.0919910693,25.8395470289,26.8102494176,21.2884795096,25.5051261605,20.5085542736,28.4339718417,23.2406280636,26.556996634,23.9163136937,22.5727579694,24.7159481126,31.2380388433,27.3624849626,25.533391544,32.6402336287,22.3641831604,31.1124939162,23.0924368142,31.853418096,20.6731474886,41.4240255071,21.7902874944,27.3122945077,38.8769831794,23.5867967775,24.2007922531,24.3991501482,26.3346790731,24.3017246803,32.4112049241,27.5879321005,22.0308349033,22.2971987366,22.6470463235,23.5663754836,21.0992802565,20.534640209,22.2816591277,20.6086421837,23.738746246,26.1037965412,39.974907988,21.073810426,23.3753710841,27.0974653435,35.4857882654,26.7723138508,33.9187945228,29.0467942769,32.3988813512,27.6219549074,28.7008700354,23.103450571,27.1308857429,28.6306256745,22.7145135803,30.9748253516,25.3424261478,21.665608845,29.0565984109,29.4115824455,24.0441765382,26.6770217691,25.7031133351,25.9402612902,22.1959898773,23.7840847763,24.235929577,21.7032516647,21.0185983986,20.3626229481,22.9573983911,22.026328136,26.6703070396,30.4135650276,21.8883598502,25.6440901322,21.7853316661,30.1074366173,33.8663684217,29.0816216452,27.6738705124,26.9346977191,24.2524245005,27.1671944104,25.3923639884,27.6935012577,21.2748487144,23.4627217848,26.3672971148,36.4640595222,29.2861787349,22.3301958946,24.8289262493,28.8156259499,23.1403757029,21.3657421969,28.8591751146,28.5930274033,37.4299235553,31.0472051187,21.6207624092,27.6879519701,25.1561318523,23.4983274897,27.2788343876,28.6556937949,24.3918117204,26.8210228906,32.7114822185,27.6467578698,30.1105552314,20.9824878976,25.2098701457,24.3575288736,23.471116207,24.416183716,33.5983757115,23.5982284406,24.552706216,23.1275766004,23.9763468995,28.1157839985,28.6558478534,38.526266459,29.0857307264,22.4602575367,25.594306962,21.9658157183,28.9861857145,25.5566808224,27.4687372745,24.269538634,27.4539230574,24.9315740058,21.0165577868,20.9832188766,24.1774458571,29.1850355854,22.0261983551,22.1042094308,23.6375042671,23.2016304435,25.9355892383,22.0183154016,31.0939297148,30.3542042651,21.0398591525,34.2592142217,28.4088968777,22.6799715295,21.6548176031,22.1498098933,23.3273575648,31.3115003171,32.8748839199,23.1727141553,28.6585646871,28.1509286152,20.4473859962,26.5121998219,26.7998967044,28.5885960508,29.6211193874,23.6363891366,23.7653588963,26.0506879922,33.0489122701,34.3660646347,25.3370304151,24.2630603175,21.5636661732,21.4749262196,29.8749218179,35.5221681127,25.6043499603,23.0071918295,21.9265404463,22.2249036338,21.2760028469,25.3541305916,25.3618911644,25.8259228725,24.5249415615,24.6431839709,29.8766637035,27.3170354336,26.7217354544,30.8915972475,28.4199397246,28.3813199978,25.6504765962,26.4174230035,20.7949145003,36.1620496768,21.1646351673,26.8317122512,28.1311256753,22.4394783031,24.9139168463,37.8557162177,26.3022124112,24.1707809354,25.4916883669,33.1705507706,27.6212202619,23.0902106078,26.4859387838,23.1555877269,21.3252416213,27.4880425949,37.8285306263,23.3959082364,24.8436764255,26.1350737407,27.0492995776,23.4377139535,25.8385915188,30.9081654252,21.2958512686,27.9845437989,29.8216772549,25.114090585,26.0737220126,29.5111051568,24.1092214996,23.7232602785,31.7564679115,21.2274985495,26.8916360957,23.2220457507,27.1047612332,23.1105931613,24.783342353,22.6791405219,24.7035738422,25.4114784919,20.3800988067,21.2676142497,43.5096312664,24.0540796996,24.3946769183,21.3783647871,30.2020223242,22.5437006438,27.5122183086,25.1924366503,26.0731942123,23.2551039088,24.1611095847,22.4978881891,25.5478497444,22.4307630131,31.3282921747,21.566934283,26.2013999123,21.9051672478,24.9348808588,33.3068799592,35.0961669494,36.5973660513,31.8688078442,28.048940055,24.219625495,31.0943755885,22.8012742514,25.7923449761,23.1957776265,22.371815164,20.8775012647,37.7976054016,27.9042775265,22.7579291716,29.3946624075,23.5213756618,25.8370278106,21.2965468029,28.3070588987,26.0472194507,23.179370588,26.6950966467,22.1631235097,29.6496187213,20.9254497424,24.8315891975,25.9518934869,20.8559297924,21.3037873627,29.6626319195,24.8881851409,33.9171225351,23.6495993745,40.9856324604,35.0954065583,26.335451151,21.5632005306,25.627476554,26.7797973822,25.0324101849,21.374852156,24.6835883493,28.387553373,23.8619571206,26.2700340422,24.9829865798,23.6209806512,22.4876755653,24.9914864733,30.09931483,22.2178273252,28.6346072988,22.064585711,29.6771214971,22.6499838322,23.0954408797,22.9031660303,29.2348514025,26.9909622556,24.8432206694,20.9216617936,27.4629050706,30.2319082563,27.9094877348,22.10594868,31.6887784462,27.8615722151,24.2246811872,23.5024589722,23.3403039403,23.2639905957,24.5614055973,33.518176484,30.5732211827,30.692950806,21.0219392825,31.876234425,21.962957713,28.8457179507,28.102299152,32.9641803281,27.8176680684,23.0211853963,21.3718562778,24.8752118982,29.4963511334,22.2482543059,20.6324979372,25.9872668112,32.5626305395,22.034559871,29.5303012671,33.7390672533,22.7180886367,21.6685371354,25.1526687437,24.5190900203,25.7029542286,29.7871283353,23.7715381442,24.1865779513,21.2785792542,27.1458724315,24.8088937941,21.443713108,33.5570945159,24.6818316034,23.427281297,21.8075034301,28.9499667494,36.3022195012,23.5908248278,23.9946851783,22.3074658533,24.7938803369,21.4739417266,21.5758408714,22.6278742882,25.3676684869,23.9379140052,34.5507215378,23.5174426745,26.8644593393,23.7632454795,25.7878232106,30.8830802116,25.631308765,30.1536961627,23.293464861,22.180712461,23.8559099174,24.0602827131,26.6767218378,23.7678795598,22.2132015562,20.7901311059,30.8438582368,21.5985920191,23.0737501623,28.3333189133,23.4730251789,41.2670286745,33.9782574392,28.0527638622,34.793918906,22.9568788335,22.0384376471,26.5027716792,27.7563421582,21.3921773552,23.7564751882,27.5736423824,28.1274350919,20.6274356422,27.1369639322,23.502931645,21.1346268598,23.6431128467,22.5021769697,31.6229384473,32.64368605,22.6835708273,22.7151104607,22.3860886208,21.4509832665,25.3471166787,22.988678139,25.1018609056,31.9027392214,22.1091523297,22.183273379,22.2562762077,25.0969584839,23.0786196976,20.2909210614,27.2636853294,30.5098113801,22.1955456371,21.9037663084,22.3355616248,22.9682594904,22.417262288,24.9116645066,28.0715247846,22.9341376458,24.6694168598,30.5562242865,20.4510086613,31.2314145176,24.3117864705,20.2087088096,26.9562020703,21.4758058174,28.2289558734,27.6675824309,21.74520683,24.8501850595,23.4695924275,23.3127511599,22.1516677016,33.7527574862,35.1484479493,25.2698541626,24.2774616762,21.1114352529,27.4281605439,22.2587664971,24.1498855089,27.5001987762,22.3981980524,30.2864523808,23.6991883976,23.9097859175,30.3570177347,24.5554042438,23.4498121043,23.6346157375,28.9289573859,29.5217654322,30.0448809391,23.7053635251,25.2325508236,25.782710954,37.55940803,36.7663457599,22.636234531,31.1216807635,23.0884655406,23.6150723448], +"norm" : [-3.02353829742,0.394716690288,-4.59719357528,-4.77865152174,7.82890286251,4.96702916486,-1.53546061628,-0.591269235849,1.84511283806,4.23217368193,3.03594678792,-8.48110554556,-0.625041832839,-0.855435605323,4.76458417583,2.43214670296,-12.0081865483,-3.8592126857,6.34512654762,-4.19284867918,-4.69870722761,0.384925052061,14.2447195972,-7.10613762178,-4.8854365152,-1.37939362163,-0.486932190437,0.618860340752,-1.99529861112,4.71904896807,-5.56771992548,-6.15576769427,-13.8511582698,-11.3038039428,-6.30378699216,0.80072646514,-8.32967245847,-1.4008643767,-7.31756224027,-0.335586421962,-13.7970940369,-2.99771477668,-9.70997763937,-6.8536795613,-8.53515125485,-0.568251264929,-0.110079445313,-5.76943267395,-0.343571749027,4.74871107249,-1.65061655585,-0.766629450089,-2.05930800536,3.02405795394,4.63597306893,-6.59630778873,-9.74553219142,-1.88907700671,1.79181572508,-5.30262163862,2.31290041265,-2.05015951044,-1.74995322048,1.35107797175,2.2648251612,-6.77934426147,-2.11746660346,-13.521169394,-5.2623442073,-8.09150988291,-8.66304853842,3.37311345047,1.61820752649,1.45000926405,3.00771721247,-4.5154369568,-5.1113711253,-6.60584304007,-5.6310674633,-0.885522270824,-1.7434194954,-7.78859733156,2.08353467945,0.168048028271,3.05368473002,7.12437605673,-6.98759123855,2.25295549368,-2.65788800606,2.56207076021,-1.05894659782,8.84730718845,-2.57464102291,8.01848681208,-1.85194923843,1.97626577931,-1.40945123155,-5.74265774246,0.924848689262,-1.236617136,-9.82828646908,-4.81270094037,-2.16332069579,-6.64503101117,-4.4128632283,-2.18131923051,3.4769503007,2.90464238744,-4.94743843102,5.90850047098,-4.64367413062,0.28500935648,2.64984379487,-9.84635302601,-7.11243489023,-4.01413462206,-0.897565683674,-2.9670055395,1.34579168233,-10.2449240975,-13.263986234,-7.83416108133,-0.231964489627,1.51055085739,-3.37284602283,-2.69571094061,-1.46171389249,-5.03272562667,-4.08532204114,-2.08503518413,-8.12072642097,-11.0041995675,6.17368102305,2.94504150965,0.289700715472,0.775772051604,4.53359862464,-4.20276785098,-3.5067513994,0.493957449563,-6.11995519946,4.60282921316,0.539823928982,-5.26718837544,-1.06510242931,-3.95862624442,-3.3614648762,-2.08570717816,1.40160374371,1.17756178334,-5.7858825122,1.59042917238,-3.52136537989,-10.3889512288,0.134930425744,-9.81869922865,-3.83743760741,3.22956266628,4.0997718246,-3.23849558098,-4.08116065876,-2.58373502131,-11.2239380805,8.34353925626,-5.88483736797,5.20083434413,-2.55278679782,4.13693497423,7.60392130926,1.7321651912,9.1232979508,-5.39700205249,1.6368439079,-6.3436536719,-8.06925452784,-4.35315465608,-6.59620848542,-6.19413344317,0.175776522814,-4.78902358567,-4.83727435503,-3.86320776356,-6.6327845063,6.77554192903,4.04904993041,4.35012365869,-6.87189063252,-5.17354627605,-3.97850376168,-3.44717949972,-5.67148535962,-5.64252339255,2.19387536337,-0.665533936345,1.60597169449,2.55491320946,-7.1045130663,-9.06708019399,4.48303920998,-0.738623955583,3.63740551779,-4.84181723434,-0.453189159375,-4.88692736379,-7.84317036663,-6.12509985915,-15.2220474521,-2.76492901674,-5.75960501527,-2.66304625965,5.28649852319,1.04755922318,-4.46889628488,4.19989941142,-2.67861070119,5.15020902979,-6.23426225664,1.01641065023,4.31786128238,-3.27745278081,-4.22844189936,0.341833406178,-6.80801962174,-11.1225226863,1.12714078249,3.11436192151,3.53712300384,-1.54531455268,-3.75054328375,-0.910214917602,-6.4740656525,-10.7074697736,-7.26127869785,5.18301392898,-4.88103693178,-14.1014721454,-7.31164814152,-0.813138692209,-1.99521315468,-1.6737340958,-8.83762052508,-2.15140025937,2.70244660698,-5.21218375252,3.20089622947,-7.41461127834,0.146067941673,-3.18111834259,1.2090890813,-3.65830278349,4.97036113093,-7.3837097202,-2.96232990981,-6.35593825351,0.104259984528,-8.05705533331,-3.29433456058,-4.90823425028,-8.30210313825,0.322873967494,-7.35120453766,2.02111348919,-2.7836775386,8.051950056,-6.43552215192,-6.88968116176,-3.33608674763,0.416689109388,-4.00166366694,0.249402075769,-0.00203023292896,-2.75787401873,-14.78967028,-1.19596579501,-1.61737466126,-3.48602083031,-8.47137011855,-6.4259000639,-2.93748263051,-4.46779999922,-2.5770648199,-3.75372303743,-1.77651311819,-6.48878157978,2.45436751248,-7.7559258241,-15.061513524,3.70625095076,-6.33567762337,-0.0820837076559,-4.18515082187,-0.262555949149,-8.1508951908,0.855390693155,-1.69969393614,-3.12761997021,4.74863069817,4.75149867008,-3.93326660895,2.32994771008,6.73616799549,-9.05123071939,-3.89120762373,-3.72910333316,-0.099687676645,-1.05502663271,4.61649102124,-13.322942962,-6.57489305267,-4.39482081973,3.23592248818,2.61974208913,-2.57075070314,0.0290122126464,-0.557740962406,-4.17394245755,-0.206221836183,-3.94122471798,8.64373145741,5.04802339384,-2.52717139012,1.50214183457,8.46425939096,-2.68485898067,-6.65244711693,-0.362513828504,4.5150654038,-9.0470118044,-2.72062972428,-5.58207012124,-1.4819290705,-9.4785927829,-7.87446777899,11.0699954406,-5.4465369927,-5.75826317939,1.18140480513,-7.78822079245,1.07339962024,3.10695557199,1.34136245958,-6.04767741227,-6.5406228662,5.56144696298,-1.52441284001,3.92334272602,1.18516467207,-4.69637293416,-2.27550496464,-7.67962893925,-2.85253127211,-7.7904363355,3.52299568457,1.17119049993,4.29841677382,2.82465387956,-4.17222969081,-6.39801429802,-5.47419114987,4.13187022913,0.28639324377,-1.42150687256,3.07021175117,-7.67503858995,-3.31685290809,4.5321259055,-10.0542050469,-7.13310344722,4.20786394536,-2.78379766076,-14.2454776712,-7.16974008783,5.99766813224,0.370353848923,-1.24337036513,-4.71086582829,-4.37748108651,-1.46798694119,-8.54114096064,8.86592376961,0.822806086967,-2.95240382374,-6.58467195062,-6.87906803192,9.06151344632,-1.63034658689,7.09297280707,-9.90765499104,-5.87181704475,0.764682465358,-1.46969305238,17.637640204,-3.27562799331,2.27068657624,-3.82403266088,-1.3444913171,-5.48806947812,4.67824731159,-2.75519428854,0.214689252962,2.70785610386,0.666819763777,-0.218669032817,-2.05057659525,5.07876585809,0.830527663017,0.282436759154,-1.02605947301,-5.27526878573,-4.82615047204,13.8843655942,2.79766270973,-6.87669917457,-7.58371051255,-7.52188172365,-6.49377587563,-8.83316180496,0.257008661964,-9.9361090467,-5.66394415239,-4.5728131344,-5.05938026749,-2.15799333495,-0.829360562456,-0.640318594311,4.15384676329,4.44153080578,2.25906753932,-9.6459030339,-9.75857495975,-0.513535414066,-0.276043993836,-3.99051680212,0.147020287369,-3.42972324343,-13.1421850032,-1.66720607508,0.44982380649,7.33962922803,8.35218900042,-3.22691613555,1.81151054021,-1.35492262579,1.13537956908,-7.31117369855,-9.49751725143,0.725770963637,0.0041163332643,-11.7311519285,0.525158919358,-6.55244569022,-3.09848142724,0.040276914528,-5.01572705582,-3.80566297238,0.820122617887,-7.28308282164,4.95975027147,-10.806338365,-6.55816563997,1.29169631766,-9.89732917709,0.288529881728,-2.91634045873,-5.427419481,-1.46784342632,-3.59118041897,0.16856486352,0.859129643227,0.835530146448,-1.59211517284,-3.51167435685,-5.63458194153,-1.09832443652,-4.60104367235,-0.00953959329388,-6.5846753721,-2.41325067253,-11.6984540307,5.0399718168,5.56203246712,0.632465602303,-3.33465416452,2.31142023809,-1.58098466977,-11.3616944834,-6.81395508265,-1.59966585423,-1.35636778919,-4.39560170157,-5.20140251982,1.72986900313,-5.11273531463,2.68144657666,1.75009188373,-2.28357514089,9.50337256355,0.847487335035,5.44704840591,4.32125141507], +"norm_std" : [-0.761837213353,-0.331616898194,-1.75131542992,0.628894110773,0.282501864129,-1.33813943,-0.50060684963,0.121645029892,1.70832347381,-0.970999448321,-0.619332343444,-0.726708131791,1.22165541672,0.503699288341,-1.3878740774,0.204851419543,0.603705215945,0.545680308693,0.235477019441,0.111834993822,-1.2515037504,-2.94934349814,0.634634160585,0.124157016111,1.29762248959,-1.68693341116,1.08953904655,2.06088173968,-0.241235326269,-0.94787218032,0.676294028923,-0.653356162094,-0.652295297944,0.528827604205,0.357793249335,0.188649359732,0.869416879035,-0.0506674481438,-0.71636457461,-0.103258720839,-1.1410365794,-0.500776900872,-0.389301370382,-0.473850530407,0.128664303795,0.153694305281,0.444790058081,0.128531666655,0.252529866032,-0.940638662695,1.00214544816,-0.52541498431,-0.887400935623,1.83131360327,-0.923029332098,0.700537686638,-0.892151197664,2.30074000291,-0.817765299371,0.513759631538,0.623586943483,1.48920592699,1.94047867052,0.543237129288,0.506190912339,1.66201449055,-1.18920250015,0.0935974490328,-0.539163905131,-1.43739560422,0.187937386025,-0.450454457295,-0.516878231615,-0.0956356677115,0.316423804579,0.603334657292,-1.49459146388,-0.110894079325,0.241289403967,-0.582645109052,-0.241112652347,0.236360537321,0.124720725203,1.04632597952,-0.27309185588,-0.534834020277,-0.306563304509,-0.162242664723,-1.08323219585,0.708401493453,1.52074304043,0.290343183233,-0.683066329711,-0.950312866297,0.400709935824,-0.12607168361,0.398204888062,0.141638473355,-0.264141421911,-0.452212074311,0.758201972721,-0.515583498457,-0.591202321979,0.896745784086,-0.971437523747,1.84080991349,0.153881232452,-0.274083943452,-1.78492568996,0.981006686402,-0.873717139787,-1.01563442014,-0.411243537311,1.46562116753,-1.00621906211,-0.902147762382,0.752769142961,-0.490508526506,-0.524672210141,-0.699195861143,0.352360939272,0.0681025983371,-0.930341707001,0.845399560277,0.0164723816491,0.844962955458,1.85083394768,0.0220742408712,-1.36917902216,0.887203523342,0.0143311821492,-0.0741547051151,-0.048564787848,1.23502145314,-0.433294923904,1.39103545609,0.820210741477,-0.247423465317,0.30227074638,0.543980361346,-0.942368503754,-1.26638281245,0.937249545091,-0.72010224475,-1.5939515375,-0.375497816009,-0.958703834468,0.794336400065,-1.60510783562,0.543710253458,0.925166364208,-1.469628604,-0.399592346308,1.41734264438,-0.897608667966,1.84480501591,1.2531682095,-1.49093241721,-0.0277339245574,1.37523596087,-0.0252081701471,-0.667880179075,-2.86801752898,0.210688543428,1.28715530785,-0.574305988486,0.49532664657,0.396049589985,0.58879818975,-1.28175713014,2.02992261305,-0.501944516275,-0.159284565628,-1.49621629567,0.0114477138521,0.419445985251,2.05121388048,-0.368765332511,-1.68925467803,0.147681161421,-0.180998391963,0.158059054263,-0.396615421768,-0.400236629563,-0.824895666289,-0.244440445893,1.21945742825,-0.43363049174,0.861183873108,-0.334503693494,0.159559959242,-0.984164476346,0.754084973823,-0.284391662165,0.32479752957,-0.885424601706,-1.28089348334,0.196109935055,0.954644156116,-0.800971331814,0.0158514729825,1.08755329253,-0.631242819687,-0.0226893248728,0.685879242202,0.519179207928,0.182701891972,0.204647380607,-0.265986356197,-0.000227288704174,1.23945231553,-0.819715255694,-0.260388906514,0.519140256693,0.143091644531,-0.116677746539,1.49674411145,-1.48427437532,-1.67118275603,0.917173408575,-0.758014151397,2.06479240297,-0.850778395978,0.499450712758,-0.0792663655031,-1.40329263703,1.57894791325,0.000369028987627,0.900884914363,-0.454869220081,-0.864546645457,1.12911990353,0.0578744128611,-0.433738666312,0.0926976373756,-1.39782014504,1.45782265006,-0.176756915665,-0.2542403002,-1.26343749602,0.452262741199,-0.840117409241,-0.502678070859,0.513392586902,1.64165300475,0.580790036214,-1.70734026937,-0.178355430855,-0.828459954458,1.28631168451,-0.406452361793,0.156632047142,0.0521066804267,0.955813177407,0.743191500529,-0.486323084325,1.92046727351,-0.652749022684,-0.173303776916,-0.360410082419,-0.380413976662,-1.29813980546,0.527919007521,-0.0931002762505,0.401184680615,-0.102583380148,0.030869097738,2.61610050962,-0.785577944872,-0.506998120503,-2.01820571555,-0.676853137903,2.66674367704,1.45145614673,0.634628855315,-0.502826863797,0.512931658764,1.75677937083,-0.974310800693,0.680397048216,0.955798725606,1.50153547771,-0.756265648025,0.473504604182,1.71374344767,-1.14769922048,0.00290322049513,-1.10057035996,-0.297531781547,0.502409078192,-0.000987418981448,-0.674560277944,0.297958279225,1.46557313878,-0.303628593773,-0.994479884587,0.189889991266,-1.68402957184,-0.45838074161,0.543405908301,-1.18726425743,-0.412641692684,1.17712534516,-0.313704165315,1.57903161901,0.375388235873,-1.56813881881,-0.900886519308,0.652345518687,0.871600313907,0.268216169978,0.947681219661,0.14726758787,-1.77245546326,0.59241961089,0.0903254744846,0.651121453984,-0.0811946962298,0.801897602972,0.139845227106,-0.501002761518,-0.128302559175,0.414605966484,0.604577785973,2.13409475324,0.941187837385,-0.931456795706,-0.124667539256,0.200696290937,0.180256285555,-0.320370096868,-1.59612803134,-1.28169898363,1.50258574677,0.653538002103,-0.319536626289,0.955094010533,0.261995955396,0.160792900918,-0.571680642407,0.351660058596,1.11498006066,1.18326825729,1.06094106427,0.510712630416,-0.938783998005,-0.546496141191,0.590029971202,1.48218523572,0.10211810416,0.265438049312,0.00319307433395,-2.59501150029,-1.55556933212,1.10299595783,0.554736503855,-1.28901163638,0.385241647645,-1.71729172626,-1.01835312911,0.051635317273,0.503298709971,-0.543186230699,-0.50667841739,0.729652833474,0.434273362739,-1.13367360599,1.42395333777,0.266351536554,-0.854264392719,-0.550596561249,-0.619109858975,1.03893338917,-0.910610824816,0.529952566873,-0.00847143614797,-1.12903824884,0.569854190963,-0.863391621561,-1.35614426983,-0.0571515568786,-0.108621121585,1.65238409216,-1.3509280784,-0.546096737434,0.991400184464,2.20099738608,0.42789979109,0.290468282765,0.611953095869,-0.512450984176,-0.724230691032,1.69288189574,-0.299339119682,1.57172719445,0.461444067234,-0.673829701094,-1.14103626018,-1.22891797584,-1.1592824639,-0.320829018255,1.08834758337,-0.906203145379,-0.464152061905,-0.513378372516,1.61783767507,-0.8166506056,0.244719605323,-1.31109422683,0.388406495077,1.59237370969,0.870399036779,0.335249325288,0.648959907332,-0.183151791075,0.500241269555,1.36882639497,0.895091842247,0.647293371999,-0.567878708259,-0.579517446608,-0.751448572574,1.07551917611,-0.62114235961,1.87855571664,1.26023993075,0.310050972989,0.106402292088,0.248012997113,-1.39383959205,-0.669436307001,-0.566791473817,-0.381778902753,-0.946546906825,-1.06510299994,-1.33182618118,-0.986453191144,-0.378391147408,0.764711974788,0.0603594164541,0.618509998953,-0.484921020219,-0.280530239708,0.406962904079,1.02518778661,0.254751681263,0.0875239905036,0.0706983543389,-0.573152602817,1.22892597332,-0.962201893069,1.52555675514,0.827282588614,0.912470470299,-0.127292342582,0.63431664124,-1.53089842788,-1.29070149281,-0.526228340585,-1.13223396004,-0.499797126597,-0.728463087382,-0.58314416965,0.329290656707,-0.826860798028,-0.536867983053,-0.562980133545,0.918404799512,-0.0793993782114,-0.278624682616,-0.130459538605,-1.39699761449,-0.244713889273,0.830253910578,0.240821201544,-0.915697123133,-2.22527996377,-0.663067012309,-0.321194763746,0.498388164634,0.380338976442,-1.06703532454,0.255452172355,2.11128718522,-0.634189962037,1.36875576787,-0.970649489259,0.654245334263,-1.17189521913,-0.00315987197527,-0.745604825465,1.5982908861,-0.913399998036,2.40291208791,-0.589360262208,0.107657442325,-0.139297516223,-1.15992572508,0.61896478197,1.37389046967], +"uniform" : [-3.59839229265,-0.787420866956,-3.5100696398,1.05790324976,-0.230903615149,4.15207192837,-1.79431229095,-2.998371695,2.59418003613,2.45822372823,-0.300780527014,-1.5440133437,1.03085204599,-1.33692765277,-1.55197146978,-4.37816704914,3.94779479863,2.31065093191,-0.440683981222,-1.96326821894,5.04421054276,-3.44594638824,-2.83054477818,-1.6236768478,1.75958683029,-4.97829427413,-2.02599936458,3.81586650452,4.14944963831,-0.328355693341,4.54399231001,-1.27349182032,2.85494056638,2.45884513286,-3.3643537016,-3.57188252267,-4.32707322248,-2.37177517336,3.56068851518,0.119520274315,1.29265060598,-3.58842998189,0.52994745173,-0.102735337404,2.93302724708,-3.32383764717,-4.52349324316,5.6903989608,4.0317269289,3.93390379664,-3.40137326696,2.69104991428,-2.98250385941,3.8846379483,-2.11914074841,-4.98646084014,4.21750944,-1.06696695511,5.00022225537,4.68580272782,2.08077838339,-1.35056535356,-4.09326392621,4.77148734389,-1.79613326611,3.36166219876,1.97170111153,0.727574936502,0.68027976209,2.36902880224,0.897632745568,-0.763412491614,1.72623264227,-0.827064632306,-2.83652093527,5.64953669712,2.68971786958,-1.18205663166,-0.344494805562,5.44770199554,4.51313488474,5.38206368193,4.67548461281,5.10142325643,-4.31719582063,-3.127370037,-1.6095224473,-1.44068466905,5.66023913117,3.60040656771,-3.07654748576,0.458102068179,-0.551562848876,3.78651437772,-4.92632166928,3.31554428844,-3.36957420934,1.6563152954,-3.03247429529,-3.21157203441,-0.668010778646,4.64806904697,-1.96203367327,2.92991697712,5.20546444871,5.4755986674,-3.26643862341,1.00904804739,-1.38404492635,1.56886790407,-4.83507644794,-4.63978031186,-0.292279349164,3.70125898863,-3.84443474416,-1.81635777648,-4.37096994121,0.089855747927,1.74869246768,0.187874051836,-1.37529674976,5.71445378441,-4.96707144174,2.686861882,-3.90049396626,5.52808290134,0.740864315283,-3.26547314714,2.15348217858,5.92399306052,-0.854832791063,-3.47257016591,-2.87412881909,-3.76985869718,1.36761862379,-1.68678261085,-2.89630097104,-0.929840779973,-3.38223139682,5.02139643759,0.393033359659,1.89913830879,-1.95337363171,0.284603845395,-2.54550799157,2.90754600872,3.26452565515,0.664851859842,5.22836966452,-3.12171454282,0.747220391654,-1.28728477314,-1.72465241416,2.70059485678,-4.42330132702,5.81005308398,1.41414051844,5.12047630634,-2.49389009369,-2.33704682111,2.56687618688,4.69747943044,-3.63090016862,-2.31052511812,0.552483770624,-2.0253543378,2.38465437442,-2.71844542221,3.83039023329,0.938275444647,1.01648378177,-0.42917332798,4.28010412964,-3.32742641349,-0.440716667874,-2.40448686931,-0.246555162662,0.659266241444,-0.287809663098,-0.686876928854,-3.95270204805,-2.0405528058,-0.342340493866,-4.55156615101,-3.09850829709,-4.24778926946,4.80837113021,-0.44942422772,1.66526629731,4.57513402387,-2.72062149494,5.42151718962,-4.91083564779,4.77867162605,-2.68427837797,-3.00469697588,-2.41322409665,-1.78936739627,5.82592265601,5.75430742511,-3.20248821799,5.33317110333,1.72000612083,5.44653943222,3.76336197682,1.37463539515,-3.01203241571,5.45167449708,3.40932103817,-1.41118426128,-0.0544041019386,5.65560368173,3.37156992784,3.64419990715,-4.71834198552,-3.06291474113,-2.18338173262,-3.07441774963,4.92150696878,3.70242052679,-4.62453404907,2.00025582183,1.1847409028,5.9863883503,4.47056407999,4.95700117754,2.42567878004,4.77528809582,0.680045377888,1.54131800483,2.89614818461,-2.38105833007,-3.68325895189,1.01175699125,0.891222274663,4.44019334706,1.53202392114,-4.95704275681,-4.14541407995,5.52889247305,5.73509039282,5.02788902743,-1.57648063218,5.86060424389,-2.66186046465,0.947789157408,-0.519740375536,-2.13072057124,2.5156177093,-4.43861568681,4.21397140772,0.792109170664,-3.57609095564,0.318813602131,1.24168733432,-4.22539976521,-3.2370090488,0.548860128953,-0.255366322879,-0.80322247363,-3.39224393994,3.52890459442,-1.68115547661,-3.34447769861,-0.484131096616,4.20937927047,2.91364541337,4.56458545417,3.34132723483,1.76011676887,4.37402454703,0.360587392547,2.9233822874,5.39277965286,-1.27195598349,1.04756383977,0.960287592935,4.82594646368,4.12973216875,4.16659344016,5.66771136856,4.09362341646,3.28797249498,-1.48824277814,-3.9741267896,-1.97084497164,-3.49784815372,4.55448908951,5.43122663082,4.72830290781,-3.34149330673,-4.48948839353,5.8265001609,3.08830498124,-4.50046772119,-1.4642523958,4.1012680044,0.117837324534,0.837131669144,5.256711969,4.65728905082,-1.36848771009,1.15146185283,4.05484904019,2.0945777433,-1.00311450868,4.76303596549,2.87296884988,-4.30570756739,4.92260881202,0.0637437645136,5.38344231564,0.0658147583681,2.83477947221,3.30380809368,-2.74945514833,3.72055910234,1.36072626962,0.403464583255,-4.98381812182,-3.77250293519,3.14082993582,-4.68195034493,-0.0976805179128,0.134796113902,1.35236343608,4.96147533206,-0.399974966116,-3.27601905549,-2.97610059269,3.1263208559,2.66939221721,2.24961522125,1.65021207792,-3.98310188066,-0.0263843882871,3.9472267178,-0.00898597331621,-1.54644834917,4.09818211997,1.66756935339,5.3053331897,4.14722271607,-1.86146383011,-2.09954141131,-0.701524297999,-4.98207487503,3.09449077835,2.04818753112,4.42704244947,4.19638827566,-0.785520444274,-0.452876786051,4.06380991982,3.23603435829,3.89702616958,1.40465665461,4.42167516599,1.73192134264,-0.582029345021,0.384287794188,-4.6925700598,4.05966440995,5.43192889639,5.72287280657,-4.74145966821,-1.42815023676,-0.861088011717,5.40888478602,5.27407562104,1.26224750024,-0.963643520477,5.77090590409,1.97617226337,-0.353796316498,-2.92301043754,-1.16585138372,-3.79977685596,5.63503718713,2.08091590981,2.41933749501,3.06063798095,-1.51733715426,2.82806126728,-4.49359948938,0.880272999875,4.00558481304,-0.653950330237,-1.18947647519,-4.41250273958,5.26573179527,-3.8697395004,0.897618613657,-4.95304263549,5.72147382286,-0.128183990781,2.1674797209,1.77182364398,0.839887139464,-3.46638031829,0.317940784978,-3.70923155832,-4.16864598235,-2.59500306501,5.00491335008,-0.771453793754,-4.10597651276,5.52298346103,-4.51741487112,-3.40754479796,-3.70568757719,-3.56798642247,1.85033458402,-4.06523189866,-0.171312621295,-0.60569156554,2.10029181103,3.30802873879,-4.65738060314,3.22279030624,2.96439127467,-1.99400360552,2.43254952255,2.72522842659,1.21751018881,4.61967393661,2.67716272172,-0.275930937041,3.69559840043,2.63380386307,-3.65683902426,-4.96319456426,0.288804317631,4.20129093782,0.0201024191633,4.42535092664,5.30089359427,1.42783093852,-3.90733592779,-4.00443405778,-3.06592124453,-2.97947799023,5.30644483091,-3.80504797748,0.161236792916,3.4893373935,1.45987824596,-0.715656860176,3.38919772778,-2.81680042324,3.68847074524,-4.09473921597,-2.46346070328,-0.52630794415,5.10880508202,-2.79774977144,2.17616990737,0.680291565024,-2.53602769803,-2.53605928656,-4.64235333979,4.99746570821,3.17463920114,3.67260920857,1.68263955579,-0.341640337226,5.60382309625,5.32862378322,-0.371694363325,3.82023019484,3.71611599539,-3.04091649203,-4.27929165888,4.33992317747,3.06447538216,5.852670248,-0.47265056095,3.11540808711,-2.12606920103,2.89694049458,0.694301505115,4.91021155609,-2.95711298319,-3.03798153336,-1.70001746744,3.5497129652,2.07464194578,0.343181988602,-3.76308258707,-1.32819126809,5.54782630984,-4.32038390688,-0.401366439506,-0.993137843146,-0.772698509678,4.57142306351,2.2145435534,2.65482758728,-1.3509043032,5.30651662306], diff --git a/tests/test_sets/nested_test_json_data_file.json b/tests/test_sets/nested_test_json_data_file.json new file mode 100644 index 000000000000..b5a51222a493 --- /dev/null +++ b/tests/test_sets/nested_test_json_data_file.json @@ -0,0 +1,116 @@ +{ + "meta": { + "foo": "bar" + }, + "data": [ + { + "y": 2, + "x": "i", + "z": 0.15645925198429417 + }, + { + "y": 91, + "x": "v", + "z": -0.8831566548506162 + }, + { + "y": 27, + "x": "f", + "z": 0.04949008795216637 + }, + { + "y": 88, + "x": "m", + "z": 0.707453140660691 + }, + { + "y": 19, + "x": "d", + "z": -0.4463904406306873 + }, + { + "y": 31, + "x": "p", + "z": -0.7428112675728786 + }, + { + "y": 88, + "x": "k", + "z": -0.8056338715441489 + }, + { + "y": 88, + "x": "c", + "z": 2.1503562085625214 + }, + { + "y": 77, + "x": "q", + "z": 2.0929237387630266 + }, + { + "y": 77, + "x": "q", + "z": -1.661959979179892 + }, + { + "y": 83, + "x": "z", + "z": -0.9832286584134281 + }, + { + "y": 74, + "x": "b", + "z": 0.8762444209783803 + }, + { + "y": 11, + "x": "n", + "z": 0.5528041402836482 + }, + { + "y": 10, + "x": "j", + "z": -1.546452548907501 + }, + { + "y": 82, + "x": "o", + "z": -1.2056933522181066 + }, + { + "y": 30, + "x": "l", + "z": 1.1095471540125805 + }, + { + "y": 34, + "x": "d", + "z": -0.4419384341857751 + }, + { + "y": 75, + "x": "w", + "z": 0.6823297850310874 + }, + { + "y": 4, + "x": "z", + "z": -0.2659061388574901 + }, + { + "y": 56, + "x": "a", + "z": -0.038264620773638694 + } + ], + "other": { + "baz": [ + 1, + 2, + 3, + 4, + 5 + ] + } +} \ No newline at end of file diff --git a/tests/test_sets/test_json_data_file.json b/tests/test_sets/test_json_data_file.json new file mode 100644 index 000000000000..06edb084cc1f --- /dev/null +++ b/tests/test_sets/test_json_data_file.json @@ -0,0 +1,102 @@ +[ + { + "y": 2, + "x": "i", + "z": 0.15645925198429417 + }, + { + "y": 91, + "x": "v", + "z": -0.8831566548506162 + }, + { + "y": 27, + "x": "f", + "z": 0.04949008795216637 + }, + { + "y": 88, + "x": "m", + "z": 0.707453140660691 + }, + { + "y": 19, + "x": "d", + "z": -0.4463904406306873 + }, + { + "y": 31, + "x": "p", + "z": -0.7428112675728786 + }, + { + "y": 88, + "x": "k", + "z": -0.8056338715441489 + }, + { + "y": 88, + "x": "c", + "z": 2.1503562085625214 + }, + { + "y": 77, + "x": "q", + "z": 2.0929237387630266 + }, + { + "y": 77, + "x": "q", + "z": -1.661959979179892 + }, + { + "y": 83, + "x": "z", + "z": -0.9832286584134281 + }, + { + "y": 74, + "x": "b", + "z": 0.8762444209783803 + }, + { + "y": 11, + "x": "n", + "z": 0.5528041402836482 + }, + { + "y": 10, + "x": "j", + "z": -1.546452548907501 + }, + { + "y": 82, + "x": "o", + "z": -1.2056933522181066 + }, + { + "y": 30, + "x": "l", + "z": 1.1095471540125805 + }, + { + "y": 34, + "x": "d", + "z": -0.4419384341857751 + }, + { + "y": 75, + "x": "w", + "z": 0.6823297850310874 + }, + { + "y": 4, + "x": "z", + "z": -0.2659061388574901 + }, + { + "y": 56, + "x": "a", + "z": -0.038264620773638694 + } +] \ No newline at end of file diff --git a/tests/test_util.py b/tests/test_util.py deleted file mode 100644 index 5215ed9f3a1e..000000000000 --- a/tests/test_util.py +++ /dev/null @@ -1,143 +0,0 @@ -import json -import datetime -import numpy as np -import unittest - -import great_expectations as ge - -class TestUtilMethods(unittest.TestCase): - def __init__(self, *args, **kwargs): - super(TestUtilMethods, self).__init__(*args, **kwargs) - self.D = ge.read_csv('./tests/test_sets/distributional_expectations_data_base.csv') - - with open('./tests/test_sets/test_partitions.json', 'r') as file: - self.test_partitions = json.loads(file.read()) - - def test_DotDict(self): - D = ge.util.DotDict({ - 'x' : [1,2,4], - 'y' : [1,2,5], - 'z' : ['hello', 'jello', 'mello'], - }) - self.assertEqual(D.x[0],D.y[0]) - self.assertNotEqual(D.x[0],D.z[0]) - - def test_continuous_partition_data_error(self): - with self.assertRaises(ValueError): - test_partition = ge.dataset.util.continuous_partition_data(self.D['norm_0_1'], bins=-1) - self.assertFalse(ge.dataset.util.is_valid_continuous_partition_object(test_partition)) - test_partition = ge.dataset.util.continuous_partition_data(self.D['norm_0_1'], n_bins=-1) - self.assertFalse(ge.dataset.util.is_valid_continuous_partition_object(test_partition)) - - def test_partition_data_norm_0_1(self): - test_partition = ge.dataset.util.continuous_partition_data(self.D.norm_0_1) - for key, val in self.test_partitions['norm_0_1_auto'].items(): - self.assertEqual(len(val), len(test_partition[key])) - self.assertTrue(np.allclose(test_partition[key], val)) - - - def test_partition_data_bimodal(self): - test_partition = ge.dataset.util.continuous_partition_data(self.D.bimodal) - for key, val in self.test_partitions['bimodal_auto'].items(): - self.assertEqual(len(val), len(test_partition[key])) - self.assertTrue(np.allclose(test_partition[key], val)) - - - def test_kde_partition_data_norm_0_1(self): - test_partition = ge.dataset.util.kde_partition_data(self.D.norm_0_1) - for key, val in self.test_partitions['norm_0_1_kde'].items(): - self.assertEqual(len(val), len(test_partition[key])) - self.assertTrue(np.allclose(test_partition[key], val)) - - - def test_kde_partition_data_bimodal(self): - test_partition = ge.dataset.util.kde_partition_data(self.D.bimodal) - for key, val in self.test_partitions['bimodal_kde'].items(): - self.assertEqual(len(val), len(test_partition[key])) - self.assertTrue(np.allclose(test_partition[key], val)) - - - def test_categorical_data_fixed(self): - test_partition = ge.dataset.util.categorical_partition_data(self.D.categorical_fixed) - for k in self.test_partitions['categorical_fixed']['values']: - # Iterate over each categorical value and check that the weights equal those computed originally. - self.assertEqual( - self.test_partitions['categorical_fixed']['weights'][self.test_partitions['categorical_fixed']['values'].index(k)], - test_partition['weights'][test_partition['values'].index(k)]) - - def test_categorical_data_na(self): - df = ge.dataset.PandasDataSet({ - 'my_column': ["A", "B", "A", "B", None] - }) - partition = ge.dataset.util.categorical_partition_data(df['my_column']) - self.assertTrue(ge.dataset.util.is_valid_categorical_partition_object(partition)) - self.assertTrue(len(partition['values']) == 2) - - def test_is_valid_partition_object_simple(self): - self.assertTrue(ge.dataset.util.is_valid_continuous_partition_object(ge.dataset.util.continuous_partition_data(self.D['norm_0_1']))) - self.assertTrue(ge.dataset.util.is_valid_continuous_partition_object(ge.dataset.util.continuous_partition_data(self.D['bimodal']))) - self.assertTrue(ge.dataset.util.is_valid_continuous_partition_object(ge.dataset.util.continuous_partition_data(self.D['norm_0_1'], bins='auto'))) - self.assertTrue(ge.dataset.util.is_valid_continuous_partition_object(ge.dataset.util.continuous_partition_data(self.D['norm_0_1'], bins='uniform', n_bins=10))) - - def test_generated_partition_objects(self): - for partition_name, partition_object in self.test_partitions.items(): - result = ge.dataset.util.is_valid_partition_object(partition_object) - if not result: - print("Partition object " + partition_name + " is invalid.") - self.assertTrue(result) - - def test_is_valid_partition_object_fails_length(self): - self.assertFalse(ge.dataset.util.is_valid_partition_object({'bins': [0,1], 'weights': [0,1,2]})) - - def test_is_valid_partition_object_fails_weights(self): - self.assertFalse(ge.dataset.util.is_valid_partition_object({'bins': [0,1,2], 'weights': [0.5,0.6]})) - - def test_is_valid_partition_object_fails_structure(self): - self.assertFalse(ge.dataset.util.is_valid_partition_object({'weights': [0.5,0.5]})) - self.assertFalse(ge.dataset.util.is_valid_partition_object({'bins': [0,1,2]})) - - def test_recursively_convert_to_json_serializable(self): - D = ge.dataset.PandasDataSet({ - 'x' : [1,2,3,4,5,6,7,8,9,10], - }) - D.expect_column_values_to_be_in_set("x", set([1,2,3,4,5,6,7,8,9]), mostly=.8) - - part = ge.dataset.util.partition_data(D.x) - D.expect_column_kl_divergence_to_be_less_than("x", part, .6) - - #Dumping this JSON object verifies that everything is serializable - json.dumps(D.get_expectations_config(), indent=2) - - - x = { - 'w': [ - "aaaa", "bbbb", 1.3, 5, 6, 7 - ], - 'x': np.array([1, 2, 3]), - 'y': { - 'alpha' : None, - 'beta' : np.nan, - 'delta': np.inf, - 'gamma' : -np.inf - }, - 'z': set([1,2,3,4,5]), - 'zz': (1,2,3), - 'zzz': [ - datetime.datetime(2017,1,1), - datetime.date(2017,5,1), - ] - } - x = ge.dataset.util.recursively_convert_to_json_serializable(x) - self.assertEqual(type(x['x']), list) - - try: - x = unicode("abcdefg") - x = ge.dataset.util.recursively_convert_to_json_serializable(x) - self.assertEqual(type(x), unicode) - except NameError: - pass - - - -if __name__ == "__main__": - unittest.main() \ No newline at end of file diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 000000000000..74b7a509cd99 --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,213 @@ +from __future__ import division + +import pandas as pd +import numpy as np + +from sqlalchemy import create_engine + +from great_expectations.dataset import PandasDataset, SqlAlchemyDataset + +## Taken from the following stackoverflow: https://stackoverflow.com/questions/23549419/assert-that-two-dictionaries-are-almost-equal +def assertDeepAlmostEqual(test_case, expected, actual, *args, **kwargs): + """ + Assert that two complex structures have almost equal contents. + + Compares lists, dicts and tuples recursively. Checks numeric values + using test_case's :py:meth:`unittest.TestCase.assertAlmostEqual` and + checks all other values with :py:meth:`unittest.TestCase.assertEqual`. + Accepts additional positional and keyword arguments and pass those + intact to assertAlmostEqual() (that's how you specify comparison + precision). + + :param test_case: TestCase object on which we can call all of the basic + 'assert' methods. + :type test_case: :py:class:`unittest.TestCase` object + """ + is_root = not '__trace' in kwargs + trace = kwargs.pop('__trace', 'ROOT') + try: + # if isinstance(expected, (int, float, long, complex)): + if isinstance(expected, (int, float, complex)): + test_case.assertAlmostEqual(expected, actual, *args, **kwargs) + elif isinstance(expected, (list, tuple, np.ndarray)): + test_case.assertEqual(len(expected), len(actual)) + for index in range(len(expected)): + v1, v2 = expected[index], actual[index] + assertDeepAlmostEqual(test_case, v1, v2, + __trace=repr(index), *args, **kwargs) + elif isinstance(expected, dict): + test_case.assertEqual(set(expected), set(actual)) + for key in expected: + assertDeepAlmostEqual(test_case, expected[key], actual[key], + __trace=repr(key), *args, **kwargs) + else: + test_case.assertEqual(expected, actual) + except AssertionError as exc: + exc.__dict__.setdefault('traces', []).append(trace) + if is_root: + trace = ' -> '.join(reversed(exc.traces)) + exc = AssertionError("%s\nTRACE: %s" % (str(exc), trace)) + raise exc + + +def get_dataset(dataset_type, data): + """For Pandas, data should be either a DataFrame or a dictionary that can be instantiated as a DataFrame + For SQL, data should have the following shape: + { + 'table': + 'table': SqlAlchemy Table object + named_column: [list of values] + } + + """ + if dataset_type == 'PandasDataset': + return PandasDataset(data) + elif dataset_type == 'SqlAlchemyDataset': + # Create a new database + + engine = create_engine('sqlite://') + + # Add the data to the database as a new table + df = pd.DataFrame(data) + df.to_sql(name='test_data', con=engine, index=False) + + # Build a SqlAlchemyDataset using that database + return SqlAlchemyDataset('test_data', engine=engine) + else: + raise ValueError("Unknown dataset_type " + str(dataset_type)) + + +def candidate_test_is_on_temporary_notimplemented_list(context, expectation_type): + if context == "SqlAlchemyDataset": + return expectation_type in [ + #"expect_column_to_exist", + #"expect_table_row_count_to_be_between", + #"expect_table_row_count_to_equal", + #"expect_table_columns_to_match_ordered_list", + "expect_column_values_to_be_unique", + # "expect_column_values_to_not_be_null", + # "expect_column_values_to_be_null", + "expect_column_values_to_be_of_type", + "expect_column_values_to_be_in_type_list", + # "expect_column_values_to_be_in_set", + "expect_column_values_to_not_be_in_set", + # "expect_column_values_to_be_between", + "expect_column_values_to_be_increasing", + "expect_column_values_to_be_decreasing", + "expect_column_value_lengths_to_be_between", + "expect_column_value_lengths_to_equal", + "expect_column_values_to_match_regex", + "expect_column_values_to_not_match_regex", + "expect_column_values_to_match_regex_list", + "expect_column_values_to_match_strftime_format", + "expect_column_values_to_be_dateutil_parseable", + "expect_column_values_to_be_json_parseable", + "expect_column_values_to_match_json_schema", + #"expect_column_mean_to_be_between", + "expect_column_median_to_be_between", + "expect_column_stdev_to_be_between", + #"expect_column_unique_value_count_to_be_between", + #"expect_column_proportion_of_unique_values_to_be_between", + "expect_column_most_common_value_to_be_in_set", + #"expect_column_sum_to_be_between", + #"expect_column_min_to_be_between", + #"expect_column_max_to_be_between", + "expect_column_chisquare_test_p_value_to_be_greater_than", + "expect_column_bootstrapped_ks_test_p_value_to_be_greater_than", + "expect_column_kl_divergence_to_be_less_than", + "expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than" + ] + return False + + +def evaluate_json_test(dataset, expectation_type, test): + """ + This method will evaluate the result of a test build using the Great Expectations json test format. + + :param dataset: (Dataset) A great expectations Dataset + :param expectation_type: (string) the name of the expectation to be run using the test input + :param test: (dict) a dictionary containing information for the test to be run. The dictionary must include: + - title: (string) the name of the test + - exact_match_out: (boolean) If true, match the 'out' dictionary exactly against the result of the expectation + - in: (dict or list) a dictionary of keyword arguments to use to evaluate the expectation or a list of positional arguments + - out: (dict) the dictionary keys against which to make assertions. Unless exact_match_out is true, keys must\ + come from the following list: + - success + - observed_value + - unexpected_index_list + - unexpected_list + - details + - traceback_substring (if present, the string value will be expected as a substring of the exception_traceback) + :return: None. asserts correctness of results. + """ + + dataset.set_default_expectation_argument('result_format', 'COMPLETE') + + if 'title' not in test: + raise ValueError("Invalid test configuration detected: 'title' is required.") + + if 'exact_match_out' not in test: + raise ValueError("Invalid test configuration detected: 'exact_match_out' is required.") + + if 'in' not in test: + raise ValueError("Invalid test configuration detected: 'in' is required.") + + if 'out' not in test: + raise ValueError("Invalid test configuration detected: 'out' is required.") + + # Pass the test if we are in a test condition that is a known exception + + # Known condition: SqlAlchemy does not support parse_strings_as_datetimes + if 'parse_strings_as_datetimes' in test['in'] and isinstance(dataset, SqlAlchemyDataset): + return + + # Known condition: SqlAlchemy does not support allow_cross_type_comparisons + if 'allow_cross_type_comparisons' in test['in'] and isinstance(dataset, SqlAlchemyDataset): + return + + try: + # Support tests with positional arguments + if isinstance(test['in'], list): + result = getattr(dataset, expectation_type)(*test['in']) + # As well as keyword arguments + else: + result = getattr(dataset, expectation_type)(**test['in']) + + except NotImplementedError: + #Note: This method of checking does not look for false negatives: tests that are incorrectly on the notimplemented_list + assert candidate_test_is_on_temporary_notimplemented_list(dataset.__class__.__name__, expectation_type), "Error: this test was supposed to return NotImplementedError" + return + + # Check results + if test['exact_match_out'] is True: + assert test['out'] == result + + else: + for key, value in test['out'].items(): + # Apply our great expectations-specific test logic + + if key == 'success': + assert result['success'] == value + + elif key == 'observed_value': + # assert np.allclose(result['result']['observed_value'], value) + assert value == result['result']['observed_value'] + + elif key == 'unexpected_index_list': + if isinstance(dataset, SqlAlchemyDataset): + pass + else: + assert result['result']['unexpected_index_list'] == value + + elif key == 'unexpected_list': + assert result['result']['unexpected_list'] == value, "expected " + str(value) + " but got " + str(result['result']['unexpected_list']) + + elif key == 'details': + assert result['result']['details'] == value + + elif key == 'traceback_substring': + assert result['exception_info']['raised_exception'] + assert value in result['exception_info']['exception_traceback'], "expected to find " + value + " in " + result['exception_info']['exception_traceback'] + + else: + raise ValueError("Invalid test specification: unknown key " + key + " in 'out'") \ No newline at end of file diff --git a/tests/util.py b/tests/util.py deleted file mode 100644 index 460fc6f0fe05..000000000000 --- a/tests/util.py +++ /dev/null @@ -1,44 +0,0 @@ -import numpy as np - - -## Taken from the following stackoverflow: https://stackoverflow.com/questions/23549419/assert-that-two-dictionaries-are-almost-equal -def assertDeepAlmostEqual(test_case, expected, actual, *args, **kwargs): - """ - Assert that two complex structures have almost equal contents. - - Compares lists, dicts and tuples recursively. Checks numeric values - using test_case's :py:meth:`unittest.TestCase.assertAlmostEqual` and - checks all other values with :py:meth:`unittest.TestCase.assertEqual`. - Accepts additional positional and keyword arguments and pass those - intact to assertAlmostEqual() (that's how you specify comparison - precision). - - :param test_case: TestCase object on which we can call all of the basic - 'assert' methods. - :type test_case: :py:class:`unittest.TestCase` object - """ - is_root = not '__trace' in kwargs - trace = kwargs.pop('__trace', 'ROOT') - try: - # if isinstance(expected, (int, float, long, complex)): - if isinstance(expected, (int, float, complex)): - test_case.assertAlmostEqual(expected, actual, *args, **kwargs) - elif isinstance(expected, (list, tuple, np.ndarray)): - test_case.assertEqual(len(expected), len(actual)) - for index in range(len(expected)): - v1, v2 = expected[index], actual[index] - assertDeepAlmostEqual(test_case, v1, v2, - __trace=repr(index), *args, **kwargs) - elif isinstance(expected, dict): - test_case.assertEqual(set(expected), set(actual)) - for key in expected: - assertDeepAlmostEqual(test_case, expected[key], actual[key], - __trace=repr(key), *args, **kwargs) - else: - test_case.assertEqual(expected, actual) - except AssertionError as exc: - exc.__dict__.setdefault('traces', []).append(trace) - if is_root: - trace = ' -> '.join(reversed(exc.traces)) - exc = AssertionError("%s\nTRACE: %s" % (str(exc), trace)) - raise exc