diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml index d204edc2f37f..20b973eca22e 100644 --- a/.github/workflows/stale.yml +++ b/.github/workflows/stale.yml @@ -1,16 +1,17 @@ -name: Mark stale issues and pull requests +name: stalebot on: schedule: - - cron: "45 * * * *" + - cron: "0 * * * *" + issues: + types: [opened, reopened] jobs: stale: - runs-on: ubuntu-latest - steps: - - uses: actions/stale@v1 +# - uses: actions/stale@v1.1.0 + - uses: Dolibarr/stale@69ff45d with: repo-token: ${{ secrets.GITHUB_TOKEN }} stale-issue-message: 'Is this still relevant? If so, what is blocking it? Is there anything you can do to help move it forward?\n\nThis issue has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs. Thank you for your contributions.' diff --git a/.travis.yml b/.travis.yml index af430722f7a7..f72d878b9698 100644 --- a/.travis.yml +++ b/.travis.yml @@ -64,7 +64,7 @@ jobs: - stage: deploy python: 3.6 script: - - git tag --annotate 0.9.5 -m 0.9.5 || echo "tag exists, skipping" + - git tag --annotate 0.9.6 -m 0.9.6 || echo "tag exists, skipping" deploy: provider: pypi edge: true diff --git a/docs/_templates/footer.html b/docs/_templates/footer.html new file mode 100644 index 000000000000..2e3db67c2c9c --- /dev/null +++ b/docs/_templates/footer.html @@ -0,0 +1,2 @@ +{% extends "!footer.html" %} +{%- if gitstamp %} {% set last_updated = gitstamp %}{%- endif %} diff --git a/docs/changelog/changelog.rst b/docs/changelog/changelog.rst index 37a24731dc07..62654412a047 100644 --- a/docs/changelog/changelog.rst +++ b/docs/changelog/changelog.rst @@ -1,8 +1,13 @@ .. _changelog: -develop +0.9.6 ----------------- - +* validate result dict when instantiating an ExpectationValidationResult (`#1133 `_) +* DataDocs: Expectation Suite name on Validation Result pages now link to Expectation Suite page +* `great_expectations init`: cli now asks user if csv has header when adding a Spark Datasource with csv file +* validate result dict when instantiating an ExpectationValidationResult (`#1133 `_) +* Improve support for using GCP Storage Bucket as a Data Docs Site backend (thanks @hahmed) +* fix notebook renderer handling for expectations with no column kwarg and table not in their name (`#1194 `_) 0.9.5 ----------------- @@ -642,4 +647,3 @@ to top-level names. * New output formats are available for all expectations * Significant improvements to test suite and compatibility -*Last updated*: |lastupdate| diff --git a/docs/command_line.rst b/docs/command_line.rst new file mode 100644 index 000000000000..b497e19664e0 --- /dev/null +++ b/docs/command_line.rst @@ -0,0 +1,345 @@ +.. _command_line: + +################################### +The Great Expectations Command Line +################################### + +.. toctree:: + :maxdepth: 2 + +After reading this guide, you will know: + +* How to create a Great Expectations project +* How to add new datasources +* How to add and edit expectation suites +* How to build and open Data Docs + +The Great Expectations command line is organized using a ** ** syntax. +This guide is organized by nouns (datasource, suite, docs) then verbs (new, list, edit, etc). + +Basics +====== + +There are a few commands that are critical to your everyday usage of Great Expectations. +This is a list of the most common commands you'll use in order of how much you'll probably use them: + +* ``great_expectations suite edit`` +* ``great_expectations suite new`` +* ``great_expectations suite list`` +* ``great_expectations docs build`` +* ``great_expectations datasource list`` +* ``great_expectations datasource new`` +* ``great_expectations datasource profile`` +* ``great_expectations init`` + +You can get a list of Great Expectations commands available to you by typing ``great_expectations --help``. +Each noun command and each verb sub-command has a description, and should help you find the thing you need. + +.. note:: + + All Great Expectations commands have help text. As with most *nix utilities, you can try adding ``--help`` to the end. + For example, by running ``great_expectations suite new --help`` you'll see help output for that specific command. + +.. code-block:: bash + + $ great_expectations --help + Usage: great_expectations [OPTIONS] COMMAND [ARGS]... + + Welcome to the great_expectations CLI! + + Most commands follow this format: great_expectations + The nouns are: datasource, docs, project, suite + Most nouns accept the following verbs: new, list, edit + + In addition, the CLI supports the following special commands: + + - great_expectations init : same as `project new` + - great_expectations datasource profile : profile a datasource + - great_expectations docs build : compile documentation from expectations + + Options: + --version Show the version and exit. + -v, --verbose Set great_expectations to use verbose output. + --help Show this message and exit. + + Commands: + datasource datasource operations + docs data docs operations + init initialize a new Great Expectations project + project project operations + suite expectation suite operations + + +great_expectations init +============================== + +To add Great Expectations to your project run the ``great_expectations init`` command in your project directory. +This will run you through a very short interactive experience to +connect to your data, show you some sample expectations, and open Data Docs. + +.. note:: + + You can install the Great Expectations python package by typing ``pip install great_expectations``, if you don't have it already. + +.. code-block:: bash + + $ great_expectations init + ... + +After this command has completed, you will have the entire Great Expectations directory structure with all the code you need to get started protecting your pipelines and data. + +great_expectations docs +============================== + +``great_expectations docs build`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``great_expectations docs build`` command builds your Data Docs site. +You'll use this any time you want to view your expectations and validations in a web browser. + +.. code-block:: bash + + $ great_expectations docs build + Building Data Docs... + The following Data Docs sites were built: + - local_site: + file:///Users/dickens/my_pipeline/great_expectations/uncommitted/data_docs/local_site/index.html + +great_expectations suite +============================== + +All command line operations for working with expectation suites are here. + +``great_expectations suite list`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Running ``great_expectations suite list`` gives a list of available expectation suites in your project: + +.. code-block:: bash + + $ great_expectations suite list + 3 expectation suites found: + customer_requests.warning + customer_requests.critical + churn_model_input + +``great_expectations suite new`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Create a new expectation suite. +Just as writing SQL queries is far better with access to data, so are writing expectations. +These are best written interactively against some data. + +To this end, this command interactively helps you choose some data, creates the new suite, adds sample expectations to it, and opens up Data Docs. + +.. important:: + + The sample suites generated **are not meant to be production suites** - they are examples only. + + Great Expectations will choose a couple of columns and generate expectations about them to demonstrate some examples of assertions you can make about your data. + +.. code-block:: bash + + $ great_expectations suite new + Enter the path (relative or absolute) of a data file + : data/npi.csv + + Name the new expectation suite [npi.warning]: + + Great Expectations will choose a couple of columns and generate expectations about them + to demonstrate some examples of assertions you can make about your data. + + Press Enter to continue + : + + Generating example Expectation Suite... + Building Data Docs... + The following Data Docs sites were built: + - local_site: + file:///Users/dickens/Desktop/great_expectations/uncommitted/data_docs/local_site/index.html + A new Expectation suite 'npi.warning' was added to your project + +To edit this suite you can click the **How to edit** button in Data Docs, or run the command: ``great_expectations suite edit npi.warning``. +This will generate a jupyter notebook and allow you to add, remove or adjust any expectations in the sample suite. + +.. important:: + + Great Expectations generates working jupyter notebooks when you make new suites and edit existing ones. + This saves you tons of time by avoiding all the necessary boilerplate. + + Because these notebooks can be generated at any time from the expectation suites (stored as JSON) you should **consider the notebooks to be entirely disposable artifacts**. + + They are put in your ``great_expectations/uncommitted`` directory and you can delete them at any time. + + Because they can expose actual data, we strongly suggest leaving them in the ``uncommitted`` directory to avoid potential data leaks into source control. + + +``great_expectations suite new --suite `` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If you already know the name of the suite you want to create you can skip one of the interactive prompts and specify the suite name directly. + + +.. code-block:: bash + + $ great_expectations suite new --suite npi.warning + Enter the path (relative or absolute) of a data file + : data/npi.csv + ... (same as above) + + +``great_expectations suite new --empty`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If you prefer to skip the example expectations and start writing expectations in a new empty suite directly in a jupyter notebook, add the ``--empty`` flag. + +.. code-block:: bash + + $ great_expectations suite new --empty + Enter the path (relative or absolute) of a data file + : data/npi.csv + + Name the new expectation suite [npi.warning]: npi.warning + A new Expectation suite 'npi.warning' was added to your project + Because you requested an empty suite, we\'ll open a notebook for you now to edit it! + If you wish to avoid this you can add the `--no-jupyter` flag. + + [I 14:55:15.992 NotebookApp] Serving notebooks from local directory: /Users/dickens/Desktop/great_expectations/uncommitted + ... (jupyter opens) + + +``great_expectations suite new --empty --no-jupyter`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If you prefer to disable Great Expectations from automatically opening the generated jupyter notebook, add the ``--no-jupyter`` flag. + +.. code-block:: bash + + $ great_expectations suite new --empty --no-jupyter + + Enter the path (relative or absolute) of a data file + : data/npi.csv + + Name the new expectation suite [npi.warning]: npi.warning + A new Expectation suite 'npi.warning' was added to your project + To continue editing this suite, run jupyter notebook /Users/taylor/Desktop/great_expectations/uncommitted/npi.warning.ipynb + +You can then run jupyter. + + +``great_expectations suite edit`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Edit an existing expectation suite. +Just as writing SQL queries is far better with access to data, so are authoring expectations. +These are best authored interactively against some data. +This best done in a jupyter notebook. + +.. note:: + BatchKwargs define what data to use during editing. + + - When suites are created through the CLI, the original batch_kwargs are stored in a piece of metadata called a citation. + - The edit command uses the most recent batch_kwargs as a way to know what data should be used for the interactive editing experience. + - It is often desirable to edit the suite on a different chunk of data. + - To do this you can edit the batch_kwargs in the generated notebook. + +To this end, this command interactively helps you choose some data, generates a working jupyter notebook, and opens up that notebook in jupyter. + +.. code-block:: bash + + $ great_expectations suite edit npi.warning + [I 15:22:18.809 NotebookApp] Serving notebooks from local directory: /Users/dickens/Desktop/great_expectations/uncommitted + ... (juypter runs) + +.. important:: + + Great Expectations generates working jupyter notebooks when you make new suites and edit existing ones. + This saves you tons of time by avoiding all the necessary boilerplate. + + Because these notebooks can be generated at any time from the expectation suites (stored as JSON) you should **consider the notebooks to be entirely disposable artifacts**. + + They are put in your ``great_expectations/uncommitted`` directory and you can delete them at any time. + + Because they can expose actual data, we strongly suggest leaving them in the ``uncommitted`` directory to avoid potential data leaks into source control. + + +``great_expectations suite edit --no-jupyter`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If you prefer to disable Great Expectations from automatically opening the generated jupyter notebook, add the ``--no-jupyter`` flag. + +.. code-block:: bash + + $ great_expectations suite edit npi.warning --no-jupyter + To continue editing this suite, run jupyter notebook /Users/dickens/Desktop/great_expectations/uncommitted/npi.warning.ipynb + +You can then run jupyter. + + + +great_expectations datasource +============================== + +All command line operations for working with :ref:`datasources ` are here. +A datasource is a connection to data and a processing engine. +Examples of a datasource are: +- csv files processed in pandas or Spark +- a relational database such as Postgres, Redshift or BigQuery + +``great_expectations datasource list`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This command displays a list of your datasources and their types. +These can be found in your ``great_expectations/great_expectations.yml`` config file. + +.. code-block:: bash + + $ great_expectations datasource list + [{'name': 'files_datasource', 'class_name': 'PandasDatasource'}] + + +``great_expectations datasource new`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This interactive command helps you connect to your data. + +.. code-block:: bash + + $ great_expectations datasource list + What data would you like Great Expectations to connect to? + 1. Files on a filesystem (for processing with Pandas or Spark) + 2. Relational database (SQL) + : 1 + + What are you processing your files with? + 1. Pandas + 2. PySpark + : 1 + + Enter the path (relative or absolute) of the root directory where the data files are stored. + : data + + Give your new data source a short name. + [data__dir]: npi_drops + A new datasource 'npi_drops' was added to your project. + +If you are using a database you will be guided through a series of prompts that collects and verifies connection details and credentials. + + +``great_expectations datasource profile`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +For details on profiling, see this :ref:`reference document` + +.. caution:: Profiling is a beta feature and is not guaranteed to be stable. YMMV + + +Miscellaneous +====================== + +* ``great_expectations project check`` checks your ``great_expectations/great_expectations.yml`` for validity. This is handy for occasional Great Expectations version migrations. + +Acknowledgements +====================== + +This article was heavily inspired by the phenomenal Rails Command Line Guide https://guides.rubyonrails.org/command_line.html. diff --git a/docs/community.rst b/docs/community.rst index 0046ac6a6707..0f4c74bbe7c9 100644 --- a/docs/community.rst +++ b/docs/community.rst @@ -36,4 +36,4 @@ Contribute code or documentation We welcome contributions to Great Expectations. Please start with our :ref:`contributing` guide and don't be shy with questions! -*last updated*: |lastupdate| + diff --git a/docs/conf.py b/docs/conf.py index 9998d001f312..dadb02d5f77a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -40,7 +40,7 @@ # 'sphinx.ext.mathjax' 'sphinx.ext.napoleon', 'sphinxcontrib.contentui', - 'sphinxcontrib.lastupdate', + 'sphinx_gitstamp', 'sphinx.ext.autosectionlabel' ] @@ -58,8 +58,9 @@ # General information about the project. project = u'great_expectations' -copyright = u'2019, The Great Expectations Team' +copyright = u'2020, The Great Expectations Team. ' author = u'The Great Expectations Team' +gitstamp_fmt = "%d %b %Y" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the diff --git a/docs/features.rst b/docs/features.rst index 4678154099ad..c6365c8e7869 100644 --- a/docs/features.rst +++ b/docs/features.rst @@ -23,4 +23,4 @@ and our `blog `__ for more information on how /features/batch_kwargs_generator /features/ge_on_teams -*last updated*: |lastupdate| + diff --git a/docs/features/batch_kwargs_generator.rst b/docs/features/batch_kwargs_generator.rst index b4f925dd3ade..ae891ddd7b11 100644 --- a/docs/features/batch_kwargs_generator.rst +++ b/docs/features/batch_kwargs_generator.rst @@ -31,5 +31,3 @@ assembled by the generator, "batch_markers" that provide more detailed metadata workflows, and optionally "batch_parameters" that include information such as an asset or partition name. See more detailed documentation on the :ref:`generator_module`. - -*last updated*: |lastupdate| diff --git a/docs/features/custom_expectations.rst b/docs/features/custom_expectations.rst index 1a9826ae083e..8a5bb19bbd76 100644 --- a/docs/features/custom_expectations.rst +++ b/docs/features/custom_expectations.rst @@ -22,4 +22,4 @@ even profiling workflows that make Great Expectations stand out. See the guide o for more information on building expectations and updating DataContext configurations to automatically load batches of data with custom Data Assets. -*last updated*: |lastupdate| + diff --git a/docs/features/data_docs.rst b/docs/features/data_docs.rst index e984dc9222ec..6e2848b33b9b 100644 --- a/docs/features/data_docs.rst +++ b/docs/features/data_docs.rst @@ -65,4 +65,4 @@ of these pages or create your own, see :ref:`customizing_data_docs`. See the :ref:`data_docs_reference` for more information. -*last updated*: |lastupdate| + diff --git a/docs/features/datasource.rst b/docs/features/datasource.rst index 6017930f6ef4..54c78e1d2a7d 100644 --- a/docs/features/datasource.rst +++ b/docs/features/datasource.rst @@ -17,4 +17,4 @@ See :ref:`datasource_reference` for more detail about configuring and using data See datasource module docs :ref:`datasource_module` for more detail about available datasources. -*last updated*: |lastupdate| + diff --git a/docs/features/expectations.rst b/docs/features/expectations.rst index 516c83c77512..a2a7eae34f3c 100644 --- a/docs/features/expectations.rst +++ b/docs/features/expectations.rst @@ -155,4 +155,4 @@ else relevant for understanding it: } ) -*last updated*: |lastupdate| + diff --git a/docs/features/profiling.rst b/docs/features/profiling.rst index f380679d2766..21898937ebc5 100644 --- a/docs/features/profiling.rst +++ b/docs/features/profiling.rst @@ -31,4 +31,4 @@ consuming. As a rule of thumb, we recommend starting with small batches of data. See the :ref:`profiling_reference` for more information. -*last updated*: |lastupdate| + diff --git a/docs/features/validation.rst b/docs/features/validation.rst index 33b48e734ec3..d430a34707ff 100644 --- a/docs/features/validation.rst +++ b/docs/features/validation.rst @@ -151,4 +151,4 @@ Useful deployment patterns include: For certain deployment patterns, it may be useful to parameterize expectations, and supply evaluation parameters at \ validation time. See :ref:`evaluation_parameters` for more information. -*last updated*: |lastupdate| + diff --git a/docs/features/validation_operators_and_actions.rst b/docs/features/validation_operators_and_actions.rst index da4b54b0a43e..5d0279517565 100644 --- a/docs/features/validation_operators_and_actions.rst +++ b/docs/features/validation_operators_and_actions.rst @@ -127,4 +127,4 @@ The only requirement from an action is for it to have a take_action method. GE comes with a list of actions that we consider useful and you can reuse in your pipelines. Most of them take in validation results and do something with them. -*last updated*: |lastupdate| + diff --git a/docs/getting_started.rst b/docs/getting_started.rst index d4d910928f37..86637c67af76 100644 --- a/docs/getting_started.rst +++ b/docs/getting_started.rst @@ -22,6 +22,3 @@ Once Great Expectations is installed, follow this tutorial for a quick start. /getting_started/cli_init /getting_started/typical_workflow - - -*last updated*: |lastupdate| diff --git a/docs/getting_started/cli_init.rst b/docs/getting_started/cli_init.rst index 28087d1996d7..bed7be453c45 100644 --- a/docs/getting_started/cli_init.rst +++ b/docs/getting_started/cli_init.rst @@ -3,7 +3,7 @@ Run ``great_expectations init`` =============================================== -The command line interface (CLI) provides the easiest way to start using Great Expectations. +The :ref:`command line interface (CLI) ` provides the easiest way to start using Great Expectations. The `init` command will walk you through setting up a new project and connecting to your data. @@ -29,5 +29,3 @@ Once you have decided which data you will use, you are ready to start. Run this After you complete the `init` command, read this article to get a more complete picture of how data teams use Great Expectations: :ref:`typical_workflow`. - -*last updated*: |lastupdate| diff --git a/docs/getting_started/typical_workflow.rst b/docs/getting_started/typical_workflow.rst index 4c8cf41e19eb..15fb47860688 100644 --- a/docs/getting_started/typical_workflow.rst +++ b/docs/getting_started/typical_workflow.rst @@ -14,7 +14,7 @@ Once the setup is complete, the workflow looks like a loop over the following st The article focuses on the "What" and the "Why" of each step in this workflow, and touches on the "How" only briefly. The exact details of configuring and executing these steps are intentionally left out - they can be found in the tutorials and reference linked from each section. -If you have not installed Great Expectations and executed the CLI init command, as described in this :ref:`tutorial`, we recommend you do so before reading the rest of the article. This will make a lot of concepts mentioned below more familiar to you. +If you have not installed Great Expectations and executed the :ref:`command line interface (CLI) ` init command, as described in this :ref:`tutorial `, we recommend you do so before reading the rest of the article. This will make a lot of concepts mentioned below more familiar to you. Setting up a project @@ -24,7 +24,7 @@ To use Great Expectations in a new data project, a :ref:`Data Context` command ``init`` does the initialization. Run this command in the terminal in the root of your project's repo: .. code-block:: bash @@ -86,7 +86,7 @@ You can have multiple Datasources in a project (Data Context). For example, this All the Datasources that your project uses are configured in the project's configuration file ``great_expectations/great_expectations.yml``: -.. code-block:: +.. code-block:: yaml datasources: @@ -104,7 +104,7 @@ All the Datasources that your project uses are configured in the project's confi -The easiest way to add a datasource to the project is to use the CLI convenience command: +The easiest way to add a datasource to the project is to use the :ref:`CLI ` convenience command: .. code-block:: bash @@ -120,7 +120,7 @@ This is beyond the scope of this article. After a team member adds a new Datasource to the Data Context, they commit the updated configuration file into the version control in order to make the change available to the rest of the team. -Because ``great_expectations/great_expectations.yml`` is committed into version control, the CLI command **does not store the credentials in this file**. +Because ``great_expectations/great_expectations.yml`` is committed into version control, the :ref:`CLI ` command **does not store the credentials in this file**. Instead it saves them in a separate file: ``uncommitted/config_variables.yml`` which is not committed into version control. This means that that when another team member checks out the updated configuration file with the newly added Datasource, they must add their own credentials to their ``uncommitted/config_variables.yml`` or in environment variables. @@ -182,7 +182,7 @@ Create ******************************************** -While you could hand-author an Expectation Suite by writing a JSON file, just like with other features it is easier to let CLI save you time and typos. +While you could hand-author an Expectation Suite by writing a JSON file, just like with other features it is easier to let :ref:`CLI ` save you time and typos. Run this command in the root directory of your project (where the init command created the ``great_expectations`` subdirectory: @@ -226,7 +226,7 @@ The screenshot below shows the Python method and the Data Docs view for the same .. image:: ../images/exp_html_python_side_by_side.png -The Great Expectations CLI command ``suite edit`` generates a Jupyter notebook to edit a suite. +The Great Expectations :ref:`CLI ` command ``suite edit`` generates a Jupyter notebook to edit a suite. This command saves you time by generating boilerplate that loads a batch of data and builds a cell for every expectation in the suite. This makes editing suites a breeze. @@ -238,8 +238,8 @@ For example, to edit a suite called ``movieratings.ratings`` you would run: These generated Jupyter notebooks can be discarded and should not be kept in source control since they are auto-generated at will, and may contain snippets of actual data. -To make this easier still, the Data Docs page for each Expectation Suite has the CLI command syntax for you. -Simply press the "How to Edit This Suite" button, and copy/paste the CLI command into your terminal. +To make this easier still, the Data Docs page for each Expectation Suite has the :ref:`CLI ` command syntax for you. +Simply press the "How to Edit This Suite" button, and copy/paste the :ref:`CLI ` command into your terminal. .. image:: ../images/edit_e_s_popup.png diff --git a/docs/index.rst b/docs/index.rst index 0f6226d8cec0..0d19c70286a2 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -18,6 +18,7 @@ communication between teams. Head over to the :ref:`intro` to learn more, or jum intro getting_started expectation_glossary + command_line tutorials features reference @@ -31,5 +32,3 @@ Index * :ref:`genindex` * :ref:`modindex` - -*last updated*: |lastupdate| diff --git a/docs/intro.rst b/docs/intro.rst index 62a178d197d2..da887b376744 100644 --- a/docs/intro.rst +++ b/docs/intro.rst @@ -121,4 +121,4 @@ If you'd like to contribute to Great Expectations, please head to the :ref:`comm If you'd like hands-on assistance setting up Great Expectations, establishing a healthy practice of data testing, or adding functionality to Great Expectations, please see options for consulting help `here `__. -*last updated*: |lastupdate| + diff --git a/docs/module_docs.rst b/docs/module_docs.rst index dc41b024b4a4..c7a8913ffd57 100644 --- a/docs/module_docs.rst +++ b/docs/module_docs.rst @@ -17,4 +17,4 @@ Module docs /module_docs/validation_operators_module /module_docs/great_expectations_module -*last updated*: |lastupdate| + diff --git a/docs/module_docs/data_context_module.rst b/docs/module_docs/data_context_module.rst index 7ca464b8ca2a..9d4a59d0b479 100644 --- a/docs/module_docs/data_context_module.rst +++ b/docs/module_docs/data_context_module.rst @@ -25,4 +25,4 @@ DataContext :undoc-members: :show-inheritance: -*last updated*: |lastupdate| + diff --git a/docs/module_docs/dataset_module.rst b/docs/module_docs/dataset_module.rst index 5db2daaa7b49..d12db0966d5e 100644 --- a/docs/module_docs/dataset_module.rst +++ b/docs/module_docs/dataset_module.rst @@ -81,4 +81,4 @@ util :undoc-members: :show-inheritance: -*last updated*: |lastupdate| + diff --git a/docs/module_docs/datasource_module.rst b/docs/module_docs/datasource_module.rst index ad5a85df19bc..a0d7c3c738ec 100644 --- a/docs/module_docs/datasource_module.rst +++ b/docs/module_docs/datasource_module.rst @@ -42,4 +42,4 @@ SparkDFDatasource :undoc-members: :show-inheritance: -*last updated*: |lastupdate| + diff --git a/docs/module_docs/generator_module.rst b/docs/module_docs/generator_module.rst index 0258044328ef..384ec3f3a0ab 100644 --- a/docs/module_docs/generator_module.rst +++ b/docs/module_docs/generator_module.rst @@ -70,4 +70,4 @@ DatabricksTableBatchKwargsGenerator :undoc-members: :show-inheritance: -*last updated*: |lastupdate| + diff --git a/docs/module_docs/profile_module.rst b/docs/module_docs/profile_module.rst index e6a0c43f52db..10a6d4382fc3 100644 --- a/docs/module_docs/profile_module.rst +++ b/docs/module_docs/profile_module.rst @@ -15,4 +15,4 @@ Profile Module :undoc-members: :show-inheritance: -*last updated*: |lastupdate| + diff --git a/docs/module_docs/render_module.rst b/docs/module_docs/render_module.rst index 225647c730ac..ce3b98aae033 100644 --- a/docs/module_docs/render_module.rst +++ b/docs/module_docs/render_module.rst @@ -117,4 +117,4 @@ View Module :undoc-members: :show-inheritance: -*last updated*: |lastupdate| + diff --git a/docs/module_docs/store_module.rst b/docs/module_docs/store_module.rst index a12ebe9dc90a..958b930a2d6a 100644 --- a/docs/module_docs/store_module.rst +++ b/docs/module_docs/store_module.rst @@ -31,4 +31,4 @@ Store Module :undoc-members: :show-inheritance: -*last updated*: |lastupdate| + diff --git a/docs/module_docs/validation_operators_module.rst b/docs/module_docs/validation_operators_module.rst index 5e28bfa7e260..dbef1f951216 100644 --- a/docs/module_docs/validation_operators_module.rst +++ b/docs/module_docs/validation_operators_module.rst @@ -36,4 +36,4 @@ WarningAndFailureExpectationSuitesValidationOperator :undoc-members: :show-inheritance: -*last updated*: |lastupdate| + diff --git a/docs/reference.rst b/docs/reference.rst index 7f8378e7ffe1..cc70b628a598 100644 --- a/docs/reference.rst +++ b/docs/reference.rst @@ -89,4 +89,4 @@ Supporting Resources /reference/supporting_resources -*last updated*: |lastupdate| + diff --git a/docs/reference/batch_identification.rst b/docs/reference/batch_identification.rst index 17bbd0b10ab4..e59eb31218f8 100644 --- a/docs/reference/batch_identification.rst +++ b/docs/reference/batch_identification.rst @@ -32,4 +32,4 @@ Batch Id Batch Fingerprint ****************** -*last updated*: |lastupdate| + diff --git a/docs/reference/batch_kwargs.rst b/docs/reference/batch_kwargs.rst index b49ed6691b07..c520a8aa6114 100644 --- a/docs/reference/batch_kwargs.rst +++ b/docs/reference/batch_kwargs.rst @@ -10,4 +10,4 @@ Batch Kwargs represent the information required by a :ref:`Datasource` to fetch The `partition_id` provides a single string that can be used to represent a data asset inside the namespace defined by a given datasource/generator/generator_asset triple. -*last updated*: |lastupdate| + diff --git a/docs/reference/contributing.rst b/docs/reference/contributing.rst index e413eb4ccbeb..1937e5fbad66 100644 --- a/docs/reference/contributing.rst +++ b/docs/reference/contributing.rst @@ -10,4 +10,4 @@ ecosystem including plugins and examples using GE. For contributing directly to great expectations, the contributors' guide is located `here `__. -*last updated*: |lastupdate| + diff --git a/docs/reference/creating_expectations.rst b/docs/reference/creating_expectations.rst index 5d8d4fa55b1d..0ce8a1f9582a 100644 --- a/docs/reference/creating_expectations.rst +++ b/docs/reference/creating_expectations.rst @@ -30,4 +30,4 @@ This is how you always know what to expect from your data. For more detail on how to control expectation output, please see :ref:`standard_arguments` and :ref:`result_format`. -*last updated*: |lastupdate| + diff --git a/docs/reference/custom_expectations.rst b/docs/reference/custom_expectations.rst index 504e2ecf3bb2..6dc8579ded56 100644 --- a/docs/reference/custom_expectations.rst +++ b/docs/reference/custom_expectations.rst @@ -433,4 +433,4 @@ structure below. "unexpected_list": [2,2,2,2,2,2,2,2] } -*last updated*: |lastupdate| + diff --git a/docs/reference/data_asset_features.rst b/docs/reference/data_asset_features.rst index 0541d5b390a1..c7a591f8523c 100644 --- a/docs/reference/data_asset_features.rst +++ b/docs/reference/data_asset_features.rst @@ -82,4 +82,4 @@ Dynamically adjusting interactive evaluation } } -*last updated*: |lastupdate| + diff --git a/docs/reference/data_context_reference.rst b/docs/reference/data_context_reference.rst index f6735ca20825..a4382bfbe58b 100644 --- a/docs/reference/data_context_reference.rst +++ b/docs/reference/data_context_reference.rst @@ -23,7 +23,7 @@ Datasources Datasources tell Great Expectations where your data lives and how to get it. -Using the CLI command ``great_expectations datasource new`` is the easiest way to +Using the :ref:`CLI ` command ``great_expectations datasource new`` is the easiest way to add a new datasource. The `datasources` section declares which :ref:`datasource` objects should be available in the DataContext. @@ -364,5 +364,3 @@ new directory or use this template: store_backend: class_name: TupleFilesystemStoreBackend base_directory: uncommitted/data_docs/local_site/ - -*last updated*: |lastupdate| diff --git a/docs/reference/data_docs_reference.rst b/docs/reference/data_docs_reference.rst index 508bba8e9f49..63c7ae8b8a6c 100644 --- a/docs/reference/data_docs_reference.rst +++ b/docs/reference/data_docs_reference.rst @@ -176,7 +176,7 @@ Building Data Docs Using the CLI =============== -The great_expectations CLI can build comprehensive Data Docs from expectation +The :ref:`Great Expectations CLI ` can build comprehensive Data Docs from expectation suites available to the configured context and validations available in the ``great_expectations/uncommitted`` directory. @@ -473,5 +473,3 @@ Dependencies * Vega-Embed 4.0.0 Data Docs is implemented in the :py:mod:`great_expectations.render` module. - -*last updated*: |lastupdate| diff --git a/docs/reference/distributional_expectations.rst b/docs/reference/distributional_expectations.rst index 477d8eb34465..3205db614d42 100644 --- a/docs/reference/distributional_expectations.rst +++ b/docs/reference/distributional_expectations.rst @@ -121,4 +121,4 @@ Distributional Expectations Alternatives -------------------------------------------------------------------------------- The core partition density object used in current expectations focuses on a particular (partition-based) method of "compressing" the data into a testable form, however it may be desireable to use alternative nonparametric approaches (e.g. Fourier transform/wavelets) to describe expected data. -*last updated*: |lastupdate| + diff --git a/docs/reference/evaluation_parameters.rst b/docs/reference/evaluation_parameters.rst index 408d5e90e70c..14d97069f9aa 100644 --- a/docs/reference/evaluation_parameters.rst +++ b/docs/reference/evaluation_parameters.rst @@ -57,4 +57,4 @@ Replace names in ``<>`` with the desired name. For example: urn:great_expectations:validations:dickens_data:expect_column_proportion_of_unique_values_to_be_between.result.observed_value:column=Title -*last updated*: |lastupdate| + diff --git a/docs/reference/extending_great_expectations.rst b/docs/reference/extending_great_expectations.rst index ef4d20e5fd30..4a30394b8df6 100644 --- a/docs/reference/extending_great_expectations.rst +++ b/docs/reference/extending_great_expectations.rst @@ -24,4 +24,4 @@ your dataset see consistent documentation no matter which backend is implementin `@DocInherit` overrides your function's __get__ method with one that will replace the local docstring with the docstring from its parent. It is defined in `Dataset.util`. -*last updated*: |lastupdate| + diff --git a/docs/reference/implemented_expectations.rst b/docs/reference/implemented_expectations.rst index 77fba4c34b86..c5a866499a3b 100644 --- a/docs/reference/implemented_expectations.rst +++ b/docs/reference/implemented_expectations.rst @@ -100,4 +100,4 @@ out the missing implementations! |`expect_multicolumn_values_to_be_unique` | Y | N | N | +------------------------------------------------------------------------------+------------+---------+-----------+ -*last updated*: |lastupdate| + diff --git a/docs/reference/improving_library_documentation.rst b/docs/reference/improving_library_documentation.rst index 96d72d3a001b..d606438c7ff1 100644 --- a/docs/reference/improving_library_documentation.rst +++ b/docs/reference/improving_library_documentation.rst @@ -32,12 +32,12 @@ Within the table of contents, each section has specific role to play. * *Community* helps expand the Great Expectations community by explaining how to get in touch to ask questions, make contributions, etc. * *Core concepts* are always phrased as nouns. These docs provide more examples of usage, and deeper explanations for why Great Expectations is set up the way it is. * *reference* are always phrased as verbs: "Creating custom Expectations", "Deploying Great Expectations in Spark", etc. They help users accomplish specific goals that go beyond the generic Getting Started tutorials. -* *Changelog and roadmap* +* *Changelog and roadmap* * *Module docs* **CLI** -The CLI has some conventions of its own. +The :ref:`CLI ` has some conventions of its own. * The CLI never writes to disk without asking first. * Questions are always phrased as conversational sentences. @@ -48,12 +48,10 @@ The CLI has some conventions of its own. * Within those constraints, shorter is better. When in doubt, shorten. * Clickable links (usually to documentation) are blue. * Copyable bash commands are green. -* All top-level bash commands must be verbs: "build documentation", not "documentation" +* All top-level bash commands must be nouns: "docs build", not "build docs" Resources =========== * We follow the `Sphinx guide for sections `__. - -*last updated*: |lastupdate| diff --git a/docs/reference/integrations/bigquery.rst b/docs/reference/integrations/bigquery.rst index 51c1adae6239..c27a2459af48 100644 --- a/docs/reference/integrations/bigquery.rst +++ b/docs/reference/integrations/bigquery.rst @@ -18,7 +18,7 @@ To add a BigQuery datasource do this: "bigquery://project-name" -5. Paste in this connection string and finish out the cli prompts. +5. Paste in this connection string and finish out the interactive prompts. 6. Should you need to modify your connection string you can manually edit the ``great_expectations/uncommitted/config_variables.yml`` file. @@ -58,7 +58,3 @@ Follow the `Google Cloud library guide `__ flag. $ git clone https://github.com/great-expectations/great_expectations.git $ pip install -e great_expectations/ -*last updated*: |lastupdate| + diff --git a/docs/reference/usage_statistics.rst b/docs/reference/usage_statistics.rst index 333893631f36..f088065e1297 100644 --- a/docs/reference/usage_statistics.rst +++ b/docs/reference/usage_statistics.rst @@ -7,6 +7,8 @@ Usage Statistics We use CDN fetch rates to get a sense of total community usage of Great Expectations. Specifically, we host images and style sheets on a public CDN and count the number of unique IPs from which resources are fetched. -Other than standard web request data, we don’t collect any data data that could be used to identify individual users. You can suppress the images by changing `static_images_dir` in `great_expectations/render/view/templates/top_navbar.j2`. Please reach out on Slack if you have any questions or comments. +Other than standard web request data, we don’t collect any data data that could be used to identify individual users. You can suppress the images by changing ``static_images_dir`` in ``great_expectations/render/view/templates/top_navbar.j2``. + +Please reach out `on Slack `__ if you have any questions or comments. + -*last updated*: |lastupdate| diff --git a/docs/reference/validation_operators.rst b/docs/reference/validation_operators.rst index 9535d5e4605e..98f078535e99 100644 --- a/docs/reference/validation_operators.rst +++ b/docs/reference/validation_operators.rst @@ -12,4 +12,4 @@ Validation Operators /reference/validation_operators/warning_and_failure_expectation_suites_validation_operator /reference/validation_operators/actions -*last updated*: |lastupdate| + diff --git a/docs/reference/validation_operators/actions.rst b/docs/reference/validation_operators/actions.rst index eef3ec9015bc..d841b4d36ed1 100644 --- a/docs/reference/validation_operators/actions.rst +++ b/docs/reference/validation_operators/actions.rst @@ -96,4 +96,4 @@ Dependencies When configured inside action_list of an operator, StoreValidationResultAction action has to be configured before this action, since the building of data docs fetches validation results from the store. -*last updated*: |lastupdate| + diff --git a/docs/reference/validation_operators/warning_and_failure_expectation_suites_validation_operator.rst b/docs/reference/validation_operators/warning_and_failure_expectation_suites_validation_operator.rst index c05c30ca82f4..d536b59f50c6 100644 --- a/docs/reference/validation_operators/warning_and_failure_expectation_suites_validation_operator.rst +++ b/docs/reference/validation_operators/warning_and_failure_expectation_suites_validation_operator.rst @@ -99,4 +99,4 @@ The value of "success" is True if no critical expectation suites ("failure") fai } } -*last updated*: |lastupdate| + diff --git a/docs/requirements.txt b/docs/requirements.txt index 09b133d07d0f..dfc1d8216bde 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -2,4 +2,4 @@ sphinx>=2.1 sybil>=1.2.1 sphinx_rtd_theme>=0.4.3 sphinxcontrib-contentui>=0.2.4 -sphinxcontrib-lastupdate>=1.1 \ No newline at end of file +sphinx-gitstamp>=0.3.1 \ No newline at end of file diff --git a/docs/roadmap_changelog.rst b/docs/roadmap_changelog.rst index 37d72c7a1c1f..91d7b64b7439 100644 --- a/docs/roadmap_changelog.rst +++ b/docs/roadmap_changelog.rst @@ -23,4 +23,4 @@ Changelog /changelog/changelog -*last updated*: |lastupdate| + diff --git a/docs/tutorials.rst b/docs/tutorials.rst index 96313e5749f3..1ec39f2e3b29 100644 --- a/docs/tutorials.rst +++ b/docs/tutorials.rst @@ -15,4 +15,4 @@ This is a collection of tutorials that walk you through a variety of useful Grea /tutorials/publishing_data_docs_to_s3 /tutorials/saving_metrics -*last updated*: |lastupdate| + diff --git a/docs/tutorials/create_expectations.rst b/docs/tutorials/create_expectations.rst index 407e0b3bd6fc..04fda89a6828 100644 --- a/docs/tutorials/create_expectations.rst +++ b/docs/tutorials/create_expectations.rst @@ -30,10 +30,9 @@ We will describe the Create, Review and Edit steps in brief: Create an Expectation Suite ---------------------------------------- +Expectation Suites are saved as JSON files, so you *could* create a new suite by writing a file directly. However the preferred way is to let the CLI save you time and typos. If you cannot use the :ref:`CLI ` in your environment (e.g., in a Databricks cluster), you can create and edit an Expectation Suite in a notebook. Jump to this section for details: :ref:`Jupyter Notebook for Creating and Editing Expectation Suites`. -Expectation Suites are saved as JSON files, so you *could* create a new suite by writing a file directly. However the preferred way is to let the CLI save you time and typos. If you cannot use the CLI in your environment (e.g., in a Databricks cluster), you can create and edit an Expectation Suite in a notebook. Jump to this section for details: :ref:`Jupyter Notebook for Creating and Editing Expectation Suites`. - -To continue with the CLI, run this command in the root directory of your project (where the init command created the ``great_expectations`` subdirectory: +To continue with the :ref:`CLI `, run this command in the root directory of your project (where the init command created the ``great_expectations`` subdirectory: .. code-block:: bash @@ -72,7 +71,7 @@ Take a look at the screenshot below. It shows the HTML view and the Python metho .. image:: ../images/exp_html_python_side_by_side .png -The CLI provides a command that, given an Expectation Suite, generates a Jupyter notebook to edit it. It takes care of generating a cell for every expectation in the suite and of getting a sample batch of data. The HTML page for each Expectation Suite has the CLI command syntax in order to make it easier for users. +The :ref:`CLI ` provides a command that, given an Expectation Suite, generates a Jupyter notebook to edit it. It takes care of generating a cell for every expectation in the suite and of getting a sample batch of data. The HTML page for each Expectation Suite has the CLI command syntax in order to make it easier for users. .. image:: ../images/edit_e_s_popup.png @@ -87,10 +86,10 @@ To understand this auto-generated notebook in more depth, jump to this section: Jupyter Notebook for Creating and Editing Expectation Suites ------------------------------------------------------------ -If you used the CLI `suite new` command to create an Expectation Suite and then the `suite edit` command to edit it, then the CLI generated a notebook in the ``great_expectations/uncommitted/`` folder for you. There is no need to check this notebook in to version control. Next time you decide to -edit this Expectation Suite, use the CLI again to generate a new notebook that reflects the expectations in the suite at that time. +If you used the :ref:`CLI ` `suite new` command to create an Expectation Suite and then the `suite edit` command to edit it, then the CLI generated a notebook in the ``great_expectations/uncommitted/`` folder for you. There is no need to check this notebook in to version control. Next time you decide to +edit this Expectation Suite, use the :ref:`CLI ` again to generate a new notebook that reflects the expectations in the suite at that time. -If you do not use the CLI, create a new notebook in the``great_expectations/notebooks/`` folder in your project. +If you do not use the :ref:`CLI `, create a new notebook in the``great_expectations/notebooks/`` folder in your project. 1. Setup @@ -301,7 +300,3 @@ To view the expectation suite you just created as HTML, rebuild the data docs an # and open the site in the browser context.build_data_docs() context.open_data_docs(validation_result_identifier) - - - -*last updated*: |lastupdate| diff --git a/docs/tutorials/publishing_data_docs_to_s3.rst b/docs/tutorials/publishing_data_docs_to_s3.rst index ce9371239cf2..5292c5540b38 100644 --- a/docs/tutorials/publishing_data_docs_to_s3.rst +++ b/docs/tutorials/publishing_data_docs_to_s3.rst @@ -106,4 +106,4 @@ For more information on static site hosting in AWS, see the following: - `AWS Static Site Access Permissions `_ - `AWS Website configuration `_ -*last updated*: |lastupdate| + diff --git a/docs/tutorials/saving_metrics.rst b/docs/tutorials/saving_metrics.rst index a90f9d121ac1..c881926b77d6 100644 --- a/docs/tutorials/saving_metrics.rst +++ b/docs/tutorials/saving_metrics.rst @@ -77,4 +77,4 @@ Now, when your operator is executed, the requested metrics will be available in context.run_validation_operator('action_list_operator', (batch_kwargs, expectation_suite_name)) -*Last updated*: |lastupdate| + diff --git a/docs/tutorials/validate_data.rst b/docs/tutorials/validate_data.rst index 8ece6a3d1f27..047cd74b9d7a 100644 --- a/docs/tutorials/validate_data.rst +++ b/docs/tutorials/validate_data.rst @@ -259,7 +259,7 @@ Save Validation Results The DataContext object provides a configurable ``validations_store`` where GE can store validation_result objects for subsequent evaluation and review. By default, the DataContext stores results in the ``great_expectations/uncommitted/validations`` directory. To specify a different directory or use a remote store such -as ``s3`` or ``gcs``, edit stores section of the DataContext configuration object: +as ``s3`` or ``gcs``, edit the stores section of the DataContext configuration object: .. code-block:: bash @@ -271,8 +271,7 @@ as ``s3`` or ``gcs``, edit stores section of the DataContext configuration objec bucket: my_bucket prefix: my_prefix -Removing the store_validation_result action from the action_list_operator configuration will disable automatically storing validation_result -objects. +Removing the store_validation_result action from the ``action_list_operator`` configuration will disable automatically storing ``validation_result`` objects. Send a Slack Notification ~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -347,4 +346,4 @@ Now you you know how to validate a Batch of data. What is next? This is a collection of tutorials that walk you through a variety of useful Great Expectations workflows: :ref:`tutorials`. -*last updated*: |lastupdate| + diff --git a/great_expectations/cli/datasource.py b/great_expectations/cli/datasource.py index 4738f8d713a6..5ce997b74173 100644 --- a/great_expectations/cli/datasource.py +++ b/great_expectations/cli/datasource.py @@ -659,7 +659,6 @@ def _add_spark_datasource(context, passthrough_generator_only=True, prompt_for_d } ) - context.add_datasource(name=datasource_name, class_name='SparkDFDatasource', **configuration) return datasource_name @@ -1045,11 +1044,28 @@ def _get_batch_kwargs_from_generator_or_from_file_path(context, datasource_name, if reader_method is not None: batch_kwargs["reader_method"] = reader_method + if isinstance(datasource, SparkDFDatasource) and reader_method == "csv": + header_row = click.confirm( + "\nDoes this file contain a header row?", + default=True + ) + batch_kwargs["reader_options"] = { + "header": header_row + } batch = datasource.get_batch(batch_kwargs=batch_kwargs) break else: # TODO: read the file and confirm with user that we read it correctly (headers, columns, etc.) try: + batch_kwargs["reader_method"] = reader_method + if isinstance(datasource, SparkDFDatasource) and reader_method == "csv": + header_row = click.confirm( + "\nDoes this file contain a header row?", + default=True + ) + batch_kwargs["reader_options"] = { + "header": header_row + } batch = datasource.get_batch(batch_kwargs=batch_kwargs) break except Exception as e: @@ -1059,7 +1075,7 @@ def _get_batch_kwargs_from_generator_or_from_file_path(context, datasource_name, - Error: {0:s}""" cli_message(file_load_error_message.format(str(e))) if not click.confirm( - "Try again?", + "\nTry again?", default=True ): cli_message(""" diff --git a/great_expectations/core/__init__.py b/great_expectations/core/__init__.py index 0b1a2a4140f9..1288dc2c0d7d 100644 --- a/great_expectations/core/__init__.py +++ b/great_expectations/core/__init__.py @@ -17,7 +17,7 @@ from great_expectations.types import DictDot from great_expectations.exceptions import InvalidExpectationConfigurationError, InvalidExpectationKwargsError, \ - UnavailableMetricError, ParserError + UnavailableMetricError, ParserError, InvalidCacheValueError logger = logging.getLogger(__name__) @@ -623,6 +623,8 @@ def make_expectation_suite(self, data, **kwargs): class ExpectationValidationResult(object): def __init__(self, success=None, expectation_config=None, result=None, meta=None, exception_info=None): + if result and not self.validate_result_dict(result): + raise InvalidCacheValueError(result) self.success = success self.expectation_config = expectation_config # TODO: re-add @@ -676,6 +678,20 @@ def __repr__(self): def __str__(self): return json.dumps(self.to_json_dict(), indent=2) + def validate_result_dict(self, result): + if result.get("unexpected_count") and result["unexpected_count"] < 0: + return False + if result.get("unexpected_percent") and (result["unexpected_percent"] < 0 or result["unexpected_percent"] > 100): + return False + if result.get("missing_percent") and (result["missing_percent"] < 0 or result["missing_percent"] > 100): + return False + if result.get("unexpected_percent_nonmissing") and ( + result["unexpected_percent_nonmissing"] < 0 or result["unexpected_percent_nonmissing"] > 100): + return False + if result.get("missing_count") and result["missing_count"] < 0: + return False + return True + def to_json_dict(self): myself = expectationValidationResultSchema.dump(self).data # NOTE - JPC - 20191031: migrate to expectation-specific schemas that subclass result with properly-typed diff --git a/great_expectations/data_context/data_context.py b/great_expectations/data_context/data_context.py index 1dc7a20372a9..9b846e8831be 100644 --- a/great_expectations/data_context/data_context.py +++ b/great_expectations/data_context/data_context.py @@ -481,7 +481,7 @@ def get_batch(self, batch_kwargs, expectation_suite_name, data_asset_type=None, if not isinstance(expectation_suite_name, (ExpectationSuite, ExpectationSuiteIdentifier, string_types)): raise ge_exceptions.DataContextError( - "expectation_suite_name must be an ExepctationSuite, " + "expectation_suite_name must be an ExpectationSuite, " "ExpectationSuiteIdentifier or string." ) diff --git a/great_expectations/data_context/store/tuple_store_backend.py b/great_expectations/data_context/store/tuple_store_backend.py index 16390eca15cc..4f9890a17e84 100644 --- a/great_expectations/data_context/store/tuple_store_backend.py +++ b/great_expectations/data_context/store/tuple_store_backend.py @@ -258,8 +258,7 @@ def list_keys(self, prefix=()): continue elif self.filepath_suffix and not filepath.endswith(self.filepath_suffix): continue - else: - key = self._convert_filepath_to_key(filepath) + key = self._convert_filepath_to_key(filepath) if key: key_list.append(key) @@ -363,9 +362,9 @@ def list_keys(self): self.prefix, ) if self.filepath_prefix and not s3_object_key.startswith(self.filepath_prefix): - # There can be other keys located in the same bucket; they are *not* our keys continue - + elif self.filepath_suffix and not s3_object_key.endswith(self.filepath_suffix): + continue key = self._convert_filepath_to_key(s3_object_key) if key: key_list.append(key) @@ -467,14 +466,20 @@ def list_keys(self): gcs_object_name, self.prefix, ) - + if self.filepath_prefix and not gcs_object_key.startswith(self.filepath_prefix): + continue + elif self.filepath_suffix and not gcs_object_key.endswith(self.filepath_suffix): + continue key = self._convert_filepath_to_key(gcs_object_key) if key: key_list.append(key) return key_list + def get_url_for_key(self, key, protocol=None): + path = self._convert_key_to_filepath(key) + return "https://storage.googleapis.com/" + self.bucket + "/" + path + def _has_key(self, key): all_keys = self.list_keys() return key in all_keys - diff --git a/great_expectations/exceptions.py b/great_expectations/exceptions.py index f4d145ff4d36..270c4969f7c0 100644 --- a/great_expectations/exceptions.py +++ b/great_expectations/exceptions.py @@ -1,4 +1,5 @@ from marshmallow import ValidationError +import json class GreatExpectationsError(Exception): @@ -107,6 +108,18 @@ class GreatExpectationsTypeError(TypeError): pass +class InvalidCacheValueError(GreatExpectationsError): + def __init__(self, result_dict): + template = """\ +Error: Invalid result values were found when trying to instantiate an ExpectationValidationResult. +- Invalid result values are likely caused by inconsistent cache values. +- Great Expectations enables caching by default. +- Please ensure that caching behavior is consistent between the underlying Dataset (e.g. Spark) and Great Expectations. +Result: {} +""" + self.message = template.format(json.dumps(result_dict, indent=2)) + + class ConfigNotFoundError(DataContextError): """The great_expectations dir could not be found.""" def __init__(self): diff --git a/great_expectations/render/renderer/notebook_renderer.py b/great_expectations/render/renderer/notebook_renderer.py index 0a7c71dd5071..b0c2b094f2c0 100755 --- a/great_expectations/render/renderer/notebook_renderer.py +++ b/great_expectations/render/renderer/notebook_renderer.py @@ -23,14 +23,15 @@ def _get_expectations_by_column(cls, expectations): # TODO probably replace this with Suite logic at some point expectations_by_column = {"table_expectations": []} for exp in expectations: - if "_table_" in exp["expectation_type"]: - expectations_by_column["table_expectations"].append(exp) - else: + if "column" in exp["kwargs"]: col = exp["kwargs"]["column"] if col not in expectations_by_column.keys(): expectations_by_column[col] = [] expectations_by_column[col].append(exp) + else: + expectations_by_column["table_expectations"].append(exp) + return expectations_by_column @classmethod diff --git a/great_expectations/render/renderer/page_renderer.py b/great_expectations/render/renderer/page_renderer.py index 1c56ae85b366..2996fd400822 100644 --- a/great_expectations/render/renderer/page_renderer.py +++ b/great_expectations/render/renderer/page_renderer.py @@ -1,4 +1,5 @@ import logging +import os from marshmallow import ValidationError from six import string_types @@ -142,6 +143,9 @@ def render(self, validation_results): def _render_validation_header(cls, validation_results): success = validation_results.success expectation_suite_name = validation_results.meta['expectation_suite_name'] + expectation_suite_path_components = ['..' for _ in range(len(expectation_suite_name.split('.')) + 2)] \ + + ["expectations"] + expectation_suite_name.split(".") + expectation_suite_path = os.path.join(*expectation_suite_path_components) + ".html" if success: success = ' Succeeded' else: @@ -175,6 +179,12 @@ def _render_validation_header(cls, validation_results): }, "status_title": { "classes": ["h6"] + }, + "expectation_suite_name": { + "tag": "a", + "attributes": { + "href": expectation_suite_path + } } }, "classes": ["mb-0", "mt-1"] diff --git a/tests/cli/test_cli.py b/tests/cli/test_cli.py index 83e4e330eb72..6297a180f7a9 100644 --- a/tests/cli/test_cli.py +++ b/tests/cli/test_cli.py @@ -17,6 +17,7 @@ def test_cli_command_entrance(caplog): runner = CliRunner(mix_stderr=False) result = runner.invoke(cli, catch_exceptions=False) assert result.exit_code == 0 + print(result.output) assert ( result.output == """Usage: cli [OPTIONS] COMMAND [ARGS]... diff --git a/tests/cli/test_suite.py b/tests/cli/test_suite.py index 33a615e75b2f..b2089ea95df4 100644 --- a/tests/cli/test_suite.py +++ b/tests/cli/test_suite.py @@ -211,7 +211,7 @@ def test_suite_new_empty_suite_creates_empty_suite( citations = suite.get_citations() citations[0].pop("citation_date") assert citations[0] == { - "batch_kwargs": {"datasource": "mydatasource", "path": csv}, + "batch_kwargs": {"datasource": "mydatasource", "path": csv, 'reader_method': 'read_csv'}, "batch_markers": None, "batch_parameters": None, "comment": "New suite added via CLI", @@ -278,11 +278,14 @@ def test_suite_new_empty_suite_creates_empty_suite_with_no_jupyter( citations = suite.get_citations() citations[0].pop("citation_date") assert citations[0] == { - "batch_kwargs": {"datasource": "mydatasource", "path": csv}, - "batch_markers": None, - "batch_parameters": None, - "comment": "New suite added via CLI", - } + 'batch_kwargs': { + 'datasource': 'mydatasource', + 'path': csv, + 'reader_method': 'read_csv' + }, + 'batch_markers': None, + 'batch_parameters': None, + 'comment': 'New suite added via CLI'} assert mock_subprocess.call_count == 0 assert mock_webbroser.call_count == 0 diff --git a/tests/render/test_page_renderer.py b/tests/render/test_page_renderer.py index 98625f82673b..0f21823e1062 100644 --- a/tests/render/test_page_renderer.py +++ b/tests/render/test_page_renderer.py @@ -161,20 +161,27 @@ def test_ProfilingResultsPageRenderer(titanic_profiled_evrs_1): def test_ValidationResultsPageRenderer_render_validation_header(titanic_profiled_evrs_1): validation_header = ValidationResultsPageRenderer._render_validation_header(titanic_profiled_evrs_1).to_json_dict() + expected_validation_header = { - 'content_block_type': 'header', 'styling': {'classes': ['col-12', 'p-0'], 'header': { - 'classes': ['alert', 'alert-secondary']}}, 'header': {'content_block_type': 'string_template', - 'string_template': {'template': 'Overview', - 'tag': 'h5', - 'styling': {'classes': ['m-0']}}}, + 'content_block_type': 'header', + 'styling': { + 'classes': ['col-12', 'p-0'], 'header': { + 'classes': ['alert', 'alert-secondary']}}, 'header': {'content_block_type': 'string_template', + 'string_template': {'template': 'Overview', + 'tag': 'h5', + 'styling': { + 'classes': ['m-0']}}}, 'subheader': {'content_block_type': 'string_template', 'string_template': { 'template': '${suite_title} ${expectation_suite_name}\n${status_title} ${success}', 'params': {'suite_title': 'Expectation Suite:', 'status_title': 'Status:', 'expectation_suite_name': 'default', 'success': ' Failed'}, 'styling': {'params': {'suite_title': {'classes': ['h6']}, - 'status_title': {'classes': ['h6']}}, + 'status_title': {'classes': ['h6']}, + 'expectation_suite_name': {'tag': 'a', 'attributes': { + 'href': '../../../expectations/default.html'}}}, 'classes': ['mb-0', 'mt-1']}}}} + # print(validation_header) assert validation_header == expected_validation_header diff --git a/tests/test_great_expectations.py b/tests/test_great_expectations.py index 08cc1cfa3f17..7dbf0ae5771d 100644 --- a/tests/test_great_expectations.py +++ b/tests/test_great_expectations.py @@ -17,6 +17,7 @@ expectationSuiteSchema, expectationSuiteValidationResultSchema, ) +from great_expectations.exceptions import InvalidCacheValueError from great_expectations.data_asset.data_asset import ( ValidationStatistics, _calc_validation_statistics, @@ -246,6 +247,55 @@ def test_validate(): assert expected_results == validation_results +@mock.patch('great_expectations.core.ExpectationValidationResult.validate_result_dict', return_value=False) +@pytest.mark.xfail(condition=PY2, reason="legacy python") +def test_validate_with_invalid_result_catch_exceptions_false(validate_result_dict): + + with open(file_relative_path(__file__, "./test_sets/titanic_expectations.json")) as f: + my_expectation_suite = expectationSuiteSchema.loads(f.read()).data + + with mock.patch("uuid.uuid1") as uuid: + uuid.return_value = "1234" + my_df = ge.read_csv( + file_relative_path(__file__, "./test_sets/Titanic.csv"), + expectation_suite=my_expectation_suite + ) + my_df.set_default_expectation_argument("result_format", "COMPLETE") + + with pytest.raises(InvalidCacheValueError): + my_df.validate(catch_exceptions=False) + + +@mock.patch('great_expectations.core.ExpectationValidationResult.validate_result_dict', return_value=False) +@pytest.mark.xfail(condition=PY2, reason="legacy python") +def test_validate_with_invalid_result(validate_result_dict): + + with open(file_relative_path(__file__, "./test_sets/titanic_expectations.json")) as f: + my_expectation_suite = expectationSuiteSchema.loads(f.read()).data + + with mock.patch("uuid.uuid1") as uuid: + uuid.return_value = "1234" + my_df = ge.read_csv( + file_relative_path(__file__, "./test_sets/Titanic.csv"), + expectation_suite=my_expectation_suite + ) + my_df.set_default_expectation_argument("result_format", "COMPLETE") + + with mock.patch("datetime.datetime") as mock_datetime: + mock_datetime.utcnow.return_value = datetime(1955, 11, 5) + results = my_df.validate() # catch_exceptions=True is default + + with open(file_relative_path(__file__, './test_sets/titanic_expected_data_asset_validate_results_with_exceptions.json')) as f: + expected_results = expectationSuiteValidationResultSchema.loads(f.read()).data + + del results.meta["great_expectations.__version__"] + + for result in results.results: + result.exception_info.pop("exception_traceback") + + assert expected_results == results + + def test_validate_catch_non_existent_expectation(): df = ge.dataset.PandasDataset({ "x": [1, 2, 3, 4, 5] diff --git a/tests/test_sets/titanic_expected_data_asset_validate_results_with_exceptions.json b/tests/test_sets/titanic_expected_data_asset_validate_results_with_exceptions.json new file mode 100644 index 000000000000..598ab52043ae --- /dev/null +++ b/tests/test_sets/titanic_expected_data_asset_validate_results_with_exceptions.json @@ -0,0 +1,192 @@ +{ + "meta": { + "expectation_suite_name": "titanic", + "run_id": "19551105T000000.000000Z", + "batch_kwargs": { + "ge_batch_id": "1234" + }, + "batch_markers": {}, + "batch_parameters": {} + }, + "results": [ + { + "meta": {}, + "success": true, + "result": {}, + "exception_info": { + "raised_exception": false, + "exception_message": null + }, + "expectation_config": { + "expectation_type": "expect_column_to_exist", + "kwargs": { + "column": "Name" + }, + "meta": {} + } + }, + { + "meta": {}, + "success": false, + "result": {}, + "exception_info": { + "raised_exception": true, + "exception_message": "InvalidCacheValueError: {'element_count': 1313, 'missing_count': 0, 'missing_percent': 0.0, 'unexpected_count': 3, 'unexpected_percent': 0.2284843869002285, 'unexpected_percent_nonmissing': 0.2284843869002285, 'partial_unexpected_list': ['Downton (?Douton), Mr William James', 'Jacobsohn Mr Samuel', 'Seman Master Betros'], 'partial_unexpected_index_list': [394, 456, 1195], 'partial_unexpected_counts': [{'value': 'Downton (?Douton), Mr William James', 'count': 1}, {'value': 'Jacobsohn Mr Samuel', 'count': 1}, {'value': 'Seman Master Betros', 'count': 1}], 'unexpected_list': ['Downton (?Douton), Mr William James', 'Jacobsohn Mr Samuel', 'Seman Master Betros'], 'unexpected_index_list': [394, 456, 1195]}" + }, + "expectation_config": { + "expectation_type": "expect_column_values_to_match_regex", + "kwargs": { + "regex": "[A-Z][a-z]+(?: \\([A-Z][a-z]+\\))?, ", + "column": "Name", + "mostly": 0.95 + }, + "meta": {} + } + }, + { + "meta": {}, + "success": true, + "result": {}, + "exception_info": { + "raised_exception": false, + "exception_message": null + }, + "expectation_config": { + "expectation_type": "expect_column_to_exist", + "kwargs": { + "column": "PClass" + }, + "meta": {} + } + }, + { + "meta": {}, + "success": false, + "result": {}, + "exception_info": { + "raised_exception": true, + "exception_message": "InvalidCacheValueError: {'element_count': 1313, 'missing_count': 0, 'missing_percent': 0.0, 'unexpected_count': 1, 'unexpected_percent': 0.07616146230007616, 'unexpected_percent_nonmissing': 0.07616146230007616, 'partial_unexpected_list': ['*'], 'partial_unexpected_index_list': [456], 'partial_unexpected_counts': [{'value': '*', 'count': 1}], 'unexpected_list': ['*'], 'unexpected_index_list': [456]}" + }, + "expectation_config": { + "expectation_type": "expect_column_values_to_be_in_set", + "kwargs": { + "column": "PClass", + "value_set": [ + "1st", + "2nd", + "3rd" + ] + }, + "meta": {} + } + }, + { + "meta": {}, + "success": true, + "result": {}, + "exception_info": { + "raised_exception": false, + "exception_message": null + }, + "expectation_config": { + "expectation_type": "expect_column_to_exist", + "kwargs": { + "column": "Age" + }, + "meta": {} + } + }, + { + "meta": {}, + "success": false, + "result": {}, + "exception_info": { + "raised_exception": true, + "exception_message": "InvalidCacheValueError: {'observed_value': 30.397989417989418, 'element_count': 1313, 'missing_count': 557, 'missing_percent': 42.421934501142424}" + }, + "expectation_config": { + "expectation_type": "expect_column_mean_to_be_between", + "kwargs": { + "column": "Age", + "max_value": 40, + "min_value": 20 + }, + "meta": {} + } + }, + { + "meta": {}, + "success": false, + "result": {}, + "exception_info": { + "raised_exception": true, + "exception_message": "InvalidCacheValueError: {'element_count': 1313, 'missing_count': 557, 'missing_percent': 42.421934501142424, 'unexpected_count': 0, 'unexpected_percent': 0.0, 'unexpected_percent_nonmissing': 0.0, 'partial_unexpected_list': [], 'partial_unexpected_index_list': [], 'partial_unexpected_counts': [], 'unexpected_list': [], 'unexpected_index_list': []}" + }, + "expectation_config": { + "expectation_type": "expect_column_values_to_be_between", + "kwargs": { + "column": "Age", + "max_value": 80, + "min_value": 0 + }, + "meta": {} + } + }, + { + "meta": {}, + "success": true, + "result": {}, + "exception_info": { + "raised_exception": false, + "exception_message": null + }, + "expectation_config": { + "expectation_type": "expect_column_to_exist", + "kwargs": { + "column": "Sex" + }, + "meta": {} + } + }, + { + "meta": {}, + "success": true, + "result": {}, + "exception_info": { + "raised_exception": false, + "exception_message": null + }, + "expectation_config": { + "expectation_type": "expect_column_to_exist", + "kwargs": { + "column": "Survived" + }, + "meta": {} + } + }, + { + "meta": {}, + "success": true, + "result": {}, + "exception_info": { + "raised_exception": false, + "exception_message": null + }, + "expectation_config": { + "expectation_type": "expect_column_to_exist", + "kwargs": { + "column": "SexCode" + }, + "meta": {} + } + } + ], + "success": false, + "statistics": { + "evaluated_expectations": 10, + "successful_expectations": 6, + "unsuccessful_expectations": 4, + "success_percent": 60.0 + }, + "evaluation_parameters": {} +} \ No newline at end of file