Skip to content

Commit

Permalink
Rerun examples
Browse files Browse the repository at this point in the history
  • Loading branch information
Oxid15 committed Sep 29, 2022
1 parent 151c13f commit 6c93aa2
Show file tree
Hide file tree
Showing 4 changed files with 244 additions and 285 deletions.
44 changes: 22 additions & 22 deletions cascade/docs/source/examples/data_validation.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
{
"data": {
"text/plain": [
"'0.7.0'"
"'0.7.2'"
]
},
"execution_count": 2,
Expand Down Expand Up @@ -528,8 +528,8 @@
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mDataValidationException\u001b[0m Traceback (most recent call last)",
"\u001b[1;32mc:\\cascade\\cascade\\docs\\source\\examples\\data_validation.ipynb Cell 33\u001b[0m in \u001b[0;36m<cell line: 1>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> <a href='vscode-notebook-cell:/c%3A/cascade/cascade/docs/source/examples/data_validation.ipynb#X44sZmlsZQ%3D%3D?line=0'>1</a>\u001b[0m cde\u001b[39m.\u001b[39;49mPredicateValidator(digits_ds, \u001b[39mlambda\u001b[39;49;00m x: x[\u001b[39m1\u001b[39;49m] \u001b[39m!=\u001b[39;49m \u001b[39m0\u001b[39;49m)\n",
"File \u001b[1;32mc:\\cascade\\cascade\\meta\\validator.py:86\u001b[0m, in \u001b[0;36mPredicateValidator.__init__\u001b[1;34m(self, dataset, func, **kwargs)\u001b[0m\n\u001b[0;32m 84\u001b[0m bad_counts \u001b[39m=\u001b[39m [\u001b[39mlen\u001b[39m(bad_items[i]) \u001b[39mfor\u001b[39;00m i \u001b[39min\u001b[39;00m \u001b[39mrange\u001b[39m(\u001b[39mlen\u001b[39m(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_func))]\n\u001b[0;32m 85\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39many\u001b[39m(bad_counts):\n\u001b[1;32m---> 86\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_raise(bad_items)\n\u001b[0;32m 87\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m 88\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m'\u001b[39m\u001b[39mOK!\u001b[39m\u001b[39m'\u001b[39m)\n",
"File \u001b[1;32mc:\\cascade\\cascade\\meta\\validator.py:102\u001b[0m, in \u001b[0;36mPredicateValidator._raise\u001b[1;34m(self, items)\u001b[0m\n\u001b[0;32m 100\u001b[0m failed_checks \u001b[39m=\u001b[39m [i \u001b[39mfor\u001b[39;00m i \u001b[39min\u001b[39;00m \u001b[39mrange\u001b[39m(\u001b[39mlen\u001b[39m(bad_counts)) \u001b[39mif\u001b[39;00m bad_counts[i]]\n\u001b[0;32m 101\u001b[0m failed_items \u001b[39m=\u001b[39m \u001b[39m'\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m.\u001b[39mjoin([\u001b[39mf\u001b[39m\u001b[39m'\u001b[39m\u001b[39m{\u001b[39;00mi\u001b[39m}\u001b[39;00m\u001b[39m: \u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_prettify_items(items[i])\u001b[39m}\u001b[39;00m\u001b[39m'\u001b[39m \u001b[39mfor\u001b[39;00m i \u001b[39min\u001b[39;00m items])\n\u001b[1;32m--> 102\u001b[0m \u001b[39mraise\u001b[39;00m DataValidationException(\n\u001b[0;32m 103\u001b[0m \u001b[39mf\u001b[39m\u001b[39m'\u001b[39m\u001b[39mChecks in positions \u001b[39m\u001b[39m{\u001b[39;00mfailed_checks\u001b[39m}\u001b[39;00m\u001b[39m failed\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m'\u001b[39m \\\n\u001b[0;32m 104\u001b[0m \u001b[39mf\u001b[39m\u001b[39m'\u001b[39m\u001b[39mItems failed by check:\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m'\u001b[39m \\\n\u001b[0;32m 105\u001b[0m \u001b[39mf\u001b[39m\u001b[39m'\u001b[39m\u001b[39m{\u001b[39;00mfailed_items\u001b[39m}\u001b[39;00m\u001b[39m'\u001b[39m\n\u001b[0;32m 106\u001b[0m )\n",
"File \u001b[1;32mc:\\cascade\\cascade\\meta\\validator.py:88\u001b[0m, in \u001b[0;36mPredicateValidator.__init__\u001b[1;34m(self, dataset, func, **kwargs)\u001b[0m\n\u001b[0;32m 86\u001b[0m bad_counts \u001b[39m=\u001b[39m [\u001b[39mlen\u001b[39m(bad_items[i]) \u001b[39mfor\u001b[39;00m i \u001b[39min\u001b[39;00m \u001b[39mrange\u001b[39m(\u001b[39mlen\u001b[39m(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_func))]\n\u001b[0;32m 87\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39many\u001b[39m(bad_counts):\n\u001b[1;32m---> 88\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_raise(bad_items)\n\u001b[0;32m 89\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m 90\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m'\u001b[39m\u001b[39mOK!\u001b[39m\u001b[39m'\u001b[39m)\n",
"File \u001b[1;32mc:\\cascade\\cascade\\meta\\validator.py:104\u001b[0m, in \u001b[0;36mPredicateValidator._raise\u001b[1;34m(self, items)\u001b[0m\n\u001b[0;32m 102\u001b[0m failed_checks \u001b[39m=\u001b[39m [i \u001b[39mfor\u001b[39;00m i \u001b[39min\u001b[39;00m \u001b[39mrange\u001b[39m(\u001b[39mlen\u001b[39m(bad_counts)) \u001b[39mif\u001b[39;00m bad_counts[i]]\n\u001b[0;32m 103\u001b[0m failed_items \u001b[39m=\u001b[39m \u001b[39m'\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m.\u001b[39mjoin([\u001b[39mf\u001b[39m\u001b[39m'\u001b[39m\u001b[39m{\u001b[39;00mi\u001b[39m}\u001b[39;00m\u001b[39m: \u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_prettify_items(items[i])\u001b[39m}\u001b[39;00m\u001b[39m'\u001b[39m \u001b[39mfor\u001b[39;00m i \u001b[39min\u001b[39;00m items])\n\u001b[1;32m--> 104\u001b[0m \u001b[39mraise\u001b[39;00m DataValidationException(\n\u001b[0;32m 105\u001b[0m \u001b[39mf\u001b[39m\u001b[39m'\u001b[39m\u001b[39mChecks in positions \u001b[39m\u001b[39m{\u001b[39;00mfailed_checks\u001b[39m}\u001b[39;00m\u001b[39m failed\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m'\u001b[39m\n\u001b[0;32m 106\u001b[0m \u001b[39mf\u001b[39m\u001b[39m'\u001b[39m\u001b[39mItems failed by check:\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m'\u001b[39m\n\u001b[0;32m 107\u001b[0m \u001b[39mf\u001b[39m\u001b[39m'\u001b[39m\u001b[39m{\u001b[39;00mfailed_items\u001b[39m}\u001b[39;00m\u001b[39m'\u001b[39m\n\u001b[0;32m 108\u001b[0m )\n",
"\u001b[1;31mDataValidationException\u001b[0m: Checks in positions [0] failed\nItems failed by check:\n0: 0, 10, 20, 30, 36 ... 1739, 1745, 1746, 1768, 1793"
]
}
Expand All @@ -547,7 +547,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 16,
"metadata": {},
"outputs": [
{
Expand All @@ -558,7 +558,7 @@
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mDataValidationException\u001b[0m Traceback (most recent call last)",
"\u001b[1;32mc:\\cascade\\cascade\\docs\\source\\examples\\data_validation.ipynb Cell 35\u001b[0m in \u001b[0;36m<cell line: 1>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> <a href='vscode-notebook-cell:/c%3A/cascade/cascade/docs/source/examples/data_validation.ipynb#X46sZmlsZQ%3D%3D?line=0'>1</a>\u001b[0m cde\u001b[39m.\u001b[39;49mAggregateValidator(digits_ds, \u001b[39mlambda\u001b[39;49;00m ds: \u001b[39mlen\u001b[39;49m(ds) \u001b[39m<\u001b[39;49m \u001b[39m1000\u001b[39;49m)\n",
"File \u001b[1;32mc:\\cascade\\cascade\\meta\\validator.py:55\u001b[0m, in \u001b[0;36mAggregateValidator.__init__\u001b[1;34m(self, dataset, func, **kwargs)\u001b[0m\n\u001b[0;32m 52\u001b[0m bad_results\u001b[39m.\u001b[39mappend(i)\n\u001b[0;32m 54\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(bad_results):\n\u001b[1;32m---> 55\u001b[0m \u001b[39mraise\u001b[39;00m DataValidationException(\u001b[39mf\u001b[39m\u001b[39m'\u001b[39m\u001b[39mChecks in positions \u001b[39m\u001b[39m{\u001b[39;00mbad_results\u001b[39m}\u001b[39;00m\u001b[39m failed\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[0;32m 56\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m 57\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m'\u001b[39m\u001b[39mOK!\u001b[39m\u001b[39m'\u001b[39m)\n",
"File \u001b[1;32mc:\\cascade\\cascade\\meta\\validator.py:57\u001b[0m, in \u001b[0;36mAggregateValidator.__init__\u001b[1;34m(self, dataset, func, **kwargs)\u001b[0m\n\u001b[0;32m 54\u001b[0m bad_results\u001b[39m.\u001b[39mappend(i)\n\u001b[0;32m 56\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(bad_results):\n\u001b[1;32m---> 57\u001b[0m \u001b[39mraise\u001b[39;00m DataValidationException(\u001b[39mf\u001b[39m\u001b[39m'\u001b[39m\u001b[39mChecks in positions \u001b[39m\u001b[39m{\u001b[39;00mbad_results\u001b[39m}\u001b[39;00m\u001b[39m failed\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[0;32m 58\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m 59\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m'\u001b[39m\u001b[39mOK!\u001b[39m\u001b[39m'\u001b[39m)\n",
"\u001b[1;31mDataValidationException\u001b[0m: Checks in positions [0] failed"
]
}
Expand All @@ -583,7 +583,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 17,
"metadata": {},
"outputs": [
{
Expand All @@ -595,7 +595,7 @@
" 'obj_type': \"<class 'list'>\"}]"
]
},
"execution_count": 16,
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -607,7 +607,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 18,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -653,7 +653,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -664,7 +664,7 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -682,7 +682,7 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 21,
"metadata": {},
"outputs": [
{
Expand All @@ -705,7 +705,7 @@
"[150 rows x 4 columns]"
]
},
"execution_count": 20,
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -727,7 +727,7 @@
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -743,7 +743,7 @@
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -757,7 +757,7 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 24,
"metadata": {},
"outputs": [
{
Expand All @@ -773,7 +773,7 @@
"cascade.utils.pa_schema_validator.PaSchemaValidator"
]
},
"execution_count": 23,
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -791,7 +791,7 @@
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -800,7 +800,7 @@
},
{
"cell_type": "code",
"execution_count": 25,
"execution_count": 26,
"metadata": {},
"outputs": [
{
Expand All @@ -816,7 +816,7 @@
"cascade.utils.pa_schema_validator.PaSchemaValidator"
]
},
"execution_count": 25,
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -834,7 +834,7 @@
},
{
"cell_type": "code",
"execution_count": 26,
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -843,7 +843,7 @@
},
{
"cell_type": "code",
"execution_count": 27,
"execution_count": 28,
"metadata": {},
"outputs": [
{
Expand All @@ -870,7 +870,7 @@
"\u001b[1;31mDataValidationException\u001b[0m Traceback (most recent call last)",
"\u001b[1;32mc:\\cascade\\cascade\\docs\\source\\examples\\data_validation.ipynb Cell 56\u001b[0m in \u001b[0;36m<cell line: 1>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> <a href='vscode-notebook-cell:/c%3A/cascade/cascade/docs/source/examples/data_validation.ipynb#Y106sZmlsZQ%3D%3D?line=0'>1</a>\u001b[0m cdu\u001b[39m.\u001b[39;49mPaSchemaValidator(iris_ds, \u001b[39m'\u001b[39;49m\u001b[39m./iris_schema.yml\u001b[39;49m\u001b[39m'\u001b[39;49m)\n",
"File \u001b[1;32mc:\\cascade\\cascade\\utils\\pa_schema_validator.py:23\u001b[0m, in \u001b[0;36mPaSchemaValidator.__init__\u001b[1;34m(self, dataset, schema, *args, **kwargs)\u001b[0m\n\u001b[0;32m 12\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__init__\u001b[39m(\u001b[39mself\u001b[39m, dataset, schema, \u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m 13\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[0;32m 14\u001b[0m \u001b[39m Parameters\u001b[39;00m\n\u001b[0;32m 15\u001b[0m \u001b[39m ----------\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 21\u001b[0m \u001b[39m For more details on schemas see pandera's documentation.\u001b[39;00m\n\u001b[0;32m 22\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[1;32m---> 23\u001b[0m \u001b[39msuper\u001b[39m()\u001b[39m.\u001b[39m\u001b[39m__init__\u001b[39m(dataset, \u001b[39m*\u001b[39margs, func\u001b[39m=\u001b[39m\u001b[39mlambda\u001b[39;00m x: \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_validate(x, schema), \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n",
"File \u001b[1;32mc:\\cascade\\cascade\\meta\\validator.py:51\u001b[0m, in \u001b[0;36mAggregateValidator.__init__\u001b[1;34m(self, dataset, func, **kwargs)\u001b[0m\n\u001b[0;32m 49\u001b[0m bad_results \u001b[39m=\u001b[39m []\n\u001b[0;32m 50\u001b[0m \u001b[39mfor\u001b[39;00m i, func \u001b[39min\u001b[39;00m \u001b[39menumerate\u001b[39m(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_func):\n\u001b[1;32m---> 51\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m func(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_dataset):\n\u001b[0;32m 52\u001b[0m bad_results\u001b[39m.\u001b[39mappend(i)\n\u001b[0;32m 54\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(bad_results):\n",
"File \u001b[1;32mc:\\cascade\\cascade\\meta\\validator.py:53\u001b[0m, in \u001b[0;36mAggregateValidator.__init__\u001b[1;34m(self, dataset, func, **kwargs)\u001b[0m\n\u001b[0;32m 51\u001b[0m bad_results \u001b[39m=\u001b[39m []\n\u001b[0;32m 52\u001b[0m \u001b[39mfor\u001b[39;00m i, func \u001b[39min\u001b[39;00m \u001b[39menumerate\u001b[39m(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_func):\n\u001b[1;32m---> 53\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m func(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_dataset):\n\u001b[0;32m 54\u001b[0m bad_results\u001b[39m.\u001b[39mappend(i)\n\u001b[0;32m 56\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(bad_results):\n",
"File \u001b[1;32mc:\\cascade\\cascade\\utils\\pa_schema_validator.py:23\u001b[0m, in \u001b[0;36mPaSchemaValidator.__init__.<locals>.<lambda>\u001b[1;34m(x)\u001b[0m\n\u001b[0;32m 12\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__init__\u001b[39m(\u001b[39mself\u001b[39m, dataset, schema, \u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m 13\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[0;32m 14\u001b[0m \u001b[39m Parameters\u001b[39;00m\n\u001b[0;32m 15\u001b[0m \u001b[39m ----------\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 21\u001b[0m \u001b[39m For more details on schemas see pandera's documentation.\u001b[39;00m\n\u001b[0;32m 22\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[1;32m---> 23\u001b[0m \u001b[39msuper\u001b[39m()\u001b[39m.\u001b[39m\u001b[39m__init__\u001b[39m(dataset, \u001b[39m*\u001b[39margs, func\u001b[39m=\u001b[39m\u001b[39mlambda\u001b[39;00m x: \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_validate(x, schema), \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n",
"File \u001b[1;32mc:\\cascade\\cascade\\utils\\pa_schema_validator.py:32\u001b[0m, in \u001b[0;36mPaSchemaValidator._validate\u001b[1;34m(ds, schema)\u001b[0m\n\u001b[0;32m 30\u001b[0m schema\u001b[39m.\u001b[39mvalidate(ds\u001b[39m.\u001b[39m_table)\n\u001b[0;32m 31\u001b[0m \u001b[39mexcept\u001b[39;00m SchemaError \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m---> 32\u001b[0m \u001b[39mraise\u001b[39;00m DataValidationException(e)\n\u001b[0;32m 33\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m 34\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mTrue\u001b[39;00m\n",
"\u001b[1;31mDataValidationException\u001b[0m: <Schema Column(name=sepal length (cm), type=DataType(float64))> failed element-wise validator 0:\n<Check greater_than: greater_than(0)>\nfailure cases:\n index failure_case\n0 0 -5.1\n1 1 -4.9\n2 2 -4.7\n3 3 -4.6\n4 4 -5.0\n.. ... ...\n145 145 -6.7\n146 146 -6.3\n147 147 -6.5\n148 148 -6.2\n149 149 -5.9\n\n[150 rows x 2 columns]"
Expand Down
Loading

0 comments on commit 6c93aa2

Please sign in to comment.