diff --git a/.github/workflows/ci_cd_pipeline.yml b/.github/workflows/ci_cd_pipeline.yml
index e602449..c2e6304 100644
--- a/.github/workflows/ci_cd_pipeline.yml
+++ b/.github/workflows/ci_cd_pipeline.yml
@@ -78,7 +78,7 @@ jobs:
- name: Run tests
run: |
source venv/bin/activate
- pytest tests/test_etl.py
+ pytest tests/test_data_unittest.py
continue-on-error: true
- name: Clean up Docker containers
diff --git a/Dockerfile b/Dockerfile
index 361dedd..859f76c 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -19,4 +19,4 @@ COPY . .
ENV DB_PATH="/opt/airflow/sqlite_db/etl.db"
# Command to run when the container starts
-CMD ["pytest", "tests/test_etl.py"]
+CMD ["pytest", "tests/test_data_unittest.py"]
diff --git a/README.md b/README.md
index 8ca583e..c1e23eb 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,7 @@ Ensure Order_Date is in the correct format (YYYY-MM-DD).
Handle missing or invalid values for Quantity (e.g., replace negative values with zero).
**Load**: Data is loaded into the Orders table in SQLite
-## Test Plan
+## Test Plan for Data Quality Testing (unit test)
| Test Case ID | Test Case Description | Steps to Execute | Expected Result | Risk Level | Test Data |
|--------------|------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------|----------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------|
@@ -36,9 +36,9 @@ Handle missing or invalid values for Quantity (e.g., replace negative values wit
## To run a specific test case:
**Run by exact function name:**
```sh
-pytest -s tests/test_etl.py::test_invalid_product_id
+pytest -s tests/test_data_unittest.py::test_invalid_product_id
```
-This will run only the test_invalid_product_id test case in tests/test_etl.py.
+This will run only the test_invalid_product_id test case in tests/test_data_unittest.py.
## Running the Project Locally
@@ -75,10 +75,22 @@ python tests/load_data.py
**6.Run the test**
```sh
-pytest tests/test_etl.py
+pytest tests/test_data_unittest.py
```
# Example fail result after run test
![date result](https://github.com/Thanasornsawan/Practice_ETL_QA_analyst/blob/main/photos/date_range.png?raw=true)
-![map result](https://github.com/Thanasornsawan/Practice_ETL_QA_analyst/blob/main/photos/id_mapping.png?raw=true)
\ No newline at end of file
+![map result](https://github.com/Thanasornsawan/Practice_ETL_QA_analyst/blob/main/photos/id_mapping.png?raw=true)
+
+## Data Completeness Testing:
+
+**Objective:** Ensure that all expected data is loaded into the target system without any loss.
+**Test Case:** Compare the record counts between the source and target tables to verify completeness.
+
+**Run the test**
+```sh
+pytest tests/test_load_correct.py
+```
+![load result](https://github.com/Thanasornsawan/Practice_ETL_QA_analyst/blob/main/photos/test_load.png?raw=true)
+
diff --git a/photos/test_load.png b/photos/test_load.png
new file mode 100644
index 0000000..c2b2aa5
Binary files /dev/null and b/photos/test_load.png differ
diff --git a/tests/test_etl.py b/tests/test_data_unittest.py
similarity index 100%
rename from tests/test_etl.py
rename to tests/test_data_unittest.py
diff --git a/tests/test_load_correct.py b/tests/test_load_correct.py
new file mode 100644
index 0000000..f727e92
--- /dev/null
+++ b/tests/test_load_correct.py
@@ -0,0 +1,46 @@
+import pandas as pd
+import sqlite3
+import os
+
+def test_row_count():
+ # Path to SQLite database
+ DB_PATH = 'sql/sqlite_db/etl.db'
+ print(f"Database path: {DB_PATH}")
+
+ # Establish a connection
+ conn = sqlite3.connect(DB_PATH)
+
+ # Path to Excel file
+ EXCEL_FILE_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), '../orders_test_data.xlsx'))
+ print(f"Excel file path: {EXCEL_FILE_PATH}")
+
+ # Read source data from Excel file for both sheets
+ source_orders_df = pd.read_excel(EXCEL_FILE_PATH, sheet_name="Orders")
+ source_products_df = pd.read_excel(EXCEL_FILE_PATH, sheet_name="Products")
+
+ # Drop empty rows in both sheets
+ source_orders_df = source_orders_df.dropna(how='all')
+ source_products_df = source_products_df.dropna(how='all')
+
+ print(f"Source Orders Rows: {len(source_orders_df)}")
+ print(f"Source Products Rows: {len(source_products_df)}")
+
+ # Read target data from the database for both tables
+ target_orders_df = pd.read_sql_query("SELECT * FROM Orders", conn)
+ target_products_df = pd.read_sql_query("SELECT * FROM Products", conn)
+
+ print(f"Target Orders Rows: {len(target_orders_df)}")
+ print(f"Target Products Rows: {len(target_products_df)}")
+
+ # Validate row count for Orders
+ assert len(source_orders_df) == len(target_orders_df), (
+ f"Row count mismatch for Orders: Source ({len(source_orders_df)}) vs Target ({len(target_orders_df)})"
+ )
+
+ # Validate row count for Products
+ assert len(source_products_df) == len(target_products_df), (
+ f"Row count mismatch for Products: Source ({len(source_products_df)}) vs Target ({len(target_products_df)})"
+ )
+
+ print("Row count validation passed for both Orders and Products.")
+ conn.close()