From 23e1d9fd0294804f484543ff7d6c4be9793e8ac5 Mon Sep 17 00:00:00 2001
From: thanasornsawan <thanasornsawan.v@doppio-tech.com>
Date: Thu, 19 Dec 2024 11:51:47 +0200
Subject: [PATCH] add code and update readme

---
 .github/workflows/ci_cd_pipeline.yml |  54 +++++++
 .gitignore                           |   4 +
 Dockerfile                           |  22 +++
 README.md                            |  63 ++++++++-
 docker-compose.yml                   |  16 +++
 orders_test_data.xlsx                | Bin 0 -> 9750 bytes
 requirements.txt                     |   5 +
 sql/Dockerfile                       |   7 +
 sql/sqlite_db/db_queries.py          | 102 ++++++++++++++
 sql/sqlite_db/setup_db.py            |  39 ++++++
 tests/load_data.py                   |  68 +++++++++
 tests/test_etl.py                    | 201 +++++++++++++++++++++++++++
 12 files changed, 574 insertions(+), 7 deletions(-)
 create mode 100644 .github/workflows/ci_cd_pipeline.yml
 create mode 100644 .gitignore
 create mode 100644 Dockerfile
 create mode 100644 docker-compose.yml
 create mode 100644 orders_test_data.xlsx
 create mode 100644 requirements.txt
 create mode 100644 sql/Dockerfile
 create mode 100644 sql/sqlite_db/db_queries.py
 create mode 100644 sql/sqlite_db/setup_db.py
 create mode 100644 tests/load_data.py
 create mode 100644 tests/test_etl.py

diff --git a/.github/workflows/ci_cd_pipeline.yml b/.github/workflows/ci_cd_pipeline.yml
new file mode 100644
index 0000000..5e547ba
--- /dev/null
+++ b/.github/workflows/ci_cd_pipeline.yml
@@ -0,0 +1,54 @@
+name: ETL CI/CD Pipeline
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - main
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout the code
+        uses: actions/checkout@v2
+
+      - name: Set up Python
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.9
+
+      - name: Install Docker and Docker Compose
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y docker.io
+          sudo curl -L "https://github.com/docker/compose/releases/download/1.29.2/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose
+          sudo chmod +x /usr/local/bin/docker-compose
+
+      - name: Build and start Docker containers with docker-compose
+        run: |
+          docker-compose -f docker-compose.yml up -d
+          sleep 10  # Wait for the DB to start properly (adjust if needed)
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+
+      - name: Run database setup
+        run: python sql/sqlite_db/setup_db.py
+
+      - name: Load data into database
+        run: tests/load_data.py
+
+      - name: Run tests
+        run: |
+          pytest tests/test_etl.py
+        continue-on-error: true
+
+      - name: Clean up Docker containers
+        run: |
+          docker-compose -f docker-compose.yml down
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..cd215b4
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+__pycache__
+etl.db
+.venv
+.pytest_cache
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..361dedd
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,22 @@
+# Dockerfile
+
+# Use a base Python image
+FROM python:3.9-slim
+
+# Set the working directory
+WORKDIR /app
+
+# Copy the requirements file into the container
+COPY requirements.txt .
+
+# Install the dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy the project files into the container
+COPY . .
+
+# Set environment variables (if necessary)
+ENV DB_PATH="/opt/airflow/sqlite_db/etl.db"
+
+# Command to run when the container starts
+CMD ["pytest", "tests/test_etl.py"]
diff --git a/README.md b/README.md
index 0083a36..68ce4f5 100644
--- a/README.md
+++ b/README.md
@@ -21,10 +21,59 @@ Handle missing or invalid values for Quantity (e.g., replace negative values wit
 
 ## Test Plan
 
-| **Test Case ID** | **Test Case Description**                           | **Steps to Execute**                                                                                                                                                       | **Expected Result**                                                               | **Business Rule Compliance**                                     | **Risk Level**                   | **Test Data**                                             |
-|------------------|-----------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------|------------------------------------------------------------------|----------------------------------|-----------------------------------------------------------|
-| TC_01            | **Validate Customer_ID Uniqueness**                 | - Insert two orders with the same Customer_ID.<br>- Check if the system raises an error or rejects the second order.                                                        | **Failure**: The system should reject the second order with the same Customer_ID. | Duplicate Customer_ID violates uniqueness in the orders table.  | **Critical** – Affects data integrity. | Customer_ID: 1234 (used for two orders)<br>Order_Date: "2024-12-01"<br>Product_ID: 567<br>Quantity: 2 |
-| TC_02            | **Validate Correct Date Format**                    | - Insert an order with an invalid date format (e.g., `12/01/2024` for `Order_Date`).<br>- Attempt to save the order.                                                        | **Failure**: The system should reject the order due to incorrect date format.    | The `Order_Date` must follow a standardized format.             | **High** – Incorrect data can cause parsing issues and errors in reporting. | Customer_ID: 1234<br>Order_Date: "12/01/2024" (invalid format)<br>Product_ID: 567<br>Quantity: 2 |
-| TC_03            | **Validate Missing Customer_Name**                  | - Insert an order with a missing `Customer_Name` value.<br>- Attempt to save the order.                                                                                     | **Failure**: The system should reject the order due to missing customer name.    | The `Customer_Name` field is mandatory for all orders.         | **High** – Missing customer information affects order processing and analysis. | Customer_ID: 1234<br>Order_Date: "2024-12-01"<br>Product_ID: 567<br>Quantity: 2 (Customer_Name: NULL) |
-| TC_04            | **Validate Negative Quantity**                      | - Insert an order with a negative `Quantity` value.<br>- Attempt to save the order.                                                                                        | **Failure**: The system should reject the order due to invalid quantity.         | `Quantity` must always be a positive number.                    | **High** – Negative quantity violates business logic and can affect financial calculations. | Customer_ID: 1234<br>Order_Date: "2024-12-01"<br>Product_ID: 567<br>Quantity: -5 |
-| TC_05            | **Validate Missing Order Date**                     | - Insert an order with a missing `Order_Date` value.<br>- Attempt to save the order.                                                                                       | **Failure**: The system should reject the order due to missing order date.       | `Order_Date` cannot be missing.                                | **Critical** – Missing order dates make the data unusable for time-based analysis. | Customer_ID: 1234<br>Customer_Name: "John Doe"<br>Product_ID: 567<br>Quantity: 2 (Order_Date: NULL) |
\ No newline at end of file
+| Test Case ID | Test Case Description                                | Steps to Execute                                                                                                                                                    | Expected Result                                                       | Risk Level                       | Test Data                                                                                                                                          |
+|--------------|------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------|----------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------|
+| TC_001       | **Validate Customer ID Uniqueness**                  | - Execute `validate_customer_id_unique` query.<br>- Fetch the results into a DataFrame.<br>- Check for any duplicate `Customer_ID`s.                              | **Failure**: The DataFrame should be empty, indicating no duplicates. | **Critical** – Affects data integrity | Customer_ID: 1234 (used for two orders)<br>Order_Date: "2024-12-01"<br>Product_ID: 567<br>Quantity: 2                                              |
+| TC_002       | **Validate Correct Date Format**                     | - Execute `validate_order_date_format` query.<br>- Fetch the results into a DataFrame.<br>- Validate if the `Order_Date` is in the correct format (`dd/mm/yyyy`).  | **Failure**: The DataFrame should have no invalid date formats.       | **High** – Affects date parsing and reporting | Customer_ID: 1234<br>Order_Date: "12/01/2024" (invalid format)<br>Product_ID: 567<br>Quantity: 2                                                  |
+| TC_003       | **Validate Missing Customer Name**                   | - Execute `get_orders_with_missing_customer_name` query.<br>- Fetch the results into a DataFrame.<br>- Check for any missing `Customer_Name` values.               | **Failure**: There should be no missing customer names.               | **High** – Affects order processing | Customer_ID: 1234<br>Order_Date: "2024-12-01"<br>Product_ID: 567<br>Quantity: 2 (Customer_Name: NULL)                                           |
+| TC_004       | **Validate Negative Quantity Orders**                | - Execute `get_orders_with_negative_quantity` query.<br>- Fetch the results into a DataFrame.<br>- Check for negative `Quantity` values.                           | **Failure**: The DataFrame should have no rows with negative quantities. | **High** – Affects business logic and financial calculations | Customer_ID: 1234<br>Order_Date: "2024-12-01"<br>Product_ID: 567<br>Quantity: -5                                                                 |
+| TC_005       | **Validate Order Date Range (December 2024 only)**   | - Execute the query to fetch all `Order_ID` and `Order_Date` from the `Orders` table.<br>- Check each order's date format and ensure it's within the range `2024-12-01` to `2024-12-31`.<br>- Identify invalid or out-of-range dates. | **Failure**: Orders with `Order_Date` outside the range `2024-12-01` to `2024-12-31` should be flagged.<br>**Failure**: Orders with invalid date formats should be flagged. | **High** – Invalid or out-of-range dates can affect reporting and processing. | Customer_ID: 1234<br>Order_Date: "01/12/2024"<br>Product_ID: 567<br>Quantity: 10 (Valid date)<br>Customer_ID: 5678<br>Order_Date: "01/11/2024" (Out of range)<br>Customer_ID: 91011<br>Order_Date: "InvalidDate" (Invalid format) |                                                     |
+| TC_006       | **Validate Invalid Email Format**                    | - Execute `get_invalid_email_customers` query.<br>- Fetch the results into a DataFrame.<br>- Check for invalid email formats.                                       | **Failure**: The DataFrame should have no rows with invalid emails.   | **High** – Affects customer communication | Customer_ID: 1234<br>Order_Date: "2024-12-01"<br>Product_ID: 567<br>Quantity: 2<br>Customer_Email: "invalid_email"                               |
+| TC_007       | **Ensure Unique Product_ID in Order**                | - Execute `get_orders_with_duplicate_product_id` query.<br>- Fetch the results into a DataFrame.<br>- Check for duplicate `Product_ID`s in orders.                  | **Failure**: The DataFrame should be empty, indicating no duplicates. | **Critical** – Affects data integrity | Customer_ID: 1234<br>Order_Date: "2024-12-01"<br>Product_ID: 567 (duplicate)<br>Quantity: 2                                                   |
+| TC_008       | **Ensure Product_Name Cannot Be NULL**                | - Execute `get_orders_with_null_product_name` query.<br>- Fetch the results into a DataFrame.<br>- Check for any `NULL` values in `Product_Name`.                  | **Failure**: The DataFrame should have no rows with NULL `Product_Name`. | **High** – Affects order completeness | Customer_ID: 1234<br>Order_Date: "2024-12-01"<br>Product_ID: 567<br>Quantity: 2<br>Product_Name: NULL                                         |
+| TC_009       | **Validate Referential Integrity Between Orders and Products** | - Execute `get_invalid_product_references` query.<br>- Fetch the results into a DataFrame.<br>- Check for any `Product_ID` references that do not exist in Products. | **Failure**: The DataFrame should have no rows indicating invalid `Product_ID` references. | **Critical** – Affects data integrity | Customer_ID: 1234<br>Order_Date: "2024-12-01"<br>Product_ID: 999 (non-existing)<br>Quantity: 2                                               |
+
+## To run a specific test case:
+**Run by exact function name:**
+```sh
+pytest -s tests/test_etl.py::test_invalid_product_id
+```
+This will run only the test_invalid_product_id test case in tests/test_etl.py.
+
+## Running the Project Locally
+
+**1.Create and Activate a Virtual Environment**
+```sh
+python3 -m venv venv
+```
+
+Activate the virtual environment:
+```sh
+source venv/bin/activate
+```
+
+**2.Install Project Dependencies**
+```sh
+pip install -r requirements.txt
+```
+
+**3.Docker step**
+```sh
+docker-compose down
+docker-compose up -d
+```
+
+**4.Set Up the Database**
+```sh
+python sql/sqlite_db/setup_db.py
+```
+
+**5.Load Data into the Database**
+```sh
+python tests/load_data.py
+```
+
+**6.Run the test**
+```sh
+pytest tests/test_etl.py
+```
\ No newline at end of file
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000..3e6ab56
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,16 @@
+version: '3.8'
+
+services:
+  sqlite_db:
+    build: ./sql  # Path where your Dockerfile is located
+    container_name: sqlite_db
+    volumes:
+      - ./sql/sqlite_db:/opt/sqlite_db  # Map the local folder to the container's folder
+    ports:
+      - "8081:8080"  # Adjust if needed
+    networks:
+      - sqlite_network
+
+networks:
+  sqlite_network:
+    driver: bridge
diff --git a/orders_test_data.xlsx b/orders_test_data.xlsx
new file mode 100644
index 0000000000000000000000000000000000000000..c15865891c6d9fa159009346019e8f211d092935
GIT binary patch
literal 9750
zcmeHtbySq!x<4JlpoD~yUvQA_E@co<kVXXQZjb?Kk&u)|qy|K!OS-#j=p4FXBm_np
zF7UnQD|*iH{LcO3x7NAqKC{-$JI{LdXYcpf`^ooJkU>QwLc+qrLb`<=sg87wNH4CP
zEZGe0^c>BsO~8M<;9zyKvV`<$SvRudVr~<D#Z#R&(;y`T5o!0yvE$9&v%VW1+KHDk
z`MUoJubC?{rB#>M;Z~2eed&ROKA{`!dLaS2%kAQCTC{juHbw96^LvfzdbdexhP7B9
z7nAYi_Ex9=vEiznUWo7hu1Vv`GZDdAPTbYOV!$GGT7+OGHuUkUXVC2RM>`L&;#`}{
zlYsN<4XAOb3%h4eM}10pZtOpVkRS2W_z>$Kq3;{d=44{#DT*kZAh&vWaRs9I@kTS2
z!x_e<#ZQH?`OoekrrMQiJBM}Jr-<5+Ddd}&D?l#oeg#17+DIcw$4$>NQ&bAiuJ&^N
zgN3OA#tz<+`%IPv+S9q+MSqm(D9B)9Vd|TCqa!0BVWJ`-Dg3*t<orKX{!dluXk%vq
zHZ?M`2eV!M{CO&;V0MIRcYNoVVq9$x$#XnL?{@O@Jjm!!XM7(k@X8XIkvS-$d2>^$
zG+>0Dhb^sDs#QaK9ieeUcyk!Ct5eHtBPZH38sD=1{V-Dm0!i$)$$vH)1S{*6KR;c8
z(>JYk7`r;Vx;qzDLd3*Q1!oe@?7oHW_GTlFd#VoITnpo8U+tdkO}#pv8y_EEUY~Mv
zu5hkAVy~ML*)cBB*0y%^(5w@IxjpY?%ipRqa|SOLsXLZdYfdJLc5hpra)EQEtJO7o
zbaL}VSy#$neve0X<GVICxB0e+sdznlOHVjstt-kk@_DoQVCibvOJ%XIq--5{&+T~X
z#6Hf=zKK>7150#uA&x_-62GRMd=Uz)BjyX*7(HH?kl$T9Tx&yY6xo1<$EjMU_Jv_0
z=SA{e@pYIU1t%2?iOuA_t==i;`iS!$!y>Mtl*}EwiTRbA@Evop!JU<~@pFmV8VOeD
zR{Dv%`^(y+@*4F?sE1CHr6{Z8x4PPxnH*6uv8fu{I`_HI7MLTfiY-AaA!(y->n!q|
zc{z!1=dfd{>mYx4{d9`zb(QD=kmC60471i1njIRyVSbo@*rYtR)`2+bS!#>l=o;?v
zAQ^=@R^=ZKvVPY&H$XUxl_!38;5$8im(N=Efn)_fyb_6+5++$ub4)q`^DQjDo}S{f
ze5G@0&mkmsTrBiC+lE$KR%&d{|HDmMP052ff3-n@dE4gvL40Tac|?<c7=8)W%hDI4
zuV2>cB+Pioh>{meiHc~$W>6;*ruru+C8OT0hK{7kD`|P0kk_p*ygpflui@j)=%{{G
zyB(U#NU5bNT$$x$sEV8|$iF;E1*VkalMNA|Zxg1Z0E@bGAJLt%%Y~>@g?7m6yP#oo
zWt|p2G48gFXU<NGq^xr_FJu*ZeY`sOZoyvf=pZsK=j+^@ToO%fU7>Ydp<EjIku~R9
zkacBYisRR=&Hkk4`|a|wR7uV5rx4JabGp4_2ZZ3l=7Q6}hts{bmwTJe!&J4%^isK@
z^3#y~<2A+o8L@RekD;bH99@F-7Iug|zs4B5)>!|GufyBz@vq<Toi;V5wasp0+}X#u
zi!drZj$yV;kaIb~xI>S#yTI3X(B9ZeRST^J<8Gf7s2>M8q~&i+wHcZn<8y9(d$9_O
z@opLJpT|pFbIleaW!IYC4@vPxY@E++4^W<oZf>2Q+OfC7NYgp`ThW3$c!6fJB^-|o
zDtXgVINa|J4#p%0lbLppwZ3C--OmsIAezFFTcm7a1Az#-!oLoUzc5LMP74Y-d><Ll
zEGmP_L24Xct$)1`Suurp3rfMshXgcMK0^BiF&#oi1Tw(I(1(x|2fTIiT(H6qkrykU
zq2q$|4$u*SOmHdmF{H>rZapCvth8bYi4-(}CkzSvO(bumA_H6keFVvNU|rYD1uL$Y
zLxKd=;F?1f8Y`cm!-6CZe~DC$RHQ();FF3gc@CQy;88)xp$ZF#(E4Nvl)_<CV<kk;
zF&8Wa`5{*8u$c*-5!@fDuz|R)PnJPN95!F9Bns~5g5@E{BNgdTclf08%3os6rVz|^
z;bJJN1Et1_r{K-p-3JiA5$BIk5L{ScCE0;8W4BN6=Fl~<7c1d{p}D)#khT%$&rk!n
zu+mC_17+s!m|*CTvn51oUAPoV>p=OJnAM2*jJsTl4<lD`r|cI_P;R#m6W3m+Y&|Ea
zpxcL;i@Qs<j}uhV?Zd)l-z7WC399J!Vdb*48JPZZicDBgBOacbp<>9yNUR>z`RMy6
zuke-(LqjgcJL<umlJ+Us<bWkpaVd~c9MeES&31oZDDVw8(PwNL0Km*k8uTcReV{KK
z7{(p-8T%f9#mwt5=xLnoKwl*ABR5g%+p|D47!D&q)vWOeNG+~@Kp`4f%w3#{%?fZZ
zYm@=$#32nT!~mPQxzn&Y0q@NkWkDty>ueccY7_?h;(%k^LuuH2fEqItInc|vu))3r
z;3~IDI<^pC$_zyw<Q&&G*p~=A;2uhUf97%vhVuY`ZZ56>@{Xe(Qb-2k^4Mo!O91Yf
ziz|YH;&g`;Qh?+<dl}f$fJf%yN}x!M^tyzW?~O{J4{`NF3TZ%Yp5jbwIe>$CqcSKd
z4ry2+11Q16orSFgcyHdQ0!oj2FszU@0?%(PsL>r($OdZi>}6p;2eg=rKLZuTeH>QE
z1zPghXJcyumdwS~Kvi+G!wUI8H=ez0fQvo~3`Y+Du<&{gYKdbX=_>$+@kHfd8v$4>
zywpKGake9UMZk|dM7h{zfTtE-8ld5cjP$n$)nSRGja;C44XM$}@{#@$);tIM+#m}I
zE%@(P)-D=&uzx9afh9_nMK5vqE+diPv_U{ogduPPQG7p%WSB6;%~(Y~c&9>9M4;0|
z3bA)TiE5bevzxDqeE25|Elv2U2Fa*k<w#3ONFJh3d0<TYCziv@+>~FWeqkx|q3OE*
zOQ}ov9|PH@LOeHqX8pB@A5uTFY6g4Q|Lw)OP6hh>yA!$h1?&FX6G=_^wjFQV(^76W
z6OLk3@-~-v<^f(>eY5~+-d&xz^*h#}RoKOJ|4Qm-EKR;-eb=8^e>daS>2Ob7^|4yn
z`OcZn6w-+|MSUeYDGUzOokq`ry>X`cVvV*8sjs{pvyTEd)}03Az&!g)Xz~E|R{j>K
zT6gCs9*p{y^Y)d5S0dlNID^+&|3K<zmgL%ZM@zQ9$NE|73jRNc#gO;Z-YU?7P%D0Z
zLSi(noVUN^KZbQEs8-YWKHq*=M(a>Rt!C_f{;T0GEf3q8Z0+p}XAz-RyR6FQvXrfj
zC{(N6RAqBpV$wkjsQt&@@BeITqRUJBO77*c*joiy;Ap*=pAaAYU(2dZkDML~*joQw
z<}iOOxE2gKfXca8(ASnlPEW33Iyw%(g!Ei%8VG0R9@~6t-gg>^7yhP|xW_BMIzcc>
zTv^&&@-~m<rB$+pjOL5~udJt9u0Tf#k9|awn|o2YdtG?1{75hHXs`Teoz~W~+Qhnw
zx`3UjGWaSSv9!{%0plIn^80!&g-&vgdf65L2i`sN#X~~+s{6aP0PUY0mTR@Fj-8R^
z^@^7D@_%lBfi)@(F2*eVsu1;-XBDoh>f5e}XPteiPpIlqW+-luqR~B_uC<q*kR}tf
zZ}#o2>o#}4p-s*}LZS|06S~z5)M$(J4V{9|V@qVV6T2G@*ruVt=kySAloqba#vcMr
zu}A>m+%@$~ed8O?4P~2ixmwA;lo}+eh!?d53>CsDRnM&IZ4Cl*hr`NBF$;v~kAf)L
z`pG6Fz*x-)C>9SNgKOKduYO{NwtAb?9+Y>&*-XD&(<!L5=e2-p=R3-w)3B~{Tr;lM
zr5ooGQ9DXiysqP8Tiol*RK9GN+6uOhs+zjcMeju;<zF^Yu4(J4^8d4mGSp?A!cKId
zq8P^q)0p5MbmU?CEc43jtco111vI8H$x`eNSg$9L7+YPUsWS#Fdd}&4vUdN@R(<i@
z6XN@OsEZtrRa=(w9@Kf%u#U8IMYjiXKPg0k#5|N=Dyvp=u`zGW!F<Dwo|S9G^$w65
zOWmr*61i=dT}(}Ejlhe4#-$Ye^izZ4E#?Ah%};DJgQ2A6RZCGd_b0G{8zi69LCnp3
zJT^U#2;0c$`G-3*auJyXCfEcD-62uIo+E0WXzq03)Gjk6IMgqszjfTTmN#H+Bfl|Z
z77Gb}8^p0^_~rGhTRo`<q`=UnM5|>Co4f(4roHpe?Dcw1pnHb3B%y783_YIbXsrp~
z#b?+JcN;az6NK4zOqZAlBFB5U7zeN8pkeL{e`|0op#a*(8jSMco!O4WcZ}T)`IfG9
zJhz-%X&yIH8^T6D@)=U_<it=%#-}}dEZRQ!G&!!Wx{#;)(HZBfU8*d{mdv^*n6HU+
z{O=j%scx+`Fn|SdYrU~%2;9JZy1L&LS_MQO(7EI}C8|j<sd*vWn=cNTetq1!ZeOwE
zeRp@qO+RldzJO~XphCmO*jSj4Z@;HGwxbz0#mN}R#f7&SpUE{CP~q8Cw^1LtuV^;V
zl+-&&=GXLbCd*d%=BA0-*_C-U)$K*XFAfVk`tJ_RZ_VrK*j$-caIegbM)q46vxH85
z?5>feIQV80!B|sIMC!w2sm6db0in~=mP9zLn$VMOz7wO<1o~I>RFW2Si^8aqlIkLH
z+Ks6qR{Qnh5-BeWed(BqKIZW*#RA{00dZ4sm}!90r26)lG+uX1TkCUaBO{<kWCnNK
z*~qh)zS9LV!?4e-9+YsZr`$V`T&bUE=@%>As8sCl&w|MU&zxwA+oTwyrT0W0VJFl%
zCViO=Kv#UPuUg=Od}%drkrB6xFRX^~w^l>=r;o8WHL@~dyZXLrmG!8>tS6~&oqC<2
z&et@I)-Y5l7RIYmX{fH3u5%T9OKliK;Way}C3UrOllf#!Yovp+z~#rqRUg)sT}Os~
z(<g}A#KKhP`K?>Ti+Y{&JSPb7xihk7Sj^j)<##!Ag!tR$yK4mbhT@zkQ9^`F)xH5y
zEjMWJ?{g88I6sUzB3=@1QhMtAPCSI_wv}YKfjU>zTvWqEQeCT9=qGOKDq((OpN>Eu
z!obuB)`;R;x7ZUHg8h7MG0ZO0?L<cmfTDcg?0`PLzxidZM(Ye;xE7{_N6D@Qk~ycI
z(ENn)VCh!bXx$w()JE5pd!p&{cjsN)?!57S@Q6bb1|~k6&Mv^nxPw<=lP@xg5OVUK
zp)jwg7}wBxUDo1?S`F7?b1U=S*QgdFfLof3Q&i8)n>0C@@KtT(c#nScN&EWxO{vw)
zjJ1V5YJlC*l95x8cq=sXBsO6}itH?Mw+3Z(K5L`njT@u159-^hWrnnSBHH-E);F7D
zC=mWdQ8j~%EF7Gu!+IuD8Fg^l%#;wNCpUKjBJ#y$2n3!s)o`u{yfTl_q%(icHrmia
z4(FJyIDvF7hRuqz7{G7qD=N345LGDnVawUUKa&^PI6E=I*CrV-nO{^b@x6L4uC8vf
z`#p?z&xIIA`Az;~9k#~jIZTZd_#BQp^o^y^RDFR@`g&sNC}gUNnCP>G76;m?SpG=L
z1knyzC-TE0uBqt};{X^Lw#^zOcYp%F^%Xpjy;RQ&G{BcsG19f<lnGHJ+ih_jx&zq4
z=&a=0&VVMsnc|R0EJ!oqvVbD-Z`j13H)c7M^qx6{r(%3E4R0s)!d2K$GXuqB?#;bP
zi=BWQlijx{8HY|DmCr|yv~`iMlf+8kV=wxoLb7abcdU-`d38O0t@7#S;{&YC)$sFr
zM#Zy9RzlJ=ao%xUWoye#-k=(ImS`X?oABwI<VRlc5Pj|C+nS+w1tiHe%)}VEqZ_N7
zJVNr03zfG95=^OgJ6?P}jjK-Xq)=N5op$JYi*Wnk<O*W{%+i`#u`rP4(ndyw)@oN2
zP%RKS>}J5Xx}{-Q&4kF=mxpx}9%5b^no)|U-u;XIe(ps;|L5U}=ublfo9fva87kY`
zT|P6p?C%FhDq6tUiQalNoO$=TGQ-I9h+u1RmVJfR6pzOYDxQ}W_&U6QGJ5ZDKN9xd
z6A~kG<R;j*@1L#jT%7Clkm_BElJR3kbft~rWOoKfSbmYb5*o%ppJ1a>B73_LA{oo`
zQL)l}qb9d<CB`9u23CL&^`o7|YV?q#*ZGYNw5xs8g;uJGX*fm=VMHZxe;ENC>Hcu%
zO<ROoyP+zY@@>uB%6jN0k7_NFz-)2E4w5@&5a^%@E<I9DyE6;fG#4d2C_>e5N>4t!
zCkMl85gHA9UZv=kRw(?cTMVyZ%T&6!$wJsmq&Q{>8Y1qgV3{*#ycj}<EfTw9QxBqj
z7Seu5)5QibUPaePDX?*7wYSJQ@J;z-$qZzX`yQ)vMBj`pKjNU*S<_N(RP`o>k%PMj
zx@=cWr&TPwG+ld}ct50-WGWq6c*xx2+x)!^Hh4~#uU9OQUtuO1pAPz@$GPjS(`$9e
zma^YIzl(ml>{WY|o_!FJEq1wan>SpU94{Zvp#5&+0RO#l_Rf|@S35TRtiu|{e&rh*
zTxDSnL4sjKXy__?5tQ>NHA)0V_;Y`RsW2|Vg@{t0`%aDE#ifcqPk6gkLl$lzrIt&y
znn>1F68KG@x6MCOlSI?X;KV&r+pV!(FzW_mu|CIUOp97aRePn}7g!*NAJW5dQZ?xC
z2cNNO{fLR!yjU;k%;3*>vPJhwdEXI_*hF|Suk40ODCOBcD7PeGilds9PP}X7ux4Bl
zE%D)A?>NmsTn=`R6rD2`SR&-}a$Y7IQfXR<iKgu+BFmQGy@o)L-PC=Og`3{A0bfxo
z(@~DN{UpQbI!pR6Vy%c5xFP#gThCMq!<?-6Z%+@yNfRQNtg0i?hPEN$*k?NuC%r1$
z-P<X;dPTd{Zw?H#&au)2p2@|@?Hu{Nbz^Nd*!sZFAAzMEVIY~R`%$lE&AjjPtS&~3
z=oZ_!mkrZ?OP_UNss|D{?J2UF-lFzw|B|vFwMv6eIil@cs^Y^$^!-IQ7Jc0PcJC0G
z49n+Pl4*9_rdmOpcjy>8P-JaW1)nE6J|StRuZE|8D_sgeiSQ43xR_7=?yXH?){G@n
z)9{pJ`Xqw)9ex|w1C4VdA)54*yLkFb>CIz}i5<F%p!4UO=I*}Jx>MYo2EYxeLAYvx
zTS!_`!}%G@H?iFp!l03LtFonD5o$Ka9Y(7I)PvKf?F|jMXIA5eF`pvUYxz2;YJ?ch
z_}E(cYB@a>M{MWHZxqzf0Ae0h#rM)w9v4NOpj@7_I|@?dEGS4wFR_1j&TjsB&Mxy;
zeH$B#t7m@^Q`TPWq`vvO`wv|5&{Kj48X}DX=;JYA=tJK@caA|5joX|1p5l|T#D%n`
zYsY&XZB<hx8jk?{Y`GoMoS~1=d|%M?FwiDZ<rnb;xamr=8vzU#Q4+uIq;Gt1SV<{a
z!P;##`HY+VgaJ{F)^jrXOm|yG4`Ph`VN=3==?%6H3R>>!A0bJZ#sgtYG!Dw7rT6Ll
zNV+TlKusDvA{^5#8TCWYG#F+=^rX5?5c|rr8EMmchGKG;WW-IHo^(>PWvBxW$nY((
zRHkA1o;Gb-hxCT5i$S+!Fl*%esaeXfZTZva!D=esisX2z?^cCJR15)T5hvm-;|t7b
z7964xd)(azQr5RS+A1l?d&)#+W)ii8P>GAewJC3hwX+9zInsX_8k^ee*48a~yr>kw
zX3ZAua|RLRXELYU+zqNa$?6(>2zo#R@aL0tnOu?mrX7hlaL9MLCz)$e57jPg-}tu=
zX7BvDCx4b**LUVBGpvbtYTm?t>#cmBkpK8-A*3{lKp>u=z4Yl#>YM^2eHrn%vxDyt
zUTz2K7*<)ec@d5+chll&H{2ShrWnC3A|iD>6s?V?D;nr*#~TO{e~AgQDObN6lCobi
z^G4*yEaqbktHRLy%0Jf=yv1ZtwtCseu!~GF!!3#z_q_EPh8C55P+|A35Lr5z0>mf~
zqIk<O4XJ<%_3R#u@2T2CO1=)Jy?wGcLGK@!>tlZ~97IbMwe#aH8u{8i_LVT`wQ=&|
zU#~bC{)BiY#obHf+@KtoDS`<uqU){-^)tj@BUv<bPu}6Eih+6fGI;1Nj_>D-Ugecy
zJNJurF7zV#Tg~BINc^d`|4yPG;(4*(LhrjsqK&^R+3LMZ{=xyY1W<k~1<^c6H_L9^
z*iZ__`4D-y`#LS>gR}n4Jw0lskL`B4rnlD`?jVnTuNFbVQPg0eFTAhYg5*%bz{cMZ
zV=XRLUQeL!{YF&MaZxyf(u9oD&U7%1R;(*V(xyDpJPPBGwReBA?yiD+Oin+FTMCZu
z#`-o@e5JL5(;p5={jF5Dbpq&QY}ju+2cFT1P5ZYpC)a03cnCkQxrpSLSf6c@sS7V|
z<o>@*p<jS(tnH1g?R8X~ZH>TMm&V(o0y9V9xC|3b5u^H_M5B~6-B*_iScYEL+jGG&
zz_f%m9y6BDaNe`1D7bZO7(xYNak*lxf>}pMr3GFT;!_o(SS@`n42p1v>R{~We8hU@
z^@>=g6r^9Y)?-+^EB)w3sY)SG@|+q?6>x`oQ+UE>h?Uge>@IO5sKf_7@SCj@J);Tj
z1iwEEX1rYD7vJ|GEe4L$N*{2mN3*<Q+yW$816Qn2gyJ`}PiU>JmcnBjrm=2j>{6IV
zrNaz7eU!4dqd5tqnj`5nwX$!dW<LYIwcSB}sN_u=ynL4=+Ih;X7UZ9=OOQEc{tY#v
zeb9eP$Hj<hq3iG?21;irsZxVt(gz(9=o$!Pj1DT=BO>9~qJBPyHYKwxvdR6OUYK@8
zB_xwYH%SGf@|#y`qKWl`@jCO<u8O6HI*U-=Q;bXfD=L2-6uZzp*;O5fj6#I;<5iUF
zZ&qBsit;o4vlmkoWd2`KWG?3Y2Pju{;jbvy+c`h4CHlhcU!eTjl(?#~enq+7=D4g)
ze~>=@uPFbp5%eqI^)mS9WrY#^3izW}@+->qI`ZdbHC*JhKi}N7W&J3ie+9grFJG3Q
zKdANg72vPM=&u0R^R~;h`UkO*T><=ElKm^f^@x0#-~FJ~3zvU!PvF0@y<dl34;+^v
z`v)md{H2b+1@K=7U-zAtvE>IXQeF-ICz0k?oa>J1($oGRO6s5d?ym!{Th!0G$EHC-
Z`UhT2K?WV;5*={yalG*O0e@Wn`!6kFkbeLG

literal 0
HcmV?d00001

diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..ef47ed9
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,5 @@
+# For setting up the environment
+apache-airflow==2.5.0              # If using Airflow for orchestration
+pandas==1.5.3                     # For handling data manipulation
+pytest==7.2.2                     # For running tests
+openpyxl==3.0.10                  # For reading and writing Excel files (e.g., orders_test_data.xlsx)
\ No newline at end of file
diff --git a/sql/Dockerfile b/sql/Dockerfile
new file mode 100644
index 0000000..f87ca23
--- /dev/null
+++ b/sql/Dockerfile
@@ -0,0 +1,7 @@
+FROM nouchka/sqlite3:latest
+
+# Set working directory to where the database will reside
+WORKDIR /opt/sqlite_db
+
+# Initialize or create the SQLite database
+RUN sqlite3 /opt/sqlite_db/etl.db "CREATE TABLE IF NOT EXISTS Orders (Order_ID INTEGER PRIMARY KEY AUTOINCREMENT, Product_Name TEXT, Quantity INTEGER);"
diff --git a/sql/sqlite_db/db_queries.py b/sql/sqlite_db/db_queries.py
new file mode 100644
index 0000000..f6b4372
--- /dev/null
+++ b/sql/sqlite_db/db_queries.py
@@ -0,0 +1,102 @@
+# Query to Validate Customer_ID Uniqueness
+def validate_customer_id_unique():
+    return """
+        SELECT Customer_ID, Order_Date, COUNT(*) AS Order_Count
+        FROM Orders
+        GROUP BY Customer_ID, Order_Date
+        HAVING COUNT(*) > 1
+    """
+
+# Query to Validate Correct Date Format
+def validate_order_date_format():
+    return """
+        SELECT Order_ID, Order_Date
+        FROM Orders
+        WHERE Order_Date IS NULL
+           OR NOT (Order_Date GLOB '????-??-??' 
+                   AND LENGTH(Order_Date) = 10
+                   AND CAST(substr(Order_Date, 1, 4) AS INTEGER) > 0
+                   AND substr(Order_Date, 6, 2) BETWEEN '01' AND '12'
+                   AND CASE 
+                       WHEN substr(Order_Date, 6, 2) IN ('01', '03', '05', '07', '08', '10', '12') THEN substr(Order_Date, 9, 2) BETWEEN '01' AND '31'
+                       WHEN substr(Order_Date, 6, 2) IN ('04', '06', '09', '11') THEN substr(Order_Date, 9, 2) BETWEEN '01' AND '30'
+                       WHEN substr(Order_Date, 6, 2) = '02' THEN (
+                           CASE 
+                               WHEN (CAST(substr(Order_Date, 1, 4) AS INTEGER) % 4 = 0 
+                                     AND CAST(substr(Order_Date, 1, 4) AS INTEGER) % 100 != 0) 
+                                  OR CAST(substr(Order_Date, 1, 4) AS INTEGER) % 400 = 0 THEN substr(Order_Date, 9, 2) BETWEEN '01' AND '29'
+                               ELSE substr(Order_Date, 9, 2) BETWEEN '01' AND '28'
+                           END
+                       )
+                       ELSE 0
+                   END = 1
+               );
+    """
+
+# Query to find orders with negative quantities
+def get_orders_with_negative_quantity():
+    return """
+        SELECT Order_ID, Customer_ID, Product_ID, Quantity
+        FROM Orders
+        WHERE Quantity < 0
+    """
+
+# Query to find orders with missing Customer_Name
+def get_orders_with_missing_customer_name():
+    return """
+        SELECT Order_ID, Customer_ID, Customer_Name, Product_ID, Quantity
+        FROM Orders
+        WHERE Customer_Name IS NULL
+    """
+
+# Query to ensure unique Product_ID (no duplicates allowed in Orders)
+def get_orders_with_duplicate_product_id():
+    return """
+        SELECT Product_ID, COUNT(*) 
+        FROM Orders 
+        GROUP BY Product_ID 
+        HAVING COUNT(*) > 1
+    """
+
+# Query to ensure Product_Name cannot be NULL in Products
+def get_orders_with_null_product_name():
+    return """
+        SELECT * 
+        FROM Products 
+        WHERE Product_Name IS NULL
+    """
+
+# Query to get email customer in Orders
+def get_invalid_email_customers():
+    """
+    Query to find customers with invalid email format.
+    Returns rows where the email does not match the expected pattern.
+    """
+    query = """
+    SELECT * 
+    FROM Orders
+    WHERE Email NOT LIKE '%_@__%.__%';
+    """
+    return query
+
+def get_orders_with_invalid_date_range():
+    """
+    Query to find orders where the Order_Date is outside the range '2024-01-01' to '2024-12-31'.
+    """
+    query = """
+    SELECT * 
+    FROM Orders
+    WHERE Order_Date < '2024-01-01' OR Order_Date > '2024-12-31';
+    """
+    return query
+
+def get_invalid_product_references():
+    """
+    Returns the SQL query to check for invalid Product_ID references in the Orders table.
+    """
+    return """
+        SELECT o.Order_ID, o.Product_ID
+        FROM Orders o
+        LEFT JOIN Products p ON o.Product_ID = p.Product_ID
+        WHERE p.Product_ID IS NULL;
+    """
\ No newline at end of file
diff --git a/sql/sqlite_db/setup_db.py b/sql/sqlite_db/setup_db.py
new file mode 100644
index 0000000..6224754
--- /dev/null
+++ b/sql/sqlite_db/setup_db.py
@@ -0,0 +1,39 @@
+import sqlite3
+
+# Path to SQLite database
+DB_PATH = 'sql/sqlite_db/etl.db'
+
+# Establish a connection
+conn = sqlite3.connect(DB_PATH)
+cursor = conn.cursor()
+
+# Drop tables if they exist to ensure schema updates
+cursor.execute('DROP TABLE IF EXISTS Orders;')
+cursor.execute('DROP TABLE IF EXISTS Products;')
+
+# Create the Orders table with the updated schema (including Email column)
+cursor.execute('''
+    CREATE TABLE Orders (
+        Order_ID INTEGER PRIMARY KEY,
+        Customer_ID INTEGER,
+        Customer_Name TEXT,
+        Order_Date TEXT,
+        Product_ID INTEGER,
+        Quantity INTEGER,
+        Email TEXT
+    );
+''')
+
+# Create the Products table
+cursor.execute('''
+    CREATE TABLE Products (
+        Product_ID INTEGER PRIMARY KEY,
+        Product_Name TEXT
+    );
+''')
+
+# Commit changes and close the connection
+conn.commit()
+conn.close()
+
+print("Database and tables set up successfully.")
diff --git a/tests/load_data.py b/tests/load_data.py
new file mode 100644
index 0000000..ddb214b
--- /dev/null
+++ b/tests/load_data.py
@@ -0,0 +1,68 @@
+import sqlite3
+import os
+from openpyxl import load_workbook
+
+# Path to the SQLite database
+DB_PATH = 'sql/sqlite_db/etl.db'
+# Path to the Excel file (dynamically resolve the absolute path)
+EXCEL_FILE_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), '../orders_test_data.xlsx'))
+
+def load_data_to_db():
+    # Load the workbook and the 'Products' and 'Orders' sheets
+    wb = load_workbook(EXCEL_FILE_PATH)
+
+    # Access the 'Products' and 'Orders' sheets
+    products_sheet = wb['Products']
+    orders_sheet = wb['Orders']
+
+    # Establish a database connection
+    conn = sqlite3.connect(DB_PATH)
+    cursor = conn.cursor()
+
+    # Insert data into Products table
+    for row in products_sheet.iter_rows(min_row=2, values_only=True):
+        cursor.execute('''
+            INSERT OR IGNORE INTO Products (Product_ID, Product_Name) 
+            VALUES (?, ?)
+        ''', (row[0], row[1]))
+
+    # Insert data into Orders table
+    for row in orders_sheet.iter_rows(min_row=2, values_only=True):
+        # Check if the row is empty (all fields are empty or None)
+        if all(cell is None or cell == '' for cell in row):
+            continue  # Skip the row if it's empty
+
+        customer_id = row[0]
+        customer_name = row[1]
+        order_date = row[2]  # Order_Date is assumed to be in the third column (index 2)
+        product_id = row[3]
+        quantity = row[4]
+        email = row[5]
+
+        # Ensure that 'Order_Date' stays as a string, not a date object
+        if isinstance(order_date, str):
+            # If the order_date is in string format (like '12/01/2024'), keep it as is
+            order_date = order_date.strip()  # Remove leading/trailing whitespace and newlines
+        elif isinstance(order_date, datetime):
+            # If the order_date is a datetime object, convert it to string
+            order_date = order_date.strftime('%d/%m/%Y') if order_date else None
+        else:
+            order_date = None  # Set to None if the date format is invalid
+
+        # Skip inserting rows where required data (such as order_date or customer_id) is invalid
+        if not customer_id or not order_date:
+            continue  # Skip this row if customer_id or order_date is missing or invalid
+
+        cursor.execute('''
+            INSERT INTO Orders (Customer_ID, Customer_Name, Order_Date, Product_ID, Quantity, Email) 
+            VALUES (?, ?, ?, ?, ?, ?)
+        ''', (customer_id, customer_name, order_date, product_id, quantity, email))
+
+    # Commit the changes and close the connection
+    conn.commit()
+    conn.close()
+
+    print("Data loaded successfully from Excel to database.")
+
+if __name__ == '__main__':
+    load_data_to_db()
diff --git a/tests/test_etl.py b/tests/test_etl.py
new file mode 100644
index 0000000..1f235ea
--- /dev/null
+++ b/tests/test_etl.py
@@ -0,0 +1,201 @@
+import pandas as pd
+import sqlite3
+import pytest
+import sys
+import os
+from datetime import datetime
+
+# Add the root directory of the project to the Python path
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+
+from sql.sqlite_db.db_queries import (
+    validate_customer_id_unique,
+    validate_order_date_format,
+    get_orders_with_negative_quantity,
+    get_orders_with_missing_customer_name,
+    get_orders_with_duplicate_product_id,
+    get_orders_with_null_product_name,
+    get_invalid_email_customers,
+    get_orders_with_invalid_date_range,
+    get_invalid_product_references
+)
+
+# Fixture to set up and tear down the SQLite connection
+@pytest.fixture(scope="module")
+def db_connection():
+    # Ensure the database file exists
+    db_path = os.path.join(os.path.dirname(__file__), "../sql/sqlite_db/etl.db")
+    assert os.path.exists(db_path), f"Database file not found at {db_path}"
+
+    conn = sqlite3.connect(db_path)
+    yield conn
+    conn.close()
+
+# Test case 1: Validate customer id unique
+def test_customer_id_unique(db_connection):
+    query = validate_customer_id_unique()
+    df = pd.read_sql(query, db_connection)
+    
+    # If df is not empty, print the rows that have duplicates
+    if not df.empty:
+        print("\nDuplicate Customer_IDs found:")
+        print(df)
+
+    # Assert that there are no duplicate orders for the same Customer_ID and Order_Date
+    assert df.empty, "Duplicate orders exist:\n" + df.to_string(index=False)
+
+def is_valid_date(date_str):
+    """Check if a date string is valid (dd/mm/yyyy)."""
+    try:
+        # Try parsing the date
+        datetime.strptime(date_str, '%d/%m/%Y')
+        return True
+    except ValueError:
+        return False
+
+# Test case 2: Validate date format dd/mm/yyyy format
+def test_order_date_format(db_connection):
+    # Run the SQL query to fetch orders with invalid date formats
+    query = validate_order_date_format()  # Your validation SQL query
+    df = pd.read_sql(query, db_connection)  # Fetch the result into a DataFrame
+    
+    # Strip any unwanted characters like newlines
+    df['Order_Date'] = df['Order_Date'].str.replace(r'\n', '').str.strip()
+
+    # Validate if the date is in the correct format and valid
+    invalid_dates = df[~df['Order_Date'].apply(is_valid_date)]
+    
+    # Print out any rows with invalid date formats
+    if not invalid_dates.empty:
+        print("Orders with invalid date format:", invalid_dates)
+
+    # Assert that there are no invalid dates remaining
+    assert invalid_dates.empty, f"There are orders with invalid date formats: {invalid_dates}"
+
+# Test case 3: Validate Missing Customer Name
+def test_missing_customer_name(db_connection):
+    query = get_orders_with_missing_customer_name()
+    df = pd.read_sql(query, db_connection)
+    
+    missing_customer_name = df['Customer_Name'].isnull().sum()  # Count NaN/None values
+    print(f"Number of missing Customer_Name values: {missing_customer_name}")
+
+    # Assert that there are no missing customer names (fail if there are any)
+    assert missing_customer_name == 0, f"There are orders with missing Customer_Name: {missing_customer_name}"
+
+# Test case 4: Validate Negative Quantity Orders
+def test_negative_quantity(db_connection):
+    query = get_orders_with_negative_quantity()
+    df = pd.read_sql(query, db_connection)
+    
+    # Log for debugging
+    print("DataFrame loaded from the database:")
+    print(df)
+    print(f"Negative quantities found: {df[df['Quantity'] < 0]}")
+
+    # Assert that there are NO negative quantities
+    negative_quantity_count = (df['Quantity'] < 0).sum()  # Count negative quantities
+    assert negative_quantity_count == 0, f"Orders with negative quantity found: {negative_quantity_count}"
+
+# Test case 5: Verify order date range should be within month December only
+def test_order_date_range(db_connection):
+    """
+    Validate that all Order_Date values are within the range '2024-12-01' to '2024-12-31'.
+    Invalid dates should also be flagged separately.
+    """
+    # Query all rows from the Orders table
+    cursor = db_connection.cursor()
+    cursor.execute("SELECT Order_ID, Order_Date FROM Orders")
+    rows = cursor.fetchall()
+
+    invalid_dates = []
+    out_of_range_dates = []
+
+    # Process each row
+    for row in rows:
+        order_id = row[0]
+        order_date = row[1]
+
+        # Validate the date format
+        try:
+            # Parse the date assuming the format is 'DD/MM/YYYY'
+            parsed_date = datetime.strptime(order_date, '%d/%m/%Y')
+            print(f"Parsed Date: {parsed_date}")  # Debugging output
+
+            # Check if the date is out of the valid range (December 2024)
+            if not (datetime(2024, 12, 1) <= parsed_date <= datetime(2024, 12, 31)):
+                out_of_range_dates.append((order_id, order_date))
+        except ValueError:
+            # If the date is invalid, add it to the invalid dates list
+            invalid_dates.append((order_id, order_date))
+
+    # Log invalid dates
+    if invalid_dates:
+        print("\nOrders with invalid date formats:")
+        for order_id, invalid_date in invalid_dates:
+            print(f"Order_ID: {order_id}, Invalid Date: {invalid_date}")
+
+    # Log out-of-range dates
+    if out_of_range_dates:
+        print("\nOrders with out-of-range dates:")
+        for order_id, out_of_range_date in out_of_range_dates:
+            print(f"Order_ID: {order_id}, Out-of-Range Date: {out_of_range_date}")
+
+    # Collect all errors and fail at the end
+    errors = []
+    # Collect all errors and fail at the end
+    if invalid_dates:
+        errors.append(f"Invalid date formats: {invalid_dates}")
+    if out_of_range_dates:
+        errors.append(f"Out-of-range dates: {out_of_range_dates}")
+
+    # Combine errors into a single line for better test summary display
+    error_message = " | ".join(errors)
+    assert not errors, error_message
+
+# Test case 6: Test invalid email format
+def test_invalid_email_format(db_connection):
+    """
+    Test case to validate that all email addresses in the Orders table are in a valid format.
+    """
+    query = get_invalid_email_customers()
+    df = pd.read_sql(query, db_connection)
+    
+    # Log for debugging
+    print("\nRows with invalid email format:")
+    print(df)
+    
+    # Assert that there are no rows with invalid email formats
+    assert df.empty, f"Invalid email addresses found:\n{df.to_string(index=False)}"
+
+# Test case 7: Ensure Unique Product_ID (no duplicates allowed)
+def test_unique_product_id_in_order(db_connection):
+    query = get_orders_with_duplicate_product_id()
+    df = pd.read_sql(query, db_connection)
+    
+    assert df.empty, "There are duplicate Product_IDs in the Orders table"
+
+# Test case 8: Ensure Product_Name Cannot Be NULL
+def test_product_name_not_null(db_connection):
+    query = get_orders_with_null_product_name()
+    df = pd.read_sql(query, db_connection)
+    
+    assert df.empty, "There are Products with NULL Product_Name"
+
+# Test case 9: Ensure Product_ID in Orders References a Valid Product_ID in Products
+def test_referential_integrity(db_connection):
+    """
+    Test case to validate referential integrity between Orders and Products tables.
+    Expected Behavior
+    If all Product_IDs in Orders have matching entries in Products, the query should return no rows.
+    If any Product_ID in Orders does not have a match in Products, the query should return those Order_IDs and their invalid Product_IDs.
+    """
+    query = get_invalid_product_references()
+    df = pd.read_sql(query, db_connection)
+    
+    # Log for debugging
+    print("\nRows with invalid Product_ID references:")
+    print(df.to_string(index=False) if not df.empty else "No issues found.")
+    
+    # Assert that there are no rows with invalid Product_ID references
+    assert df.empty, f"Referential integrity issues found:\n{df.to_string(index=False)}"