Skip to content

Commit

Permalink
Add Issues column as a first column
Browse files Browse the repository at this point in the history
  • Loading branch information
rjambrecic committed Jul 25, 2024
1 parent 9b08c6f commit 94cfa3d
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 23 deletions.
3 changes: 2 additions & 1 deletion google_sheets/data_processing/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,8 @@ def process_data_f(


def _validate_output_data_ad(df: pd.DataFrame) -> pd.DataFrame: # noqa: C901
df["Issues"] = ""
df.insert(0, "Issues", "")

headline_columns = [col for col in df.columns if "Headline" in col]
description_columns = [col for col in df.columns if "Description" in col]

Expand Down
64 changes: 42 additions & 22 deletions tests/data_processing/test_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,33 +167,53 @@ def test_process_data_f(


@pytest.mark.parametrize(
"issues_column",
("df", "issues_column"),
[
[
"Duplicate headlines found.\nDuplicate descriptions found.\n",
"Duplicate descriptions found.\n",
"",
"Minimum 3 headlines are required, found 2.\nMinimum 2 descriptions are required, found 1.\nHeadline length should be less than 30 characters, found 31 in column Headline 2.\n",
],
None,
(
pd.DataFrame(
{
"Headline 1": ["H1", "H1", "H1", "H1"],
"Headline 2": ["H1", "H2", "H2", ("H" * 31)],
"Headline 3": ["H3", "H3", "H3", ""],
"Description 1": ["D1", "D1", "D2", "D3"],
"Description 2": ["D1", "D1", "D3", ""],
"Path 1": ["P1", "P1", "P1", "P1"],
"Path 2": ["P2", "P2", "P2", "P2"],
"Final URL": ["URL", "URL", "URL", "URL"],
}
),
[
"Duplicate headlines found.\nDuplicate descriptions found.\n",
"Duplicate descriptions found.\n",
"",
"Minimum 3 headlines are required, found 2.\nMinimum 2 descriptions are required, found 1.\nHeadline length should be less than 30 characters, found 31 in column Headline 2.\n",
],
),
(
pd.DataFrame(
{
"Headline 1": ["H1", "H1", "H1", "H1"],
"Headline 2": ["H2", "H2", "H2", "H2"],
"Headline 3": ["H3", "H3", "H3", "H3"],
"Description 1": ["D1", "D1", "D1", "D1"],
"Description 2": ["D2", "D2", "D2", "D2"],
"Path 1": ["P1", "P1", "P1", "P1"],
"Path 2": ["P2", "P2", "P2", "P2"],
"Final URL": ["URL", "URL", "URL", "URL"],
}
),
None,
),
],
)
def test_validate_output_data(issues_column: Optional[List[str]]) -> None:
df = pd.DataFrame(
{
"Headline 1": ["H1", "H1", "H1", "H1"],
"Headline 2": ["H1", "H2", "H2", ("H" * 31)],
"Headline 3": ["H3", "H3", "H3", ""],
"Description 1": ["D1", "D1", "D2", "D3"],
"Description 2": ["D1", "D1", "D3", ""],
"Path 1": ["P1", "P1", "P1", "P1"],
"Path 2": ["P2", "P2", "P2", "P2"],
"Final URL": ["URL", "URL", "URL", "URL"],
}
)
result = validate_output_data(df, "ad")
def test_validate_output_data(
df: pd.DataFrame, issues_column: Optional[List[str]]
) -> None:
expected = df.copy()
result = validate_output_data(df, "ad")

if issues_column:
expected.insert(0, "Issues", "")
expected["Issues"] = issues_column

assert result.equals(expected)

0 comments on commit 94cfa3d

Please sign in to comment.