Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for different time formats #14

Merged
merged 1 commit into from
Dec 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/actions/collect_data/src/cicd.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
get_pipeline_row_from_github_info,
get_job_rows_from_github_info,
get_data_pipeline_datetime_from_datetime,
get_datetime_from_github_datetime,
parse_timestamp,
)
import pydantic_models
from test_parser import parse_file
Expand Down Expand Up @@ -94,7 +94,7 @@ def get_github_job_id_to_test_reports(workflow_outputs_dir, workflow_run_id: int
def alter_time(timestamp):
# Workarpound for the fact that we don't have milliseconds in the timestamp
# Add a random number of milliseconds to the timestamp to make it unique
original_timestamp = get_datetime_from_github_datetime(timestamp)
original_timestamp = parse_timestamp(timestamp)
altered_time = original_timestamp + timedelta(milliseconds=random.randint(0, 999))
altered_time_str = altered_time.isoformat(sep=" ", timespec="milliseconds")
return altered_time_str
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from datetime import datetime
from .parser import Parser
from . import junit_xml_utils
from utils import parse_timestamp


class PythonPytestParser(Parser):
Expand All @@ -29,7 +30,7 @@ def get_tests(filepath):
report_root_tree = junit_xml_utils.get_xml_file_root_element_tree(filepath)
report_root = report_root_tree.getroot()
testsuite = report_root[0]
default_timestamp = datetime.strptime(testsuite.attrib["timestamp"], "%Y-%m-%dT%H:%M:%S.%f")
default_timestamp = parse_timestamp(testsuite.attrib["timestamp"])
get_pydantic_test = partial(get_pydantic_test_from_pytest_testcase_, default_timestamp=default_timestamp)
tests = []
for testcase in testsuite:
Expand Down Expand Up @@ -60,9 +61,9 @@ def get_pydantic_test_from_pytest_testcase_(testcase, default_timestamp=datetime
pass

# Error at the beginning of a test can prevent pytest from recording timestamps at all
if not (skipped or error):
test_start_ts = datetime.strptime(properties["start_timestamp"], "%Y-%m-%dT%H:%M:%S")
test_end_ts = datetime.strptime(properties["end_timestamp"], "%Y-%m-%dT%H:%M:%S")
if not (skipped or error) and "start_timestamp" in properties and "end_timestamp" in properties:
test_start_ts = parse_timestamp(properties["start_timestamp"])
test_end_ts = parse_timestamp(properties["end_timestamp"])
else:
test_start_ts = default_timestamp
test_end_ts = default_timestamp
Expand Down
38 changes: 32 additions & 6 deletions .github/actions/collect_data/src/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,34 @@ class InfraErrorV1(enum.Enum):
GENERIC_SET_UP_FAILURE = enum.auto()


def get_datetime_from_github_datetime(github_datetime):
return datetime.strptime(github_datetime, "%Y-%m-%dT%H:%M:%SZ")
def parse_timestamp(timestamp):
"""
Parse a timestamp string into a datetime object.
Supports multiple formats with and without timezone and milliseconds.

Supported formats:
- "2024-12-23T02:56:37.036690+00:00"
- "2024-12-23T02:56:37.036690"
- "2024-12-23T02:56:37+00:00"
- "2024-12-23T02:56:37"

:param timestamp: Timestamp string to parse.
:return: Parsed datetime object or None if parsing fails.
"""
formats = [
"%Y-%m-%dT%H:%M:%S.%f%z", # With microseconds and timezone
"%Y-%m-%dT%H:%M:%S.%f", # With microseconds, no timezone
"%Y-%m-%dT%H:%M:%S%z", # No microseconds, with timezone
"%Y-%m-%dT%H:%M:%S", # No microseconds, no timezone
]

for fmt in formats:
try:
return datetime.strptime(timestamp, fmt)
except ValueError:
continue # Try the next format

return None # Return None if no format matches


def get_data_pipeline_datetime_from_datetime(requested_datetime):
Expand All @@ -29,11 +55,11 @@ def get_pipeline_row_from_github_info(github_runner_environment, github_pipeline
repository_url = github_pipeline_json["repository"]["html_url"]

jobs = github_jobs_json["jobs"]
jobs_start_times = list(map(lambda job_: get_datetime_from_github_datetime(job_["started_at"]), jobs))
jobs_start_times = list(map(lambda job_: parse_timestamp(job_["started_at"]), jobs))
# We filter out jobs that started before because that means they're from a previous attempt for that pipeline
eligible_jobs_start_times = list(
filter(
lambda job_start_time_: job_start_time_ >= get_datetime_from_github_datetime(pipeline_submission_ts),
lambda job_start_time_: job_start_time_ >= parse_timestamp(pipeline_submission_ts),
jobs_start_times,
)
)
Expand Down Expand Up @@ -143,8 +169,8 @@ def get_job_row_from_github_job(github_job):

job_start_ts = github_job["started_at"]

job_submission_ts_dt = get_datetime_from_github_datetime(job_submission_ts)
job_start_ts_dt = get_datetime_from_github_datetime(job_start_ts)
job_submission_ts_dt = parse_timestamp(job_submission_ts)
job_start_ts_dt = parse_timestamp(job_start_ts)

if job_submission_ts_dt > job_start_ts_dt:
logger.warning(
Expand Down
Loading