Skip to content

Commit

Permalink
preprocessing changes
Browse files Browse the repository at this point in the history
  • Loading branch information
j-beastman committed Jun 24, 2024
1 parent 15b2d83 commit c774575
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 4 deletions.
2 changes: 2 additions & 0 deletions {{ cookiecutter.repo_name }}/conf/base/parameters.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ deploy_forecast:
datetime_partition_column: as_of_datetime
multiseries_id_columns: [title]
use_time_series: True
feature_derivation_window_start: -21
feature_derivation_window_end: 0
forecast_window_start: 3 # Forecast Window start must be a multiple of the time step
forecast_window_end: 6
# TODO: what other configurations should we add? Should we add and leave empty?
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -158,10 +158,10 @@ def ensure_deployment_settings(

user_id = deployment.owners["preview"][0]["id"] # type: ignore

client.patch(f"deployments/{deployment_id}/settings",
json={
"automaticActuals": True
})
# client.patch(f"deployments/{deployment_id}/settings",
# json={
# "automaticActuals": {"enabled": True}
# })

# set up retraining
try:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,23 @@ def create_or_update_modeling_dataset(modeling_dataset_name: str,
new_data["likeDiff"] = 0
new_data["commentDiff"] = 0

def _round_to_nearest_half_hour(time):
# Find the number of minutes past the hour
minutes = time.minute
if minutes < 15:
rounded_time = time.replace(minute=0, second=0, microsecond=0)
elif minutes < 45:
rounded_time = time.replace(minute=30, second=0, microsecond=0)
else:
rounded_time = (time + pd.Timedelta(minutes=(60 - minutes))).replace(minute=0, second=0, microsecond=0)
return rounded_time

modeling_dataset_id = _check_if_dataset_exists(modeling_dataset_name)

# TODO: Should this be idempotent? (use hash?)
if modeling_dataset_id is None:
new_data["as_of_datetime"] = pd.to_datetime(new_data['as_of_datetime'], errors='coerce')
new_data["as_of_datetime"] = new_data["as_of_datetime"].apply(_round_to_nearest_half_hour)
dataset: Dataset = Dataset.create_from_in_memory_data(
data_frame=new_data, use_cases=use_cases
)
Expand Down

0 comments on commit c774575

Please sign in to comment.