diff --git a/YoutubeForecastMaker/conf/base/parameters.yml b/YoutubeForecastMaker/conf/base/parameters.yml index 92b1eb6..aca3420 100644 --- a/YoutubeForecastMaker/conf/base/parameters.yml +++ b/YoutubeForecastMaker/conf/base/parameters.yml @@ -41,12 +41,34 @@ deploy_forecast: multiseries_id_columns: [title] use_time_series: True # Forecast/Feature Derivation Window start/ends must be a multiple of the time step - feature_derivation_window_start: -21 + feature_derivation_window_start: -72 feature_derivation_window_end: 0 forecast_window_start: 3 forecast_window_end: 36 # TODO: what other configurations should we add? Should we add and leave empty? # - Known in advance? + default_to_known_in_advance: true + feature_settings_config: + - feature_name: video_id + known_in_advance: true + - feature_name: publishedAt + known_in_advance: true + - feature_name: channelId + known_in_advance: true + - feature_name: description + known_in_advance: true + - feature_name: categoryId + known_in_advance: true + - feature_name: channelTitle + known_in_advance: true + - feature_name: tags + known_in_advance: true + - feature_name: duration + known_in_advance: true + - feature_name: madeForKids + known_in_advance: true + - feature_name: association_id + known_in_advance: true advanced_options_config: seed: 42 registered_model: @@ -66,19 +88,54 @@ deploy_forecast: runLeakageRemovedFeatureList: True trigger: type: accuracy_decline + schedule: + minute: + - 0 + hour: + - 0 + dayOfMonth: + - "*" + month: + - "*" + dayOfWeek: + - "*" statusDeclinesToWarning: true, statusDeclinesToFailing: false statusStillInDecline: false + projectOptions: + cvMethod: RandomCV + validationType: CV + reps: null + validationPct: null + holdoutPct: null + metric: RMSE featureListStrategy: informative_features projectOptionsStrategy: same_as_champion modelSelectionStrategy: autopilot_recommended action: model_replacement batch_prediction_job_definition: batch_prediction_job: - intake_settings: + num_concurrent: 3 + intake_settings: type: dataset - dataset: - enabled: True + enabled: true + schedule: + minute: + - 0 + hour: + - 0 + - 3 + - 6 + - 9 + - 12 + - 15 + - 18 + dayOfMonth: + - "*" + month: + - "*" + dayOfWeek: + - "*" name: Batch Prediction Job for Retraining diff --git a/YoutubeForecastMaker/src/YoutubeForecastMaker/pipelines/deploy_forecast/nodes.py b/YoutubeForecastMaker/src/YoutubeForecastMaker/pipelines/deploy_forecast/nodes.py index da528bd..14a5af5 100644 --- a/YoutubeForecastMaker/src/YoutubeForecastMaker/pipelines/deploy_forecast/nodes.py +++ b/YoutubeForecastMaker/src/YoutubeForecastMaker/pipelines/deploy_forecast/nodes.py @@ -13,7 +13,7 @@ from datarobot.models.use_cases.utils import UseCaseLike from datarobot import Dataset -from datarobotx.idp.batch_predictions import update_or_create_batch_prediction_job +from datarobotx.idp.batch_predictions import get_update_or_create_batch_prediction_job if TYPE_CHECKING: import tempfile @@ -176,9 +176,10 @@ def setup_batch_prediction_job_definition( """ dr.Client(token=token, endpoint=endpoint) # type: ignore - batch_prediction_job["intake_settings"]["dataset"] = dr.Dataset.get(dataset_id=dataset_id) + batch_prediction_job["intake_settings"]["datasetId"] = dataset_id + batch_prediction_job["deploymentId"] = deployment_id - update_or_create_batch_prediction_job(endpoint=endpoint, + get_update_or_create_batch_prediction_job(endpoint=endpoint, token=token, deployment_id=deployment_id, batch_prediction_job=batch_prediction_job, diff --git a/YoutubeForecastMaker/src/YoutubeForecastMaker/pipelines/deploy_forecast/pipeline.py b/YoutubeForecastMaker/src/YoutubeForecastMaker/pipelines/deploy_forecast/pipeline.py index a95582d..1a73cdd 100644 --- a/YoutubeForecastMaker/src/YoutubeForecastMaker/pipelines/deploy_forecast/pipeline.py +++ b/YoutubeForecastMaker/src/YoutubeForecastMaker/pipelines/deploy_forecast/pipeline.py @@ -19,7 +19,7 @@ get_or_create_registered_leaderboard_model_version, ) from datarobotx.idp.use_cases import get_or_create_use_case -from datarobotx.idp.retraining_policies import update_or_create_retraining_policy +from datarobotx.idp.retraining_policies import get_update_or_create_retraining_policy from .nodes import (ensure_deployment_settings, put_forecast_distance_into_registered_model_name, @@ -58,6 +58,7 @@ def create_pipeline(**kwargs) -> Pipeline: "dataset_id": "preprocessed_timeseries_data_id", "analyze_and_model_config": "params:project.analyze_and_model_config", "datetime_partitioning_config": "params:project.datetime_partitioning_config", + # "feature_settings_config": "params:project.feature_settings_config", "advanced_options_config": "params:project.advanced_options_config", "use_case": "use_case_id", }, @@ -139,20 +140,22 @@ def create_pipeline(**kwargs) -> Pipeline: "dataset_id": "preprocessed_timeseries_data_id", "enabled": "params:batch_prediction_job_definition.enabled", "name": "params:batch_prediction_job_definition.name", - "batch_prediction_job": "params:batch_prediction_job_definition.batch_prediction_job" + "batch_prediction_job": "params:batch_prediction_job_definition.batch_prediction_job", + "schedule": "params:batch_prediction_job_definition.schedule" }, outputs=None ), - node( + node( name="set_up_retraining_job", - func=update_or_create_retraining_policy, + func=lambda endpoint, token, deployment_id, name, dataset_id, retraining_settings: get_update_or_create_retraining_policy( + endpoint, token, deployment_id, name, dataset_id, **retraining_settings), inputs={ "endpoint": "params:credentials.datarobot.endpoint", "token": "params:credentials.datarobot.api_token", "deployment_id": "deployment_id", "name": "params:retraining_policy.name", "dataset_id": "preprocessed_timeseries_data_id", - "kwargs": "params:retraining_policy.kwargs" + "retraining_settings": "params:retraining_policy.retraining_settings" }, outputs=None ) diff --git a/YoutubeForecastMaker/src/YoutubeForecastMaker/pipelines/deploy_streamlit_app/nodes.py b/YoutubeForecastMaker/src/YoutubeForecastMaker/pipelines/deploy_streamlit_app/nodes.py index 93ddcce..1e100e8 100644 --- a/YoutubeForecastMaker/src/YoutubeForecastMaker/pipelines/deploy_streamlit_app/nodes.py +++ b/YoutubeForecastMaker/src/YoutubeForecastMaker/pipelines/deploy_streamlit_app/nodes.py @@ -136,7 +136,7 @@ def log_outputs( base_url = urljoin(endpoint, "/") project_url = base_url + "projects/{project_id}/models/{model_id}/" - deployment_url = base_url + "console/{deployment_id}/overview" + deployment_url = base_url + "deployments/{deployment_id}/overview" application_url = base_url + "custom_applications/{application_id}/" logger = logging.getLogger(__name__)