diff --git a/data-processing-lib/spark/src/data_processing_spark/runtime/spark/transform_runtime.py b/data-processing-lib/spark/src/data_processing_spark/runtime/spark/transform_runtime.py index 1f3d671a5..7b968b1e9 100644 --- a/data-processing-lib/spark/src/data_processing_spark/runtime/spark/transform_runtime.py +++ b/data-processing-lib/spark/src/data_processing_spark/runtime/spark/transform_runtime.py @@ -34,9 +34,7 @@ def get_transform_config( """ Get the dictionary of configuration that will be provided to the transform's initializer. This is the opportunity for this runtime to create a new set of configuration based on the - config/params provided to this instance's initializer. This may include the addition - of new configuration data such as ray shared memory, new actors, etc, that might be needed and - expected by the transform in its initializer and/or transform() methods. + config/params provided to this instance's initializer. :param partition - the partition assigned to this worker, needed by transforms like doc_id :param data_access_factory - data access factory class being used by the RayOrchestrator. :param statistics - reference to statistics actor @@ -57,7 +55,8 @@ def get_bcast_params(self, data_access_factory: DataAccessFactoryBase) -> dict[s def compute_execution_stats(self, stats: TransformStatistics) -> None: """ Update/augment the given statistics object with runtime-specific additions/modifications. + This method does not return a value; the job execution statistics are generally reported + as metadata by the Spark Orchestrator. :param stats: output of statistics as aggregated across all calls to all transforms. - :return: job execution statistics. These are generally reported as metadata by the Ray Orchestrator. """ pass