diff --git a/llm_on_ray/inference/api_server_openai.py b/llm_on_ray/inference/api_server_openai.py index dcc1ee85..c5be3f8f 100644 --- a/llm_on_ray/inference/api_server_openai.py +++ b/llm_on_ray/inference/api_server_openai.py @@ -72,14 +72,21 @@ def router_application(deployments, model_list, max_ongoing_requests, max_num_se def openai_serve_run( - deployments, model_list, host, route_prefix, port, max_ongoing_requests, max_num_seqs + deployments, + model_list, + host, + route_prefix, + application_name, + port, + max_ongoing_requests, + max_num_seqs, ): router_app = router_application(deployments, model_list, max_ongoing_requests, max_num_seqs) serve.start(http_options={"host": host, "port": port}) serve.run( router_app, - name="router", + name=application_name, route_prefix=route_prefix, ).options( stream=True, diff --git a/llm_on_ray/inference/serve.py b/llm_on_ray/inference/serve.py index 6f5e7da7..a800444f 100644 --- a/llm_on_ray/inference/serve.py +++ b/llm_on_ray/inference/serve.py @@ -158,6 +158,12 @@ def main(argv=None): type=str, help="The openai_route_prefix must start with a forward slash ('/')", ) + parser.add_argument( + "--openai_application_name", + default="router", + type=str, + help="If not specified, the application name will be 'router'.", + ) # Print help if no arguments were provided if len(sys.argv) == 1: @@ -191,6 +197,7 @@ def main(argv=None): model_list, host, args.openai_route_prefix, + args.openai_application_name, args.port, args.max_ongoing_requests, args.max_num_seqs,