diff --git a/charts/model-engine/templates/service_template_config_map.yaml b/charts/model-engine/templates/service_template_config_map.yaml index 5cd98294..80d761dc 100644 --- a/charts/model-engine/templates/service_template_config_map.yaml +++ b/charts/model-engine/templates/service_template_config_map.yaml @@ -938,6 +938,7 @@ data: host: "${RESOURCE_NAME}.${NAMESPACE}.svc.cluster.local" port: number: 80 + ${MCP_TIMEOUT} {{- end }} {{- if .Values.destinationrule.enabled }} destination-rule.yaml: |- diff --git a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py index 03c99cd2..736161ef 100644 --- a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py +++ b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py @@ -382,6 +382,7 @@ class VirtualServiceArguments(_BaseEndpointArguments): """Keyword-arguments for substituting into virtual-service templates.""" DNS_HOST_DOMAIN: str + MCP_TIMEOUT: str # "" (Default) is 30 seconds class LwsServiceEntryArguments(_BaseEndpointArguments): @@ -1361,6 +1362,23 @@ def get_endpoint_resource_arguments_from_request( SERVICE_NAME_OVERRIDE=service_name_override, ) elif endpoint_resource_name == "virtual-service": + # Set 5-minute timeout for MCP servers to fix 30-second default timeout issue + # MCP servers use passthrough forwarder and have routes containing /mcp + is_mcp_server = False + if ( + isinstance(flavor, RunnableImageLike) + and flavor.forwarder_type == "passthrough" + ): + all_routes = [] + if flavor.predict_route: + all_routes.append(flavor.predict_route) + if flavor.routes: + all_routes.extend(flavor.routes) + if flavor.extra_routes: + all_routes.extend(flavor.extra_routes) + is_mcp_server = any("/mcp" in route.lower() for route in all_routes) + timeout = "timeout: 300s" if is_mcp_server else "" + return VirtualServiceArguments( # Base resource arguments RESOURCE_NAME=k8s_resource_group_name, @@ -1373,6 +1391,7 @@ def get_endpoint_resource_arguments_from_request( OWNER=owner, GIT_TAG=GIT_TAG, DNS_HOST_DOMAIN=infra_config().dns_host_domain, + MCP_TIMEOUT=timeout, ) elif endpoint_resource_name == "destination-rule": return DestinationRuleArguments( diff --git a/model-engine/model_engine_server/infra/services/live_model_endpoint_service.py b/model-engine/model_engine_server/infra/services/live_model_endpoint_service.py index 6c28f499..12c02673 100644 --- a/model-engine/model_engine_server/infra/services/live_model_endpoint_service.py +++ b/model-engine/model_engine_server/infra/services/live_model_endpoint_service.py @@ -33,6 +33,9 @@ from model_engine_server.domain.services import ModelEndpointService from model_engine_server.domain.use_cases.model_endpoint_use_cases import MODEL_BUNDLE_CHANGED_KEY from model_engine_server.infra.gateways import ModelEndpointInfraGateway +from model_engine_server.infra.gateways.resources.k8s_endpoint_resource_delegate import ( + K8SEndpointResourceDelegate, +) from model_engine_server.infra.repositories import ModelEndpointCacheRepository from model_engine_server.infra.repositories.model_endpoint_record_repository import ( ModelEndpointRecordRepository,